9 Commits

Author SHA1 Message Date
github-actions[bot]
93b0c83381 chore: bump version to 1.1.2
Some checks failed
Merge / tests (push) Has been cancelled
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-08 23:24:17 +00:00
Jayden Pyles
9381ba9232 chore: update workflow 2025-06-08 18:17:03 -05:00
Jayden Pyles
20dccc5527 feat: edit ui + add return html option (#90)
* fix: restyle the element table

* chore: wip ui

* wip: edit styles

* feat: add html return

* fix: build

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: cypress test

* chore: update photo [skip ci]
2025-06-08 18:14:02 -05:00
Jayden Pyles
02619eb184 feat: update workflows [no bump]
Some checks failed
Merge / tests (push) Has been cancelled
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-05 22:19:41 -05:00
github-actions[bot]
58c6c09fc9 chore: bump version to 1.1.2 2025-06-06 03:18:03 +00:00
Jayden Pyles
bf896b4c6b feat: update workflows [no bump] 2025-06-05 22:09:49 -05:00
Jayden Pyles
e3b9c11ab7 feat: update workflows [no bump] 2025-06-05 21:59:54 -05:00
github-actions[bot]
32da3375b3 chore: bump version to 1.1.1 2025-06-06 02:56:42 +00:00
Jayden Pyles
b5131cbe4c feat: update workflows [no bump] 2025-06-05 21:47:55 -05:00
30 changed files with 681 additions and 300 deletions

View File

@@ -73,5 +73,8 @@ runs:
- name: Run Cypress tests
shell: bash
run: npm run cy:run
run: |
set -e
npm run cy:run

View File

@@ -18,15 +18,14 @@ jobs:
uses: ./.github/actions/run-cypress-tests
with:
openai_key: ${{ secrets.openai_key }}
continue-on-error: true
- name: Check container logs on failure
if: steps.run-tests.outcome == 'failure'
if: steps.run-tests.conclusion == 'failure'
run: |
echo "Cypress tests failed. Dumping container logs..."
docker logs scraperr_api || true
- name: Fail job if Cypress failed
if: steps.run-tests.outcome == 'failure'
if: steps.run-tests.conclusion == 'failure'
run: exit 1
- uses: actions/checkout@v4

View File

@@ -4,6 +4,10 @@ on:
push:
branches:
- master
pull_request:
types: [closed]
branches:
- master
jobs:
tests:
@@ -15,8 +19,11 @@ jobs:
version:
needs: tests
uses: ./.github/workflows/version.yml
secrets:
git_token: ${{ secrets.GPAT_TOKEN }}
build-and-deploy:
if: needs.version.outputs.version_bump == 'true'
needs: version
uses: ./.github/workflows/docker-image.yml
secrets:

View File

@@ -22,7 +22,6 @@ jobs:
uses: ./.github/actions/run-cypress-tests
with:
openai_key: ${{ secrets.openai_key }}
continue-on-error: true
success-message:
runs-on: ubuntu-latest

View File

@@ -2,10 +2,16 @@ name: Version
on:
workflow_call:
secrets:
git_token:
required: true
outputs:
version:
description: "The new version number"
value: ${{ jobs.version.outputs.version }}
version_bump:
description: "Whether the version was bumped"
value: ${{ jobs.version.outputs.version_bump }}
jobs:
version:
@@ -37,7 +43,27 @@ jobs:
echo "VERSION_TYPE=$VERSION_TYPE" >> $GITHUB_ENV
- name: Check for version bump
id: check_version_bump
run: |
COMMIT_MSG=$(git log -1 --pretty=%B)
if [[ $COMMIT_MSG =~ ^feat\(breaking\) ]]; then
echo "version_bump=true" >> $GITHUB_OUTPUT
elif [[ $COMMIT_MSG =~ .*\[no\ bump\].* ]]; then
echo "version_bump=false" >> $GITHUB_OUTPUT
fi
- name: Skip version bump
if: steps.check_version_bump.outputs.version_bump == 'false'
run: |
echo "Skipping version bump as requested"
gh run cancel ${{ github.run_id }}
exit 0
env:
GITHUB_TOKEN: ${{ secrets.git_token }}
- name: Set version
if: steps.check_version_bump.outputs.version_bump != 'false'
id: set_version
run: |
VERSION=$(./scripts/version.sh "$VERSION_TYPE")
@@ -48,6 +74,7 @@ jobs:
VERSION_TYPE: ${{ env.VERSION_TYPE }}
- name: Update chart file
if: steps.check_version_bump.outputs.version_bump != 'false'
run: |
sed -i "s/^version: .*/version: $VERSION/" helm/Chart.yaml

View File

@@ -63,7 +63,9 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
xpaths = parse_response(response)
captured_elements = await capture_elements(page, xpaths)
captured_elements = await capture_elements(
page, xpaths, agent_job["job_options"]["return_html"]
)
final_url = page.url

View File

@@ -206,7 +206,7 @@ def parse_next_page(text: str) -> str | None:
async def capture_elements(
page: Page, xpaths: list[dict[str, str]]
page: Page, xpaths: list[dict[str, str]], return_html: bool
) -> list[CapturedElement]:
captured_elements = []
seen_texts = set()
@@ -217,6 +217,23 @@ async def capture_elements(
count = await locator.count()
for i in range(count):
if return_html:
element_text = (
await page.locator(f"xpath={xpath['xpath']}")
.nth(i)
.inner_html()
)
seen_texts.add(element_text)
captured_elements.append(
CapturedElement(
name=xpath["name"],
text=element_text,
xpath=xpath["xpath"],
)
)
continue
element_text = ""
element_handle = await locator.nth(i).element_handle()

View File

@@ -25,3 +25,4 @@ class JobOptions(BaseModel):
site_map: Optional[SiteMap] = None
collect_media: bool = False
custom_cookies: list[dict[str, Any]] = []
return_html: bool = False

View File

@@ -110,7 +110,9 @@ async def make_site_request(
)
async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element]):
async def collect_scraped_elements(
page: tuple[str, str], xpaths: list[Element], return_html: bool
):
soup = BeautifulSoup(page[0], "lxml")
root = etree.HTML(str(soup))
@@ -120,6 +122,16 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
el = sxpath(root, elem.xpath)
for e in el: # type: ignore
if return_html:
elements[elem.name] = [
CapturedElement(
xpath=elem.xpath,
text=page[0],
name=elem.name,
)
]
continue
text = (
" ".join(str(t) for t in e.itertext())
if isinstance(e, etree._Element)
@@ -161,6 +173,8 @@ async def scrape(
elements: list[dict[str, dict[str, list[CapturedElement]]]] = []
for page in pages:
elements.append(await collect_scraped_elements(page, xpaths))
elements.append(
await collect_scraped_elements(page, xpaths, job_options["return_html"])
)
return elements

View File

@@ -7,11 +7,10 @@ import {
} from "../utilities/job.utilities";
import { mockSubmitJob } from "../utilities/mocks";
describe.only("Agent", () => {
describe("Agent", () => {
beforeEach(() => {
mockSubmitJob();
login();
cy.visit("/agent");
});
afterEach(() => {
@@ -19,6 +18,9 @@ describe.only("Agent", () => {
});
it("should be able to scrape some data", () => {
cy.visit("/agent");
cy.wait(1000);
const url = "https://books.toscrape.com";
const prompt = "Collect all the links on the page";
buildAgentJob(url, prompt);

View File

@@ -4,7 +4,7 @@ export const cleanUpJobs = () => {
cy.wait("@retrieve", { timeout: 15000 });
cy.get("tbody tr", { timeout: 10000 }).should("have.length.at.least", 1);
cy.get("tbody tr", { timeout: 20000 }).should("have.length.at.least", 1);
const tryClickSelectAll = (attempt = 1, maxAttempts = 5) => {
cy.log(`Attempt ${attempt} to click Select All`);
@@ -100,13 +100,13 @@ export const waitForJobCompletion = (url: string) => {
};
export const enableMultiPageScraping = () => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
cy.get('[data-cy="multi-page-toggle"]').click();
cy.get("body").type("{esc}");
};
export const addCustomHeaders = (headers: Record<string, string>) => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
cy.get('[name="custom_headers"]').type(JSON.stringify(headers), {
parseSpecialCharSequences: false,
});
@@ -114,16 +114,17 @@ export const addCustomHeaders = (headers: Record<string, string>) => {
};
export const addCustomCookies = (cookies: Record<string, string>) => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
cy.get('[name="custom_cookies"]').type(JSON.stringify(cookies));
cy.get("body").type("{esc}");
};
export const openAdvancedJobOptions = () => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
};
export const selectJobFromSelector = () => {
checkAiDisabled();
cy.get("div[id='select-job']", { timeout: 10000 }).first().click();
cy.get("li[role='option']", { timeout: 10000 }).first().click();
};
@@ -162,15 +163,13 @@ export const addElement = (name: string, xpath: string) => {
};
export const checkAiDisabled = () => {
const disabledMessage = cy.contains(
/must set either OPENAI_KEY or OLLAMA_MODEL to use AI features/i
);
if (disabledMessage) {
throw new Error(
"Must set either OPENAI_KEY or OLLAMA_MODEL to use AI features."
cy.getAllLocalStorage().then((result) => {
const storage = JSON.parse(
result["http://localhost"]["persist:root"] as string
);
}
const settings = JSON.parse(storage.settings);
expect(settings.aiEnabled).to.equal(true);
});
};
export const buildAgentJob = (url: string, prompt: string) => {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 67 KiB

View File

@@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.1.1
version: 1.1.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to

View File

@@ -1,7 +1,8 @@
import { Box, Link, Typography } from "@mui/material";
import { SetStateAction, Dispatch, useState } from "react";
import { AdvancedJobOptionsDialog } from "./dialog/advanced-job-options-dialog";
import { RawJobOptions } from "@/types";
import SettingsIcon from "@mui/icons-material/Settings";
import { Box, Button, Typography } from "@mui/material";
import { Dispatch, SetStateAction, useState } from "react";
import { AdvancedJobOptionsDialog } from "./dialog/advanced-job-options-dialog";
export type AdvancedJobOptionsProps = {
jobOptions: RawJobOptions;
@@ -17,26 +18,27 @@ export const AdvancedJobOptions = ({
const [open, setOpen] = useState(false);
return (
<Box sx={{ mb: 2 }}>
<Link
component="button"
variant="body2"
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
<Button
variant="outlined"
onClick={() => setOpen(true)}
startIcon={<SettingsIcon />}
sx={{
textDecoration: "none",
color: "primary.main",
textTransform: "none",
borderRadius: 2,
px: 2,
py: 1,
borderColor: "divider",
color: "text.secondary",
"&:hover": {
color: "primary.dark",
textDecoration: "underline",
borderColor: "primary.main",
color: "primary.main",
bgcolor: "action.hover",
},
paddingLeft: 1,
display: "inline-flex",
alignItems: "center",
gap: 0.5,
}}
>
<Typography variant="body2">Advanced Job Options</Typography>
</Link>
<Typography variant="body2">Advanced Options</Typography>
</Button>
<AdvancedJobOptionsDialog
open={open}

View File

@@ -46,15 +46,14 @@ export const AdvancedJobOptionsDialog = ({
const [localJobOptions, setLocalJobOptions] =
useState<RawJobOptions>(jobOptions);
// Update local state when prop changes
useEffect(() => {
setLocalJobOptions(jobOptions);
}, [jobOptions]);
const handleMultiPageScrapeChange = () => {
const handleCheckboxChange = (key: keyof RawJobOptions) => {
setLocalJobOptions((prevJobOptions) => ({
...prevJobOptions,
multi_page_scrape: !prevJobOptions.multi_page_scrape,
[key]: !prevJobOptions[key],
}));
};
@@ -65,15 +64,7 @@ export const AdvancedJobOptionsDialog = ({
}));
};
const handleCollectMediaChange = () => {
setLocalJobOptions((prevJobOptions) => ({
...prevJobOptions,
collect_media: !prevJobOptions.collect_media,
}));
};
const handleClose = () => {
// Save the local state back to the parent before closing
setJobOptions(localJobOptions);
onClose();
};
@@ -137,7 +128,7 @@ export const AdvancedJobOptionsDialog = ({
control={
<Checkbox
checked={localJobOptions.multi_page_scrape}
onChange={handleMultiPageScrapeChange}
onChange={() => handleCheckboxChange("multi_page_scrape")}
disabled={!multiPageScrapeEnabled}
/>
}
@@ -158,11 +149,12 @@ export const AdvancedJobOptionsDialog = ({
</Box>
}
/>
<FormControlLabel
control={
<Checkbox
checked={localJobOptions.collect_media}
onChange={handleCollectMediaChange}
onChange={() => handleCheckboxChange("collect_media")}
data-cy="collect-media-checkbox"
/>
}
@@ -177,6 +169,26 @@ export const AdvancedJobOptionsDialog = ({
</Box>
}
/>
<FormControlLabel
control={
<Checkbox
checked={localJobOptions.return_html}
onChange={() => handleCheckboxChange("return_html")}
data-cy="return-html-checkbox"
/>
}
label={
<Box sx={{ display: "flex", alignItems: "center" }}>
<Typography>Return HTML</Typography>
<Tooltip title="Return the HTML of the page">
<IconButton size="small">
<InfoOutlined fontSize="small" />
</IconButton>
</Tooltip>
</Box>
}
/>
</FormGroup>
</Box>

View File

@@ -12,6 +12,7 @@ export const Disabled = ({ message }: DisabledProps) => {
display="flex"
justifyContent="center"
alignItems="center"
data-testid="disabled-message"
>
<h4
style={{

View File

@@ -213,6 +213,7 @@ export const JobQueue = ({
query: {
url: row.url,
prompt: row.prompt,
job_options: JSON.stringify(row.job_options),
},
});
} else {

View File

@@ -1,14 +1,14 @@
"use client";
import React, { useEffect, useRef } from "react";
import { Container, Box } from "@mui/material";
import { useRouter } from "next/router";
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
import {
ErrorSnackbar,
JobNotifySnackbar,
} from "@/components/common/snackbars";
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
import { Box, Container } from "@mui/material";
import { useRouter } from "next/router";
import { useEffect, useRef } from "react";
export const Home = () => {
const {
@@ -50,19 +50,18 @@ export const Home = () => {
flexDirection="column"
justifyContent="center"
alignItems="center"
height="100%"
minHeight="100vh"
py={4}
>
<Container maxWidth="lg" className="overflow-y-auto max-h-full">
<JobSubmitter />
{submittedURL.length > 0 ? (
<Container maxWidth="lg" className="overflow-y-auto">
<Box className="flex flex-col gap-6">
<JobSubmitter />
<ElementTable
rows={rows}
setRows={setRows}
submittedURL={submittedURL}
/>
) : null}
</Box>
</Container>
{snackbarSeverity === "info" ? (

View File

@@ -1,24 +1,24 @@
"use client";
import React, { useState, Dispatch, SetStateAction } from "react";
import { Element } from "@/types";
import AddIcon from "@mui/icons-material/Add";
import DeleteIcon from "@mui/icons-material/Delete";
import {
Typography,
TextField,
Button,
Box,
Divider,
IconButton,
Paper,
Table,
TableBody,
TableContainer,
TableCell,
TableContainer,
TableHead,
TableRow,
Box,
IconButton,
TextField,
Tooltip,
useTheme,
Divider,
Typography,
} from "@mui/material";
import AddIcon from "@mui/icons-material/Add";
import { Element } from "@/types";
import { Dispatch, SetStateAction, useState } from "react";
import { SiteMap } from "../site-map";
interface Props {
@@ -28,7 +28,6 @@ interface Props {
}
export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
const theme = useTheme();
const [newRow, setNewRow] = useState<Element>({
name: "",
xpath: "",
@@ -42,142 +41,219 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
};
const handleDeleteRow = (elementName: string) => {
setRows(
rows.filter((r) => {
return elementName !== r.name;
})
);
setRows(rows.filter((r) => elementName !== r.name));
};
return (
<Box className="animate-fadeIn p-2" bgcolor="background.paper">
<Box className="text-center mb-4">
<Typography variant="h4" sx={{ marginBottom: 1 }}>
Elements to Scrape
</Typography>
<Paper
elevation={0}
sx={{
p: 4,
borderRadius: 2,
bgcolor: "background.paper",
border: 1,
borderColor: "divider",
"&:hover": {
boxShadow: "0 4px 20px rgba(0, 0, 0, 0.05)",
},
}}
>
<Box className="flex flex-col gap-6">
<Box>
<Typography
variant="h5"
sx={{
fontWeight: 600,
color: "text.primary",
mb: 1,
}}
>
Elements to Scrape
</Typography>
<Typography
variant="body2"
sx={{
color: "text.secondary",
}}
>
Add elements to scrape from the target URL using XPath selectors
</Typography>
</Box>
<TableContainer
component={Box}
sx={{ maxHeight: "50%", overflow: "auto" }}
sx={{
maxHeight: "400px",
overflow: "auto",
borderRadius: 2,
border: 1,
borderColor: "divider",
}}
>
<div className="rounded-lg shadow-md border border-gray-300 overflow-hidden">
<Table
stickyHeader
className="mb-4"
sx={{
tableLayout: "fixed",
width: "100%",
"& .MuiTableCell-root": {
borderBottom: "1px solid #e0e0e0",
},
}}
>
<TableHead>
<TableRow>
<TableCell>
<Typography sx={{ fontWeight: "bold" }}>Name</Typography>
</TableCell>
<TableCell>
<Typography sx={{ fontWeight: "bold" }}>XPath</Typography>
</TableCell>
<TableCell>
<Typography sx={{ fontWeight: "bold" }}>Actions</Typography>
</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow>
<TableCell>
<TextField
data-cy="name-field"
label="Name"
variant="outlined"
fullWidth
value={newRow.name}
onChange={(e) =>
setNewRow({ ...newRow, name: e.target.value })
}
/>
</TableCell>
<TableCell>
<TextField
data-cy="xpath-field"
label="XPath"
variant="outlined"
fullWidth
value={newRow.xpath}
onChange={(e) =>
setNewRow({ ...newRow, xpath: e.target.value })
}
/>
</TableCell>
<TableCell>
<Tooltip
title={
newRow.xpath.length > 0 && newRow.name.length > 0
? "Add Element"
: "Fill out all fields to add an element"
}
placement="top"
>
<span>
<IconButton
data-cy="add-button"
aria-label="add"
size="small"
onClick={handleAddRow}
sx={{
height: "40px",
width: "40px",
}}
disabled={
!(newRow.xpath.length > 0 && newRow.name.length > 0)
}
>
<AddIcon
fontSize="inherit"
sx={{
color:
theme.palette.mode === "light"
? "#000000"
: "#ffffff",
}}
/>
</IconButton>
</span>
</Tooltip>
</TableCell>
</TableRow>
{rows.map((row, index) => (
<TableRow key={index}>
<TableCell>
<Typography>{row.name}</Typography>
</TableCell>
<TableCell>
<Typography>{row.xpath}</Typography>
</TableCell>
<TableCell>
<Button
onClick={() => handleDeleteRow(row.name)}
className="!bg-red-500 bg-opacity-50 !text-white font-semibold rounded-md
transition-transform transform hover:scale-105 hover:bg-red-500"
<Table
stickyHeader
size="small"
sx={{
"& .MuiTableCell-root": {
borderBottom: "1px solid",
borderColor: "divider",
py: 1.5,
},
"& .MuiTableCell-head": {
bgcolor: "background.default",
fontWeight: 600,
},
}}
>
<TableHead>
<TableRow>
<TableCell width="30%">Name</TableCell>
<TableCell width="50%">XPath</TableCell>
<TableCell width="20%" align="center">
Actions
</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow>
<TableCell>
<TextField
data-cy="name-field"
placeholder="Enter element name"
variant="outlined"
fullWidth
size="small"
value={newRow.name}
onChange={(e) =>
setNewRow({ ...newRow, name: e.target.value })
}
sx={{
"& .MuiOutlinedInput-root": {
borderRadius: 2,
bgcolor: "background.default",
"&:hover": {
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "primary.main",
},
},
},
}}
/>
</TableCell>
<TableCell>
<TextField
data-cy="xpath-field"
placeholder="Enter XPath selector"
variant="outlined"
fullWidth
size="small"
value={newRow.xpath}
onChange={(e) =>
setNewRow({ ...newRow, xpath: e.target.value })
}
sx={{
"& .MuiOutlinedInput-root": {
borderRadius: 2,
bgcolor: "background.default",
"&:hover": {
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "primary.main",
},
},
},
}}
/>
</TableCell>
<TableCell align="center">
<Tooltip
title={
newRow.xpath.length > 0 && newRow.name.length > 0
? "Add Element"
: "Fill out all fields to add an element"
}
placement="top"
>
<span>
<IconButton
data-cy="add-button"
aria-label="add"
size="small"
onClick={handleAddRow}
disabled={
!(newRow.xpath.length > 0 && newRow.name.length > 0)
}
sx={{
bgcolor: "primary.main",
color: "primary.contrastText",
borderRadius: 2,
"&:hover": {
bgcolor: "primary.dark",
transform: "translateY(-1px)",
},
"&.Mui-disabled": {
bgcolor: "action.disabledBackground",
color: "action.disabled",
},
}}
>
Delete
</Button>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</div>
<AddIcon fontSize="small" />
</IconButton>
</span>
</Tooltip>
</TableCell>
</TableRow>
{rows.map((row, index) => (
<TableRow
key={index}
sx={{
"&:hover": {
bgcolor: "action.hover",
},
}}
>
<TableCell>
<Typography variant="body2" noWrap>
{row.name}
</Typography>
</TableCell>
<TableCell>
<Typography
variant="body2"
sx={{
fontFamily: "monospace",
fontSize: "0.875rem",
color: "text.secondary",
}}
noWrap
>
{row.xpath}
</Typography>
</TableCell>
<TableCell align="center">
<IconButton
onClick={() => handleDeleteRow(row.name)}
size="small"
color="error"
sx={{
"&:hover": {
bgcolor: "error.main",
color: "error.contrastText",
transform: "translateY(-1px)",
},
}}
>
<DeleteIcon fontSize="small" />
</IconButton>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</TableContainer>
<Divider sx={{ my: 2 }} />
<SiteMap />
</Box>
<Divider
sx={{
borderColor: theme.palette.mode === "dark" ? "#ffffff" : "0000000",
marginBottom: 2,
}}
/>
<SiteMap />
</Box>
</Paper>
);
};

View File

@@ -2,3 +2,14 @@
margin-bottom: 1rem;
text-align: center;
}
.container {
text-align: left;
margin-bottom: 8px;
}
.title {
font-weight: 600;
color: var(--mui-palette-text-primary);
margin-bottom: 8px;
}

View File

@@ -1,6 +1,6 @@
import { Box, Typography } from "@mui/material";
import React, { ReactNode } from "react";
import { Typography } from "@mui/material";
import classes from "./job-submitter-header.module.css";
import styles from "./job-submitter-header.module.css";
interface JobSubmitterHeaderProps {
title?: string;
@@ -8,13 +8,15 @@ interface JobSubmitterHeaderProps {
}
export const JobSubmitterHeader: React.FC<JobSubmitterHeaderProps> = ({
title = "Scraping Made Easy",
title = "Scrape Webpage",
children,
}) => {
return (
<div className={classes.jobSubmitterHeader}>
<Typography variant="h3">{title}</Typography>
<Box className={styles.container}>
<Typography variant="h4" className={styles.title}>
{title}
</Typography>
{children}
</div>
</Box>
);
};

View File

@@ -0,0 +1,52 @@
.container {
display: flex;
flex-direction: column;
gap: 16px;
align-items: stretch;
}
@media (min-width: 600px) {
.container {
flex-direction: row;
align-items: center;
}
}
.input {
width: 100%;
}
.input :global(.MuiOutlinedInput-root) {
border-radius: 16px;
transition: all 0.2s ease-in-out;
}
.input
:global(.MuiOutlinedInput-root:hover)
:global(.MuiOutlinedInput-notchedOutline) {
border-color: var(--mui-palette-primary-main);
}
.submitButton {
height: 48px !important;
border-radius: 16px;
font-size: 1rem !important;
font-weight: 500 !important;
}
.submitButton:hover {
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
}
.submitButton:disabled {
transform: none;
box-shadow: none;
}
@media (min-width: 600px) {
.submitButton {
min-width: 120px;
height: 56px;
}
}

View File

@@ -1,6 +1,6 @@
import React from "react";
import { TextField, Button, CircularProgress } from "@mui/material";
import { Box, Button, CircularProgress, TextField } from "@mui/material";
import { useJobSubmitterProvider } from "../provider";
import styles from "./job-submitter-input.module.css";
export type JobSubmitterInputProps = {
urlError: string | null;
@@ -17,7 +17,7 @@ export const JobSubmitterInput = ({
useJobSubmitterProvider();
return (
<div className="flex flex-row space-x-4 items-center mb-2">
<Box className={styles.container}>
<TextField
data-cy="url-input"
label="URL"
@@ -27,19 +27,18 @@ export const JobSubmitterInput = ({
onChange={(e) => setSubmittedURL(e.target.value)}
error={!isValidURL}
helperText={!isValidURL ? urlError : ""}
className="rounded-md"
className={styles.input}
/>
<Button
data-cy="submit-button"
variant="contained"
size="small"
size="large"
onClick={handleSubmit}
disabled={!(rows.length > 0) || loading}
className={`bg-[#034efc] text-white font-semibold rounded-md
transition-transform transform hover:scale-105 disabled:opacity-50`}
className={styles.submitButton}
>
{loading ? <CircularProgress size={24} color="inherit" /> : "Submit"}
</Button>
</div>
</Box>
);
};

View File

@@ -4,6 +4,7 @@ import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
import { useSubmitJob } from "@/hooks/use-submit-job";
import { parseJobOptions } from "@/lib";
import { useUser } from "@/store/hooks";
import { Box, Paper } from "@mui/material";
import { useRouter } from "next/router";
import { useEffect } from "react";
import { JobSubmitterHeader } from "./job-submitter-header";
@@ -29,23 +30,34 @@ export const JobSubmitter = () => {
await submitJob(submittedURL, rows, user, jobOptions, siteMap, false, null);
};
console.log(jobOptions);
useEffect(() => {
console.log(jobOptions);
}, [jobOptions]);
return (
<div>
<JobSubmitterHeader />
<JobSubmitterInput
urlError={error}
handleSubmit={handleSubmit}
loading={loading}
/>
<AdvancedJobOptions
jobOptions={jobOptions}
setJobOptions={setJobOptions}
/>
</div>
<Paper
elevation={0}
sx={{
p: 4,
borderRadius: 2,
bgcolor: "background.paper",
border: 1,
borderColor: "divider",
"&:hover": {
boxShadow: "0 4px 20px rgba(0, 0, 0, 0.05)",
},
}}
>
<Box className="flex flex-col gap-6">
<JobSubmitterHeader />
<Box className="flex flex-col gap-4">
<JobSubmitterInput
urlError={error}
handleSubmit={handleSubmit}
loading={loading}
/>
<AdvancedJobOptions
jobOptions={jobOptions}
setJobOptions={setJobOptions}
/>
</Box>
</Box>
</Paper>
);
};

View File

@@ -1,17 +1,17 @@
import { useState } from "react";
import { useJobSubmitterProvider } from "../../provider";
import { ActionOption } from "@/types/job";
import {
Box,
Button,
Checkbox,
FormControl,
FormControlLabel,
InputLabel,
MenuItem,
Select,
TextField,
FormControl,
Button,
Checkbox,
FormControlLabel,
} from "@mui/material";
import { ActionOption } from "@/types/job";
import classes from "./site-map-input.module.css";
import { clsx } from "clsx";
import { useState } from "react";
import { useJobSubmitterProvider } from "../../provider";
export type SiteMapInputProps = {
disabled?: boolean;
@@ -28,7 +28,6 @@ export const SiteMapInput = ({
clickOnce,
input,
}: SiteMapInputProps) => {
console.log(clickOnce);
const [optionState, setOptionState] = useState<ActionOption>(
option || "click"
);
@@ -43,8 +42,6 @@ export const SiteMapInput = ({
const handleAdd = () => {
if (!siteMap) return;
console.log(optionState, xpathState, clickOnceState, inputState);
setSiteMap((prevSiteMap) => ({
...prevSiteMap,
actions: [
@@ -60,6 +57,7 @@ export const SiteMapInput = ({
}));
setXpathState("");
setInputState("");
};
const handleRemove = () => {
@@ -72,14 +70,22 @@ export const SiteMapInput = ({
};
return (
<div className="flex flex-col gap-2 w-full">
<div className="flex gap-2 items-center">
<FormControl className="w-1/4">
<Box
sx={{ display: "flex", flexDirection: "column", gap: 2, width: "100%" }}
>
<Box sx={{ display: "flex", gap: 2, alignItems: "center" }}>
<FormControl size="small" sx={{ minWidth: 120 }}>
<InputLabel>Action Type</InputLabel>
<Select
disabled={disabled}
displayEmpty
value={optionState}
label="Action Type"
onChange={(e) => setOptionState(e.target.value as ActionOption)}
sx={{
"& .MuiSelect-select": {
textTransform: "capitalize",
},
}}
>
<MenuItem value="click">Click</MenuItem>
<MenuItem value="input">Input</MenuItem>
@@ -88,23 +94,49 @@ export const SiteMapInput = ({
{optionState === "input" && (
<TextField
label="Input Text"
size="small"
fullWidth
value={inputState}
onChange={(e) => setInputState(e.target.value)}
disabled={disabled}
sx={{
"& .MuiOutlinedInput-root": {
bgcolor: "background.default",
},
}}
/>
)}
{!disabled && (
<TextField
label="XPath Selector"
size="small"
fullWidth
value={xpathState}
onChange={(e) => setXpathState(e.target.value)}
disabled={disabled}
sx={{
"& .MuiOutlinedInput-root": {
bgcolor: "background.default",
fontFamily: "monospace",
fontSize: "1rem",
},
}}
/>
)}
<TextField
label="XPath Selector"
fullWidth
value={xpathState}
onChange={(e) => setXpathState(e.target.value)}
disabled={disabled}
/>
{disabled ? (
<Button
onClick={handleRemove}
className={clsx(classes.button, classes.remove)}
size="small"
variant="outlined"
color="error"
sx={{
minWidth: "80px",
textTransform: "none",
"&:hover": {
bgcolor: "error.main",
color: "error.contrastText",
},
}}
>
Delete
</Button>
@@ -112,24 +144,41 @@ export const SiteMapInput = ({
<Button
onClick={handleAdd}
disabled={!xpathState}
className={clsx(classes.button, classes.add)}
size="small"
variant="contained"
color="primary"
sx={{
minWidth: "80px",
textTransform: "none",
"&.Mui-disabled": {
bgcolor: "action.disabledBackground",
color: "action.disabled",
},
}}
>
Add
</Button>
)}
</div>
</Box>
{!disabled && (
<FormControlLabel
label="Do Once"
control={
<Checkbox
size="small"
checked={clickOnceState}
disabled={disabled}
onChange={() => setClickOnceState(!clickOnceState)}
/>
}
sx={{
"& .MuiFormControlLabel-label": {
fontSize: "0.875rem",
color: "text.secondary",
},
}}
/>
)}
</div>
</Box>
);
};

View File

@@ -1,12 +1,22 @@
import {
Box,
Button,
Divider,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Typography,
} from "@mui/material";
import { useEffect, useState } from "react";
import { useJobSubmitterProvider } from "../provider";
import { Button, Divider, Typography, useTheme } from "@mui/material";
import { SiteMapInput } from "./site-map-input";
export const SiteMap = () => {
const { siteMap, setSiteMap } = useJobSubmitterProvider();
const [showSiteMap, setShowSiteMap] = useState<boolean>(false);
const theme = useTheme();
const handleCreateSiteMap = () => {
setSiteMap({ actions: [] });
@@ -25,46 +35,123 @@ export const SiteMap = () => {
}, [siteMap]);
return (
<div className="flex flex-col gap-4">
{siteMap ? (
<Button onClick={handleClearSiteMap}>Clear Site Map</Button>
<Box className="flex flex-col gap-4">
{!siteMap ? (
<Button
onClick={handleCreateSiteMap}
variant="contained"
color="primary"
sx={{
alignSelf: "flex-end",
textTransform: "none",
}}
>
Create Site Map
</Button>
) : (
<Button onClick={handleCreateSiteMap}>Create Site Map</Button>
)}
{showSiteMap && (
<div className="flex flex-col gap-4">
<Box className="flex flex-col gap-4">
<Box
sx={{
display: "flex",
justifyContent: "space-between",
alignItems: "center",
}}
>
<Typography variant="h6" sx={{ fontWeight: 500 }}>
Site Map Configuration
</Typography>
<Button
onClick={handleClearSiteMap}
variant="outlined"
color="error"
size="small"
sx={{
textTransform: "none",
"&:hover": {
bgcolor: "error.main",
color: "error.contrastText",
},
}}
>
Clear Site Map
</Button>
</Box>
<SiteMapInput />
{siteMap?.actions && siteMap?.actions.length > 0 && (
<>
<Divider
<Divider />
<TableContainer
sx={{
borderColor:
theme.palette.mode === "dark" ? "#ffffff" : "0000000",
maxHeight: "400px",
overflow: "auto",
borderRadius: 1,
border: 1,
borderColor: "divider",
}}
/>
<Typography className="w-full text-center" variant="h5">
Site Map Actions
</Typography>
>
<Table size="small" stickyHeader>
<TableHead>
<TableRow>
<TableCell width="10%">
<Typography sx={{ fontWeight: 600 }}>Action</Typography>
</TableCell>
<TableCell width="30%">
<Typography sx={{ fontWeight: 600 }}>Type</Typography>
</TableCell>
<TableCell width="40%">
<Typography sx={{ fontWeight: 600 }}>XPath</Typography>
</TableCell>
</TableRow>
</TableHead>
<TableBody>
{siteMap?.actions.reverse().map((action, index) => (
<TableRow
key={action.xpath}
sx={{
"&:hover": {
bgcolor: "action.hover",
},
}}
>
<TableCell>
<Typography variant="body2">{index + 1}</Typography>
</TableCell>
<TableCell>
<Typography
variant="body2"
sx={{
color:
action.type === "click"
? "primary.main"
: "warning.main",
fontWeight: 500,
}}
>
{action.type}
</Typography>
</TableCell>
<TableCell>
<Typography
variant="body2"
sx={{
fontFamily: "monospace",
fontSize: "0.875rem",
color: "text.secondary",
}}
noWrap
>
{action.xpath}
</Typography>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</TableContainer>
</>
)}
<ul className="flex flex-col gap-4">
{siteMap?.actions.reverse().map((action, index) => (
<li key={action.xpath} className="flex w-full items-center">
<Typography variant="h6" className="w-[10%] mr-2">
Action {index + 1}:
</Typography>
<SiteMapInput
disabled={Boolean(siteMap)}
xpath={action.xpath}
option={action.type}
clickOnce={action.do_once}
input={action.input}
/>
</li>
))}
</ul>
</div>
</Box>
)}
</div>
</Box>
);
};

View File

@@ -11,6 +11,7 @@ export const useAdvancedJobOptions = () => {
proxies: null,
collect_media: false,
custom_cookies: null,
return_html: false,
};
const router = useRouter();

View File

@@ -15,6 +15,7 @@ export const parseJobOptions = (
proxies: null,
collect_media: false,
custom_cookies: null,
return_html: false,
};
if (jsonOptions.collect_media) {
@@ -42,6 +43,10 @@ export const parseJobOptions = (
setSiteMap(jsonOptions.site_map);
}
if (jsonOptions.return_html) {
newJobOptions.return_html = true;
}
setJobOptions(newJobOptions);
}
};

View File

@@ -8,7 +8,7 @@ export type DeleteCronJobsParams = {
export const deleteCronJobs = async (params: DeleteCronJobsParams) => {
const token = Cookies.get("token");
const response = await fetch("/api/delete-cron-jobs", {
const response = await fetch("/api/delete-cron-job", {
method: "POST",
headers: {
"Content-Type": "application/json",

View File

@@ -27,6 +27,7 @@ export type RawJobOptions = {
proxies: string | null;
collect_media: boolean;
custom_cookies: string | null;
return_html: boolean;
};
export type ActionOption = "click" | "input";
@@ -58,6 +59,7 @@ export const initialJobOptions: RawJobOptions = {
proxies: null,
collect_media: false,
custom_cookies: null,
return_html: false,
};
export const COLOR_MAP: Record<string, string> = {