24 Commits

Author SHA1 Message Date
github-actions[bot]
24f4b57fea chore: bump version to 1.1.4
Some checks failed
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-18 23:06:20 +00:00
Gaurav Agnihotri
1c0dec6db6 fix: pin browserforge version to 1.2.1 (#93)
Co-authored-by: gauravagnihotri <gaagniho@mtu.edu>
2025-06-18 18:06:10 -05:00
github-actions[bot]
e9c60f6338 chore: bump version to 1.1.3
Some checks failed
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-12 23:06:34 +00:00
Jayden Pyles
5719a85491 chore: update chart version 2025-06-12 18:07:55 -05:00
github-actions[bot]
052d80de07 chore: bump version to 1.1.3 2025-06-12 23:03:26 +00:00
Jayden Pyles
7047a3c0e3 chore: update chart version 2025-06-12 18:04:47 -05:00
github-actions[bot]
71f603fc62 chore: bump version to 1.1.3 2025-06-12 23:01:57 +00:00
Jayden Pyles
86a77a27df chore: update chart version 2025-06-12 18:03:20 -05:00
github-actions[bot]
b11e263b93 chore: bump version to 1.1.3 2025-06-12 23:00:47 +00:00
Jayden Pyles
91dc13348d feat: add import/export for job configurations (#91)
* chore: wip add upload/import

* chore: wip add upload/import

* feat: update job rerunning

* fix: update workflow

* fix: update workflow

* chore: temp disable workflow
2025-06-12 18:00:39 -05:00
github-actions[bot]
93b0c83381 chore: bump version to 1.1.2
Some checks failed
Merge / tests (push) Has been cancelled
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-08 23:24:17 +00:00
Jayden Pyles
9381ba9232 chore: update workflow 2025-06-08 18:17:03 -05:00
Jayden Pyles
20dccc5527 feat: edit ui + add return html option (#90)
* fix: restyle the element table

* chore: wip ui

* wip: edit styles

* feat: add html return

* fix: build

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: workflow

* fix: cypress test

* chore: update photo [skip ci]
2025-06-08 18:14:02 -05:00
Jayden Pyles
02619eb184 feat: update workflows [no bump]
Some checks failed
Merge / tests (push) Has been cancelled
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-05 22:19:41 -05:00
github-actions[bot]
58c6c09fc9 chore: bump version to 1.1.2 2025-06-06 03:18:03 +00:00
Jayden Pyles
bf896b4c6b feat: update workflows [no bump] 2025-06-05 22:09:49 -05:00
Jayden Pyles
e3b9c11ab7 feat: update workflows [no bump] 2025-06-05 21:59:54 -05:00
github-actions[bot]
32da3375b3 chore: bump version to 1.1.1 2025-06-06 02:56:42 +00:00
Jayden Pyles
b5131cbe4c feat: update workflows [no bump] 2025-06-05 21:47:55 -05:00
github-actions[bot]
47c4c9a7d1 chore: bump version to 1.1.1
Some checks failed
Merge / tests (push) Has been cancelled
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-05 01:48:00 +00:00
Jayden Pyles
4352988666 feat: update workflows 2025-06-04 20:40:16 -05:00
Jayden Pyles
00759151e6 feat: update workflos 2025-06-04 20:35:06 -05:00
github-actions[bot]
bfae00ca72 chore: bump version to 1.1.1 2025-06-04 23:06:51 +00:00
Jayden Pyles
e810700569 chore: remove deprecated output version 2025-06-04 17:58:49 -05:00
49 changed files with 1131 additions and 387 deletions

View File

@@ -20,6 +20,16 @@ runs:
with:
node-version: 22
- name: Setup yarn
shell: bash
run: npm install -g yarn
- name: Install xvfb for headless testing
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y xvfb libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1 libasound2t64 libpango-1.0-0 libcairo2 libgtk-3-0 libgdk-pixbuf2.0-0 libx11-6 libx11-xcb1 libxcb1 libxss1 libxtst6 libnspr4
- name: Setup Docker project
shell: bash
run: |
@@ -63,5 +73,8 @@ runs:
- name: Run Cypress tests
shell: bash
run: npm run cy:run
run: |
set -e
npm run cy:run

31
.github/workflows/cypress-tests.yml vendored Normal file
View File

@@ -0,0 +1,31 @@
name: Cypress Tests
on:
workflow_call:
secrets:
openai_key:
required: true
jobs:
cypress-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run Cypress Tests
id: run-tests
uses: ./.github/actions/run-cypress-tests
with:
openai_key: ${{ secrets.openai_key }}
- name: Check container logs on failure
if: steps.run-tests.conclusion == 'failure'
run: |
echo "Cypress tests failed. Dumping container logs..."
docker logs scraperr_api || true
- name: Fail job if Cypress failed
if: steps.run-tests.conclusion == 'failure'
run: exit 1

View File

@@ -4,19 +4,26 @@ on:
push:
branches:
- master
pull_request:
types: [closed]
branches:
- master
jobs:
tests:
uses: ./.github/workflows/tests.yml
secrets:
openai_key: ${{ secrets.OPENAI_KEY }}
discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
# TODO: Renable once browser forge is fixed for camoufox, or else tests will never pass
# tests:
# uses: ./.github/workflows/tests.yml
# secrets:
# openai_key: ${{ secrets.OPENAI_KEY }}
# discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
version:
needs: tests
uses: ./.github/workflows/version.yml
secrets:
git_token: ${{ secrets.GPAT_TOKEN }}
build-and-deploy:
if: needs.version.outputs.version_bump == 'true'
needs: version
uses: ./.github/workflows/docker-image.yml
secrets:

View File

@@ -8,11 +8,6 @@ on:
workflow_dispatch:
jobs:
checkout:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
tests:
uses: ./.github/workflows/tests.yml
secrets:

29
.github/workflows/pytest.yml vendored Normal file
View File

@@ -0,0 +1,29 @@
name: Pytest
on:
workflow_call:
jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-node@v3
- name: Set env
run: echo "ENV=test" >> $GITHUB_ENV
- name: Install pdm
run: pip install pdm
- name: Install project dependencies
run: pdm install
- name: Install playwright
run: pdm run playwright install --with-deps
- name: Run tests
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests

View File

@@ -10,26 +10,8 @@ on:
jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set env
run: echo "ENV=test" >> $GITHUB_ENV
- name: Install pdm
run: pip install pdm
- name: Install project dependencies
run: pdm install
- name: Install playwright
run: pdm run playwright install
- name: Run tests
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
pytest:
uses: ./.github/workflows/pytest.yml
cypress-tests:
runs-on: ubuntu-latest
@@ -37,26 +19,14 @@ jobs:
- uses: actions/checkout@v4
- name: Run Cypress Tests
id: run-tests
uses: ./.github/actions/run-cypress-tests
with:
openai_key: ${{ secrets.OPENAI_KEY }}
continue-on-error: true
- name: Check container logs on failure
if: steps.run-tests.outcome == 'failure'
run: |
echo "Cypress tests failed. Dumping container logs..."
docker logs scraperr_api || true
- name: Fail job if Cypress failed
if: steps.run-tests.outcome == 'failure'
run: exit 1
openai_key: ${{ secrets.openai_key }}
success-message:
runs-on: ubuntu-latest
needs:
- unit-tests
- pytest
- cypress-tests
steps:
- name: Send Discord Message

View File

@@ -2,10 +2,16 @@ name: Version
on:
workflow_call:
secrets:
git_token:
required: true
outputs:
version:
description: "The new version number"
value: ${{ jobs.version.outputs.version }}
version_bump:
description: "Whether the version was bumped"
value: ${{ jobs.version.outputs.version_bump }}
jobs:
version:
@@ -13,6 +19,7 @@ jobs:
outputs:
version: ${{ steps.set_version.outputs.version }}
version_bump: ${{ steps.check_version_bump.outputs.version_bump }}
steps:
- name: Checkout
@@ -37,17 +44,39 @@ jobs:
echo "VERSION_TYPE=$VERSION_TYPE" >> $GITHUB_ENV
- name: Check for version bump
id: check_version_bump
run: |
COMMIT_MSG=$(git log -1 --pretty=%B)
if [[ $COMMIT_MSG =~ .*\[no\ bump\].* ]]; then
echo "version_bump=false" >> $GITHUB_OUTPUT
else
echo "version_bump=true" >> $GITHUB_OUTPUT
fi
- name: Skip version bump
if: steps.check_version_bump.outputs.version_bump == 'false'
run: |
echo "Skipping version bump as requested"
gh run cancel ${{ github.run_id }}
exit 0
env:
GITHUB_TOKEN: ${{ secrets.git_token }}
- name: Set version
if: steps.check_version_bump.outputs.version_bump != 'false'
id: set_version
run: |
VERSION=$(./scripts/version.sh "$VERSION_TYPE")
echo "VERSION=$VERSION" >> $GITHUB_ENV
echo "Version is $VERSION"
echo "::set-output name=version::$VERSION"
echo "version=$VERSION" >> $GITHUB_OUTPUT
env:
VERSION_TYPE: ${{ env.VERSION_TYPE }}
- name: Update chart file
if: steps.check_version_bump.outputs.version_bump != 'false'
run: |
sed -i "s/^version: .*/version: $VERSION/" helm/Chart.yaml

View File

@@ -7,6 +7,7 @@ from camoufox import AsyncCamoufox
from playwright.async_api import Page
# LOCAL
from api.backend.constants import RECORDINGS_ENABLED
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
from api.backend.job.models import CapturedElement
from api.backend.worker.logger import LOG
@@ -29,11 +30,13 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
LOG.info(f"Starting work for agent job: {agent_job}")
pages = set()
proxy = None
if agent_job["job_options"]["proxies"]:
proxy = random.choice(agent_job["job_options"]["proxies"])
LOG.info(f"Using proxy: {proxy}")
async with AsyncCamoufox(headless=True) as browser:
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
page: Page = await browser.new_page()
await add_custom_items(
@@ -63,7 +66,9 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
xpaths = parse_response(response)
captured_elements = await capture_elements(page, xpaths)
captured_elements = await capture_elements(
page, xpaths, agent_job["job_options"].get("return_html", False)
)
final_url = page.url

View File

@@ -206,7 +206,7 @@ def parse_next_page(text: str) -> str | None:
async def capture_elements(
page: Page, xpaths: list[dict[str, str]]
page: Page, xpaths: list[dict[str, str]], return_html: bool
) -> list[CapturedElement]:
captured_elements = []
seen_texts = set()
@@ -217,6 +217,23 @@ async def capture_elements(
count = await locator.count()
for i in range(count):
if return_html:
element_text = (
await page.locator(f"xpath={xpath['xpath']}")
.nth(i)
.inner_html()
)
seen_texts.add(element_text)
captured_elements.append(
CapturedElement(
name=xpath["name"],
text=element_text,
xpath=xpath["xpath"],
)
)
continue
element_text = ""
element_handle = await locator.nth(i).element_handle()

View File

@@ -29,6 +29,7 @@ def insert(query: str, values: tuple[Any, ...]):
except sqlite3.Error as e:
LOG.error(f"An error occurred: {e}")
raise e
finally:
cursor.close()

View File

@@ -49,10 +49,15 @@ async def get_queued_job():
return res[0] if res else None
async def update_job(ids: list[str], field: str, value: Any):
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
res = update(query, tuple([value] + ids))
LOG.info(f"Updated job: {res}")
async def update_job(ids: list[str], updates: dict[str, Any]):
if not updates:
return
set_clause = ", ".join(f"{field} = ?" for field in updates.keys())
query = f"UPDATE jobs SET {set_clause} WHERE id IN {format_list_for_query(ids)}"
values = list(updates.values()) + ids
res = update(query, tuple(values))
LOG.debug(f"Updated job: {res}")
async def delete_jobs(jobs: list[str]):

View File

@@ -1,5 +1,6 @@
# STL
import logging
import datetime
from typing import Any
# LOCAL
@@ -12,7 +13,23 @@ from api.backend.database.queries.job.job_queries import JOB_INSERT_QUERY
LOG = logging.getLogger("Job")
def insert(item: dict[str, Any]) -> None:
async def insert(item: dict[str, Any]) -> None:
if check_for_job_completion(item["id"]):
await multi_field_update_job(
item["id"],
{
"agent_mode": item["agent_mode"],
"prompt": item["prompt"],
"job_options": item["job_options"],
"elements": item["elements"],
"status": "Queued",
"result": [],
"time_created": datetime.datetime.now().isoformat(),
"chat": None,
},
)
return
common_insert(
JOB_INSERT_QUERY,
(
@@ -33,6 +50,12 @@ def insert(item: dict[str, Any]) -> None:
LOG.debug(f"Inserted item: {item}")
def check_for_job_completion(id: str) -> dict[str, Any]:
query = f"SELECT * FROM jobs WHERE id = ?"
res = common_query(query, (id,))
return res[0] if res else {}
async def get_queued_job():
query = (
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
@@ -48,6 +71,12 @@ async def update_job(ids: list[str], field: str, value: Any):
LOG.debug(f"Updated job: {res}")
async def multi_field_update_job(id: str, fields: dict[str, Any]):
query = f"UPDATE jobs SET {', '.join(f'{field} = ?' for field in fields.keys())} WHERE id = ?"
res = common_update(query, tuple(list(fields.values()) + [id]))
LOG.debug(f"Updated job: {res}")
async def delete_jobs(jobs: list[str]):
if not jobs:
LOG.debug("No jobs to delete.")

View File

@@ -43,10 +43,8 @@ job_router = APIRouter()
@job_router.post("/update")
@handle_exceptions(logger=LOG)
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
"""Used to update jobs"""
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
return JSONResponse(content={"message": "Jobs updated successfully."})
return {"message": "Jobs updated successfully"}
@job_router.post("/submit-scrape-job")
@@ -54,9 +52,11 @@ async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
async def submit_scrape_job(job: Job):
LOG.info(f"Recieved job: {job}")
job.id = uuid.uuid4().hex
if not job.id:
job.id = uuid.uuid4().hex
job_dict = job.model_dump()
insert(job_dict)
await insert(job_dict)
return JSONResponse(
content={"id": job.id, "message": "Job submitted successfully."}
@@ -70,7 +70,9 @@ async def retrieve_scrape_jobs(
):
LOG.info(f"Retrieving jobs for account: {user.email}")
ATTRIBUTES = "chat" if fetch_options.chat else "*"
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
job_query = (
f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ? ORDER BY time_created ASC"
)
results = query(job_query, (user.email,))
return JSONResponse(content=jsonable_encoder(results[::-1]))

View File

@@ -25,3 +25,4 @@ class JobOptions(BaseModel):
site_map: Optional[SiteMap] = None
collect_media: bool = False
custom_cookies: list[dict[str, Any]] = []
return_html: bool = False

View File

@@ -110,7 +110,9 @@ async def make_site_request(
)
async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element]):
async def collect_scraped_elements(
page: tuple[str, str], xpaths: list[Element], return_html: bool
):
soup = BeautifulSoup(page[0], "lxml")
root = etree.HTML(str(soup))
@@ -120,6 +122,16 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
el = sxpath(root, elem.xpath)
for e in el: # type: ignore
if return_html:
elements[elem.name] = [
CapturedElement(
xpath=elem.xpath,
text=page[0],
name=elem.name,
)
]
continue
text = (
" ".join(str(t) for t in e.itertext())
if isinstance(e, etree._Element)
@@ -161,6 +173,10 @@ async def scrape(
elements: list[dict[str, dict[str, list[CapturedElement]]]] = []
for page in pages:
elements.append(await collect_scraped_elements(page, xpaths))
elements.append(
await collect_scraped_elements(
page, xpaths, job_options.get("return_html", False)
)
)
return elements

View File

@@ -7,11 +7,10 @@ import {
} from "../utilities/job.utilities";
import { mockSubmitJob } from "../utilities/mocks";
describe.only("Agent", () => {
describe("Agent", () => {
beforeEach(() => {
mockSubmitJob();
login();
cy.visit("/agent");
});
afterEach(() => {
@@ -19,6 +18,9 @@ describe.only("Agent", () => {
});
it("should be able to scrape some data", () => {
cy.visit("/agent");
cy.wait(1000);
const url = "https://books.toscrape.com";
const prompt = "Collect all the links on the page";
buildAgentJob(url, prompt);

View File

@@ -4,7 +4,7 @@ export const cleanUpJobs = () => {
cy.wait("@retrieve", { timeout: 15000 });
cy.get("tbody tr", { timeout: 10000 }).should("have.length.at.least", 1);
cy.get("tbody tr", { timeout: 20000 }).should("have.length.at.least", 1);
const tryClickSelectAll = (attempt = 1, maxAttempts = 5) => {
cy.log(`Attempt ${attempt} to click Select All`);
@@ -100,13 +100,13 @@ export const waitForJobCompletion = (url: string) => {
};
export const enableMultiPageScraping = () => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
cy.get('[data-cy="multi-page-toggle"]').click();
cy.get("body").type("{esc}");
};
export const addCustomHeaders = (headers: Record<string, string>) => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
cy.get('[name="custom_headers"]').type(JSON.stringify(headers), {
parseSpecialCharSequences: false,
});
@@ -114,16 +114,17 @@ export const addCustomHeaders = (headers: Record<string, string>) => {
};
export const addCustomCookies = (cookies: Record<string, string>) => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
cy.get('[name="custom_cookies"]').type(JSON.stringify(cookies));
cy.get("body").type("{esc}");
};
export const openAdvancedJobOptions = () => {
cy.get("button").contains("Advanced Job Options").click();
cy.get("button").contains("Advanced Options").click();
};
export const selectJobFromSelector = () => {
checkAiDisabled();
cy.get("div[id='select-job']", { timeout: 10000 }).first().click();
cy.get("li[role='option']", { timeout: 10000 }).first().click();
};
@@ -161,7 +162,18 @@ export const addElement = (name: string, xpath: string) => {
cy.get('[data-cy="add-button"]').click();
};
export const checkAiDisabled = () => {
cy.getAllLocalStorage().then((result) => {
const storage = JSON.parse(
result["http://localhost"]["persist:root"] as string
);
const settings = JSON.parse(storage.settings);
expect(settings.aiEnabled).to.equal(true);
});
};
export const buildAgentJob = (url: string, prompt: string) => {
checkAiDisabled();
enterJobUrl(url);
cy.get("[data-cy='prompt-input']").type(prompt);
};

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 67 KiB

View File

@@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.1.1
version: 1.1.4
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to

2
next-env.d.ts vendored
View File

@@ -2,4 +2,4 @@
/// <reference types="next/image-types/global" />
// NOTE: This file should not be edited
// see https://nextjs.org/docs/basic-features/typescript for more information.
// see https://nextjs.org/docs/pages/building-your-application/configuring/typescript for more information.

8
pdm.lock generated
View File

@@ -5,7 +5,7 @@
groups = ["default", "dev"]
strategy = ["inherit_metadata"]
lock_version = "4.5.0"
content_hash = "sha256:1a65c1e288d2c6827fc6866d3bfe6a9b8707b2ca895d488f4a9b11cd579c4359"
content_hash = "sha256:222416fbd48d349e2ae777bf1d167b68e4342f38d5e20d04095cbbb594afb8f3"
[[metadata.targets]]
requires_python = ">=3.10"
@@ -459,7 +459,7 @@ files = [
[[package]]
name = "browserforge"
version = "1.2.3"
version = "1.2.1"
requires_python = "<4.0,>=3.8"
summary = "Intelligent browser header & fingerprint generator"
groups = ["default"]
@@ -468,8 +468,8 @@ dependencies = [
"typing-extensions; python_version < \"3.10\"",
]
files = [
{file = "browserforge-1.2.3-py3-none-any.whl", hash = "sha256:a6c71ed4688b2f1b0bee757ca82ddad0007cbba68a71eca66ca607dde382f132"},
{file = "browserforge-1.2.3.tar.gz", hash = "sha256:d5bec6dffd4748b30fbac9f9c1ef33b26c01a23185240bf90011843e174b7ecc"},
{file = "browserforge-1.2.1-py3-none-any.whl", hash = "sha256:b2813b4de80b9c48c88700c93e3dfa6a64694d04f3263545e28bb03dd95df27e"},
{file = "browserforge-1.2.1.tar.gz", hash = "sha256:7036d73fb066a4361a015b619079474c42d8b0ff415e1d874b62366de48d0b61"},
]
[[package]]

View File

@@ -43,6 +43,7 @@ dependencies = [
"camoufox>=0.4.11",
"html2text>=2025.4.15",
"proxy-py>=2.4.10",
"browserforge==1.2.1",
]
requires-python = ">=3.10"
readme = "README.md"

View File

@@ -1,7 +1,8 @@
import { Box, Link, Typography } from "@mui/material";
import { SetStateAction, Dispatch, useState } from "react";
import { AdvancedJobOptionsDialog } from "./dialog/advanced-job-options-dialog";
import { RawJobOptions } from "@/types";
import SettingsIcon from "@mui/icons-material/Settings";
import { Box, Button, Typography } from "@mui/material";
import { Dispatch, SetStateAction, useState } from "react";
import { AdvancedJobOptionsDialog } from "./dialog/advanced-job-options-dialog";
export type AdvancedJobOptionsProps = {
jobOptions: RawJobOptions;
@@ -17,26 +18,27 @@ export const AdvancedJobOptions = ({
const [open, setOpen] = useState(false);
return (
<Box sx={{ mb: 2 }}>
<Link
component="button"
variant="body2"
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
<Button
variant="outlined"
onClick={() => setOpen(true)}
startIcon={<SettingsIcon />}
sx={{
textDecoration: "none",
color: "primary.main",
textTransform: "none",
borderRadius: 2,
px: 2,
py: 1,
borderColor: "divider",
color: "text.secondary",
"&:hover": {
color: "primary.dark",
textDecoration: "underline",
borderColor: "primary.main",
color: "primary.main",
bgcolor: "action.hover",
},
paddingLeft: 1,
display: "inline-flex",
alignItems: "center",
gap: 0.5,
}}
>
<Typography variant="body2">Advanced Job Options</Typography>
</Link>
<Typography variant="body2">Advanced Options</Typography>
</Button>
<AdvancedJobOptionsDialog
open={open}

View File

@@ -1,4 +1,6 @@
import { ExpandedTableInput } from "@/components/common/expanded-table-input";
import { UploadFile } from "@/components/common/upload-file";
import { useImportJobConfig } from "@/hooks/use-import-job-config";
import { RawJobOptions } from "@/types";
import {
Code as CodeIcon,
@@ -26,6 +28,7 @@ import {
useTheme,
} from "@mui/material";
import { Dispatch, SetStateAction, useEffect, useState } from "react";
import { toast } from "react-toastify";
export type AdvancedJobOptionsDialogProps = {
open: boolean;
@@ -43,18 +46,18 @@ export const AdvancedJobOptionsDialog = ({
multiPageScrapeEnabled = true,
}: AdvancedJobOptionsDialogProps) => {
const theme = useTheme();
const { handleUploadFile } = useImportJobConfig();
const [localJobOptions, setLocalJobOptions] =
useState<RawJobOptions>(jobOptions);
// Update local state when prop changes
useEffect(() => {
setLocalJobOptions(jobOptions);
}, [jobOptions]);
const handleMultiPageScrapeChange = () => {
const handleCheckboxChange = (key: keyof RawJobOptions) => {
setLocalJobOptions((prevJobOptions) => ({
...prevJobOptions,
multi_page_scrape: !prevJobOptions.multi_page_scrape,
[key]: !prevJobOptions[key],
}));
};
@@ -65,19 +68,23 @@ export const AdvancedJobOptionsDialog = ({
}));
};
const handleCollectMediaChange = () => {
setLocalJobOptions((prevJobOptions) => ({
...prevJobOptions,
collect_media: !prevJobOptions.collect_media,
}));
};
const handleClose = () => {
// Save the local state back to the parent before closing
setJobOptions(localJobOptions);
onClose();
};
const onUploadFile = async (file: File) => {
const errorOccured = await handleUploadFile(file);
if (errorOccured) {
handleClose();
toast.error("Failed to upload job config");
return;
} else {
handleClose();
toast.success("Job config uploaded successfully");
}
};
return (
<Dialog
open={open}
@@ -108,11 +115,18 @@ export const AdvancedJobOptionsDialog = ({
<Typography variant="h6" component="div">
Advanced Job Options
</Typography>
<Settings
sx={{
color: theme.palette.primary.contrastText,
}}
/>
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
<UploadFile
message="Upload Job Config"
fileTypes={["application/json"]}
onUploadFile={onUploadFile}
/>
<Settings
sx={{
color: theme.palette.primary.contrastText,
}}
/>
</Box>
</DialogTitle>
<DialogContent
@@ -137,7 +151,7 @@ export const AdvancedJobOptionsDialog = ({
control={
<Checkbox
checked={localJobOptions.multi_page_scrape}
onChange={handleMultiPageScrapeChange}
onChange={() => handleCheckboxChange("multi_page_scrape")}
disabled={!multiPageScrapeEnabled}
/>
}
@@ -158,11 +172,12 @@ export const AdvancedJobOptionsDialog = ({
</Box>
}
/>
<FormControlLabel
control={
<Checkbox
checked={localJobOptions.collect_media}
onChange={handleCollectMediaChange}
onChange={() => handleCheckboxChange("collect_media")}
data-cy="collect-media-checkbox"
/>
}
@@ -177,6 +192,26 @@ export const AdvancedJobOptionsDialog = ({
</Box>
}
/>
<FormControlLabel
control={
<Checkbox
checked={localJobOptions.return_html}
onChange={() => handleCheckboxChange("return_html")}
data-cy="return-html-checkbox"
/>
}
label={
<Box sx={{ display: "flex", alignItems: "center" }}>
<Typography>Return HTML</Typography>
<Tooltip title="Return the HTML of the page">
<IconButton size="small">
<InfoOutlined fontSize="small" />
</IconButton>
</Tooltip>
</Box>
}
/>
</FormGroup>
</Box>

View File

@@ -12,6 +12,7 @@ export const Disabled = ({ message }: DisabledProps) => {
display="flex"
justifyContent="center"
alignItems="center"
data-testid="disabled-message"
>
<h4
style={{

View File

@@ -0,0 +1 @@
export * from "./upload-file";

View File

@@ -0,0 +1,34 @@
import { Box, Button, Typography } from "@mui/material";
export type UploadFileProps = {
message: string;
fileTypes?: string[];
onUploadFile: (file: File) => void;
};
export const UploadFile = ({
message,
fileTypes,
onUploadFile,
}: UploadFileProps) => {
const handleUploadFile = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file) {
onUploadFile(file);
}
};
return (
<Box>
<Button variant="contained" component="label">
<Typography>{message}</Typography>
<input
type="file"
hidden
onChange={handleUploadFile}
accept={fileTypes?.join(",")}
/>
</Button>
</Box>
);
};

View File

@@ -1,18 +1,18 @@
import React from "react";
import StarIcon from "@mui/icons-material/Star";
import {
Tooltip,
Box,
Button,
Checkbox,
IconButton,
Table,
TableBody,
TableCell,
TableHead,
TableRow,
Box,
Checkbox,
Button,
Tooltip,
} from "@mui/material";
import router from "next/router";
import { Job } from "../../types";
import StarIcon from "@mui/icons-material/Star";
interface stateProps {
selectedJobs: Set<string>;
@@ -21,7 +21,12 @@ interface stateProps {
interface Props {
onSelectJob: (job: string) => void;
onNavigate: (elements: Object[], url: string, options: any) => void;
onNavigate: (
id: string,
elements: Object[],
url: string,
options: any
) => void;
onFavorite: (ids: string[], field: string, value: any) => void;
stateProps: stateProps;
}
@@ -87,11 +92,29 @@ export const Favorites = ({
</TableCell>
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
<Button
onClick={() =>
onNavigate(row.elements, row.url, row.job_options)
}
onClick={() => {
if (row.agent_mode) {
router.push({
pathname: "/agent",
query: {
url: row.url,
prompt: row.prompt,
job_options: JSON.stringify(row.job_options),
id: row.id,
},
});
} else {
onNavigate(row.id, row.elements, row.url, row.job_options);
}
}}
size="small"
sx={{
minWidth: 0,
padding: "4px 8px",
fontSize: "0.625rem",
}}
>
Run
Rerun
</Button>
</TableCell>
</TableRow>

View File

@@ -1,5 +1,11 @@
"use client";
import { AutoAwesome, Image, VideoCameraBack } from "@mui/icons-material";
import { useExportJobConfig } from "@/hooks/use-export-job-config";
import {
AutoAwesome,
Image,
Settings,
VideoCameraBack,
} from "@mui/icons-material";
import StarIcon from "@mui/icons-material/Star";
import {
Box,
@@ -30,7 +36,12 @@ interface Props {
colors: stringMap;
onSelectJob: (job: string) => void;
onDownload: (job: string[]) => void;
onNavigate: (elements: Object[], url: string, options: any) => void;
onNavigate: (
id: string,
elements: Object[],
url: string,
options: any
) => void;
onFavorite: (ids: string[], field: string, value: any) => void;
onJobClick: (job: Job) => void;
stateProps: stateProps;
@@ -46,6 +57,7 @@ export const JobQueue = ({
onJobClick,
}: Props) => {
const { selectedJobs, filteredJobs } = stateProps;
const { exportJobConfig } = useExportJobConfig();
const router = useRouter();
return (
@@ -116,6 +128,17 @@ export const JobQueue = ({
</IconButton>
</span>
</Tooltip>
<Tooltip title="Export Job Configuration">
<span>
<IconButton
onClick={() => {
exportJobConfig(row);
}}
>
<Settings />
</IconButton>
</span>
</Tooltip>
{row.job_options.collect_media && (
<Tooltip title="View Media">
<span>
@@ -213,10 +236,17 @@ export const JobQueue = ({
query: {
url: row.url,
prompt: row.prompt,
job_options: JSON.stringify(row.job_options),
id: row.id,
},
});
} else {
onNavigate(row.elements, row.url, row.job_options);
onNavigate(
row.id,
row.elements,
row.url,
row.job_options
);
}
}}
size="small"

View File

@@ -47,10 +47,16 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
setJobDownloadDialogOpen(true);
};
const handleNavigate = (elements: Object[], url: string, options: any) => {
const handleNavigate = (
id: string,
elements: Object[],
url: string,
options: any
) => {
router.push({
pathname: "/",
query: {
id,
elements: JSON.stringify(elements),
url: url,
job_options: JSON.stringify(options),

View File

@@ -1,14 +1,14 @@
"use client";
import React, { useEffect, useRef } from "react";
import { Container, Box } from "@mui/material";
import { useRouter } from "next/router";
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
import {
ErrorSnackbar,
JobNotifySnackbar,
} from "@/components/common/snackbars";
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
import { Box, Container } from "@mui/material";
import { useRouter } from "next/router";
import { useEffect, useRef } from "react";
export const Home = () => {
const {
@@ -50,19 +50,18 @@ export const Home = () => {
flexDirection="column"
justifyContent="center"
alignItems="center"
height="100%"
minHeight="100vh"
py={4}
>
<Container maxWidth="lg" className="overflow-y-auto max-h-full">
<JobSubmitter />
{submittedURL.length > 0 ? (
<Container maxWidth="lg" className="overflow-y-auto">
<Box className="flex flex-col gap-6">
<JobSubmitter />
<ElementTable
rows={rows}
setRows={setRows}
submittedURL={submittedURL}
/>
) : null}
</Box>
</Container>
{snackbarSeverity === "info" ? (

View File

@@ -1,24 +1,24 @@
"use client";
import React, { useState, Dispatch, SetStateAction } from "react";
import { Element } from "@/types";
import AddIcon from "@mui/icons-material/Add";
import DeleteIcon from "@mui/icons-material/Delete";
import {
Typography,
TextField,
Button,
Box,
Divider,
IconButton,
Paper,
Table,
TableBody,
TableContainer,
TableCell,
TableContainer,
TableHead,
TableRow,
Box,
IconButton,
TextField,
Tooltip,
useTheme,
Divider,
Typography,
} from "@mui/material";
import AddIcon from "@mui/icons-material/Add";
import { Element } from "@/types";
import { Dispatch, SetStateAction, useState } from "react";
import { SiteMap } from "../site-map";
interface Props {
@@ -28,7 +28,6 @@ interface Props {
}
export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
const theme = useTheme();
const [newRow, setNewRow] = useState<Element>({
name: "",
xpath: "",
@@ -42,142 +41,219 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
};
const handleDeleteRow = (elementName: string) => {
setRows(
rows.filter((r) => {
return elementName !== r.name;
})
);
setRows(rows.filter((r) => elementName !== r.name));
};
return (
<Box className="animate-fadeIn p-2" bgcolor="background.paper">
<Box className="text-center mb-4">
<Typography variant="h4" sx={{ marginBottom: 1 }}>
Elements to Scrape
</Typography>
<Paper
elevation={0}
sx={{
p: 4,
borderRadius: 2,
bgcolor: "background.paper",
border: 1,
borderColor: "divider",
"&:hover": {
boxShadow: "0 4px 20px rgba(0, 0, 0, 0.05)",
},
}}
>
<Box className="flex flex-col gap-6">
<Box>
<Typography
variant="h5"
sx={{
fontWeight: 600,
color: "text.primary",
mb: 1,
}}
>
Elements to Scrape
</Typography>
<Typography
variant="body2"
sx={{
color: "text.secondary",
}}
>
Add elements to scrape from the target URL using XPath selectors
</Typography>
</Box>
<TableContainer
component={Box}
sx={{ maxHeight: "50%", overflow: "auto" }}
sx={{
maxHeight: "400px",
overflow: "auto",
borderRadius: 2,
border: 1,
borderColor: "divider",
}}
>
<div className="rounded-lg shadow-md border border-gray-300 overflow-hidden">
<Table
stickyHeader
className="mb-4"
sx={{
tableLayout: "fixed",
width: "100%",
"& .MuiTableCell-root": {
borderBottom: "1px solid #e0e0e0",
},
}}
>
<TableHead>
<TableRow>
<TableCell>
<Typography sx={{ fontWeight: "bold" }}>Name</Typography>
</TableCell>
<TableCell>
<Typography sx={{ fontWeight: "bold" }}>XPath</Typography>
</TableCell>
<TableCell>
<Typography sx={{ fontWeight: "bold" }}>Actions</Typography>
</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow>
<TableCell>
<TextField
data-cy="name-field"
label="Name"
variant="outlined"
fullWidth
value={newRow.name}
onChange={(e) =>
setNewRow({ ...newRow, name: e.target.value })
}
/>
</TableCell>
<TableCell>
<TextField
data-cy="xpath-field"
label="XPath"
variant="outlined"
fullWidth
value={newRow.xpath}
onChange={(e) =>
setNewRow({ ...newRow, xpath: e.target.value })
}
/>
</TableCell>
<TableCell>
<Tooltip
title={
newRow.xpath.length > 0 && newRow.name.length > 0
? "Add Element"
: "Fill out all fields to add an element"
}
placement="top"
>
<span>
<IconButton
data-cy="add-button"
aria-label="add"
size="small"
onClick={handleAddRow}
sx={{
height: "40px",
width: "40px",
}}
disabled={
!(newRow.xpath.length > 0 && newRow.name.length > 0)
}
>
<AddIcon
fontSize="inherit"
sx={{
color:
theme.palette.mode === "light"
? "#000000"
: "#ffffff",
}}
/>
</IconButton>
</span>
</Tooltip>
</TableCell>
</TableRow>
{rows.map((row, index) => (
<TableRow key={index}>
<TableCell>
<Typography>{row.name}</Typography>
</TableCell>
<TableCell>
<Typography>{row.xpath}</Typography>
</TableCell>
<TableCell>
<Button
onClick={() => handleDeleteRow(row.name)}
className="!bg-red-500 bg-opacity-50 !text-white font-semibold rounded-md
transition-transform transform hover:scale-105 hover:bg-red-500"
<Table
stickyHeader
size="small"
sx={{
"& .MuiTableCell-root": {
borderBottom: "1px solid",
borderColor: "divider",
py: 1.5,
},
"& .MuiTableCell-head": {
bgcolor: "background.default",
fontWeight: 600,
},
}}
>
<TableHead>
<TableRow>
<TableCell width="30%">Name</TableCell>
<TableCell width="50%">XPath</TableCell>
<TableCell width="20%" align="center">
Actions
</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow>
<TableCell>
<TextField
data-cy="name-field"
placeholder="Enter element name"
variant="outlined"
fullWidth
size="small"
value={newRow.name}
onChange={(e) =>
setNewRow({ ...newRow, name: e.target.value })
}
sx={{
"& .MuiOutlinedInput-root": {
borderRadius: 2,
bgcolor: "background.default",
"&:hover": {
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "primary.main",
},
},
},
}}
/>
</TableCell>
<TableCell>
<TextField
data-cy="xpath-field"
placeholder="Enter XPath selector"
variant="outlined"
fullWidth
size="small"
value={newRow.xpath}
onChange={(e) =>
setNewRow({ ...newRow, xpath: e.target.value })
}
sx={{
"& .MuiOutlinedInput-root": {
borderRadius: 2,
bgcolor: "background.default",
"&:hover": {
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "primary.main",
},
},
},
}}
/>
</TableCell>
<TableCell align="center">
<Tooltip
title={
newRow.xpath.length > 0 && newRow.name.length > 0
? "Add Element"
: "Fill out all fields to add an element"
}
placement="top"
>
<span>
<IconButton
data-cy="add-button"
aria-label="add"
size="small"
onClick={handleAddRow}
disabled={
!(newRow.xpath.length > 0 && newRow.name.length > 0)
}
sx={{
bgcolor: "primary.main",
color: "primary.contrastText",
borderRadius: 2,
"&:hover": {
bgcolor: "primary.dark",
transform: "translateY(-1px)",
},
"&.Mui-disabled": {
bgcolor: "action.disabledBackground",
color: "action.disabled",
},
}}
>
Delete
</Button>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</div>
<AddIcon fontSize="small" />
</IconButton>
</span>
</Tooltip>
</TableCell>
</TableRow>
{rows.map((row, index) => (
<TableRow
key={index}
sx={{
"&:hover": {
bgcolor: "action.hover",
},
}}
>
<TableCell>
<Typography variant="body2" noWrap>
{row.name}
</Typography>
</TableCell>
<TableCell>
<Typography
variant="body2"
sx={{
fontFamily: "monospace",
fontSize: "0.875rem",
color: "text.secondary",
}}
noWrap
>
{row.xpath}
</Typography>
</TableCell>
<TableCell align="center">
<IconButton
onClick={() => handleDeleteRow(row.name)}
size="small"
color="error"
sx={{
"&:hover": {
bgcolor: "error.main",
color: "error.contrastText",
transform: "translateY(-1px)",
},
}}
>
<DeleteIcon fontSize="small" />
</IconButton>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</TableContainer>
<Divider sx={{ my: 2 }} />
<SiteMap />
</Box>
<Divider
sx={{
borderColor: theme.palette.mode === "dark" ? "#ffffff" : "0000000",
marginBottom: 2,
}}
/>
<SiteMap />
</Box>
</Paper>
);
};

View File

@@ -2,3 +2,14 @@
margin-bottom: 1rem;
text-align: center;
}
.container {
text-align: left;
margin-bottom: 8px;
}
.title {
font-weight: 600;
color: var(--mui-palette-text-primary);
margin-bottom: 8px;
}

View File

@@ -1,6 +1,6 @@
import { Box, Typography } from "@mui/material";
import React, { ReactNode } from "react";
import { Typography } from "@mui/material";
import classes from "./job-submitter-header.module.css";
import styles from "./job-submitter-header.module.css";
interface JobSubmitterHeaderProps {
title?: string;
@@ -8,13 +8,15 @@ interface JobSubmitterHeaderProps {
}
export const JobSubmitterHeader: React.FC<JobSubmitterHeaderProps> = ({
title = "Scraping Made Easy",
title = "Scrape Webpage",
children,
}) => {
return (
<div className={classes.jobSubmitterHeader}>
<Typography variant="h3">{title}</Typography>
<Box className={styles.container}>
<Typography variant="h4" className={styles.title}>
{title}
</Typography>
{children}
</div>
</Box>
);
};

View File

@@ -0,0 +1,52 @@
.container {
display: flex;
flex-direction: column;
gap: 16px;
align-items: stretch;
}
@media (min-width: 600px) {
.container {
flex-direction: row;
align-items: center;
}
}
.input {
width: 100%;
}
.input :global(.MuiOutlinedInput-root) {
border-radius: 16px;
transition: all 0.2s ease-in-out;
}
.input
:global(.MuiOutlinedInput-root:hover)
:global(.MuiOutlinedInput-notchedOutline) {
border-color: var(--mui-palette-primary-main);
}
.submitButton {
height: 48px !important;
border-radius: 16px;
font-size: 1rem !important;
font-weight: 500 !important;
}
.submitButton:hover {
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
}
.submitButton:disabled {
transform: none;
box-shadow: none;
}
@media (min-width: 600px) {
.submitButton {
min-width: 120px;
height: 56px;
}
}

View File

@@ -1,6 +1,6 @@
import React from "react";
import { TextField, Button, CircularProgress } from "@mui/material";
import { Box, Button, CircularProgress, TextField } from "@mui/material";
import { useJobSubmitterProvider } from "../provider";
import styles from "./job-submitter-input.module.css";
export type JobSubmitterInputProps = {
urlError: string | null;
@@ -17,7 +17,7 @@ export const JobSubmitterInput = ({
useJobSubmitterProvider();
return (
<div className="flex flex-row space-x-4 items-center mb-2">
<Box className={styles.container}>
<TextField
data-cy="url-input"
label="URL"
@@ -27,19 +27,18 @@ export const JobSubmitterInput = ({
onChange={(e) => setSubmittedURL(e.target.value)}
error={!isValidURL}
helperText={!isValidURL ? urlError : ""}
className="rounded-md"
className={styles.input}
/>
<Button
data-cy="submit-button"
variant="contained"
size="small"
size="large"
onClick={handleSubmit}
disabled={!(rows.length > 0) || loading}
className={`bg-[#034efc] text-white font-semibold rounded-md
transition-transform transform hover:scale-105 disabled:opacity-50`}
className={styles.submitButton}
>
{loading ? <CircularProgress size={24} color="inherit" /> : "Submit"}
</Button>
</div>
</Box>
);
};

View File

@@ -4,6 +4,7 @@ import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
import { useSubmitJob } from "@/hooks/use-submit-job";
import { parseJobOptions } from "@/lib";
import { useUser } from "@/store/hooks";
import { Box, Paper } from "@mui/material";
import { useRouter } from "next/router";
import { useEffect } from "react";
import { JobSubmitterHeader } from "./job-submitter-header";
@@ -12,40 +13,74 @@ import { useJobSubmitterProvider } from "./provider";
export const JobSubmitter = () => {
const router = useRouter();
const { job_options } = router.query;
const { job_options, id } = router.query;
const { user } = useUser();
const { submitJob, loading, error } = useSubmitJob();
const { submittedURL, rows, siteMap, setSiteMap, jobOptions, setJobOptions } =
useJobSubmitterProvider();
const {
jobId,
setJobId,
submittedURL,
rows,
siteMap,
setSiteMap,
jobOptions,
setJobOptions,
} = useJobSubmitterProvider();
useEffect(() => {
if (job_options) {
parseJobOptions(job_options as string, setJobOptions, setSiteMap);
parseJobOptions(
id as string,
job_options as string,
setJobOptions,
setSiteMap,
setJobId
);
}
}, [job_options]);
const handleSubmit = async () => {
await submitJob(submittedURL, rows, user, jobOptions, siteMap, false, null);
await submitJob(
submittedURL,
rows,
user,
jobOptions,
siteMap,
false,
null,
jobId
);
};
console.log(jobOptions);
useEffect(() => {
console.log(jobOptions);
}, [jobOptions]);
return (
<div>
<JobSubmitterHeader />
<JobSubmitterInput
urlError={error}
handleSubmit={handleSubmit}
loading={loading}
/>
<AdvancedJobOptions
jobOptions={jobOptions}
setJobOptions={setJobOptions}
/>
</div>
<Paper
elevation={0}
sx={{
p: 4,
borderRadius: 2,
bgcolor: "background.paper",
border: 1,
borderColor: "divider",
"&:hover": {
boxShadow: "0 4px 20px rgba(0, 0, 0, 0.05)",
},
}}
>
<Box className="flex flex-col gap-6">
<JobSubmitterHeader />
<Box className="flex flex-col gap-4">
<JobSubmitterInput
urlError={error}
handleSubmit={handleSubmit}
loading={loading}
/>
<AdvancedJobOptions
jobOptions={jobOptions}
setJobOptions={setJobOptions}
/>
</Box>
</Box>
</Paper>
);
};

View File

@@ -10,6 +10,8 @@ import React, {
} from "react";
type JobSubmitterProviderType = {
jobId: string;
setJobId: Dispatch<React.SetStateAction<string>>;
submittedURL: string;
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
rows: Element[];
@@ -36,6 +38,7 @@ const JobSubmitterProvider = createContext<JobSubmitterProviderType>(
);
export const Provider = ({ children }: PropsWithChildren) => {
const [jobId, setJobId] = useState<string>("");
const [submittedURL, setSubmittedURL] = useState<string>("");
const [rows, setRows] = useState<Element[]>([]);
const [results, setResults] = useState<Result>({});
@@ -55,6 +58,8 @@ export const Provider = ({ children }: PropsWithChildren) => {
const value: JobSubmitterProviderType = useMemo(
() => ({
jobId,
setJobId,
submittedURL,
setSubmittedURL,
rows,
@@ -76,6 +81,7 @@ export const Provider = ({ children }: PropsWithChildren) => {
closeSnackbar,
}),
[
jobId,
submittedURL,
rows,
results,

View File

@@ -1,17 +1,17 @@
import { useState } from "react";
import { useJobSubmitterProvider } from "../../provider";
import { ActionOption } from "@/types/job";
import {
Box,
Button,
Checkbox,
FormControl,
FormControlLabel,
InputLabel,
MenuItem,
Select,
TextField,
FormControl,
Button,
Checkbox,
FormControlLabel,
} from "@mui/material";
import { ActionOption } from "@/types/job";
import classes from "./site-map-input.module.css";
import { clsx } from "clsx";
import { useState } from "react";
import { useJobSubmitterProvider } from "../../provider";
export type SiteMapInputProps = {
disabled?: boolean;
@@ -28,7 +28,6 @@ export const SiteMapInput = ({
clickOnce,
input,
}: SiteMapInputProps) => {
console.log(clickOnce);
const [optionState, setOptionState] = useState<ActionOption>(
option || "click"
);
@@ -43,8 +42,6 @@ export const SiteMapInput = ({
const handleAdd = () => {
if (!siteMap) return;
console.log(optionState, xpathState, clickOnceState, inputState);
setSiteMap((prevSiteMap) => ({
...prevSiteMap,
actions: [
@@ -60,6 +57,7 @@ export const SiteMapInput = ({
}));
setXpathState("");
setInputState("");
};
const handleRemove = () => {
@@ -72,14 +70,22 @@ export const SiteMapInput = ({
};
return (
<div className="flex flex-col gap-2 w-full">
<div className="flex gap-2 items-center">
<FormControl className="w-1/4">
<Box
sx={{ display: "flex", flexDirection: "column", gap: 2, width: "100%" }}
>
<Box sx={{ display: "flex", gap: 2, alignItems: "center" }}>
<FormControl size="small" sx={{ minWidth: 120 }}>
<InputLabel>Action Type</InputLabel>
<Select
disabled={disabled}
displayEmpty
value={optionState}
label="Action Type"
onChange={(e) => setOptionState(e.target.value as ActionOption)}
sx={{
"& .MuiSelect-select": {
textTransform: "capitalize",
},
}}
>
<MenuItem value="click">Click</MenuItem>
<MenuItem value="input">Input</MenuItem>
@@ -88,23 +94,49 @@ export const SiteMapInput = ({
{optionState === "input" && (
<TextField
label="Input Text"
size="small"
fullWidth
value={inputState}
onChange={(e) => setInputState(e.target.value)}
disabled={disabled}
sx={{
"& .MuiOutlinedInput-root": {
bgcolor: "background.default",
},
}}
/>
)}
{!disabled && (
<TextField
label="XPath Selector"
size="small"
fullWidth
value={xpathState}
onChange={(e) => setXpathState(e.target.value)}
disabled={disabled}
sx={{
"& .MuiOutlinedInput-root": {
bgcolor: "background.default",
fontFamily: "monospace",
fontSize: "1rem",
},
}}
/>
)}
<TextField
label="XPath Selector"
fullWidth
value={xpathState}
onChange={(e) => setXpathState(e.target.value)}
disabled={disabled}
/>
{disabled ? (
<Button
onClick={handleRemove}
className={clsx(classes.button, classes.remove)}
size="small"
variant="outlined"
color="error"
sx={{
minWidth: "80px",
textTransform: "none",
"&:hover": {
bgcolor: "error.main",
color: "error.contrastText",
},
}}
>
Delete
</Button>
@@ -112,24 +144,41 @@ export const SiteMapInput = ({
<Button
onClick={handleAdd}
disabled={!xpathState}
className={clsx(classes.button, classes.add)}
size="small"
variant="contained"
color="primary"
sx={{
minWidth: "80px",
textTransform: "none",
"&.Mui-disabled": {
bgcolor: "action.disabledBackground",
color: "action.disabled",
},
}}
>
Add
</Button>
)}
</div>
</Box>
{!disabled && (
<FormControlLabel
label="Do Once"
control={
<Checkbox
size="small"
checked={clickOnceState}
disabled={disabled}
onChange={() => setClickOnceState(!clickOnceState)}
/>
}
sx={{
"& .MuiFormControlLabel-label": {
fontSize: "0.875rem",
color: "text.secondary",
},
}}
/>
)}
</div>
</Box>
);
};

View File

@@ -1,12 +1,22 @@
import {
Box,
Button,
Divider,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Typography,
} from "@mui/material";
import { useEffect, useState } from "react";
import { useJobSubmitterProvider } from "../provider";
import { Button, Divider, Typography, useTheme } from "@mui/material";
import { SiteMapInput } from "./site-map-input";
export const SiteMap = () => {
const { siteMap, setSiteMap } = useJobSubmitterProvider();
const [showSiteMap, setShowSiteMap] = useState<boolean>(false);
const theme = useTheme();
const handleCreateSiteMap = () => {
setSiteMap({ actions: [] });
@@ -25,46 +35,123 @@ export const SiteMap = () => {
}, [siteMap]);
return (
<div className="flex flex-col gap-4">
{siteMap ? (
<Button onClick={handleClearSiteMap}>Clear Site Map</Button>
<Box className="flex flex-col gap-4">
{!siteMap ? (
<Button
onClick={handleCreateSiteMap}
variant="contained"
color="primary"
sx={{
alignSelf: "flex-end",
textTransform: "none",
}}
>
Create Site Map
</Button>
) : (
<Button onClick={handleCreateSiteMap}>Create Site Map</Button>
)}
{showSiteMap && (
<div className="flex flex-col gap-4">
<Box className="flex flex-col gap-4">
<Box
sx={{
display: "flex",
justifyContent: "space-between",
alignItems: "center",
}}
>
<Typography variant="h6" sx={{ fontWeight: 500 }}>
Site Map Configuration
</Typography>
<Button
onClick={handleClearSiteMap}
variant="outlined"
color="error"
size="small"
sx={{
textTransform: "none",
"&:hover": {
bgcolor: "error.main",
color: "error.contrastText",
},
}}
>
Clear Site Map
</Button>
</Box>
<SiteMapInput />
{siteMap?.actions && siteMap?.actions.length > 0 && (
<>
<Divider
<Divider />
<TableContainer
sx={{
borderColor:
theme.palette.mode === "dark" ? "#ffffff" : "0000000",
maxHeight: "400px",
overflow: "auto",
borderRadius: 1,
border: 1,
borderColor: "divider",
}}
/>
<Typography className="w-full text-center" variant="h5">
Site Map Actions
</Typography>
>
<Table size="small" stickyHeader>
<TableHead>
<TableRow>
<TableCell width="10%">
<Typography sx={{ fontWeight: 600 }}>Action</Typography>
</TableCell>
<TableCell width="30%">
<Typography sx={{ fontWeight: 600 }}>Type</Typography>
</TableCell>
<TableCell width="40%">
<Typography sx={{ fontWeight: 600 }}>XPath</Typography>
</TableCell>
</TableRow>
</TableHead>
<TableBody>
{siteMap?.actions.reverse().map((action, index) => (
<TableRow
key={action.xpath}
sx={{
"&:hover": {
bgcolor: "action.hover",
},
}}
>
<TableCell>
<Typography variant="body2">{index + 1}</Typography>
</TableCell>
<TableCell>
<Typography
variant="body2"
sx={{
color:
action.type === "click"
? "primary.main"
: "warning.main",
fontWeight: 500,
}}
>
{action.type}
</Typography>
</TableCell>
<TableCell>
<Typography
variant="body2"
sx={{
fontFamily: "monospace",
fontSize: "0.875rem",
color: "text.secondary",
}}
noWrap
>
{action.xpath}
</Typography>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</TableContainer>
</>
)}
<ul className="flex flex-col gap-4">
{siteMap?.actions.reverse().map((action, index) => (
<li key={action.xpath} className="flex w-full items-center">
<Typography variant="h6" className="w-[10%] mr-2">
Action {index + 1}:
</Typography>
<SiteMapInput
disabled={Boolean(siteMap)}
xpath={action.xpath}
option={action.type}
clickOnce={action.do_once}
input={action.input}
/>
</li>
))}
</ul>
</div>
</Box>
)}
</div>
</Box>
);
};

View File

@@ -11,17 +11,18 @@ export const useAdvancedJobOptions = () => {
proxies: null,
collect_media: false,
custom_cookies: null,
return_html: false,
};
const router = useRouter();
const { job_options } = router.query;
const { job_options, job_id } = router.query;
const [jobOptions, setJobOptions] =
useState<RawJobOptions>(initialJobOptions);
useEffect(() => {
if (job_options) {
parseJobOptions(job_options as string, setJobOptions);
parseJobOptions(job_id as string, job_options as string, setJobOptions);
}
}, [job_options]);

View File

@@ -0,0 +1,27 @@
import { Job } from "@/types";
export const useExportJobConfig = () => {
const exportJobConfig = async (job: Job) => {
const jobConfig = {
url: job.url,
prompt: job.prompt,
job_options: job.job_options,
elements: job.elements,
agent_mode: job.agent_mode,
};
const jobConfigString = JSON.stringify(jobConfig);
const blob = new Blob([jobConfigString], { type: "application/json" });
const url = window.URL.createObjectURL(blob);
const a = document.createElement("a");
a.style.display = "none";
a.href = url;
a.download = `job_${job.id}.json`;
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
};
return { exportJobConfig };
};

View File

@@ -0,0 +1,83 @@
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
import { useRouter } from "next/router";
import { toast } from "react-toastify";
export const useImportJobConfig = () => {
const router = useRouter();
const { setJobOptions, setSiteMap, setSubmittedURL, setRows } =
useJobSubmitterProvider();
const handleUploadFile = (file: File): Promise<boolean> => {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onerror = () => {
toast.error("Failed to read file");
resolve(true);
};
reader.onload = (e) => {
const result = e.target?.result as string;
if (!result.includes("url")) {
toast.error("Invalid job config: missing url");
resolve(true);
return;
}
if (!result.includes("job_options")) {
toast.error("Invalid job config: missing job_options");
resolve(true);
return;
}
if (!result.includes("elements")) {
toast.error("Invalid job config: missing elements");
resolve(true);
return;
}
if (!result.includes("site_map")) {
toast.error("Invalid job config: missing site_map");
resolve(true);
return;
}
try {
const jobConfig = JSON.parse(result);
if (jobConfig.agent_mode) {
router.push({
pathname: "/agent",
query: {
url: jobConfig.url,
prompt: jobConfig.prompt,
job_options: JSON.stringify(jobConfig.job_options),
},
});
}
if (
jobConfig.job_options &&
Array.isArray(jobConfig.job_options.proxies)
) {
jobConfig.job_options.proxies = "";
}
setJobOptions(jobConfig.job_options || {});
setSiteMap(jobConfig.site_map);
setSubmittedURL(jobConfig.url || "");
setRows(jobConfig.elements || []);
resolve(false);
} catch (error) {
toast.error("Failed to parse job config");
resolve(true);
}
};
reader.readAsText(file);
});
};
return { handleUploadFile };
};

View File

@@ -25,7 +25,8 @@ export const useSubmitJob = () => {
jobOptions: RawJobOptions,
siteMap: SiteMap | null,
agentMode: boolean,
prompt: string | null
prompt: string | null,
id?: string
) => {
if (!validateURL(submittedURL)) {
setIsValidUrl(false);
@@ -61,7 +62,8 @@ export const useSubmitJob = () => {
customCookies,
siteMap,
agentMode,
prompt || undefined
prompt || undefined,
id
)
.then(async (response) => {
if (!response.ok) {
@@ -80,7 +82,10 @@ export const useSubmitJob = () => {
setSnackbarOpen(true);
})
.catch((error) => {
setSnackbarMessage(error || "An error occurred.");
const errorMessage =
error instanceof Error ? error.message : "An error occurred.";
console.log(errorMessage);
setSnackbarMessage(errorMessage);
setSnackbarSeverity("error");
setSnackbarOpen(true);
})

View File

@@ -3,9 +3,11 @@ import { Dispatch, SetStateAction } from "react";
import { RawJobOptions, SiteMap } from "@/types";
export const parseJobOptions = (
id: string,
job_options: string,
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>,
setJobId?: Dispatch<SetStateAction<string>>
) => {
if (job_options) {
const jsonOptions = JSON.parse(job_options as string);
@@ -15,6 +17,7 @@ export const parseJobOptions = (
proxies: null,
collect_media: false,
custom_cookies: null,
return_html: false,
};
if (jsonOptions.collect_media) {
@@ -42,6 +45,14 @@ export const parseJobOptions = (
setSiteMap(jsonOptions.site_map);
}
if (jsonOptions.return_html) {
newJobOptions.return_html = true;
}
if (id && setJobId) {
setJobId(id);
}
setJobOptions(newJobOptions);
}
};

View File

@@ -21,15 +21,16 @@ export default async function handler(
}
);
if (!response.ok) {
throw new Error(`Error: ${response.statusText}`);
const result = await response.json();
if (response.status === 500) {
res.status(500).json({ error: result.error });
}
const result = await response.json();
res.status(200).json(result);
} catch (error) {
console.error("Error submitting scrape job:", error);
res.status(500).json({ error: "Internal Server Error" });
res.status(500).json({ error: error });
}
} else {
res.setHeader("Allow", ["POST"]);

View File

@@ -8,7 +8,7 @@ export type DeleteCronJobsParams = {
export const deleteCronJobs = async (params: DeleteCronJobsParams) => {
const token = Cookies.get("token");
const response = await fetch("/api/delete-cron-jobs", {
const response = await fetch("/api/delete-cron-job", {
method: "POST",
headers: {
"Content-Type": "application/json",

View File

@@ -9,14 +9,15 @@ export const submitJob = async (
customCookies: any,
siteMap: SiteMap | null,
agentMode: boolean = false,
prompt?: string
prompt?: string,
id?: string
) => {
console.log(user);
return await fetch(`/api/submit-scrape-job`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
data: {
id,
url: submittedURL,
elements: rows,
user: user?.email,

View File

@@ -27,6 +27,7 @@ export type RawJobOptions = {
proxies: string | null;
collect_media: boolean;
custom_cookies: string | null;
return_html: boolean;
};
export type ActionOption = "click" | "input";
@@ -58,6 +59,7 @@ export const initialJobOptions: RawJobOptions = {
proxies: null,
collect_media: false,
custom_cookies: null,
return_html: false,
};
export const COLOR_MAP: Record<string, string> = {