mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-10-30 05:57:12 +00:00
Compare commits
24 Commits
v1.1.1
...
24f4b57fea
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
24f4b57fea | ||
|
|
1c0dec6db6 | ||
|
|
e9c60f6338 | ||
|
|
5719a85491 | ||
|
|
052d80de07 | ||
|
|
7047a3c0e3 | ||
|
|
71f603fc62 | ||
|
|
86a77a27df | ||
|
|
b11e263b93 | ||
|
|
91dc13348d | ||
|
|
93b0c83381 | ||
|
|
9381ba9232 | ||
|
|
20dccc5527 | ||
|
|
02619eb184 | ||
|
|
58c6c09fc9 | ||
|
|
bf896b4c6b | ||
|
|
e3b9c11ab7 | ||
|
|
32da3375b3 | ||
|
|
b5131cbe4c | ||
|
|
47c4c9a7d1 | ||
|
|
4352988666 | ||
|
|
00759151e6 | ||
|
|
bfae00ca72 | ||
|
|
e810700569 |
15
.github/actions/run-cypress-tests/action.yaml
vendored
15
.github/actions/run-cypress-tests/action.yaml
vendored
@@ -20,6 +20,16 @@ runs:
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Setup yarn
|
||||
shell: bash
|
||||
run: npm install -g yarn
|
||||
|
||||
- name: Install xvfb for headless testing
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y xvfb libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1 libasound2t64 libpango-1.0-0 libcairo2 libgtk-3-0 libgdk-pixbuf2.0-0 libx11-6 libx11-xcb1 libxcb1 libxss1 libxtst6 libnspr4
|
||||
|
||||
- name: Setup Docker project
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -63,5 +73,8 @@ runs:
|
||||
|
||||
- name: Run Cypress tests
|
||||
shell: bash
|
||||
run: npm run cy:run
|
||||
run: |
|
||||
set -e
|
||||
npm run cy:run
|
||||
|
||||
|
||||
|
||||
31
.github/workflows/cypress-tests.yml
vendored
Normal file
31
.github/workflows/cypress-tests.yml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
name: Cypress Tests
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
secrets:
|
||||
openai_key:
|
||||
required: true
|
||||
|
||||
|
||||
jobs:
|
||||
cypress-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Run Cypress Tests
|
||||
id: run-tests
|
||||
uses: ./.github/actions/run-cypress-tests
|
||||
with:
|
||||
openai_key: ${{ secrets.openai_key }}
|
||||
|
||||
- name: Check container logs on failure
|
||||
if: steps.run-tests.conclusion == 'failure'
|
||||
run: |
|
||||
echo "Cypress tests failed. Dumping container logs..."
|
||||
docker logs scraperr_api || true
|
||||
|
||||
- name: Fail job if Cypress failed
|
||||
if: steps.run-tests.conclusion == 'failure'
|
||||
run: exit 1
|
||||
|
||||
19
.github/workflows/merge.yml
vendored
19
.github/workflows/merge.yml
vendored
@@ -4,19 +4,26 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
types: [closed]
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
uses: ./.github/workflows/tests.yml
|
||||
secrets:
|
||||
openai_key: ${{ secrets.OPENAI_KEY }}
|
||||
discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
# TODO: Renable once browser forge is fixed for camoufox, or else tests will never pass
|
||||
# tests:
|
||||
# uses: ./.github/workflows/tests.yml
|
||||
# secrets:
|
||||
# openai_key: ${{ secrets.OPENAI_KEY }}
|
||||
# discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
|
||||
version:
|
||||
needs: tests
|
||||
uses: ./.github/workflows/version.yml
|
||||
secrets:
|
||||
git_token: ${{ secrets.GPAT_TOKEN }}
|
||||
|
||||
build-and-deploy:
|
||||
if: needs.version.outputs.version_bump == 'true'
|
||||
needs: version
|
||||
uses: ./.github/workflows/docker-image.yml
|
||||
secrets:
|
||||
|
||||
5
.github/workflows/pr.yml
vendored
5
.github/workflows/pr.yml
vendored
@@ -8,11 +8,6 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
checkout:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
tests:
|
||||
uses: ./.github/workflows/tests.yml
|
||||
secrets:
|
||||
|
||||
29
.github/workflows/pytest.yml
vendored
Normal file
29
.github/workflows/pytest.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
name: Pytest
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@v3
|
||||
|
||||
- name: Set env
|
||||
run: echo "ENV=test" >> $GITHUB_ENV
|
||||
|
||||
- name: Install pdm
|
||||
run: pip install pdm
|
||||
|
||||
- name: Install project dependencies
|
||||
run: pdm install
|
||||
|
||||
- name: Install playwright
|
||||
run: pdm run playwright install --with-deps
|
||||
|
||||
- name: Run tests
|
||||
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
|
||||
|
||||
38
.github/workflows/tests.yml
vendored
38
.github/workflows/tests.yml
vendored
@@ -10,26 +10,8 @@ on:
|
||||
|
||||
|
||||
jobs:
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set env
|
||||
run: echo "ENV=test" >> $GITHUB_ENV
|
||||
|
||||
- name: Install pdm
|
||||
run: pip install pdm
|
||||
|
||||
- name: Install project dependencies
|
||||
run: pdm install
|
||||
|
||||
- name: Install playwright
|
||||
run: pdm run playwright install
|
||||
|
||||
- name: Run tests
|
||||
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
|
||||
pytest:
|
||||
uses: ./.github/workflows/pytest.yml
|
||||
|
||||
cypress-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -37,26 +19,14 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Run Cypress Tests
|
||||
id: run-tests
|
||||
uses: ./.github/actions/run-cypress-tests
|
||||
with:
|
||||
openai_key: ${{ secrets.OPENAI_KEY }}
|
||||
continue-on-error: true
|
||||
|
||||
- name: Check container logs on failure
|
||||
if: steps.run-tests.outcome == 'failure'
|
||||
run: |
|
||||
echo "Cypress tests failed. Dumping container logs..."
|
||||
docker logs scraperr_api || true
|
||||
|
||||
- name: Fail job if Cypress failed
|
||||
if: steps.run-tests.outcome == 'failure'
|
||||
run: exit 1
|
||||
openai_key: ${{ secrets.openai_key }}
|
||||
|
||||
success-message:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- unit-tests
|
||||
- pytest
|
||||
- cypress-tests
|
||||
steps:
|
||||
- name: Send Discord Message
|
||||
|
||||
31
.github/workflows/version.yml
vendored
31
.github/workflows/version.yml
vendored
@@ -2,10 +2,16 @@ name: Version
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
secrets:
|
||||
git_token:
|
||||
required: true
|
||||
outputs:
|
||||
version:
|
||||
description: "The new version number"
|
||||
value: ${{ jobs.version.outputs.version }}
|
||||
version_bump:
|
||||
description: "Whether the version was bumped"
|
||||
value: ${{ jobs.version.outputs.version_bump }}
|
||||
|
||||
jobs:
|
||||
version:
|
||||
@@ -13,6 +19,7 @@ jobs:
|
||||
|
||||
outputs:
|
||||
version: ${{ steps.set_version.outputs.version }}
|
||||
version_bump: ${{ steps.check_version_bump.outputs.version_bump }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -37,17 +44,39 @@ jobs:
|
||||
|
||||
echo "VERSION_TYPE=$VERSION_TYPE" >> $GITHUB_ENV
|
||||
|
||||
- name: Check for version bump
|
||||
id: check_version_bump
|
||||
run: |
|
||||
COMMIT_MSG=$(git log -1 --pretty=%B)
|
||||
|
||||
if [[ $COMMIT_MSG =~ .*\[no\ bump\].* ]]; then
|
||||
echo "version_bump=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "version_bump=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Skip version bump
|
||||
if: steps.check_version_bump.outputs.version_bump == 'false'
|
||||
run: |
|
||||
echo "Skipping version bump as requested"
|
||||
gh run cancel ${{ github.run_id }}
|
||||
exit 0
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.git_token }}
|
||||
|
||||
- name: Set version
|
||||
if: steps.check_version_bump.outputs.version_bump != 'false'
|
||||
id: set_version
|
||||
run: |
|
||||
VERSION=$(./scripts/version.sh "$VERSION_TYPE")
|
||||
echo "VERSION=$VERSION" >> $GITHUB_ENV
|
||||
echo "Version is $VERSION"
|
||||
echo "::set-output name=version::$VERSION"
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
VERSION_TYPE: ${{ env.VERSION_TYPE }}
|
||||
|
||||
- name: Update chart file
|
||||
if: steps.check_version_bump.outputs.version_bump != 'false'
|
||||
run: |
|
||||
sed -i "s/^version: .*/version: $VERSION/" helm/Chart.yaml
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from camoufox import AsyncCamoufox
|
||||
from playwright.async_api import Page
|
||||
|
||||
# LOCAL
|
||||
from api.backend.constants import RECORDINGS_ENABLED
|
||||
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
|
||||
from api.backend.job.models import CapturedElement
|
||||
from api.backend.worker.logger import LOG
|
||||
@@ -29,11 +30,13 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
|
||||
LOG.info(f"Starting work for agent job: {agent_job}")
|
||||
pages = set()
|
||||
|
||||
proxy = None
|
||||
|
||||
if agent_job["job_options"]["proxies"]:
|
||||
proxy = random.choice(agent_job["job_options"]["proxies"])
|
||||
LOG.info(f"Using proxy: {proxy}")
|
||||
|
||||
async with AsyncCamoufox(headless=True) as browser:
|
||||
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
|
||||
page: Page = await browser.new_page()
|
||||
|
||||
await add_custom_items(
|
||||
@@ -63,7 +66,9 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
|
||||
|
||||
xpaths = parse_response(response)
|
||||
|
||||
captured_elements = await capture_elements(page, xpaths)
|
||||
captured_elements = await capture_elements(
|
||||
page, xpaths, agent_job["job_options"].get("return_html", False)
|
||||
)
|
||||
|
||||
final_url = page.url
|
||||
|
||||
|
||||
@@ -206,7 +206,7 @@ def parse_next_page(text: str) -> str | None:
|
||||
|
||||
|
||||
async def capture_elements(
|
||||
page: Page, xpaths: list[dict[str, str]]
|
||||
page: Page, xpaths: list[dict[str, str]], return_html: bool
|
||||
) -> list[CapturedElement]:
|
||||
captured_elements = []
|
||||
seen_texts = set()
|
||||
@@ -217,6 +217,23 @@ async def capture_elements(
|
||||
count = await locator.count()
|
||||
|
||||
for i in range(count):
|
||||
if return_html:
|
||||
element_text = (
|
||||
await page.locator(f"xpath={xpath['xpath']}")
|
||||
.nth(i)
|
||||
.inner_html()
|
||||
)
|
||||
|
||||
seen_texts.add(element_text)
|
||||
captured_elements.append(
|
||||
CapturedElement(
|
||||
name=xpath["name"],
|
||||
text=element_text,
|
||||
xpath=xpath["xpath"],
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
element_text = ""
|
||||
|
||||
element_handle = await locator.nth(i).element_handle()
|
||||
|
||||
@@ -29,6 +29,7 @@ def insert(query: str, values: tuple[Any, ...]):
|
||||
|
||||
except sqlite3.Error as e:
|
||||
LOG.error(f"An error occurred: {e}")
|
||||
raise e
|
||||
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
@@ -49,10 +49,15 @@ async def get_queued_job():
|
||||
return res[0] if res else None
|
||||
|
||||
|
||||
async def update_job(ids: list[str], field: str, value: Any):
|
||||
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
|
||||
res = update(query, tuple([value] + ids))
|
||||
LOG.info(f"Updated job: {res}")
|
||||
async def update_job(ids: list[str], updates: dict[str, Any]):
|
||||
if not updates:
|
||||
return
|
||||
|
||||
set_clause = ", ".join(f"{field} = ?" for field in updates.keys())
|
||||
query = f"UPDATE jobs SET {set_clause} WHERE id IN {format_list_for_query(ids)}"
|
||||
values = list(updates.values()) + ids
|
||||
res = update(query, tuple(values))
|
||||
LOG.debug(f"Updated job: {res}")
|
||||
|
||||
|
||||
async def delete_jobs(jobs: list[str]):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# STL
|
||||
import logging
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
# LOCAL
|
||||
@@ -12,7 +13,23 @@ from api.backend.database.queries.job.job_queries import JOB_INSERT_QUERY
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
|
||||
def insert(item: dict[str, Any]) -> None:
|
||||
async def insert(item: dict[str, Any]) -> None:
|
||||
if check_for_job_completion(item["id"]):
|
||||
await multi_field_update_job(
|
||||
item["id"],
|
||||
{
|
||||
"agent_mode": item["agent_mode"],
|
||||
"prompt": item["prompt"],
|
||||
"job_options": item["job_options"],
|
||||
"elements": item["elements"],
|
||||
"status": "Queued",
|
||||
"result": [],
|
||||
"time_created": datetime.datetime.now().isoformat(),
|
||||
"chat": None,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
common_insert(
|
||||
JOB_INSERT_QUERY,
|
||||
(
|
||||
@@ -33,6 +50,12 @@ def insert(item: dict[str, Any]) -> None:
|
||||
LOG.debug(f"Inserted item: {item}")
|
||||
|
||||
|
||||
def check_for_job_completion(id: str) -> dict[str, Any]:
|
||||
query = f"SELECT * FROM jobs WHERE id = ?"
|
||||
res = common_query(query, (id,))
|
||||
return res[0] if res else {}
|
||||
|
||||
|
||||
async def get_queued_job():
|
||||
query = (
|
||||
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
|
||||
@@ -48,6 +71,12 @@ async def update_job(ids: list[str], field: str, value: Any):
|
||||
LOG.debug(f"Updated job: {res}")
|
||||
|
||||
|
||||
async def multi_field_update_job(id: str, fields: dict[str, Any]):
|
||||
query = f"UPDATE jobs SET {', '.join(f'{field} = ?' for field in fields.keys())} WHERE id = ?"
|
||||
res = common_update(query, tuple(list(fields.values()) + [id]))
|
||||
LOG.debug(f"Updated job: {res}")
|
||||
|
||||
|
||||
async def delete_jobs(jobs: list[str]):
|
||||
if not jobs:
|
||||
LOG.debug("No jobs to delete.")
|
||||
|
||||
@@ -43,10 +43,8 @@ job_router = APIRouter()
|
||||
@job_router.post("/update")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
|
||||
"""Used to update jobs"""
|
||||
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
|
||||
|
||||
return JSONResponse(content={"message": "Jobs updated successfully."})
|
||||
return {"message": "Jobs updated successfully"}
|
||||
|
||||
|
||||
@job_router.post("/submit-scrape-job")
|
||||
@@ -54,9 +52,11 @@ async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
|
||||
async def submit_scrape_job(job: Job):
|
||||
LOG.info(f"Recieved job: {job}")
|
||||
|
||||
job.id = uuid.uuid4().hex
|
||||
if not job.id:
|
||||
job.id = uuid.uuid4().hex
|
||||
|
||||
job_dict = job.model_dump()
|
||||
insert(job_dict)
|
||||
await insert(job_dict)
|
||||
|
||||
return JSONResponse(
|
||||
content={"id": job.id, "message": "Job submitted successfully."}
|
||||
@@ -70,7 +70,9 @@ async def retrieve_scrape_jobs(
|
||||
):
|
||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||
ATTRIBUTES = "chat" if fetch_options.chat else "*"
|
||||
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
|
||||
job_query = (
|
||||
f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ? ORDER BY time_created ASC"
|
||||
)
|
||||
results = query(job_query, (user.email,))
|
||||
return JSONResponse(content=jsonable_encoder(results[::-1]))
|
||||
|
||||
|
||||
@@ -25,3 +25,4 @@ class JobOptions(BaseModel):
|
||||
site_map: Optional[SiteMap] = None
|
||||
collect_media: bool = False
|
||||
custom_cookies: list[dict[str, Any]] = []
|
||||
return_html: bool = False
|
||||
|
||||
@@ -110,7 +110,9 @@ async def make_site_request(
|
||||
)
|
||||
|
||||
|
||||
async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element]):
|
||||
async def collect_scraped_elements(
|
||||
page: tuple[str, str], xpaths: list[Element], return_html: bool
|
||||
):
|
||||
soup = BeautifulSoup(page[0], "lxml")
|
||||
root = etree.HTML(str(soup))
|
||||
|
||||
@@ -120,6 +122,16 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
|
||||
el = sxpath(root, elem.xpath)
|
||||
|
||||
for e in el: # type: ignore
|
||||
if return_html:
|
||||
elements[elem.name] = [
|
||||
CapturedElement(
|
||||
xpath=elem.xpath,
|
||||
text=page[0],
|
||||
name=elem.name,
|
||||
)
|
||||
]
|
||||
continue
|
||||
|
||||
text = (
|
||||
" ".join(str(t) for t in e.itertext())
|
||||
if isinstance(e, etree._Element)
|
||||
@@ -161,6 +173,10 @@ async def scrape(
|
||||
elements: list[dict[str, dict[str, list[CapturedElement]]]] = []
|
||||
|
||||
for page in pages:
|
||||
elements.append(await collect_scraped_elements(page, xpaths))
|
||||
elements.append(
|
||||
await collect_scraped_elements(
|
||||
page, xpaths, job_options.get("return_html", False)
|
||||
)
|
||||
)
|
||||
|
||||
return elements
|
||||
|
||||
@@ -7,11 +7,10 @@ import {
|
||||
} from "../utilities/job.utilities";
|
||||
import { mockSubmitJob } from "../utilities/mocks";
|
||||
|
||||
describe.only("Agent", () => {
|
||||
describe("Agent", () => {
|
||||
beforeEach(() => {
|
||||
mockSubmitJob();
|
||||
login();
|
||||
cy.visit("/agent");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -19,6 +18,9 @@ describe.only("Agent", () => {
|
||||
});
|
||||
|
||||
it("should be able to scrape some data", () => {
|
||||
cy.visit("/agent");
|
||||
cy.wait(1000);
|
||||
|
||||
const url = "https://books.toscrape.com";
|
||||
const prompt = "Collect all the links on the page";
|
||||
buildAgentJob(url, prompt);
|
||||
|
||||
@@ -4,7 +4,7 @@ export const cleanUpJobs = () => {
|
||||
|
||||
cy.wait("@retrieve", { timeout: 15000 });
|
||||
|
||||
cy.get("tbody tr", { timeout: 10000 }).should("have.length.at.least", 1);
|
||||
cy.get("tbody tr", { timeout: 20000 }).should("have.length.at.least", 1);
|
||||
|
||||
const tryClickSelectAll = (attempt = 1, maxAttempts = 5) => {
|
||||
cy.log(`Attempt ${attempt} to click Select All`);
|
||||
@@ -100,13 +100,13 @@ export const waitForJobCompletion = (url: string) => {
|
||||
};
|
||||
|
||||
export const enableMultiPageScraping = () => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
cy.get("button").contains("Advanced Options").click();
|
||||
cy.get('[data-cy="multi-page-toggle"]').click();
|
||||
cy.get("body").type("{esc}");
|
||||
};
|
||||
|
||||
export const addCustomHeaders = (headers: Record<string, string>) => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
cy.get("button").contains("Advanced Options").click();
|
||||
cy.get('[name="custom_headers"]').type(JSON.stringify(headers), {
|
||||
parseSpecialCharSequences: false,
|
||||
});
|
||||
@@ -114,16 +114,17 @@ export const addCustomHeaders = (headers: Record<string, string>) => {
|
||||
};
|
||||
|
||||
export const addCustomCookies = (cookies: Record<string, string>) => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
cy.get("button").contains("Advanced Options").click();
|
||||
cy.get('[name="custom_cookies"]').type(JSON.stringify(cookies));
|
||||
cy.get("body").type("{esc}");
|
||||
};
|
||||
|
||||
export const openAdvancedJobOptions = () => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
cy.get("button").contains("Advanced Options").click();
|
||||
};
|
||||
|
||||
export const selectJobFromSelector = () => {
|
||||
checkAiDisabled();
|
||||
cy.get("div[id='select-job']", { timeout: 10000 }).first().click();
|
||||
cy.get("li[role='option']", { timeout: 10000 }).first().click();
|
||||
};
|
||||
@@ -161,7 +162,18 @@ export const addElement = (name: string, xpath: string) => {
|
||||
cy.get('[data-cy="add-button"]').click();
|
||||
};
|
||||
|
||||
export const checkAiDisabled = () => {
|
||||
cy.getAllLocalStorage().then((result) => {
|
||||
const storage = JSON.parse(
|
||||
result["http://localhost"]["persist:root"] as string
|
||||
);
|
||||
const settings = JSON.parse(storage.settings);
|
||||
expect(settings.aiEnabled).to.equal(true);
|
||||
});
|
||||
};
|
||||
|
||||
export const buildAgentJob = (url: string, prompt: string) => {
|
||||
checkAiDisabled();
|
||||
enterJobUrl(url);
|
||||
cy.get("[data-cy='prompt-input']").type(prompt);
|
||||
};
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 67 KiB |
@@ -15,7 +15,7 @@ type: application
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 1.1.1
|
||||
version: 1.1.4
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
|
||||
2
next-env.d.ts
vendored
2
next-env.d.ts
vendored
@@ -2,4 +2,4 @@
|
||||
/// <reference types="next/image-types/global" />
|
||||
|
||||
// NOTE: This file should not be edited
|
||||
// see https://nextjs.org/docs/basic-features/typescript for more information.
|
||||
// see https://nextjs.org/docs/pages/building-your-application/configuring/typescript for more information.
|
||||
|
||||
8
pdm.lock
generated
8
pdm.lock
generated
@@ -5,7 +5,7 @@
|
||||
groups = ["default", "dev"]
|
||||
strategy = ["inherit_metadata"]
|
||||
lock_version = "4.5.0"
|
||||
content_hash = "sha256:1a65c1e288d2c6827fc6866d3bfe6a9b8707b2ca895d488f4a9b11cd579c4359"
|
||||
content_hash = "sha256:222416fbd48d349e2ae777bf1d167b68e4342f38d5e20d04095cbbb594afb8f3"
|
||||
|
||||
[[metadata.targets]]
|
||||
requires_python = ">=3.10"
|
||||
@@ -459,7 +459,7 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "browserforge"
|
||||
version = "1.2.3"
|
||||
version = "1.2.1"
|
||||
requires_python = "<4.0,>=3.8"
|
||||
summary = "Intelligent browser header & fingerprint generator"
|
||||
groups = ["default"]
|
||||
@@ -468,8 +468,8 @@ dependencies = [
|
||||
"typing-extensions; python_version < \"3.10\"",
|
||||
]
|
||||
files = [
|
||||
{file = "browserforge-1.2.3-py3-none-any.whl", hash = "sha256:a6c71ed4688b2f1b0bee757ca82ddad0007cbba68a71eca66ca607dde382f132"},
|
||||
{file = "browserforge-1.2.3.tar.gz", hash = "sha256:d5bec6dffd4748b30fbac9f9c1ef33b26c01a23185240bf90011843e174b7ecc"},
|
||||
{file = "browserforge-1.2.1-py3-none-any.whl", hash = "sha256:b2813b4de80b9c48c88700c93e3dfa6a64694d04f3263545e28bb03dd95df27e"},
|
||||
{file = "browserforge-1.2.1.tar.gz", hash = "sha256:7036d73fb066a4361a015b619079474c42d8b0ff415e1d874b62366de48d0b61"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -43,6 +43,7 @@ dependencies = [
|
||||
"camoufox>=0.4.11",
|
||||
"html2text>=2025.4.15",
|
||||
"proxy-py>=2.4.10",
|
||||
"browserforge==1.2.1",
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
readme = "README.md"
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { Box, Link, Typography } from "@mui/material";
|
||||
import { SetStateAction, Dispatch, useState } from "react";
|
||||
import { AdvancedJobOptionsDialog } from "./dialog/advanced-job-options-dialog";
|
||||
import { RawJobOptions } from "@/types";
|
||||
import SettingsIcon from "@mui/icons-material/Settings";
|
||||
import { Box, Button, Typography } from "@mui/material";
|
||||
import { Dispatch, SetStateAction, useState } from "react";
|
||||
import { AdvancedJobOptionsDialog } from "./dialog/advanced-job-options-dialog";
|
||||
|
||||
export type AdvancedJobOptionsProps = {
|
||||
jobOptions: RawJobOptions;
|
||||
@@ -17,26 +18,27 @@ export const AdvancedJobOptions = ({
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
return (
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Link
|
||||
component="button"
|
||||
variant="body2"
|
||||
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
|
||||
<Button
|
||||
variant="outlined"
|
||||
onClick={() => setOpen(true)}
|
||||
startIcon={<SettingsIcon />}
|
||||
sx={{
|
||||
textDecoration: "none",
|
||||
color: "primary.main",
|
||||
textTransform: "none",
|
||||
borderRadius: 2,
|
||||
px: 2,
|
||||
py: 1,
|
||||
borderColor: "divider",
|
||||
color: "text.secondary",
|
||||
"&:hover": {
|
||||
color: "primary.dark",
|
||||
textDecoration: "underline",
|
||||
borderColor: "primary.main",
|
||||
color: "primary.main",
|
||||
bgcolor: "action.hover",
|
||||
},
|
||||
paddingLeft: 1,
|
||||
display: "inline-flex",
|
||||
alignItems: "center",
|
||||
gap: 0.5,
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2">Advanced Job Options</Typography>
|
||||
</Link>
|
||||
<Typography variant="body2">Advanced Options</Typography>
|
||||
</Button>
|
||||
|
||||
<AdvancedJobOptionsDialog
|
||||
open={open}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { ExpandedTableInput } from "@/components/common/expanded-table-input";
|
||||
import { UploadFile } from "@/components/common/upload-file";
|
||||
import { useImportJobConfig } from "@/hooks/use-import-job-config";
|
||||
import { RawJobOptions } from "@/types";
|
||||
import {
|
||||
Code as CodeIcon,
|
||||
@@ -26,6 +28,7 @@ import {
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import { Dispatch, SetStateAction, useEffect, useState } from "react";
|
||||
import { toast } from "react-toastify";
|
||||
|
||||
export type AdvancedJobOptionsDialogProps = {
|
||||
open: boolean;
|
||||
@@ -43,18 +46,18 @@ export const AdvancedJobOptionsDialog = ({
|
||||
multiPageScrapeEnabled = true,
|
||||
}: AdvancedJobOptionsDialogProps) => {
|
||||
const theme = useTheme();
|
||||
const { handleUploadFile } = useImportJobConfig();
|
||||
const [localJobOptions, setLocalJobOptions] =
|
||||
useState<RawJobOptions>(jobOptions);
|
||||
|
||||
// Update local state when prop changes
|
||||
useEffect(() => {
|
||||
setLocalJobOptions(jobOptions);
|
||||
}, [jobOptions]);
|
||||
|
||||
const handleMultiPageScrapeChange = () => {
|
||||
const handleCheckboxChange = (key: keyof RawJobOptions) => {
|
||||
setLocalJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
multi_page_scrape: !prevJobOptions.multi_page_scrape,
|
||||
[key]: !prevJobOptions[key],
|
||||
}));
|
||||
};
|
||||
|
||||
@@ -65,19 +68,23 @@ export const AdvancedJobOptionsDialog = ({
|
||||
}));
|
||||
};
|
||||
|
||||
const handleCollectMediaChange = () => {
|
||||
setLocalJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
collect_media: !prevJobOptions.collect_media,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleClose = () => {
|
||||
// Save the local state back to the parent before closing
|
||||
setJobOptions(localJobOptions);
|
||||
onClose();
|
||||
};
|
||||
|
||||
const onUploadFile = async (file: File) => {
|
||||
const errorOccured = await handleUploadFile(file);
|
||||
if (errorOccured) {
|
||||
handleClose();
|
||||
toast.error("Failed to upload job config");
|
||||
return;
|
||||
} else {
|
||||
handleClose();
|
||||
toast.success("Job config uploaded successfully");
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog
|
||||
open={open}
|
||||
@@ -108,11 +115,18 @@ export const AdvancedJobOptionsDialog = ({
|
||||
<Typography variant="h6" component="div">
|
||||
Advanced Job Options
|
||||
</Typography>
|
||||
<Settings
|
||||
sx={{
|
||||
color: theme.palette.primary.contrastText,
|
||||
}}
|
||||
/>
|
||||
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
|
||||
<UploadFile
|
||||
message="Upload Job Config"
|
||||
fileTypes={["application/json"]}
|
||||
onUploadFile={onUploadFile}
|
||||
/>
|
||||
<Settings
|
||||
sx={{
|
||||
color: theme.palette.primary.contrastText,
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent
|
||||
@@ -137,7 +151,7 @@ export const AdvancedJobOptionsDialog = ({
|
||||
control={
|
||||
<Checkbox
|
||||
checked={localJobOptions.multi_page_scrape}
|
||||
onChange={handleMultiPageScrapeChange}
|
||||
onChange={() => handleCheckboxChange("multi_page_scrape")}
|
||||
disabled={!multiPageScrapeEnabled}
|
||||
/>
|
||||
}
|
||||
@@ -158,11 +172,12 @@ export const AdvancedJobOptionsDialog = ({
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={localJobOptions.collect_media}
|
||||
onChange={handleCollectMediaChange}
|
||||
onChange={() => handleCheckboxChange("collect_media")}
|
||||
data-cy="collect-media-checkbox"
|
||||
/>
|
||||
}
|
||||
@@ -177,6 +192,26 @@ export const AdvancedJobOptionsDialog = ({
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={localJobOptions.return_html}
|
||||
onChange={() => handleCheckboxChange("return_html")}
|
||||
data-cy="return-html-checkbox"
|
||||
/>
|
||||
}
|
||||
label={
|
||||
<Box sx={{ display: "flex", alignItems: "center" }}>
|
||||
<Typography>Return HTML</Typography>
|
||||
<Tooltip title="Return the HTML of the page">
|
||||
<IconButton size="small">
|
||||
<InfoOutlined fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
</FormGroup>
|
||||
</Box>
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ export const Disabled = ({ message }: DisabledProps) => {
|
||||
display="flex"
|
||||
justifyContent="center"
|
||||
alignItems="center"
|
||||
data-testid="disabled-message"
|
||||
>
|
||||
<h4
|
||||
style={{
|
||||
|
||||
1
src/components/common/upload-file/index.ts
Normal file
1
src/components/common/upload-file/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./upload-file";
|
||||
34
src/components/common/upload-file/upload-file.tsx
Normal file
34
src/components/common/upload-file/upload-file.tsx
Normal file
@@ -0,0 +1,34 @@
|
||||
import { Box, Button, Typography } from "@mui/material";
|
||||
|
||||
export type UploadFileProps = {
|
||||
message: string;
|
||||
fileTypes?: string[];
|
||||
onUploadFile: (file: File) => void;
|
||||
};
|
||||
|
||||
export const UploadFile = ({
|
||||
message,
|
||||
fileTypes,
|
||||
onUploadFile,
|
||||
}: UploadFileProps) => {
|
||||
const handleUploadFile = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = event.target.files?.[0];
|
||||
if (file) {
|
||||
onUploadFile(file);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<Button variant="contained" component="label">
|
||||
<Typography>{message}</Typography>
|
||||
<input
|
||||
type="file"
|
||||
hidden
|
||||
onChange={handleUploadFile}
|
||||
accept={fileTypes?.join(",")}
|
||||
/>
|
||||
</Button>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
@@ -1,18 +1,18 @@
|
||||
import React from "react";
|
||||
import StarIcon from "@mui/icons-material/Star";
|
||||
import {
|
||||
Tooltip,
|
||||
Box,
|
||||
Button,
|
||||
Checkbox,
|
||||
IconButton,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Box,
|
||||
Checkbox,
|
||||
Button,
|
||||
Tooltip,
|
||||
} from "@mui/material";
|
||||
import router from "next/router";
|
||||
import { Job } from "../../types";
|
||||
import StarIcon from "@mui/icons-material/Star";
|
||||
|
||||
interface stateProps {
|
||||
selectedJobs: Set<string>;
|
||||
@@ -21,7 +21,12 @@ interface stateProps {
|
||||
|
||||
interface Props {
|
||||
onSelectJob: (job: string) => void;
|
||||
onNavigate: (elements: Object[], url: string, options: any) => void;
|
||||
onNavigate: (
|
||||
id: string,
|
||||
elements: Object[],
|
||||
url: string,
|
||||
options: any
|
||||
) => void;
|
||||
onFavorite: (ids: string[], field: string, value: any) => void;
|
||||
stateProps: stateProps;
|
||||
}
|
||||
@@ -87,11 +92,29 @@ export const Favorites = ({
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
||||
<Button
|
||||
onClick={() =>
|
||||
onNavigate(row.elements, row.url, row.job_options)
|
||||
}
|
||||
onClick={() => {
|
||||
if (row.agent_mode) {
|
||||
router.push({
|
||||
pathname: "/agent",
|
||||
query: {
|
||||
url: row.url,
|
||||
prompt: row.prompt,
|
||||
job_options: JSON.stringify(row.job_options),
|
||||
id: row.id,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
onNavigate(row.id, row.elements, row.url, row.job_options);
|
||||
}
|
||||
}}
|
||||
size="small"
|
||||
sx={{
|
||||
minWidth: 0,
|
||||
padding: "4px 8px",
|
||||
fontSize: "0.625rem",
|
||||
}}
|
||||
>
|
||||
Run
|
||||
Rerun
|
||||
</Button>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
"use client";
|
||||
import { AutoAwesome, Image, VideoCameraBack } from "@mui/icons-material";
|
||||
import { useExportJobConfig } from "@/hooks/use-export-job-config";
|
||||
import {
|
||||
AutoAwesome,
|
||||
Image,
|
||||
Settings,
|
||||
VideoCameraBack,
|
||||
} from "@mui/icons-material";
|
||||
import StarIcon from "@mui/icons-material/Star";
|
||||
import {
|
||||
Box,
|
||||
@@ -30,7 +36,12 @@ interface Props {
|
||||
colors: stringMap;
|
||||
onSelectJob: (job: string) => void;
|
||||
onDownload: (job: string[]) => void;
|
||||
onNavigate: (elements: Object[], url: string, options: any) => void;
|
||||
onNavigate: (
|
||||
id: string,
|
||||
elements: Object[],
|
||||
url: string,
|
||||
options: any
|
||||
) => void;
|
||||
onFavorite: (ids: string[], field: string, value: any) => void;
|
||||
onJobClick: (job: Job) => void;
|
||||
stateProps: stateProps;
|
||||
@@ -46,6 +57,7 @@ export const JobQueue = ({
|
||||
onJobClick,
|
||||
}: Props) => {
|
||||
const { selectedJobs, filteredJobs } = stateProps;
|
||||
const { exportJobConfig } = useExportJobConfig();
|
||||
const router = useRouter();
|
||||
|
||||
return (
|
||||
@@ -116,6 +128,17 @@ export const JobQueue = ({
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
<Tooltip title="Export Job Configuration">
|
||||
<span>
|
||||
<IconButton
|
||||
onClick={() => {
|
||||
exportJobConfig(row);
|
||||
}}
|
||||
>
|
||||
<Settings />
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
{row.job_options.collect_media && (
|
||||
<Tooltip title="View Media">
|
||||
<span>
|
||||
@@ -213,10 +236,17 @@ export const JobQueue = ({
|
||||
query: {
|
||||
url: row.url,
|
||||
prompt: row.prompt,
|
||||
job_options: JSON.stringify(row.job_options),
|
||||
id: row.id,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
onNavigate(row.elements, row.url, row.job_options);
|
||||
onNavigate(
|
||||
row.id,
|
||||
row.elements,
|
||||
row.url,
|
||||
row.job_options
|
||||
);
|
||||
}
|
||||
}}
|
||||
size="small"
|
||||
|
||||
@@ -47,10 +47,16 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
setJobDownloadDialogOpen(true);
|
||||
};
|
||||
|
||||
const handleNavigate = (elements: Object[], url: string, options: any) => {
|
||||
const handleNavigate = (
|
||||
id: string,
|
||||
elements: Object[],
|
||||
url: string,
|
||||
options: any
|
||||
) => {
|
||||
router.push({
|
||||
pathname: "/",
|
||||
query: {
|
||||
id,
|
||||
elements: JSON.stringify(elements),
|
||||
url: url,
|
||||
job_options: JSON.stringify(options),
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
"use client";
|
||||
|
||||
import React, { useEffect, useRef } from "react";
|
||||
import { Container, Box } from "@mui/material";
|
||||
import { useRouter } from "next/router";
|
||||
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
|
||||
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||
import {
|
||||
ErrorSnackbar,
|
||||
JobNotifySnackbar,
|
||||
} from "@/components/common/snackbars";
|
||||
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
|
||||
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||
import { Box, Container } from "@mui/material";
|
||||
import { useRouter } from "next/router";
|
||||
import { useEffect, useRef } from "react";
|
||||
|
||||
export const Home = () => {
|
||||
const {
|
||||
@@ -50,19 +50,18 @@ export const Home = () => {
|
||||
flexDirection="column"
|
||||
justifyContent="center"
|
||||
alignItems="center"
|
||||
height="100%"
|
||||
minHeight="100vh"
|
||||
py={4}
|
||||
>
|
||||
<Container maxWidth="lg" className="overflow-y-auto max-h-full">
|
||||
<JobSubmitter />
|
||||
|
||||
{submittedURL.length > 0 ? (
|
||||
<Container maxWidth="lg" className="overflow-y-auto">
|
||||
<Box className="flex flex-col gap-6">
|
||||
<JobSubmitter />
|
||||
<ElementTable
|
||||
rows={rows}
|
||||
setRows={setRows}
|
||||
submittedURL={submittedURL}
|
||||
/>
|
||||
) : null}
|
||||
</Box>
|
||||
</Container>
|
||||
|
||||
{snackbarSeverity === "info" ? (
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
"use client";
|
||||
|
||||
import React, { useState, Dispatch, SetStateAction } from "react";
|
||||
import { Element } from "@/types";
|
||||
import AddIcon from "@mui/icons-material/Add";
|
||||
import DeleteIcon from "@mui/icons-material/Delete";
|
||||
import {
|
||||
Typography,
|
||||
TextField,
|
||||
Button,
|
||||
Box,
|
||||
Divider,
|
||||
IconButton,
|
||||
Paper,
|
||||
Table,
|
||||
TableBody,
|
||||
TableContainer,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Box,
|
||||
IconButton,
|
||||
TextField,
|
||||
Tooltip,
|
||||
useTheme,
|
||||
Divider,
|
||||
Typography,
|
||||
} from "@mui/material";
|
||||
import AddIcon from "@mui/icons-material/Add";
|
||||
import { Element } from "@/types";
|
||||
import { Dispatch, SetStateAction, useState } from "react";
|
||||
import { SiteMap } from "../site-map";
|
||||
|
||||
interface Props {
|
||||
@@ -28,7 +28,6 @@ interface Props {
|
||||
}
|
||||
|
||||
export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
|
||||
const theme = useTheme();
|
||||
const [newRow, setNewRow] = useState<Element>({
|
||||
name: "",
|
||||
xpath: "",
|
||||
@@ -42,142 +41,219 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
|
||||
};
|
||||
|
||||
const handleDeleteRow = (elementName: string) => {
|
||||
setRows(
|
||||
rows.filter((r) => {
|
||||
return elementName !== r.name;
|
||||
})
|
||||
);
|
||||
setRows(rows.filter((r) => elementName !== r.name));
|
||||
};
|
||||
|
||||
return (
|
||||
<Box className="animate-fadeIn p-2" bgcolor="background.paper">
|
||||
<Box className="text-center mb-4">
|
||||
<Typography variant="h4" sx={{ marginBottom: 1 }}>
|
||||
Elements to Scrape
|
||||
</Typography>
|
||||
<Paper
|
||||
elevation={0}
|
||||
sx={{
|
||||
p: 4,
|
||||
borderRadius: 2,
|
||||
bgcolor: "background.paper",
|
||||
border: 1,
|
||||
borderColor: "divider",
|
||||
"&:hover": {
|
||||
boxShadow: "0 4px 20px rgba(0, 0, 0, 0.05)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<Box className="flex flex-col gap-6">
|
||||
<Box>
|
||||
<Typography
|
||||
variant="h5"
|
||||
sx={{
|
||||
fontWeight: 600,
|
||||
color: "text.primary",
|
||||
mb: 1,
|
||||
}}
|
||||
>
|
||||
Elements to Scrape
|
||||
</Typography>
|
||||
<Typography
|
||||
variant="body2"
|
||||
sx={{
|
||||
color: "text.secondary",
|
||||
}}
|
||||
>
|
||||
Add elements to scrape from the target URL using XPath selectors
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
<TableContainer
|
||||
component={Box}
|
||||
sx={{ maxHeight: "50%", overflow: "auto" }}
|
||||
sx={{
|
||||
maxHeight: "400px",
|
||||
overflow: "auto",
|
||||
borderRadius: 2,
|
||||
border: 1,
|
||||
borderColor: "divider",
|
||||
}}
|
||||
>
|
||||
<div className="rounded-lg shadow-md border border-gray-300 overflow-hidden">
|
||||
<Table
|
||||
stickyHeader
|
||||
className="mb-4"
|
||||
sx={{
|
||||
tableLayout: "fixed",
|
||||
width: "100%",
|
||||
"& .MuiTableCell-root": {
|
||||
borderBottom: "1px solid #e0e0e0",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell>
|
||||
<Typography sx={{ fontWeight: "bold" }}>Name</Typography>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Typography sx={{ fontWeight: "bold" }}>XPath</Typography>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Typography sx={{ fontWeight: "bold" }}>Actions</Typography>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
<TableRow>
|
||||
<TableCell>
|
||||
<TextField
|
||||
data-cy="name-field"
|
||||
label="Name"
|
||||
variant="outlined"
|
||||
fullWidth
|
||||
value={newRow.name}
|
||||
onChange={(e) =>
|
||||
setNewRow({ ...newRow, name: e.target.value })
|
||||
}
|
||||
/>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<TextField
|
||||
data-cy="xpath-field"
|
||||
label="XPath"
|
||||
variant="outlined"
|
||||
fullWidth
|
||||
value={newRow.xpath}
|
||||
onChange={(e) =>
|
||||
setNewRow({ ...newRow, xpath: e.target.value })
|
||||
}
|
||||
/>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Tooltip
|
||||
title={
|
||||
newRow.xpath.length > 0 && newRow.name.length > 0
|
||||
? "Add Element"
|
||||
: "Fill out all fields to add an element"
|
||||
}
|
||||
placement="top"
|
||||
>
|
||||
<span>
|
||||
<IconButton
|
||||
data-cy="add-button"
|
||||
aria-label="add"
|
||||
size="small"
|
||||
onClick={handleAddRow}
|
||||
sx={{
|
||||
height: "40px",
|
||||
width: "40px",
|
||||
}}
|
||||
disabled={
|
||||
!(newRow.xpath.length > 0 && newRow.name.length > 0)
|
||||
}
|
||||
>
|
||||
<AddIcon
|
||||
fontSize="inherit"
|
||||
sx={{
|
||||
color:
|
||||
theme.palette.mode === "light"
|
||||
? "#000000"
|
||||
: "#ffffff",
|
||||
}}
|
||||
/>
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
{rows.map((row, index) => (
|
||||
<TableRow key={index}>
|
||||
<TableCell>
|
||||
<Typography>{row.name}</Typography>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Typography>{row.xpath}</Typography>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Button
|
||||
onClick={() => handleDeleteRow(row.name)}
|
||||
className="!bg-red-500 bg-opacity-50 !text-white font-semibold rounded-md
|
||||
transition-transform transform hover:scale-105 hover:bg-red-500"
|
||||
<Table
|
||||
stickyHeader
|
||||
size="small"
|
||||
sx={{
|
||||
"& .MuiTableCell-root": {
|
||||
borderBottom: "1px solid",
|
||||
borderColor: "divider",
|
||||
py: 1.5,
|
||||
},
|
||||
"& .MuiTableCell-head": {
|
||||
bgcolor: "background.default",
|
||||
fontWeight: 600,
|
||||
},
|
||||
}}
|
||||
>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell width="30%">Name</TableCell>
|
||||
<TableCell width="50%">XPath</TableCell>
|
||||
<TableCell width="20%" align="center">
|
||||
Actions
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
<TableRow>
|
||||
<TableCell>
|
||||
<TextField
|
||||
data-cy="name-field"
|
||||
placeholder="Enter element name"
|
||||
variant="outlined"
|
||||
fullWidth
|
||||
size="small"
|
||||
value={newRow.name}
|
||||
onChange={(e) =>
|
||||
setNewRow({ ...newRow, name: e.target.value })
|
||||
}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
borderRadius: 2,
|
||||
bgcolor: "background.default",
|
||||
"&:hover": {
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "primary.main",
|
||||
},
|
||||
},
|
||||
},
|
||||
}}
|
||||
/>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<TextField
|
||||
data-cy="xpath-field"
|
||||
placeholder="Enter XPath selector"
|
||||
variant="outlined"
|
||||
fullWidth
|
||||
size="small"
|
||||
value={newRow.xpath}
|
||||
onChange={(e) =>
|
||||
setNewRow({ ...newRow, xpath: e.target.value })
|
||||
}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
borderRadius: 2,
|
||||
bgcolor: "background.default",
|
||||
"&:hover": {
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "primary.main",
|
||||
},
|
||||
},
|
||||
},
|
||||
}}
|
||||
/>
|
||||
</TableCell>
|
||||
<TableCell align="center">
|
||||
<Tooltip
|
||||
title={
|
||||
newRow.xpath.length > 0 && newRow.name.length > 0
|
||||
? "Add Element"
|
||||
: "Fill out all fields to add an element"
|
||||
}
|
||||
placement="top"
|
||||
>
|
||||
<span>
|
||||
<IconButton
|
||||
data-cy="add-button"
|
||||
aria-label="add"
|
||||
size="small"
|
||||
onClick={handleAddRow}
|
||||
disabled={
|
||||
!(newRow.xpath.length > 0 && newRow.name.length > 0)
|
||||
}
|
||||
sx={{
|
||||
bgcolor: "primary.main",
|
||||
color: "primary.contrastText",
|
||||
borderRadius: 2,
|
||||
"&:hover": {
|
||||
bgcolor: "primary.dark",
|
||||
transform: "translateY(-1px)",
|
||||
},
|
||||
"&.Mui-disabled": {
|
||||
bgcolor: "action.disabledBackground",
|
||||
color: "action.disabled",
|
||||
},
|
||||
}}
|
||||
>
|
||||
Delete
|
||||
</Button>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</div>
|
||||
<AddIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
{rows.map((row, index) => (
|
||||
<TableRow
|
||||
key={index}
|
||||
sx={{
|
||||
"&:hover": {
|
||||
bgcolor: "action.hover",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<TableCell>
|
||||
<Typography variant="body2" noWrap>
|
||||
{row.name}
|
||||
</Typography>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Typography
|
||||
variant="body2"
|
||||
sx={{
|
||||
fontFamily: "monospace",
|
||||
fontSize: "0.875rem",
|
||||
color: "text.secondary",
|
||||
}}
|
||||
noWrap
|
||||
>
|
||||
{row.xpath}
|
||||
</Typography>
|
||||
</TableCell>
|
||||
<TableCell align="center">
|
||||
<IconButton
|
||||
onClick={() => handleDeleteRow(row.name)}
|
||||
size="small"
|
||||
color="error"
|
||||
sx={{
|
||||
"&:hover": {
|
||||
bgcolor: "error.main",
|
||||
color: "error.contrastText",
|
||||
transform: "translateY(-1px)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<DeleteIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
|
||||
<Divider sx={{ my: 2 }} />
|
||||
<SiteMap />
|
||||
</Box>
|
||||
<Divider
|
||||
sx={{
|
||||
borderColor: theme.palette.mode === "dark" ? "#ffffff" : "0000000",
|
||||
marginBottom: 2,
|
||||
}}
|
||||
/>
|
||||
<SiteMap />
|
||||
</Box>
|
||||
</Paper>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -2,3 +2,14 @@
|
||||
margin-bottom: 1rem;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.container {
|
||||
text-align: left;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.title {
|
||||
font-weight: 600;
|
||||
color: var(--mui-palette-text-primary);
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Box, Typography } from "@mui/material";
|
||||
import React, { ReactNode } from "react";
|
||||
import { Typography } from "@mui/material";
|
||||
import classes from "./job-submitter-header.module.css";
|
||||
import styles from "./job-submitter-header.module.css";
|
||||
|
||||
interface JobSubmitterHeaderProps {
|
||||
title?: string;
|
||||
@@ -8,13 +8,15 @@ interface JobSubmitterHeaderProps {
|
||||
}
|
||||
|
||||
export const JobSubmitterHeader: React.FC<JobSubmitterHeaderProps> = ({
|
||||
title = "Scraping Made Easy",
|
||||
title = "Scrape Webpage",
|
||||
children,
|
||||
}) => {
|
||||
return (
|
||||
<div className={classes.jobSubmitterHeader}>
|
||||
<Typography variant="h3">{title}</Typography>
|
||||
<Box className={styles.container}>
|
||||
<Typography variant="h4" className={styles.title}>
|
||||
{title}
|
||||
</Typography>
|
||||
{children}
|
||||
</div>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
.container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 16px;
|
||||
align-items: stretch;
|
||||
}
|
||||
|
||||
@media (min-width: 600px) {
|
||||
.container {
|
||||
flex-direction: row;
|
||||
align-items: center;
|
||||
}
|
||||
}
|
||||
|
||||
.input {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.input :global(.MuiOutlinedInput-root) {
|
||||
border-radius: 16px;
|
||||
transition: all 0.2s ease-in-out;
|
||||
}
|
||||
|
||||
.input
|
||||
:global(.MuiOutlinedInput-root:hover)
|
||||
:global(.MuiOutlinedInput-notchedOutline) {
|
||||
border-color: var(--mui-palette-primary-main);
|
||||
}
|
||||
|
||||
.submitButton {
|
||||
height: 48px !important;
|
||||
border-radius: 16px;
|
||||
font-size: 1rem !important;
|
||||
font-weight: 500 !important;
|
||||
}
|
||||
|
||||
.submitButton:hover {
|
||||
transform: translateY(-1px);
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
|
||||
.submitButton:disabled {
|
||||
transform: none;
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
@media (min-width: 600px) {
|
||||
.submitButton {
|
||||
min-width: 120px;
|
||||
height: 56px;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import React from "react";
|
||||
import { TextField, Button, CircularProgress } from "@mui/material";
|
||||
import { Box, Button, CircularProgress, TextField } from "@mui/material";
|
||||
import { useJobSubmitterProvider } from "../provider";
|
||||
import styles from "./job-submitter-input.module.css";
|
||||
|
||||
export type JobSubmitterInputProps = {
|
||||
urlError: string | null;
|
||||
@@ -17,7 +17,7 @@ export const JobSubmitterInput = ({
|
||||
useJobSubmitterProvider();
|
||||
|
||||
return (
|
||||
<div className="flex flex-row space-x-4 items-center mb-2">
|
||||
<Box className={styles.container}>
|
||||
<TextField
|
||||
data-cy="url-input"
|
||||
label="URL"
|
||||
@@ -27,19 +27,18 @@ export const JobSubmitterInput = ({
|
||||
onChange={(e) => setSubmittedURL(e.target.value)}
|
||||
error={!isValidURL}
|
||||
helperText={!isValidURL ? urlError : ""}
|
||||
className="rounded-md"
|
||||
className={styles.input}
|
||||
/>
|
||||
<Button
|
||||
data-cy="submit-button"
|
||||
variant="contained"
|
||||
size="small"
|
||||
size="large"
|
||||
onClick={handleSubmit}
|
||||
disabled={!(rows.length > 0) || loading}
|
||||
className={`bg-[#034efc] text-white font-semibold rounded-md
|
||||
transition-transform transform hover:scale-105 disabled:opacity-50`}
|
||||
className={styles.submitButton}
|
||||
>
|
||||
{loading ? <CircularProgress size={24} color="inherit" /> : "Submit"}
|
||||
</Button>
|
||||
</div>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -4,6 +4,7 @@ import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
|
||||
import { useSubmitJob } from "@/hooks/use-submit-job";
|
||||
import { parseJobOptions } from "@/lib";
|
||||
import { useUser } from "@/store/hooks";
|
||||
import { Box, Paper } from "@mui/material";
|
||||
import { useRouter } from "next/router";
|
||||
import { useEffect } from "react";
|
||||
import { JobSubmitterHeader } from "./job-submitter-header";
|
||||
@@ -12,40 +13,74 @@ import { useJobSubmitterProvider } from "./provider";
|
||||
|
||||
export const JobSubmitter = () => {
|
||||
const router = useRouter();
|
||||
const { job_options } = router.query;
|
||||
const { job_options, id } = router.query;
|
||||
const { user } = useUser();
|
||||
|
||||
const { submitJob, loading, error } = useSubmitJob();
|
||||
const { submittedURL, rows, siteMap, setSiteMap, jobOptions, setJobOptions } =
|
||||
useJobSubmitterProvider();
|
||||
const {
|
||||
jobId,
|
||||
setJobId,
|
||||
submittedURL,
|
||||
rows,
|
||||
siteMap,
|
||||
setSiteMap,
|
||||
jobOptions,
|
||||
setJobOptions,
|
||||
} = useJobSubmitterProvider();
|
||||
|
||||
useEffect(() => {
|
||||
if (job_options) {
|
||||
parseJobOptions(job_options as string, setJobOptions, setSiteMap);
|
||||
parseJobOptions(
|
||||
id as string,
|
||||
job_options as string,
|
||||
setJobOptions,
|
||||
setSiteMap,
|
||||
setJobId
|
||||
);
|
||||
}
|
||||
}, [job_options]);
|
||||
|
||||
const handleSubmit = async () => {
|
||||
await submitJob(submittedURL, rows, user, jobOptions, siteMap, false, null);
|
||||
await submitJob(
|
||||
submittedURL,
|
||||
rows,
|
||||
user,
|
||||
jobOptions,
|
||||
siteMap,
|
||||
false,
|
||||
null,
|
||||
jobId
|
||||
);
|
||||
};
|
||||
|
||||
console.log(jobOptions);
|
||||
useEffect(() => {
|
||||
console.log(jobOptions);
|
||||
}, [jobOptions]);
|
||||
|
||||
return (
|
||||
<div>
|
||||
<JobSubmitterHeader />
|
||||
<JobSubmitterInput
|
||||
urlError={error}
|
||||
handleSubmit={handleSubmit}
|
||||
loading={loading}
|
||||
/>
|
||||
<AdvancedJobOptions
|
||||
jobOptions={jobOptions}
|
||||
setJobOptions={setJobOptions}
|
||||
/>
|
||||
</div>
|
||||
<Paper
|
||||
elevation={0}
|
||||
sx={{
|
||||
p: 4,
|
||||
borderRadius: 2,
|
||||
bgcolor: "background.paper",
|
||||
border: 1,
|
||||
borderColor: "divider",
|
||||
"&:hover": {
|
||||
boxShadow: "0 4px 20px rgba(0, 0, 0, 0.05)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<Box className="flex flex-col gap-6">
|
||||
<JobSubmitterHeader />
|
||||
<Box className="flex flex-col gap-4">
|
||||
<JobSubmitterInput
|
||||
urlError={error}
|
||||
handleSubmit={handleSubmit}
|
||||
loading={loading}
|
||||
/>
|
||||
<AdvancedJobOptions
|
||||
jobOptions={jobOptions}
|
||||
setJobOptions={setJobOptions}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
</Paper>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -10,6 +10,8 @@ import React, {
|
||||
} from "react";
|
||||
|
||||
type JobSubmitterProviderType = {
|
||||
jobId: string;
|
||||
setJobId: Dispatch<React.SetStateAction<string>>;
|
||||
submittedURL: string;
|
||||
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
||||
rows: Element[];
|
||||
@@ -36,6 +38,7 @@ const JobSubmitterProvider = createContext<JobSubmitterProviderType>(
|
||||
);
|
||||
|
||||
export const Provider = ({ children }: PropsWithChildren) => {
|
||||
const [jobId, setJobId] = useState<string>("");
|
||||
const [submittedURL, setSubmittedURL] = useState<string>("");
|
||||
const [rows, setRows] = useState<Element[]>([]);
|
||||
const [results, setResults] = useState<Result>({});
|
||||
@@ -55,6 +58,8 @@ export const Provider = ({ children }: PropsWithChildren) => {
|
||||
|
||||
const value: JobSubmitterProviderType = useMemo(
|
||||
() => ({
|
||||
jobId,
|
||||
setJobId,
|
||||
submittedURL,
|
||||
setSubmittedURL,
|
||||
rows,
|
||||
@@ -76,6 +81,7 @@ export const Provider = ({ children }: PropsWithChildren) => {
|
||||
closeSnackbar,
|
||||
}),
|
||||
[
|
||||
jobId,
|
||||
submittedURL,
|
||||
rows,
|
||||
results,
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import { useState } from "react";
|
||||
import { useJobSubmitterProvider } from "../../provider";
|
||||
import { ActionOption } from "@/types/job";
|
||||
import {
|
||||
Box,
|
||||
Button,
|
||||
Checkbox,
|
||||
FormControl,
|
||||
FormControlLabel,
|
||||
InputLabel,
|
||||
MenuItem,
|
||||
Select,
|
||||
TextField,
|
||||
FormControl,
|
||||
Button,
|
||||
Checkbox,
|
||||
FormControlLabel,
|
||||
} from "@mui/material";
|
||||
import { ActionOption } from "@/types/job";
|
||||
import classes from "./site-map-input.module.css";
|
||||
import { clsx } from "clsx";
|
||||
import { useState } from "react";
|
||||
import { useJobSubmitterProvider } from "../../provider";
|
||||
|
||||
export type SiteMapInputProps = {
|
||||
disabled?: boolean;
|
||||
@@ -28,7 +28,6 @@ export const SiteMapInput = ({
|
||||
clickOnce,
|
||||
input,
|
||||
}: SiteMapInputProps) => {
|
||||
console.log(clickOnce);
|
||||
const [optionState, setOptionState] = useState<ActionOption>(
|
||||
option || "click"
|
||||
);
|
||||
@@ -43,8 +42,6 @@ export const SiteMapInput = ({
|
||||
const handleAdd = () => {
|
||||
if (!siteMap) return;
|
||||
|
||||
console.log(optionState, xpathState, clickOnceState, inputState);
|
||||
|
||||
setSiteMap((prevSiteMap) => ({
|
||||
...prevSiteMap,
|
||||
actions: [
|
||||
@@ -60,6 +57,7 @@ export const SiteMapInput = ({
|
||||
}));
|
||||
|
||||
setXpathState("");
|
||||
setInputState("");
|
||||
};
|
||||
|
||||
const handleRemove = () => {
|
||||
@@ -72,14 +70,22 @@ export const SiteMapInput = ({
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-2 w-full">
|
||||
<div className="flex gap-2 items-center">
|
||||
<FormControl className="w-1/4">
|
||||
<Box
|
||||
sx={{ display: "flex", flexDirection: "column", gap: 2, width: "100%" }}
|
||||
>
|
||||
<Box sx={{ display: "flex", gap: 2, alignItems: "center" }}>
|
||||
<FormControl size="small" sx={{ minWidth: 120 }}>
|
||||
<InputLabel>Action Type</InputLabel>
|
||||
<Select
|
||||
disabled={disabled}
|
||||
displayEmpty
|
||||
value={optionState}
|
||||
label="Action Type"
|
||||
onChange={(e) => setOptionState(e.target.value as ActionOption)}
|
||||
sx={{
|
||||
"& .MuiSelect-select": {
|
||||
textTransform: "capitalize",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<MenuItem value="click">Click</MenuItem>
|
||||
<MenuItem value="input">Input</MenuItem>
|
||||
@@ -88,23 +94,49 @@ export const SiteMapInput = ({
|
||||
{optionState === "input" && (
|
||||
<TextField
|
||||
label="Input Text"
|
||||
size="small"
|
||||
fullWidth
|
||||
value={inputState}
|
||||
onChange={(e) => setInputState(e.target.value)}
|
||||
disabled={disabled}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
bgcolor: "background.default",
|
||||
},
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
{!disabled && (
|
||||
<TextField
|
||||
label="XPath Selector"
|
||||
size="small"
|
||||
fullWidth
|
||||
value={xpathState}
|
||||
onChange={(e) => setXpathState(e.target.value)}
|
||||
disabled={disabled}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
bgcolor: "background.default",
|
||||
fontFamily: "monospace",
|
||||
fontSize: "1rem",
|
||||
},
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
<TextField
|
||||
label="XPath Selector"
|
||||
fullWidth
|
||||
value={xpathState}
|
||||
onChange={(e) => setXpathState(e.target.value)}
|
||||
disabled={disabled}
|
||||
/>
|
||||
{disabled ? (
|
||||
<Button
|
||||
onClick={handleRemove}
|
||||
className={clsx(classes.button, classes.remove)}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="error"
|
||||
sx={{
|
||||
minWidth: "80px",
|
||||
textTransform: "none",
|
||||
"&:hover": {
|
||||
bgcolor: "error.main",
|
||||
color: "error.contrastText",
|
||||
},
|
||||
}}
|
||||
>
|
||||
Delete
|
||||
</Button>
|
||||
@@ -112,24 +144,41 @@ export const SiteMapInput = ({
|
||||
<Button
|
||||
onClick={handleAdd}
|
||||
disabled={!xpathState}
|
||||
className={clsx(classes.button, classes.add)}
|
||||
size="small"
|
||||
variant="contained"
|
||||
color="primary"
|
||||
sx={{
|
||||
minWidth: "80px",
|
||||
textTransform: "none",
|
||||
"&.Mui-disabled": {
|
||||
bgcolor: "action.disabledBackground",
|
||||
color: "action.disabled",
|
||||
},
|
||||
}}
|
||||
>
|
||||
Add
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</Box>
|
||||
{!disabled && (
|
||||
<FormControlLabel
|
||||
label="Do Once"
|
||||
control={
|
||||
<Checkbox
|
||||
size="small"
|
||||
checked={clickOnceState}
|
||||
disabled={disabled}
|
||||
onChange={() => setClickOnceState(!clickOnceState)}
|
||||
/>
|
||||
}
|
||||
sx={{
|
||||
"& .MuiFormControlLabel-label": {
|
||||
fontSize: "0.875rem",
|
||||
color: "text.secondary",
|
||||
},
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,12 +1,22 @@
|
||||
import {
|
||||
Box,
|
||||
Button,
|
||||
Divider,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Typography,
|
||||
} from "@mui/material";
|
||||
import { useEffect, useState } from "react";
|
||||
import { useJobSubmitterProvider } from "../provider";
|
||||
import { Button, Divider, Typography, useTheme } from "@mui/material";
|
||||
import { SiteMapInput } from "./site-map-input";
|
||||
|
||||
export const SiteMap = () => {
|
||||
const { siteMap, setSiteMap } = useJobSubmitterProvider();
|
||||
const [showSiteMap, setShowSiteMap] = useState<boolean>(false);
|
||||
const theme = useTheme();
|
||||
|
||||
const handleCreateSiteMap = () => {
|
||||
setSiteMap({ actions: [] });
|
||||
@@ -25,46 +35,123 @@ export const SiteMap = () => {
|
||||
}, [siteMap]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-4">
|
||||
{siteMap ? (
|
||||
<Button onClick={handleClearSiteMap}>Clear Site Map</Button>
|
||||
<Box className="flex flex-col gap-4">
|
||||
{!siteMap ? (
|
||||
<Button
|
||||
onClick={handleCreateSiteMap}
|
||||
variant="contained"
|
||||
color="primary"
|
||||
sx={{
|
||||
alignSelf: "flex-end",
|
||||
textTransform: "none",
|
||||
}}
|
||||
>
|
||||
Create Site Map
|
||||
</Button>
|
||||
) : (
|
||||
<Button onClick={handleCreateSiteMap}>Create Site Map</Button>
|
||||
)}
|
||||
{showSiteMap && (
|
||||
<div className="flex flex-col gap-4">
|
||||
<Box className="flex flex-col gap-4">
|
||||
<Box
|
||||
sx={{
|
||||
display: "flex",
|
||||
justifyContent: "space-between",
|
||||
alignItems: "center",
|
||||
}}
|
||||
>
|
||||
<Typography variant="h6" sx={{ fontWeight: 500 }}>
|
||||
Site Map Configuration
|
||||
</Typography>
|
||||
<Button
|
||||
onClick={handleClearSiteMap}
|
||||
variant="outlined"
|
||||
color="error"
|
||||
size="small"
|
||||
sx={{
|
||||
textTransform: "none",
|
||||
"&:hover": {
|
||||
bgcolor: "error.main",
|
||||
color: "error.contrastText",
|
||||
},
|
||||
}}
|
||||
>
|
||||
Clear Site Map
|
||||
</Button>
|
||||
</Box>
|
||||
<SiteMapInput />
|
||||
{siteMap?.actions && siteMap?.actions.length > 0 && (
|
||||
<>
|
||||
<Divider
|
||||
<Divider />
|
||||
<TableContainer
|
||||
sx={{
|
||||
borderColor:
|
||||
theme.palette.mode === "dark" ? "#ffffff" : "0000000",
|
||||
maxHeight: "400px",
|
||||
overflow: "auto",
|
||||
borderRadius: 1,
|
||||
border: 1,
|
||||
borderColor: "divider",
|
||||
}}
|
||||
/>
|
||||
<Typography className="w-full text-center" variant="h5">
|
||||
Site Map Actions
|
||||
</Typography>
|
||||
>
|
||||
<Table size="small" stickyHeader>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell width="10%">
|
||||
<Typography sx={{ fontWeight: 600 }}>Action</Typography>
|
||||
</TableCell>
|
||||
<TableCell width="30%">
|
||||
<Typography sx={{ fontWeight: 600 }}>Type</Typography>
|
||||
</TableCell>
|
||||
<TableCell width="40%">
|
||||
<Typography sx={{ fontWeight: 600 }}>XPath</Typography>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{siteMap?.actions.reverse().map((action, index) => (
|
||||
<TableRow
|
||||
key={action.xpath}
|
||||
sx={{
|
||||
"&:hover": {
|
||||
bgcolor: "action.hover",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<TableCell>
|
||||
<Typography variant="body2">{index + 1}</Typography>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Typography
|
||||
variant="body2"
|
||||
sx={{
|
||||
color:
|
||||
action.type === "click"
|
||||
? "primary.main"
|
||||
: "warning.main",
|
||||
fontWeight: 500,
|
||||
}}
|
||||
>
|
||||
{action.type}
|
||||
</Typography>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Typography
|
||||
variant="body2"
|
||||
sx={{
|
||||
fontFamily: "monospace",
|
||||
fontSize: "0.875rem",
|
||||
color: "text.secondary",
|
||||
}}
|
||||
noWrap
|
||||
>
|
||||
{action.xpath}
|
||||
</Typography>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
</>
|
||||
)}
|
||||
<ul className="flex flex-col gap-4">
|
||||
{siteMap?.actions.reverse().map((action, index) => (
|
||||
<li key={action.xpath} className="flex w-full items-center">
|
||||
<Typography variant="h6" className="w-[10%] mr-2">
|
||||
Action {index + 1}:
|
||||
</Typography>
|
||||
<SiteMapInput
|
||||
disabled={Boolean(siteMap)}
|
||||
xpath={action.xpath}
|
||||
option={action.type}
|
||||
clickOnce={action.do_once}
|
||||
input={action.input}
|
||||
/>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</Box>
|
||||
)}
|
||||
</div>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -11,17 +11,18 @@ export const useAdvancedJobOptions = () => {
|
||||
proxies: null,
|
||||
collect_media: false,
|
||||
custom_cookies: null,
|
||||
return_html: false,
|
||||
};
|
||||
|
||||
const router = useRouter();
|
||||
const { job_options } = router.query;
|
||||
const { job_options, job_id } = router.query;
|
||||
|
||||
const [jobOptions, setJobOptions] =
|
||||
useState<RawJobOptions>(initialJobOptions);
|
||||
|
||||
useEffect(() => {
|
||||
if (job_options) {
|
||||
parseJobOptions(job_options as string, setJobOptions);
|
||||
parseJobOptions(job_id as string, job_options as string, setJobOptions);
|
||||
}
|
||||
}, [job_options]);
|
||||
|
||||
|
||||
27
src/hooks/use-export-job-config.ts
Normal file
27
src/hooks/use-export-job-config.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { Job } from "@/types";
|
||||
|
||||
export const useExportJobConfig = () => {
|
||||
const exportJobConfig = async (job: Job) => {
|
||||
const jobConfig = {
|
||||
url: job.url,
|
||||
prompt: job.prompt,
|
||||
job_options: job.job_options,
|
||||
elements: job.elements,
|
||||
agent_mode: job.agent_mode,
|
||||
};
|
||||
|
||||
const jobConfigString = JSON.stringify(jobConfig);
|
||||
const blob = new Blob([jobConfigString], { type: "application/json" });
|
||||
const url = window.URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.style.display = "none";
|
||||
a.href = url;
|
||||
a.download = `job_${job.id}.json`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
};
|
||||
|
||||
return { exportJobConfig };
|
||||
};
|
||||
83
src/hooks/use-import-job-config.ts
Normal file
83
src/hooks/use-import-job-config.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||
import { useRouter } from "next/router";
|
||||
import { toast } from "react-toastify";
|
||||
|
||||
export const useImportJobConfig = () => {
|
||||
const router = useRouter();
|
||||
const { setJobOptions, setSiteMap, setSubmittedURL, setRows } =
|
||||
useJobSubmitterProvider();
|
||||
|
||||
const handleUploadFile = (file: File): Promise<boolean> => {
|
||||
return new Promise((resolve) => {
|
||||
const reader = new FileReader();
|
||||
|
||||
reader.onerror = () => {
|
||||
toast.error("Failed to read file");
|
||||
resolve(true);
|
||||
};
|
||||
|
||||
reader.onload = (e) => {
|
||||
const result = e.target?.result as string;
|
||||
|
||||
if (!result.includes("url")) {
|
||||
toast.error("Invalid job config: missing url");
|
||||
resolve(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!result.includes("job_options")) {
|
||||
toast.error("Invalid job config: missing job_options");
|
||||
resolve(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!result.includes("elements")) {
|
||||
toast.error("Invalid job config: missing elements");
|
||||
resolve(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!result.includes("site_map")) {
|
||||
toast.error("Invalid job config: missing site_map");
|
||||
resolve(true);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const jobConfig = JSON.parse(result);
|
||||
|
||||
if (jobConfig.agent_mode) {
|
||||
router.push({
|
||||
pathname: "/agent",
|
||||
query: {
|
||||
url: jobConfig.url,
|
||||
prompt: jobConfig.prompt,
|
||||
job_options: JSON.stringify(jobConfig.job_options),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if (
|
||||
jobConfig.job_options &&
|
||||
Array.isArray(jobConfig.job_options.proxies)
|
||||
) {
|
||||
jobConfig.job_options.proxies = "";
|
||||
}
|
||||
|
||||
setJobOptions(jobConfig.job_options || {});
|
||||
setSiteMap(jobConfig.site_map);
|
||||
setSubmittedURL(jobConfig.url || "");
|
||||
setRows(jobConfig.elements || []);
|
||||
resolve(false);
|
||||
} catch (error) {
|
||||
toast.error("Failed to parse job config");
|
||||
resolve(true);
|
||||
}
|
||||
};
|
||||
|
||||
reader.readAsText(file);
|
||||
});
|
||||
};
|
||||
|
||||
return { handleUploadFile };
|
||||
};
|
||||
@@ -25,7 +25,8 @@ export const useSubmitJob = () => {
|
||||
jobOptions: RawJobOptions,
|
||||
siteMap: SiteMap | null,
|
||||
agentMode: boolean,
|
||||
prompt: string | null
|
||||
prompt: string | null,
|
||||
id?: string
|
||||
) => {
|
||||
if (!validateURL(submittedURL)) {
|
||||
setIsValidUrl(false);
|
||||
@@ -61,7 +62,8 @@ export const useSubmitJob = () => {
|
||||
customCookies,
|
||||
siteMap,
|
||||
agentMode,
|
||||
prompt || undefined
|
||||
prompt || undefined,
|
||||
id
|
||||
)
|
||||
.then(async (response) => {
|
||||
if (!response.ok) {
|
||||
@@ -80,7 +82,10 @@ export const useSubmitJob = () => {
|
||||
setSnackbarOpen(true);
|
||||
})
|
||||
.catch((error) => {
|
||||
setSnackbarMessage(error || "An error occurred.");
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "An error occurred.";
|
||||
console.log(errorMessage);
|
||||
setSnackbarMessage(errorMessage);
|
||||
setSnackbarSeverity("error");
|
||||
setSnackbarOpen(true);
|
||||
})
|
||||
|
||||
@@ -3,9 +3,11 @@ import { Dispatch, SetStateAction } from "react";
|
||||
import { RawJobOptions, SiteMap } from "@/types";
|
||||
|
||||
export const parseJobOptions = (
|
||||
id: string,
|
||||
job_options: string,
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
||||
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>
|
||||
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>,
|
||||
setJobId?: Dispatch<SetStateAction<string>>
|
||||
) => {
|
||||
if (job_options) {
|
||||
const jsonOptions = JSON.parse(job_options as string);
|
||||
@@ -15,6 +17,7 @@ export const parseJobOptions = (
|
||||
proxies: null,
|
||||
collect_media: false,
|
||||
custom_cookies: null,
|
||||
return_html: false,
|
||||
};
|
||||
|
||||
if (jsonOptions.collect_media) {
|
||||
@@ -42,6 +45,14 @@ export const parseJobOptions = (
|
||||
setSiteMap(jsonOptions.site_map);
|
||||
}
|
||||
|
||||
if (jsonOptions.return_html) {
|
||||
newJobOptions.return_html = true;
|
||||
}
|
||||
|
||||
if (id && setJobId) {
|
||||
setJobId(id);
|
||||
}
|
||||
|
||||
setJobOptions(newJobOptions);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -21,15 +21,16 @@ export default async function handler(
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error: ${response.statusText}`);
|
||||
const result = await response.json();
|
||||
|
||||
if (response.status === 500) {
|
||||
res.status(500).json({ error: result.error });
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
res.status(200).json(result);
|
||||
} catch (error) {
|
||||
console.error("Error submitting scrape job:", error);
|
||||
res.status(500).json({ error: "Internal Server Error" });
|
||||
res.status(500).json({ error: error });
|
||||
}
|
||||
} else {
|
||||
res.setHeader("Allow", ["POST"]);
|
||||
|
||||
@@ -8,7 +8,7 @@ export type DeleteCronJobsParams = {
|
||||
export const deleteCronJobs = async (params: DeleteCronJobsParams) => {
|
||||
const token = Cookies.get("token");
|
||||
|
||||
const response = await fetch("/api/delete-cron-jobs", {
|
||||
const response = await fetch("/api/delete-cron-job", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
|
||||
@@ -9,14 +9,15 @@ export const submitJob = async (
|
||||
customCookies: any,
|
||||
siteMap: SiteMap | null,
|
||||
agentMode: boolean = false,
|
||||
prompt?: string
|
||||
prompt?: string,
|
||||
id?: string
|
||||
) => {
|
||||
console.log(user);
|
||||
return await fetch(`/api/submit-scrape-job`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
data: {
|
||||
id,
|
||||
url: submittedURL,
|
||||
elements: rows,
|
||||
user: user?.email,
|
||||
|
||||
@@ -27,6 +27,7 @@ export type RawJobOptions = {
|
||||
proxies: string | null;
|
||||
collect_media: boolean;
|
||||
custom_cookies: string | null;
|
||||
return_html: boolean;
|
||||
};
|
||||
|
||||
export type ActionOption = "click" | "input";
|
||||
@@ -58,6 +59,7 @@ export const initialJobOptions: RawJobOptions = {
|
||||
proxies: null,
|
||||
collect_media: false,
|
||||
custom_cookies: null,
|
||||
return_html: false,
|
||||
};
|
||||
|
||||
export const COLOR_MAP: Record<string, string> = {
|
||||
|
||||
Reference in New Issue
Block a user