mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-12-12 18:56:17 +00:00
feat: add import/export for job configurations (#91)
* chore: wip add upload/import * chore: wip add upload/import * feat: update job rerunning * fix: update workflow * fix: update workflow * chore: temp disable workflow
This commit is contained in:
12
.github/workflows/merge.yml
vendored
12
.github/workflows/merge.yml
vendored
@@ -10,14 +10,14 @@ on:
|
|||||||
- master
|
- master
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
tests:
|
# TODO: Renable once browser forge is fixed for camoufox, or else tests will never pass
|
||||||
uses: ./.github/workflows/tests.yml
|
# tests:
|
||||||
secrets:
|
# uses: ./.github/workflows/tests.yml
|
||||||
openai_key: ${{ secrets.OPENAI_KEY }}
|
# secrets:
|
||||||
discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
# openai_key: ${{ secrets.OPENAI_KEY }}
|
||||||
|
# discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||||
|
|
||||||
version:
|
version:
|
||||||
needs: tests
|
|
||||||
uses: ./.github/workflows/version.yml
|
uses: ./.github/workflows/version.yml
|
||||||
secrets:
|
secrets:
|
||||||
git_token: ${{ secrets.GPAT_TOKEN }}
|
git_token: ${{ secrets.GPAT_TOKEN }}
|
||||||
|
|||||||
5
.github/workflows/pr.yml
vendored
5
.github/workflows/pr.yml
vendored
@@ -8,11 +8,6 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
checkout:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
tests:
|
tests:
|
||||||
uses: ./.github/workflows/tests.yml
|
uses: ./.github/workflows/tests.yml
|
||||||
secrets:
|
secrets:
|
||||||
|
|||||||
4
.github/workflows/pytest.yml
vendored
4
.github/workflows/pytest.yml
vendored
@@ -10,6 +10,8 @@ jobs:
|
|||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v3
|
||||||
|
|
||||||
- name: Set env
|
- name: Set env
|
||||||
run: echo "ENV=test" >> $GITHUB_ENV
|
run: echo "ENV=test" >> $GITHUB_ENV
|
||||||
|
|
||||||
@@ -20,7 +22,7 @@ jobs:
|
|||||||
run: pdm install
|
run: pdm install
|
||||||
|
|
||||||
- name: Install playwright
|
- name: Install playwright
|
||||||
run: pdm run playwright install
|
run: pdm run playwright install --with-deps
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
|
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from camoufox import AsyncCamoufox
|
|||||||
from playwright.async_api import Page
|
from playwright.async_api import Page
|
||||||
|
|
||||||
# LOCAL
|
# LOCAL
|
||||||
|
from api.backend.constants import RECORDINGS_ENABLED
|
||||||
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
|
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
|
||||||
from api.backend.job.models import CapturedElement
|
from api.backend.job.models import CapturedElement
|
||||||
from api.backend.worker.logger import LOG
|
from api.backend.worker.logger import LOG
|
||||||
@@ -29,11 +30,13 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
|
|||||||
LOG.info(f"Starting work for agent job: {agent_job}")
|
LOG.info(f"Starting work for agent job: {agent_job}")
|
||||||
pages = set()
|
pages = set()
|
||||||
|
|
||||||
|
proxy = None
|
||||||
|
|
||||||
if agent_job["job_options"]["proxies"]:
|
if agent_job["job_options"]["proxies"]:
|
||||||
proxy = random.choice(agent_job["job_options"]["proxies"])
|
proxy = random.choice(agent_job["job_options"]["proxies"])
|
||||||
LOG.info(f"Using proxy: {proxy}")
|
LOG.info(f"Using proxy: {proxy}")
|
||||||
|
|
||||||
async with AsyncCamoufox(headless=True) as browser:
|
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
|
||||||
page: Page = await browser.new_page()
|
page: Page = await browser.new_page()
|
||||||
|
|
||||||
await add_custom_items(
|
await add_custom_items(
|
||||||
@@ -64,7 +67,7 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
|
|||||||
xpaths = parse_response(response)
|
xpaths = parse_response(response)
|
||||||
|
|
||||||
captured_elements = await capture_elements(
|
captured_elements = await capture_elements(
|
||||||
page, xpaths, agent_job["job_options"]["return_html"]
|
page, xpaths, agent_job["job_options"].get("return_html", False)
|
||||||
)
|
)
|
||||||
|
|
||||||
final_url = page.url
|
final_url = page.url
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ def insert(query: str, values: tuple[Any, ...]):
|
|||||||
|
|
||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
LOG.error(f"An error occurred: {e}")
|
LOG.error(f"An error occurred: {e}")
|
||||||
|
raise e
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
|||||||
@@ -49,10 +49,15 @@ async def get_queued_job():
|
|||||||
return res[0] if res else None
|
return res[0] if res else None
|
||||||
|
|
||||||
|
|
||||||
async def update_job(ids: list[str], field: str, value: Any):
|
async def update_job(ids: list[str], updates: dict[str, Any]):
|
||||||
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
|
if not updates:
|
||||||
res = update(query, tuple([value] + ids))
|
return
|
||||||
LOG.info(f"Updated job: {res}")
|
|
||||||
|
set_clause = ", ".join(f"{field} = ?" for field in updates.keys())
|
||||||
|
query = f"UPDATE jobs SET {set_clause} WHERE id IN {format_list_for_query(ids)}"
|
||||||
|
values = list(updates.values()) + ids
|
||||||
|
res = update(query, tuple(values))
|
||||||
|
LOG.debug(f"Updated job: {res}")
|
||||||
|
|
||||||
|
|
||||||
async def delete_jobs(jobs: list[str]):
|
async def delete_jobs(jobs: list[str]):
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
# STL
|
# STL
|
||||||
import logging
|
import logging
|
||||||
|
import datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
# LOCAL
|
# LOCAL
|
||||||
@@ -12,7 +13,23 @@ from api.backend.database.queries.job.job_queries import JOB_INSERT_QUERY
|
|||||||
LOG = logging.getLogger("Job")
|
LOG = logging.getLogger("Job")
|
||||||
|
|
||||||
|
|
||||||
def insert(item: dict[str, Any]) -> None:
|
async def insert(item: dict[str, Any]) -> None:
|
||||||
|
if check_for_job_completion(item["id"]):
|
||||||
|
await multi_field_update_job(
|
||||||
|
item["id"],
|
||||||
|
{
|
||||||
|
"agent_mode": item["agent_mode"],
|
||||||
|
"prompt": item["prompt"],
|
||||||
|
"job_options": item["job_options"],
|
||||||
|
"elements": item["elements"],
|
||||||
|
"status": "Queued",
|
||||||
|
"result": [],
|
||||||
|
"time_created": datetime.datetime.now().isoformat(),
|
||||||
|
"chat": None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
common_insert(
|
common_insert(
|
||||||
JOB_INSERT_QUERY,
|
JOB_INSERT_QUERY,
|
||||||
(
|
(
|
||||||
@@ -33,6 +50,12 @@ def insert(item: dict[str, Any]) -> None:
|
|||||||
LOG.debug(f"Inserted item: {item}")
|
LOG.debug(f"Inserted item: {item}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_for_job_completion(id: str) -> dict[str, Any]:
|
||||||
|
query = f"SELECT * FROM jobs WHERE id = ?"
|
||||||
|
res = common_query(query, (id,))
|
||||||
|
return res[0] if res else {}
|
||||||
|
|
||||||
|
|
||||||
async def get_queued_job():
|
async def get_queued_job():
|
||||||
query = (
|
query = (
|
||||||
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
|
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
|
||||||
@@ -48,6 +71,12 @@ async def update_job(ids: list[str], field: str, value: Any):
|
|||||||
LOG.debug(f"Updated job: {res}")
|
LOG.debug(f"Updated job: {res}")
|
||||||
|
|
||||||
|
|
||||||
|
async def multi_field_update_job(id: str, fields: dict[str, Any]):
|
||||||
|
query = f"UPDATE jobs SET {', '.join(f'{field} = ?' for field in fields.keys())} WHERE id = ?"
|
||||||
|
res = common_update(query, tuple(list(fields.values()) + [id]))
|
||||||
|
LOG.debug(f"Updated job: {res}")
|
||||||
|
|
||||||
|
|
||||||
async def delete_jobs(jobs: list[str]):
|
async def delete_jobs(jobs: list[str]):
|
||||||
if not jobs:
|
if not jobs:
|
||||||
LOG.debug("No jobs to delete.")
|
LOG.debug("No jobs to delete.")
|
||||||
|
|||||||
@@ -43,10 +43,8 @@ job_router = APIRouter()
|
|||||||
@job_router.post("/update")
|
@job_router.post("/update")
|
||||||
@handle_exceptions(logger=LOG)
|
@handle_exceptions(logger=LOG)
|
||||||
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
|
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
|
||||||
"""Used to update jobs"""
|
|
||||||
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
|
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
|
||||||
|
return {"message": "Jobs updated successfully"}
|
||||||
return JSONResponse(content={"message": "Jobs updated successfully."})
|
|
||||||
|
|
||||||
|
|
||||||
@job_router.post("/submit-scrape-job")
|
@job_router.post("/submit-scrape-job")
|
||||||
@@ -54,9 +52,11 @@ async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
|
|||||||
async def submit_scrape_job(job: Job):
|
async def submit_scrape_job(job: Job):
|
||||||
LOG.info(f"Recieved job: {job}")
|
LOG.info(f"Recieved job: {job}")
|
||||||
|
|
||||||
|
if not job.id:
|
||||||
job.id = uuid.uuid4().hex
|
job.id = uuid.uuid4().hex
|
||||||
|
|
||||||
job_dict = job.model_dump()
|
job_dict = job.model_dump()
|
||||||
insert(job_dict)
|
await insert(job_dict)
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
content={"id": job.id, "message": "Job submitted successfully."}
|
content={"id": job.id, "message": "Job submitted successfully."}
|
||||||
@@ -70,7 +70,9 @@ async def retrieve_scrape_jobs(
|
|||||||
):
|
):
|
||||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||||
ATTRIBUTES = "chat" if fetch_options.chat else "*"
|
ATTRIBUTES = "chat" if fetch_options.chat else "*"
|
||||||
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
|
job_query = (
|
||||||
|
f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ? ORDER BY time_created ASC"
|
||||||
|
)
|
||||||
results = query(job_query, (user.email,))
|
results = query(job_query, (user.email,))
|
||||||
return JSONResponse(content=jsonable_encoder(results[::-1]))
|
return JSONResponse(content=jsonable_encoder(results[::-1]))
|
||||||
|
|
||||||
|
|||||||
@@ -174,7 +174,9 @@ async def scrape(
|
|||||||
|
|
||||||
for page in pages:
|
for page in pages:
|
||||||
elements.append(
|
elements.append(
|
||||||
await collect_scraped_elements(page, xpaths, job_options["return_html"])
|
await collect_scraped_elements(
|
||||||
|
page, xpaths, job_options.get("return_html", False)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return elements
|
return elements
|
||||||
|
|||||||
2
next-env.d.ts
vendored
2
next-env.d.ts
vendored
@@ -2,4 +2,4 @@
|
|||||||
/// <reference types="next/image-types/global" />
|
/// <reference types="next/image-types/global" />
|
||||||
|
|
||||||
// NOTE: This file should not be edited
|
// NOTE: This file should not be edited
|
||||||
// see https://nextjs.org/docs/basic-features/typescript for more information.
|
// see https://nextjs.org/docs/pages/building-your-application/configuring/typescript for more information.
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
import { ExpandedTableInput } from "@/components/common/expanded-table-input";
|
import { ExpandedTableInput } from "@/components/common/expanded-table-input";
|
||||||
|
import { UploadFile } from "@/components/common/upload-file";
|
||||||
|
import { useImportJobConfig } from "@/hooks/use-import-job-config";
|
||||||
import { RawJobOptions } from "@/types";
|
import { RawJobOptions } from "@/types";
|
||||||
import {
|
import {
|
||||||
Code as CodeIcon,
|
Code as CodeIcon,
|
||||||
@@ -26,6 +28,7 @@ import {
|
|||||||
useTheme,
|
useTheme,
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
import { Dispatch, SetStateAction, useEffect, useState } from "react";
|
import { Dispatch, SetStateAction, useEffect, useState } from "react";
|
||||||
|
import { toast } from "react-toastify";
|
||||||
|
|
||||||
export type AdvancedJobOptionsDialogProps = {
|
export type AdvancedJobOptionsDialogProps = {
|
||||||
open: boolean;
|
open: boolean;
|
||||||
@@ -43,6 +46,7 @@ export const AdvancedJobOptionsDialog = ({
|
|||||||
multiPageScrapeEnabled = true,
|
multiPageScrapeEnabled = true,
|
||||||
}: AdvancedJobOptionsDialogProps) => {
|
}: AdvancedJobOptionsDialogProps) => {
|
||||||
const theme = useTheme();
|
const theme = useTheme();
|
||||||
|
const { handleUploadFile } = useImportJobConfig();
|
||||||
const [localJobOptions, setLocalJobOptions] =
|
const [localJobOptions, setLocalJobOptions] =
|
||||||
useState<RawJobOptions>(jobOptions);
|
useState<RawJobOptions>(jobOptions);
|
||||||
|
|
||||||
@@ -69,6 +73,18 @@ export const AdvancedJobOptionsDialog = ({
|
|||||||
onClose();
|
onClose();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const onUploadFile = async (file: File) => {
|
||||||
|
const errorOccured = await handleUploadFile(file);
|
||||||
|
if (errorOccured) {
|
||||||
|
handleClose();
|
||||||
|
toast.error("Failed to upload job config");
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
handleClose();
|
||||||
|
toast.success("Job config uploaded successfully");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Dialog
|
<Dialog
|
||||||
open={open}
|
open={open}
|
||||||
@@ -99,11 +115,18 @@ export const AdvancedJobOptionsDialog = ({
|
|||||||
<Typography variant="h6" component="div">
|
<Typography variant="h6" component="div">
|
||||||
Advanced Job Options
|
Advanced Job Options
|
||||||
</Typography>
|
</Typography>
|
||||||
|
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
|
||||||
|
<UploadFile
|
||||||
|
message="Upload Job Config"
|
||||||
|
fileTypes={["application/json"]}
|
||||||
|
onUploadFile={onUploadFile}
|
||||||
|
/>
|
||||||
<Settings
|
<Settings
|
||||||
sx={{
|
sx={{
|
||||||
color: theme.palette.primary.contrastText,
|
color: theme.palette.primary.contrastText,
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
|
</Box>
|
||||||
</DialogTitle>
|
</DialogTitle>
|
||||||
|
|
||||||
<DialogContent
|
<DialogContent
|
||||||
|
|||||||
1
src/components/common/upload-file/index.ts
Normal file
1
src/components/common/upload-file/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./upload-file";
|
||||||
34
src/components/common/upload-file/upload-file.tsx
Normal file
34
src/components/common/upload-file/upload-file.tsx
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import { Box, Button, Typography } from "@mui/material";
|
||||||
|
|
||||||
|
export type UploadFileProps = {
|
||||||
|
message: string;
|
||||||
|
fileTypes?: string[];
|
||||||
|
onUploadFile: (file: File) => void;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const UploadFile = ({
|
||||||
|
message,
|
||||||
|
fileTypes,
|
||||||
|
onUploadFile,
|
||||||
|
}: UploadFileProps) => {
|
||||||
|
const handleUploadFile = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
|
const file = event.target.files?.[0];
|
||||||
|
if (file) {
|
||||||
|
onUploadFile(file);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box>
|
||||||
|
<Button variant="contained" component="label">
|
||||||
|
<Typography>{message}</Typography>
|
||||||
|
<input
|
||||||
|
type="file"
|
||||||
|
hidden
|
||||||
|
onChange={handleUploadFile}
|
||||||
|
accept={fileTypes?.join(",")}
|
||||||
|
/>
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
@@ -1,18 +1,18 @@
|
|||||||
import React from "react";
|
import StarIcon from "@mui/icons-material/Star";
|
||||||
import {
|
import {
|
||||||
Tooltip,
|
Box,
|
||||||
|
Button,
|
||||||
|
Checkbox,
|
||||||
IconButton,
|
IconButton,
|
||||||
Table,
|
Table,
|
||||||
TableBody,
|
TableBody,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableHead,
|
TableHead,
|
||||||
TableRow,
|
TableRow,
|
||||||
Box,
|
Tooltip,
|
||||||
Checkbox,
|
|
||||||
Button,
|
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
|
import router from "next/router";
|
||||||
import { Job } from "../../types";
|
import { Job } from "../../types";
|
||||||
import StarIcon from "@mui/icons-material/Star";
|
|
||||||
|
|
||||||
interface stateProps {
|
interface stateProps {
|
||||||
selectedJobs: Set<string>;
|
selectedJobs: Set<string>;
|
||||||
@@ -21,7 +21,12 @@ interface stateProps {
|
|||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
onSelectJob: (job: string) => void;
|
onSelectJob: (job: string) => void;
|
||||||
onNavigate: (elements: Object[], url: string, options: any) => void;
|
onNavigate: (
|
||||||
|
id: string,
|
||||||
|
elements: Object[],
|
||||||
|
url: string,
|
||||||
|
options: any
|
||||||
|
) => void;
|
||||||
onFavorite: (ids: string[], field: string, value: any) => void;
|
onFavorite: (ids: string[], field: string, value: any) => void;
|
||||||
stateProps: stateProps;
|
stateProps: stateProps;
|
||||||
}
|
}
|
||||||
@@ -87,11 +92,29 @@ export const Favorites = ({
|
|||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
||||||
<Button
|
<Button
|
||||||
onClick={() =>
|
onClick={() => {
|
||||||
onNavigate(row.elements, row.url, row.job_options)
|
if (row.agent_mode) {
|
||||||
|
router.push({
|
||||||
|
pathname: "/agent",
|
||||||
|
query: {
|
||||||
|
url: row.url,
|
||||||
|
prompt: row.prompt,
|
||||||
|
job_options: JSON.stringify(row.job_options),
|
||||||
|
id: row.id,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
onNavigate(row.id, row.elements, row.url, row.job_options);
|
||||||
}
|
}
|
||||||
|
}}
|
||||||
|
size="small"
|
||||||
|
sx={{
|
||||||
|
minWidth: 0,
|
||||||
|
padding: "4px 8px",
|
||||||
|
fontSize: "0.625rem",
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
Run
|
Rerun
|
||||||
</Button>
|
</Button>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
"use client";
|
"use client";
|
||||||
import { AutoAwesome, Image, VideoCameraBack } from "@mui/icons-material";
|
import { useExportJobConfig } from "@/hooks/use-export-job-config";
|
||||||
|
import {
|
||||||
|
AutoAwesome,
|
||||||
|
Image,
|
||||||
|
Settings,
|
||||||
|
VideoCameraBack,
|
||||||
|
} from "@mui/icons-material";
|
||||||
import StarIcon from "@mui/icons-material/Star";
|
import StarIcon from "@mui/icons-material/Star";
|
||||||
import {
|
import {
|
||||||
Box,
|
Box,
|
||||||
@@ -30,7 +36,12 @@ interface Props {
|
|||||||
colors: stringMap;
|
colors: stringMap;
|
||||||
onSelectJob: (job: string) => void;
|
onSelectJob: (job: string) => void;
|
||||||
onDownload: (job: string[]) => void;
|
onDownload: (job: string[]) => void;
|
||||||
onNavigate: (elements: Object[], url: string, options: any) => void;
|
onNavigate: (
|
||||||
|
id: string,
|
||||||
|
elements: Object[],
|
||||||
|
url: string,
|
||||||
|
options: any
|
||||||
|
) => void;
|
||||||
onFavorite: (ids: string[], field: string, value: any) => void;
|
onFavorite: (ids: string[], field: string, value: any) => void;
|
||||||
onJobClick: (job: Job) => void;
|
onJobClick: (job: Job) => void;
|
||||||
stateProps: stateProps;
|
stateProps: stateProps;
|
||||||
@@ -46,6 +57,7 @@ export const JobQueue = ({
|
|||||||
onJobClick,
|
onJobClick,
|
||||||
}: Props) => {
|
}: Props) => {
|
||||||
const { selectedJobs, filteredJobs } = stateProps;
|
const { selectedJobs, filteredJobs } = stateProps;
|
||||||
|
const { exportJobConfig } = useExportJobConfig();
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -116,6 +128,17 @@ export const JobQueue = ({
|
|||||||
</IconButton>
|
</IconButton>
|
||||||
</span>
|
</span>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
<Tooltip title="Export Job Configuration">
|
||||||
|
<span>
|
||||||
|
<IconButton
|
||||||
|
onClick={() => {
|
||||||
|
exportJobConfig(row);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Settings />
|
||||||
|
</IconButton>
|
||||||
|
</span>
|
||||||
|
</Tooltip>
|
||||||
{row.job_options.collect_media && (
|
{row.job_options.collect_media && (
|
||||||
<Tooltip title="View Media">
|
<Tooltip title="View Media">
|
||||||
<span>
|
<span>
|
||||||
@@ -214,10 +237,16 @@ export const JobQueue = ({
|
|||||||
url: row.url,
|
url: row.url,
|
||||||
prompt: row.prompt,
|
prompt: row.prompt,
|
||||||
job_options: JSON.stringify(row.job_options),
|
job_options: JSON.stringify(row.job_options),
|
||||||
|
id: row.id,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
onNavigate(row.elements, row.url, row.job_options);
|
onNavigate(
|
||||||
|
row.id,
|
||||||
|
row.elements,
|
||||||
|
row.url,
|
||||||
|
row.job_options
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
size="small"
|
size="small"
|
||||||
|
|||||||
@@ -47,10 +47,16 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
|||||||
setJobDownloadDialogOpen(true);
|
setJobDownloadDialogOpen(true);
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleNavigate = (elements: Object[], url: string, options: any) => {
|
const handleNavigate = (
|
||||||
|
id: string,
|
||||||
|
elements: Object[],
|
||||||
|
url: string,
|
||||||
|
options: any
|
||||||
|
) => {
|
||||||
router.push({
|
router.push({
|
||||||
pathname: "/",
|
pathname: "/",
|
||||||
query: {
|
query: {
|
||||||
|
id,
|
||||||
elements: JSON.stringify(elements),
|
elements: JSON.stringify(elements),
|
||||||
url: url,
|
url: url,
|
||||||
job_options: JSON.stringify(options),
|
job_options: JSON.stringify(options),
|
||||||
|
|||||||
@@ -13,21 +13,44 @@ import { useJobSubmitterProvider } from "./provider";
|
|||||||
|
|
||||||
export const JobSubmitter = () => {
|
export const JobSubmitter = () => {
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const { job_options } = router.query;
|
const { job_options, id } = router.query;
|
||||||
const { user } = useUser();
|
const { user } = useUser();
|
||||||
|
|
||||||
const { submitJob, loading, error } = useSubmitJob();
|
const { submitJob, loading, error } = useSubmitJob();
|
||||||
const { submittedURL, rows, siteMap, setSiteMap, jobOptions, setJobOptions } =
|
const {
|
||||||
useJobSubmitterProvider();
|
jobId,
|
||||||
|
setJobId,
|
||||||
|
submittedURL,
|
||||||
|
rows,
|
||||||
|
siteMap,
|
||||||
|
setSiteMap,
|
||||||
|
jobOptions,
|
||||||
|
setJobOptions,
|
||||||
|
} = useJobSubmitterProvider();
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (job_options) {
|
if (job_options) {
|
||||||
parseJobOptions(job_options as string, setJobOptions, setSiteMap);
|
parseJobOptions(
|
||||||
|
id as string,
|
||||||
|
job_options as string,
|
||||||
|
setJobOptions,
|
||||||
|
setSiteMap,
|
||||||
|
setJobId
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}, [job_options]);
|
}, [job_options]);
|
||||||
|
|
||||||
const handleSubmit = async () => {
|
const handleSubmit = async () => {
|
||||||
await submitJob(submittedURL, rows, user, jobOptions, siteMap, false, null);
|
await submitJob(
|
||||||
|
submittedURL,
|
||||||
|
rows,
|
||||||
|
user,
|
||||||
|
jobOptions,
|
||||||
|
siteMap,
|
||||||
|
false,
|
||||||
|
null,
|
||||||
|
jobId
|
||||||
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ import React, {
|
|||||||
} from "react";
|
} from "react";
|
||||||
|
|
||||||
type JobSubmitterProviderType = {
|
type JobSubmitterProviderType = {
|
||||||
|
jobId: string;
|
||||||
|
setJobId: Dispatch<React.SetStateAction<string>>;
|
||||||
submittedURL: string;
|
submittedURL: string;
|
||||||
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
||||||
rows: Element[];
|
rows: Element[];
|
||||||
@@ -36,6 +38,7 @@ const JobSubmitterProvider = createContext<JobSubmitterProviderType>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
export const Provider = ({ children }: PropsWithChildren) => {
|
export const Provider = ({ children }: PropsWithChildren) => {
|
||||||
|
const [jobId, setJobId] = useState<string>("");
|
||||||
const [submittedURL, setSubmittedURL] = useState<string>("");
|
const [submittedURL, setSubmittedURL] = useState<string>("");
|
||||||
const [rows, setRows] = useState<Element[]>([]);
|
const [rows, setRows] = useState<Element[]>([]);
|
||||||
const [results, setResults] = useState<Result>({});
|
const [results, setResults] = useState<Result>({});
|
||||||
@@ -55,6 +58,8 @@ export const Provider = ({ children }: PropsWithChildren) => {
|
|||||||
|
|
||||||
const value: JobSubmitterProviderType = useMemo(
|
const value: JobSubmitterProviderType = useMemo(
|
||||||
() => ({
|
() => ({
|
||||||
|
jobId,
|
||||||
|
setJobId,
|
||||||
submittedURL,
|
submittedURL,
|
||||||
setSubmittedURL,
|
setSubmittedURL,
|
||||||
rows,
|
rows,
|
||||||
@@ -76,6 +81,7 @@ export const Provider = ({ children }: PropsWithChildren) => {
|
|||||||
closeSnackbar,
|
closeSnackbar,
|
||||||
}),
|
}),
|
||||||
[
|
[
|
||||||
|
jobId,
|
||||||
submittedURL,
|
submittedURL,
|
||||||
rows,
|
rows,
|
||||||
results,
|
results,
|
||||||
|
|||||||
@@ -15,14 +15,14 @@ export const useAdvancedJobOptions = () => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const { job_options } = router.query;
|
const { job_options, job_id } = router.query;
|
||||||
|
|
||||||
const [jobOptions, setJobOptions] =
|
const [jobOptions, setJobOptions] =
|
||||||
useState<RawJobOptions>(initialJobOptions);
|
useState<RawJobOptions>(initialJobOptions);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (job_options) {
|
if (job_options) {
|
||||||
parseJobOptions(job_options as string, setJobOptions);
|
parseJobOptions(job_id as string, job_options as string, setJobOptions);
|
||||||
}
|
}
|
||||||
}, [job_options]);
|
}, [job_options]);
|
||||||
|
|
||||||
|
|||||||
27
src/hooks/use-export-job-config.ts
Normal file
27
src/hooks/use-export-job-config.ts
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import { Job } from "@/types";
|
||||||
|
|
||||||
|
export const useExportJobConfig = () => {
|
||||||
|
const exportJobConfig = async (job: Job) => {
|
||||||
|
const jobConfig = {
|
||||||
|
url: job.url,
|
||||||
|
prompt: job.prompt,
|
||||||
|
job_options: job.job_options,
|
||||||
|
elements: job.elements,
|
||||||
|
agent_mode: job.agent_mode,
|
||||||
|
};
|
||||||
|
|
||||||
|
const jobConfigString = JSON.stringify(jobConfig);
|
||||||
|
const blob = new Blob([jobConfigString], { type: "application/json" });
|
||||||
|
const url = window.URL.createObjectURL(blob);
|
||||||
|
const a = document.createElement("a");
|
||||||
|
a.style.display = "none";
|
||||||
|
a.href = url;
|
||||||
|
a.download = `job_${job.id}.json`;
|
||||||
|
document.body.appendChild(a);
|
||||||
|
a.click();
|
||||||
|
window.URL.revokeObjectURL(url);
|
||||||
|
document.body.removeChild(a);
|
||||||
|
};
|
||||||
|
|
||||||
|
return { exportJobConfig };
|
||||||
|
};
|
||||||
83
src/hooks/use-import-job-config.ts
Normal file
83
src/hooks/use-import-job-config.ts
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||||
|
import { useRouter } from "next/router";
|
||||||
|
import { toast } from "react-toastify";
|
||||||
|
|
||||||
|
export const useImportJobConfig = () => {
|
||||||
|
const router = useRouter();
|
||||||
|
const { setJobOptions, setSiteMap, setSubmittedURL, setRows } =
|
||||||
|
useJobSubmitterProvider();
|
||||||
|
|
||||||
|
const handleUploadFile = (file: File): Promise<boolean> => {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const reader = new FileReader();
|
||||||
|
|
||||||
|
reader.onerror = () => {
|
||||||
|
toast.error("Failed to read file");
|
||||||
|
resolve(true);
|
||||||
|
};
|
||||||
|
|
||||||
|
reader.onload = (e) => {
|
||||||
|
const result = e.target?.result as string;
|
||||||
|
|
||||||
|
if (!result.includes("url")) {
|
||||||
|
toast.error("Invalid job config: missing url");
|
||||||
|
resolve(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!result.includes("job_options")) {
|
||||||
|
toast.error("Invalid job config: missing job_options");
|
||||||
|
resolve(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!result.includes("elements")) {
|
||||||
|
toast.error("Invalid job config: missing elements");
|
||||||
|
resolve(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!result.includes("site_map")) {
|
||||||
|
toast.error("Invalid job config: missing site_map");
|
||||||
|
resolve(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const jobConfig = JSON.parse(result);
|
||||||
|
|
||||||
|
if (jobConfig.agent_mode) {
|
||||||
|
router.push({
|
||||||
|
pathname: "/agent",
|
||||||
|
query: {
|
||||||
|
url: jobConfig.url,
|
||||||
|
prompt: jobConfig.prompt,
|
||||||
|
job_options: JSON.stringify(jobConfig.job_options),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
jobConfig.job_options &&
|
||||||
|
Array.isArray(jobConfig.job_options.proxies)
|
||||||
|
) {
|
||||||
|
jobConfig.job_options.proxies = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
setJobOptions(jobConfig.job_options || {});
|
||||||
|
setSiteMap(jobConfig.site_map);
|
||||||
|
setSubmittedURL(jobConfig.url || "");
|
||||||
|
setRows(jobConfig.elements || []);
|
||||||
|
resolve(false);
|
||||||
|
} catch (error) {
|
||||||
|
toast.error("Failed to parse job config");
|
||||||
|
resolve(true);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
reader.readAsText(file);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
return { handleUploadFile };
|
||||||
|
};
|
||||||
@@ -25,7 +25,8 @@ export const useSubmitJob = () => {
|
|||||||
jobOptions: RawJobOptions,
|
jobOptions: RawJobOptions,
|
||||||
siteMap: SiteMap | null,
|
siteMap: SiteMap | null,
|
||||||
agentMode: boolean,
|
agentMode: boolean,
|
||||||
prompt: string | null
|
prompt: string | null,
|
||||||
|
id?: string
|
||||||
) => {
|
) => {
|
||||||
if (!validateURL(submittedURL)) {
|
if (!validateURL(submittedURL)) {
|
||||||
setIsValidUrl(false);
|
setIsValidUrl(false);
|
||||||
@@ -61,7 +62,8 @@ export const useSubmitJob = () => {
|
|||||||
customCookies,
|
customCookies,
|
||||||
siteMap,
|
siteMap,
|
||||||
agentMode,
|
agentMode,
|
||||||
prompt || undefined
|
prompt || undefined,
|
||||||
|
id
|
||||||
)
|
)
|
||||||
.then(async (response) => {
|
.then(async (response) => {
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
@@ -80,7 +82,10 @@ export const useSubmitJob = () => {
|
|||||||
setSnackbarOpen(true);
|
setSnackbarOpen(true);
|
||||||
})
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
setSnackbarMessage(error || "An error occurred.");
|
const errorMessage =
|
||||||
|
error instanceof Error ? error.message : "An error occurred.";
|
||||||
|
console.log(errorMessage);
|
||||||
|
setSnackbarMessage(errorMessage);
|
||||||
setSnackbarSeverity("error");
|
setSnackbarSeverity("error");
|
||||||
setSnackbarOpen(true);
|
setSnackbarOpen(true);
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ import { Dispatch, SetStateAction } from "react";
|
|||||||
import { RawJobOptions, SiteMap } from "@/types";
|
import { RawJobOptions, SiteMap } from "@/types";
|
||||||
|
|
||||||
export const parseJobOptions = (
|
export const parseJobOptions = (
|
||||||
|
id: string,
|
||||||
job_options: string,
|
job_options: string,
|
||||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
||||||
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>
|
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>,
|
||||||
|
setJobId?: Dispatch<SetStateAction<string>>
|
||||||
) => {
|
) => {
|
||||||
if (job_options) {
|
if (job_options) {
|
||||||
const jsonOptions = JSON.parse(job_options as string);
|
const jsonOptions = JSON.parse(job_options as string);
|
||||||
@@ -47,6 +49,10 @@ export const parseJobOptions = (
|
|||||||
newJobOptions.return_html = true;
|
newJobOptions.return_html = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (id && setJobId) {
|
||||||
|
setJobId(id);
|
||||||
|
}
|
||||||
|
|
||||||
setJobOptions(newJobOptions);
|
setJobOptions(newJobOptions);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -21,15 +21,16 @@ export default async function handler(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!response.ok) {
|
const result = await response.json();
|
||||||
throw new Error(`Error: ${response.statusText}`);
|
|
||||||
|
if (response.status === 500) {
|
||||||
|
res.status(500).json({ error: result.error });
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await response.json();
|
|
||||||
res.status(200).json(result);
|
res.status(200).json(result);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error submitting scrape job:", error);
|
console.error("Error submitting scrape job:", error);
|
||||||
res.status(500).json({ error: "Internal Server Error" });
|
res.status(500).json({ error: error });
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
res.setHeader("Allow", ["POST"]);
|
res.setHeader("Allow", ["POST"]);
|
||||||
|
|||||||
@@ -9,14 +9,15 @@ export const submitJob = async (
|
|||||||
customCookies: any,
|
customCookies: any,
|
||||||
siteMap: SiteMap | null,
|
siteMap: SiteMap | null,
|
||||||
agentMode: boolean = false,
|
agentMode: boolean = false,
|
||||||
prompt?: string
|
prompt?: string,
|
||||||
|
id?: string
|
||||||
) => {
|
) => {
|
||||||
console.log(user);
|
|
||||||
return await fetch(`/api/submit-scrape-job`, {
|
return await fetch(`/api/submit-scrape-job`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "content-type": "application/json" },
|
headers: { "content-type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
data: {
|
data: {
|
||||||
|
id,
|
||||||
url: submittedURL,
|
url: submittedURL,
|
||||||
elements: rows,
|
elements: rows,
|
||||||
user: user?.email,
|
user: user?.email,
|
||||||
|
|||||||
Reference in New Issue
Block a user