8 Commits

Author SHA1 Message Date
github-actions[bot]
e9c60f6338 chore: bump version to 1.1.3
Some checks failed
Merge / version (push) Has been cancelled
Merge / build-and-deploy (push) Has been cancelled
2025-06-12 23:06:34 +00:00
Jayden Pyles
5719a85491 chore: update chart version 2025-06-12 18:07:55 -05:00
github-actions[bot]
052d80de07 chore: bump version to 1.1.3 2025-06-12 23:03:26 +00:00
Jayden Pyles
7047a3c0e3 chore: update chart version 2025-06-12 18:04:47 -05:00
github-actions[bot]
71f603fc62 chore: bump version to 1.1.3 2025-06-12 23:01:57 +00:00
Jayden Pyles
86a77a27df chore: update chart version 2025-06-12 18:03:20 -05:00
github-actions[bot]
b11e263b93 chore: bump version to 1.1.3 2025-06-12 23:00:47 +00:00
Jayden Pyles
91dc13348d feat: add import/export for job configurations (#91)
* chore: wip add upload/import

* chore: wip add upload/import

* feat: update job rerunning

* fix: update workflow

* fix: update workflow

* chore: temp disable workflow
2025-06-12 18:00:39 -05:00
27 changed files with 377 additions and 68 deletions

View File

@@ -10,14 +10,14 @@ on:
- master
jobs:
tests:
uses: ./.github/workflows/tests.yml
secrets:
openai_key: ${{ secrets.OPENAI_KEY }}
discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
# TODO: Renable once browser forge is fixed for camoufox, or else tests will never pass
# tests:
# uses: ./.github/workflows/tests.yml
# secrets:
# openai_key: ${{ secrets.OPENAI_KEY }}
# discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
version:
needs: tests
uses: ./.github/workflows/version.yml
secrets:
git_token: ${{ secrets.GPAT_TOKEN }}

View File

@@ -8,11 +8,6 @@ on:
workflow_dispatch:
jobs:
checkout:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
tests:
uses: ./.github/workflows/tests.yml
secrets:

View File

@@ -10,6 +10,8 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-node@v3
- name: Set env
run: echo "ENV=test" >> $GITHUB_ENV
@@ -20,7 +22,7 @@ jobs:
run: pdm install
- name: Install playwright
run: pdm run playwright install
run: pdm run playwright install --with-deps
- name: Run tests
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests

View File

@@ -19,6 +19,7 @@ jobs:
outputs:
version: ${{ steps.set_version.outputs.version }}
version_bump: ${{ steps.check_version_bump.outputs.version_bump }}
steps:
- name: Checkout
@@ -47,10 +48,11 @@ jobs:
id: check_version_bump
run: |
COMMIT_MSG=$(git log -1 --pretty=%B)
if [[ $COMMIT_MSG =~ ^feat\(breaking\) ]]; then
echo "version_bump=true" >> $GITHUB_OUTPUT
elif [[ $COMMIT_MSG =~ .*\[no\ bump\].* ]]; then
if [[ $COMMIT_MSG =~ .*\[no\ bump\].* ]]; then
echo "version_bump=false" >> $GITHUB_OUTPUT
else
echo "version_bump=true" >> $GITHUB_OUTPUT
fi
- name: Skip version bump

View File

@@ -7,6 +7,7 @@ from camoufox import AsyncCamoufox
from playwright.async_api import Page
# LOCAL
from api.backend.constants import RECORDINGS_ENABLED
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
from api.backend.job.models import CapturedElement
from api.backend.worker.logger import LOG
@@ -29,11 +30,13 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
LOG.info(f"Starting work for agent job: {agent_job}")
pages = set()
proxy = None
if agent_job["job_options"]["proxies"]:
proxy = random.choice(agent_job["job_options"]["proxies"])
LOG.info(f"Using proxy: {proxy}")
async with AsyncCamoufox(headless=True) as browser:
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
page: Page = await browser.new_page()
await add_custom_items(
@@ -64,7 +67,7 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
xpaths = parse_response(response)
captured_elements = await capture_elements(
page, xpaths, agent_job["job_options"]["return_html"]
page, xpaths, agent_job["job_options"].get("return_html", False)
)
final_url = page.url

View File

@@ -29,6 +29,7 @@ def insert(query: str, values: tuple[Any, ...]):
except sqlite3.Error as e:
LOG.error(f"An error occurred: {e}")
raise e
finally:
cursor.close()

View File

@@ -49,10 +49,15 @@ async def get_queued_job():
return res[0] if res else None
async def update_job(ids: list[str], field: str, value: Any):
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
res = update(query, tuple([value] + ids))
LOG.info(f"Updated job: {res}")
async def update_job(ids: list[str], updates: dict[str, Any]):
if not updates:
return
set_clause = ", ".join(f"{field} = ?" for field in updates.keys())
query = f"UPDATE jobs SET {set_clause} WHERE id IN {format_list_for_query(ids)}"
values = list(updates.values()) + ids
res = update(query, tuple(values))
LOG.debug(f"Updated job: {res}")
async def delete_jobs(jobs: list[str]):

View File

@@ -1,5 +1,6 @@
# STL
import logging
import datetime
from typing import Any
# LOCAL
@@ -12,7 +13,23 @@ from api.backend.database.queries.job.job_queries import JOB_INSERT_QUERY
LOG = logging.getLogger("Job")
def insert(item: dict[str, Any]) -> None:
async def insert(item: dict[str, Any]) -> None:
if check_for_job_completion(item["id"]):
await multi_field_update_job(
item["id"],
{
"agent_mode": item["agent_mode"],
"prompt": item["prompt"],
"job_options": item["job_options"],
"elements": item["elements"],
"status": "Queued",
"result": [],
"time_created": datetime.datetime.now().isoformat(),
"chat": None,
},
)
return
common_insert(
JOB_INSERT_QUERY,
(
@@ -33,6 +50,12 @@ def insert(item: dict[str, Any]) -> None:
LOG.debug(f"Inserted item: {item}")
def check_for_job_completion(id: str) -> dict[str, Any]:
query = f"SELECT * FROM jobs WHERE id = ?"
res = common_query(query, (id,))
return res[0] if res else {}
async def get_queued_job():
query = (
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
@@ -48,6 +71,12 @@ async def update_job(ids: list[str], field: str, value: Any):
LOG.debug(f"Updated job: {res}")
async def multi_field_update_job(id: str, fields: dict[str, Any]):
query = f"UPDATE jobs SET {', '.join(f'{field} = ?' for field in fields.keys())} WHERE id = ?"
res = common_update(query, tuple(list(fields.values()) + [id]))
LOG.debug(f"Updated job: {res}")
async def delete_jobs(jobs: list[str]):
if not jobs:
LOG.debug("No jobs to delete.")

View File

@@ -43,10 +43,8 @@ job_router = APIRouter()
@job_router.post("/update")
@handle_exceptions(logger=LOG)
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
"""Used to update jobs"""
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
return JSONResponse(content={"message": "Jobs updated successfully."})
return {"message": "Jobs updated successfully"}
@job_router.post("/submit-scrape-job")
@@ -54,9 +52,11 @@ async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
async def submit_scrape_job(job: Job):
LOG.info(f"Recieved job: {job}")
job.id = uuid.uuid4().hex
if not job.id:
job.id = uuid.uuid4().hex
job_dict = job.model_dump()
insert(job_dict)
await insert(job_dict)
return JSONResponse(
content={"id": job.id, "message": "Job submitted successfully."}
@@ -70,7 +70,9 @@ async def retrieve_scrape_jobs(
):
LOG.info(f"Retrieving jobs for account: {user.email}")
ATTRIBUTES = "chat" if fetch_options.chat else "*"
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
job_query = (
f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ? ORDER BY time_created ASC"
)
results = query(job_query, (user.email,))
return JSONResponse(content=jsonable_encoder(results[::-1]))

View File

@@ -174,7 +174,9 @@ async def scrape(
for page in pages:
elements.append(
await collect_scraped_elements(page, xpaths, job_options["return_html"])
await collect_scraped_elements(
page, xpaths, job_options.get("return_html", False)
)
)
return elements

View File

@@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.1.2
version: 1.1.3
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to

2
next-env.d.ts vendored
View File

@@ -2,4 +2,4 @@
/// <reference types="next/image-types/global" />
// NOTE: This file should not be edited
// see https://nextjs.org/docs/basic-features/typescript for more information.
// see https://nextjs.org/docs/pages/building-your-application/configuring/typescript for more information.

View File

@@ -1,4 +1,6 @@
import { ExpandedTableInput } from "@/components/common/expanded-table-input";
import { UploadFile } from "@/components/common/upload-file";
import { useImportJobConfig } from "@/hooks/use-import-job-config";
import { RawJobOptions } from "@/types";
import {
Code as CodeIcon,
@@ -26,6 +28,7 @@ import {
useTheme,
} from "@mui/material";
import { Dispatch, SetStateAction, useEffect, useState } from "react";
import { toast } from "react-toastify";
export type AdvancedJobOptionsDialogProps = {
open: boolean;
@@ -43,6 +46,7 @@ export const AdvancedJobOptionsDialog = ({
multiPageScrapeEnabled = true,
}: AdvancedJobOptionsDialogProps) => {
const theme = useTheme();
const { handleUploadFile } = useImportJobConfig();
const [localJobOptions, setLocalJobOptions] =
useState<RawJobOptions>(jobOptions);
@@ -69,6 +73,18 @@ export const AdvancedJobOptionsDialog = ({
onClose();
};
const onUploadFile = async (file: File) => {
const errorOccured = await handleUploadFile(file);
if (errorOccured) {
handleClose();
toast.error("Failed to upload job config");
return;
} else {
handleClose();
toast.success("Job config uploaded successfully");
}
};
return (
<Dialog
open={open}
@@ -99,11 +115,18 @@ export const AdvancedJobOptionsDialog = ({
<Typography variant="h6" component="div">
Advanced Job Options
</Typography>
<Settings
sx={{
color: theme.palette.primary.contrastText,
}}
/>
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
<UploadFile
message="Upload Job Config"
fileTypes={["application/json"]}
onUploadFile={onUploadFile}
/>
<Settings
sx={{
color: theme.palette.primary.contrastText,
}}
/>
</Box>
</DialogTitle>
<DialogContent

View File

@@ -0,0 +1 @@
export * from "./upload-file";

View File

@@ -0,0 +1,34 @@
import { Box, Button, Typography } from "@mui/material";
export type UploadFileProps = {
message: string;
fileTypes?: string[];
onUploadFile: (file: File) => void;
};
export const UploadFile = ({
message,
fileTypes,
onUploadFile,
}: UploadFileProps) => {
const handleUploadFile = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file) {
onUploadFile(file);
}
};
return (
<Box>
<Button variant="contained" component="label">
<Typography>{message}</Typography>
<input
type="file"
hidden
onChange={handleUploadFile}
accept={fileTypes?.join(",")}
/>
</Button>
</Box>
);
};

View File

@@ -1,18 +1,18 @@
import React from "react";
import StarIcon from "@mui/icons-material/Star";
import {
Tooltip,
Box,
Button,
Checkbox,
IconButton,
Table,
TableBody,
TableCell,
TableHead,
TableRow,
Box,
Checkbox,
Button,
Tooltip,
} from "@mui/material";
import router from "next/router";
import { Job } from "../../types";
import StarIcon from "@mui/icons-material/Star";
interface stateProps {
selectedJobs: Set<string>;
@@ -21,7 +21,12 @@ interface stateProps {
interface Props {
onSelectJob: (job: string) => void;
onNavigate: (elements: Object[], url: string, options: any) => void;
onNavigate: (
id: string,
elements: Object[],
url: string,
options: any
) => void;
onFavorite: (ids: string[], field: string, value: any) => void;
stateProps: stateProps;
}
@@ -87,11 +92,29 @@ export const Favorites = ({
</TableCell>
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
<Button
onClick={() =>
onNavigate(row.elements, row.url, row.job_options)
}
onClick={() => {
if (row.agent_mode) {
router.push({
pathname: "/agent",
query: {
url: row.url,
prompt: row.prompt,
job_options: JSON.stringify(row.job_options),
id: row.id,
},
});
} else {
onNavigate(row.id, row.elements, row.url, row.job_options);
}
}}
size="small"
sx={{
minWidth: 0,
padding: "4px 8px",
fontSize: "0.625rem",
}}
>
Run
Rerun
</Button>
</TableCell>
</TableRow>

View File

@@ -1,5 +1,11 @@
"use client";
import { AutoAwesome, Image, VideoCameraBack } from "@mui/icons-material";
import { useExportJobConfig } from "@/hooks/use-export-job-config";
import {
AutoAwesome,
Image,
Settings,
VideoCameraBack,
} from "@mui/icons-material";
import StarIcon from "@mui/icons-material/Star";
import {
Box,
@@ -30,7 +36,12 @@ interface Props {
colors: stringMap;
onSelectJob: (job: string) => void;
onDownload: (job: string[]) => void;
onNavigate: (elements: Object[], url: string, options: any) => void;
onNavigate: (
id: string,
elements: Object[],
url: string,
options: any
) => void;
onFavorite: (ids: string[], field: string, value: any) => void;
onJobClick: (job: Job) => void;
stateProps: stateProps;
@@ -46,6 +57,7 @@ export const JobQueue = ({
onJobClick,
}: Props) => {
const { selectedJobs, filteredJobs } = stateProps;
const { exportJobConfig } = useExportJobConfig();
const router = useRouter();
return (
@@ -116,6 +128,17 @@ export const JobQueue = ({
</IconButton>
</span>
</Tooltip>
<Tooltip title="Export Job Configuration">
<span>
<IconButton
onClick={() => {
exportJobConfig(row);
}}
>
<Settings />
</IconButton>
</span>
</Tooltip>
{row.job_options.collect_media && (
<Tooltip title="View Media">
<span>
@@ -214,10 +237,16 @@ export const JobQueue = ({
url: row.url,
prompt: row.prompt,
job_options: JSON.stringify(row.job_options),
id: row.id,
},
});
} else {
onNavigate(row.elements, row.url, row.job_options);
onNavigate(
row.id,
row.elements,
row.url,
row.job_options
);
}
}}
size="small"

View File

@@ -47,10 +47,16 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
setJobDownloadDialogOpen(true);
};
const handleNavigate = (elements: Object[], url: string, options: any) => {
const handleNavigate = (
id: string,
elements: Object[],
url: string,
options: any
) => {
router.push({
pathname: "/",
query: {
id,
elements: JSON.stringify(elements),
url: url,
job_options: JSON.stringify(options),

View File

@@ -13,21 +13,44 @@ import { useJobSubmitterProvider } from "./provider";
export const JobSubmitter = () => {
const router = useRouter();
const { job_options } = router.query;
const { job_options, id } = router.query;
const { user } = useUser();
const { submitJob, loading, error } = useSubmitJob();
const { submittedURL, rows, siteMap, setSiteMap, jobOptions, setJobOptions } =
useJobSubmitterProvider();
const {
jobId,
setJobId,
submittedURL,
rows,
siteMap,
setSiteMap,
jobOptions,
setJobOptions,
} = useJobSubmitterProvider();
useEffect(() => {
if (job_options) {
parseJobOptions(job_options as string, setJobOptions, setSiteMap);
parseJobOptions(
id as string,
job_options as string,
setJobOptions,
setSiteMap,
setJobId
);
}
}, [job_options]);
const handleSubmit = async () => {
await submitJob(submittedURL, rows, user, jobOptions, siteMap, false, null);
await submitJob(
submittedURL,
rows,
user,
jobOptions,
siteMap,
false,
null,
jobId
);
};
return (

View File

@@ -10,6 +10,8 @@ import React, {
} from "react";
type JobSubmitterProviderType = {
jobId: string;
setJobId: Dispatch<React.SetStateAction<string>>;
submittedURL: string;
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
rows: Element[];
@@ -36,6 +38,7 @@ const JobSubmitterProvider = createContext<JobSubmitterProviderType>(
);
export const Provider = ({ children }: PropsWithChildren) => {
const [jobId, setJobId] = useState<string>("");
const [submittedURL, setSubmittedURL] = useState<string>("");
const [rows, setRows] = useState<Element[]>([]);
const [results, setResults] = useState<Result>({});
@@ -55,6 +58,8 @@ export const Provider = ({ children }: PropsWithChildren) => {
const value: JobSubmitterProviderType = useMemo(
() => ({
jobId,
setJobId,
submittedURL,
setSubmittedURL,
rows,
@@ -76,6 +81,7 @@ export const Provider = ({ children }: PropsWithChildren) => {
closeSnackbar,
}),
[
jobId,
submittedURL,
rows,
results,

View File

@@ -15,14 +15,14 @@ export const useAdvancedJobOptions = () => {
};
const router = useRouter();
const { job_options } = router.query;
const { job_options, job_id } = router.query;
const [jobOptions, setJobOptions] =
useState<RawJobOptions>(initialJobOptions);
useEffect(() => {
if (job_options) {
parseJobOptions(job_options as string, setJobOptions);
parseJobOptions(job_id as string, job_options as string, setJobOptions);
}
}, [job_options]);

View File

@@ -0,0 +1,27 @@
import { Job } from "@/types";
export const useExportJobConfig = () => {
const exportJobConfig = async (job: Job) => {
const jobConfig = {
url: job.url,
prompt: job.prompt,
job_options: job.job_options,
elements: job.elements,
agent_mode: job.agent_mode,
};
const jobConfigString = JSON.stringify(jobConfig);
const blob = new Blob([jobConfigString], { type: "application/json" });
const url = window.URL.createObjectURL(blob);
const a = document.createElement("a");
a.style.display = "none";
a.href = url;
a.download = `job_${job.id}.json`;
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
};
return { exportJobConfig };
};

View File

@@ -0,0 +1,83 @@
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
import { useRouter } from "next/router";
import { toast } from "react-toastify";
export const useImportJobConfig = () => {
const router = useRouter();
const { setJobOptions, setSiteMap, setSubmittedURL, setRows } =
useJobSubmitterProvider();
const handleUploadFile = (file: File): Promise<boolean> => {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onerror = () => {
toast.error("Failed to read file");
resolve(true);
};
reader.onload = (e) => {
const result = e.target?.result as string;
if (!result.includes("url")) {
toast.error("Invalid job config: missing url");
resolve(true);
return;
}
if (!result.includes("job_options")) {
toast.error("Invalid job config: missing job_options");
resolve(true);
return;
}
if (!result.includes("elements")) {
toast.error("Invalid job config: missing elements");
resolve(true);
return;
}
if (!result.includes("site_map")) {
toast.error("Invalid job config: missing site_map");
resolve(true);
return;
}
try {
const jobConfig = JSON.parse(result);
if (jobConfig.agent_mode) {
router.push({
pathname: "/agent",
query: {
url: jobConfig.url,
prompt: jobConfig.prompt,
job_options: JSON.stringify(jobConfig.job_options),
},
});
}
if (
jobConfig.job_options &&
Array.isArray(jobConfig.job_options.proxies)
) {
jobConfig.job_options.proxies = "";
}
setJobOptions(jobConfig.job_options || {});
setSiteMap(jobConfig.site_map);
setSubmittedURL(jobConfig.url || "");
setRows(jobConfig.elements || []);
resolve(false);
} catch (error) {
toast.error("Failed to parse job config");
resolve(true);
}
};
reader.readAsText(file);
});
};
return { handleUploadFile };
};

View File

@@ -25,7 +25,8 @@ export const useSubmitJob = () => {
jobOptions: RawJobOptions,
siteMap: SiteMap | null,
agentMode: boolean,
prompt: string | null
prompt: string | null,
id?: string
) => {
if (!validateURL(submittedURL)) {
setIsValidUrl(false);
@@ -61,7 +62,8 @@ export const useSubmitJob = () => {
customCookies,
siteMap,
agentMode,
prompt || undefined
prompt || undefined,
id
)
.then(async (response) => {
if (!response.ok) {
@@ -80,7 +82,10 @@ export const useSubmitJob = () => {
setSnackbarOpen(true);
})
.catch((error) => {
setSnackbarMessage(error || "An error occurred.");
const errorMessage =
error instanceof Error ? error.message : "An error occurred.";
console.log(errorMessage);
setSnackbarMessage(errorMessage);
setSnackbarSeverity("error");
setSnackbarOpen(true);
})

View File

@@ -3,9 +3,11 @@ import { Dispatch, SetStateAction } from "react";
import { RawJobOptions, SiteMap } from "@/types";
export const parseJobOptions = (
id: string,
job_options: string,
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>,
setJobId?: Dispatch<SetStateAction<string>>
) => {
if (job_options) {
const jsonOptions = JSON.parse(job_options as string);
@@ -47,6 +49,10 @@ export const parseJobOptions = (
newJobOptions.return_html = true;
}
if (id && setJobId) {
setJobId(id);
}
setJobOptions(newJobOptions);
}
};

View File

@@ -21,15 +21,16 @@ export default async function handler(
}
);
if (!response.ok) {
throw new Error(`Error: ${response.statusText}`);
const result = await response.json();
if (response.status === 500) {
res.status(500).json({ error: result.error });
}
const result = await response.json();
res.status(200).json(result);
} catch (error) {
console.error("Error submitting scrape job:", error);
res.status(500).json({ error: "Internal Server Error" });
res.status(500).json({ error: error });
}
} else {
res.setHeader("Allow", ["POST"]);

View File

@@ -9,14 +9,15 @@ export const submitJob = async (
customCookies: any,
siteMap: SiteMap | null,
agentMode: boolean = false,
prompt?: string
prompt?: string,
id?: string
) => {
console.log(user);
return await fetch(`/api/submit-scrape-job`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
data: {
id,
url: submittedURL,
elements: rows,
user: user?.email,