chore: wip add upload/import

This commit is contained in:
Jayden Pyles
2025-06-09 20:29:55 -05:00
parent 87cdd809b6
commit 3cde89c493
12 changed files with 109 additions and 27 deletions

View File

@@ -64,7 +64,7 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
xpaths = parse_response(response)
captured_elements = await capture_elements(
page, xpaths, agent_job["job_options"]["return_html"]
page, xpaths, agent_job["job_options"].get("return_html", False)
)
final_url = page.url

View File

@@ -29,6 +29,7 @@ def insert(query: str, values: tuple[Any, ...]):
except sqlite3.Error as e:
LOG.error(f"An error occurred: {e}")
raise e
finally:
cursor.close()

View File

@@ -49,10 +49,15 @@ async def get_queued_job():
return res[0] if res else None
async def update_job(ids: list[str], field: str, value: Any):
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
res = update(query, tuple([value] + ids))
LOG.info(f"Updated job: {res}")
async def update_job(ids: list[str], updates: dict[str, Any]):
if not updates:
return
set_clause = ", ".join(f"{field} = ?" for field in updates.keys())
query = f"UPDATE jobs SET {set_clause} WHERE id IN {format_list_for_query(ids)}"
values = list(updates.values()) + ids
res = update(query, tuple(values))
LOG.debug(f"Updated job: {res}")
async def delete_jobs(jobs: list[str]):

View File

@@ -1,5 +1,6 @@
# STL
import logging
import datetime
from typing import Any
# LOCAL
@@ -12,7 +13,19 @@ from api.backend.database.queries.job.job_queries import JOB_INSERT_QUERY
LOG = logging.getLogger("Job")
def insert(item: dict[str, Any]) -> None:
async def insert(item: dict[str, Any]) -> None:
if check_for_job_completion(item["id"]):
await multi_field_update_job(
item["id"],
{
"status": "Queued",
"result": [],
"time_created": datetime.datetime.now().isoformat(),
"chat": None,
},
)
return
common_insert(
JOB_INSERT_QUERY,
(
@@ -33,6 +46,12 @@ def insert(item: dict[str, Any]) -> None:
LOG.debug(f"Inserted item: {item}")
def check_for_job_completion(id: str) -> dict[str, Any]:
query = f"SELECT * FROM jobs WHERE id = ?"
res = common_query(query, (id,))
return res[0] if res else {}
async def get_queued_job():
query = (
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
@@ -48,6 +67,12 @@ async def update_job(ids: list[str], field: str, value: Any):
LOG.debug(f"Updated job: {res}")
async def multi_field_update_job(id: str, fields: dict[str, Any]):
query = f"UPDATE jobs SET {', '.join(f'{field} = ?' for field in fields.keys())} WHERE id = ?"
res = common_update(query, tuple(list(fields.values()) + [id]))
LOG.debug(f"Updated job: {res}")
async def delete_jobs(jobs: list[str]):
if not jobs:
LOG.debug("No jobs to delete.")

View File

@@ -43,10 +43,8 @@ job_router = APIRouter()
@job_router.post("/update")
@handle_exceptions(logger=LOG)
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
"""Used to update jobs"""
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
return JSONResponse(content={"message": "Jobs updated successfully."})
return {"message": "Jobs updated successfully"}
@job_router.post("/submit-scrape-job")
@@ -54,9 +52,11 @@ async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
async def submit_scrape_job(job: Job):
LOG.info(f"Recieved job: {job}")
job.id = uuid.uuid4().hex
if not job.id:
job.id = uuid.uuid4().hex
job_dict = job.model_dump()
insert(job_dict)
await insert(job_dict)
return JSONResponse(
content={"id": job.id, "message": "Job submitted successfully."}
@@ -70,7 +70,9 @@ async def retrieve_scrape_jobs(
):
LOG.info(f"Retrieving jobs for account: {user.email}")
ATTRIBUTES = "chat" if fetch_options.chat else "*"
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
job_query = (
f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ? ORDER BY time_created ASC"
)
results = query(job_query, (user.email,))
return JSONResponse(content=jsonable_encoder(results[::-1]))

View File

@@ -174,7 +174,9 @@ async def scrape(
for page in pages:
elements.append(
await collect_scraped_elements(page, xpaths, job_options["return_html"])
await collect_scraped_elements(
page, xpaths, job_options.get("return_html", False)
)
)
return elements

View File

@@ -13,21 +13,45 @@ import { useJobSubmitterProvider } from "./provider";
export const JobSubmitter = () => {
const router = useRouter();
const { job_options } = router.query;
const { job_options, id } = router.query;
console.log(id);
const { user } = useUser();
const { submitJob, loading, error } = useSubmitJob();
const { submittedURL, rows, siteMap, setSiteMap, jobOptions, setJobOptions } =
useJobSubmitterProvider();
const {
jobId,
setJobId,
submittedURL,
rows,
siteMap,
setSiteMap,
jobOptions,
setJobOptions,
} = useJobSubmitterProvider();
useEffect(() => {
if (job_options) {
parseJobOptions(job_options as string, setJobOptions, setSiteMap);
parseJobOptions(
id as string,
job_options as string,
setJobOptions,
setSiteMap,
setJobId
);
}
}, [job_options]);
const handleSubmit = async () => {
await submitJob(submittedURL, rows, user, jobOptions, siteMap, false, null);
await submitJob(
submittedURL,
rows,
user,
jobOptions,
siteMap,
false,
null,
jobId
);
};
return (

View File

@@ -10,6 +10,8 @@ import React, {
} from "react";
type JobSubmitterProviderType = {
jobId: string;
setJobId: Dispatch<React.SetStateAction<string>>;
submittedURL: string;
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
rows: Element[];
@@ -36,6 +38,7 @@ const JobSubmitterProvider = createContext<JobSubmitterProviderType>(
);
export const Provider = ({ children }: PropsWithChildren) => {
const [jobId, setJobId] = useState<string>("");
const [submittedURL, setSubmittedURL] = useState<string>("");
const [rows, setRows] = useState<Element[]>([]);
const [results, setResults] = useState<Result>({});
@@ -55,6 +58,8 @@ export const Provider = ({ children }: PropsWithChildren) => {
const value: JobSubmitterProviderType = useMemo(
() => ({
jobId,
setJobId,
submittedURL,
setSubmittedURL,
rows,
@@ -76,6 +81,7 @@ export const Provider = ({ children }: PropsWithChildren) => {
closeSnackbar,
}),
[
jobId,
submittedURL,
rows,
results,

View File

@@ -23,10 +23,17 @@ export const useImportJobConfig = () => {
});
}
setJobOptions(jobConfig.job_options);
if (
jobConfig.job_options &&
Array.isArray(jobConfig.job_options.proxies)
) {
jobConfig.job_options.proxies = "";
}
setJobOptions(jobConfig.job_options || {});
setSiteMap(jobConfig.site_map);
setSubmittedURL(jobConfig.url);
setRows(jobConfig.elements);
setSubmittedURL(jobConfig.url || "");
setRows(jobConfig.elements || []);
};
reader.readAsText(file);

View File

@@ -82,7 +82,10 @@ export const useSubmitJob = () => {
setSnackbarOpen(true);
})
.catch((error) => {
setSnackbarMessage(error || "An error occurred.");
const errorMessage =
error instanceof Error ? error.message : "An error occurred.";
console.log(errorMessage);
setSnackbarMessage(errorMessage);
setSnackbarSeverity("error");
setSnackbarOpen(true);
})

View File

@@ -3,9 +3,11 @@ import { Dispatch, SetStateAction } from "react";
import { RawJobOptions, SiteMap } from "@/types";
export const parseJobOptions = (
id: string,
job_options: string,
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>,
setJobId?: Dispatch<SetStateAction<string>>
) => {
if (job_options) {
const jsonOptions = JSON.parse(job_options as string);
@@ -47,6 +49,10 @@ export const parseJobOptions = (
newJobOptions.return_html = true;
}
if (id && setJobId) {
setJobId(id);
}
setJobOptions(newJobOptions);
}
};

View File

@@ -21,15 +21,16 @@ export default async function handler(
}
);
if (!response.ok) {
throw new Error(`Error: ${response.statusText}`);
const result = await response.json();
if (response.status === 500) {
res.status(500).json({ error: result.error });
}
const result = await response.json();
res.status(200).json(result);
} catch (error) {
console.error("Error submitting scrape job:", error);
res.status(500).json({ error: "Internal Server Error" });
res.status(500).json({ error: error });
}
} else {
res.setHeader("Allow", ["POST"]);