mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-11-25 02:26:37 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7f1bc295ac |
@@ -43,6 +43,14 @@ async def llama_chat(chat_messages: list[Message]) -> AsyncGenerator[str, None]:
|
||||
async def openai_chat(
|
||||
chat_messages: Iterable[ChatCompletionMessageParam],
|
||||
) -> AsyncGenerator[str, None]:
|
||||
if openai_client and not open_ai_model:
|
||||
LOG.error("OpenAI model is not set")
|
||||
yield "An error occurred while processing your request."
|
||||
|
||||
if not openai_client:
|
||||
LOG.error("OpenAI client is not set")
|
||||
yield "An error occurred while processing your request."
|
||||
|
||||
if openai_client and open_ai_model:
|
||||
try:
|
||||
response = openai_client.chat.completions.create(
|
||||
|
||||
36
api/backend/job/utils/clean_job_format.py
Normal file
36
api/backend/job/utils/clean_job_format.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from typing import Any
|
||||
|
||||
from api.backend.utils import clean_text
|
||||
|
||||
|
||||
def clean_job_format(jobs: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
"""
|
||||
Convert a single job to a dictionary format.
|
||||
"""
|
||||
headers = ["id", "url", "element_name", "xpath", "text", "user", "time_created"]
|
||||
|
||||
cleaned_rows = []
|
||||
|
||||
for job in jobs:
|
||||
for res in job["result"]:
|
||||
for url, elements in res.items():
|
||||
for element_name, values in elements.items():
|
||||
for value in values:
|
||||
text = clean_text(value.get("text", "")).strip()
|
||||
if text:
|
||||
cleaned_rows.append(
|
||||
{
|
||||
"id": job.get("id", ""),
|
||||
"url": url,
|
||||
"element_name": element_name,
|
||||
"xpath": value.get("xpath", ""),
|
||||
"text": text,
|
||||
"user": job.get("user", ""),
|
||||
"time_created": job.get("time_created", ""),
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"headers": headers,
|
||||
"rows": cleaned_rows,
|
||||
}
|
||||
@@ -39,6 +39,8 @@ from api.backend.job.cron_scheduling.cron_scheduling import (
|
||||
insert_job_from_cron_job,
|
||||
)
|
||||
|
||||
from api.backend.job.utils.clean_job_format import clean_job_format
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
job_router = APIRouter()
|
||||
@@ -145,6 +147,19 @@ async def download(download_job: DownloadJob):
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
@job_router.get("/job/{id}/convert-to-csv")
|
||||
async def convert_to_csv(id: str):
|
||||
try:
|
||||
job_query = f"SELECT * FROM jobs WHERE id = ?"
|
||||
results = query(job_query, (id,))
|
||||
|
||||
return JSONResponse(content=clean_job_format(results))
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
traceback.print_exc()
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
@job_router.post("/delete-scrape-jobs")
|
||||
async def delete(delete_scrape_jobs: DeleteScrapeJobs):
|
||||
result = await delete_jobs(delete_scrape_jobs.ids)
|
||||
|
||||
165
src/components/common/csv-table/csv-table.tsx
Normal file
165
src/components/common/csv-table/csv-table.tsx
Normal file
@@ -0,0 +1,165 @@
|
||||
import React, { useState } from "react";
|
||||
import {
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Paper,
|
||||
Box,
|
||||
Typography,
|
||||
useTheme,
|
||||
alpha,
|
||||
} from "@mui/material";
|
||||
|
||||
export type CsvRow = {
|
||||
[key: string]: string;
|
||||
};
|
||||
|
||||
export type CsvTableProps = {
|
||||
csv: {
|
||||
rows: CsvRow[];
|
||||
headers: string[];
|
||||
};
|
||||
className?: string;
|
||||
};
|
||||
|
||||
export const CsvTable: React.FC<CsvTableProps> = ({ csv, className }) => {
|
||||
const [expandedRow, setExpandedRow] = useState<number | null>(null);
|
||||
const theme = useTheme();
|
||||
|
||||
const handleRowClick = (rowIndex: number) => {
|
||||
setExpandedRow((prevRow) => (prevRow === rowIndex ? null : rowIndex));
|
||||
};
|
||||
|
||||
return (
|
||||
<Box
|
||||
sx={{
|
||||
height: "100%",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
overflow: "hidden",
|
||||
width: "100%",
|
||||
}}
|
||||
className={className}
|
||||
>
|
||||
{csv.rows.length > 0 ? (
|
||||
<TableContainer
|
||||
sx={{
|
||||
flex: 1,
|
||||
overflow: "auto",
|
||||
borderRadius: theme.shape.borderRadius,
|
||||
boxShadow: theme.shadows[1],
|
||||
}}
|
||||
>
|
||||
<Table stickyHeader size="small" aria-label="csv data table">
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
{csv.headers.map((header, idx) => (
|
||||
<TableCell
|
||||
key={idx}
|
||||
sx={{
|
||||
fontWeight: "bold",
|
||||
cursor: "pointer",
|
||||
whiteSpace: "nowrap",
|
||||
backgroundColor: theme.palette.background.paper,
|
||||
color: theme.palette.text.primary,
|
||||
"&:hover": {
|
||||
backgroundColor: alpha(theme.palette.primary.main, 0.1),
|
||||
},
|
||||
p: { xs: 1, sm: 2 },
|
||||
}}
|
||||
>
|
||||
{header}
|
||||
</TableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{csv.rows.map((row, rowIndex) => (
|
||||
<React.Fragment key={rowIndex}>
|
||||
<TableRow
|
||||
onClick={() => handleRowClick(rowIndex)}
|
||||
sx={{
|
||||
"&:nth-of-type(odd)": {
|
||||
backgroundColor: alpha(
|
||||
theme.palette.primary.main,
|
||||
0.02
|
||||
),
|
||||
},
|
||||
"&:hover": {
|
||||
backgroundColor: alpha(
|
||||
theme.palette.primary.main,
|
||||
0.04
|
||||
),
|
||||
},
|
||||
cursor: "pointer",
|
||||
}}
|
||||
>
|
||||
{Object.values(row).map((col, colIndex) => (
|
||||
<TableCell
|
||||
key={colIndex}
|
||||
sx={{
|
||||
whiteSpace: "nowrap",
|
||||
maxWidth: { xs: "150px", sm: "200px", md: "200px" },
|
||||
overflow: "hidden",
|
||||
textOverflow: "ellipsis",
|
||||
p: { xs: 1, sm: 2 },
|
||||
}}
|
||||
>
|
||||
{col}
|
||||
</TableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
|
||||
{expandedRow === rowIndex && (
|
||||
<TableRow>
|
||||
<TableCell
|
||||
colSpan={csv.headers.length}
|
||||
sx={{ padding: 2 }}
|
||||
>
|
||||
<Paper
|
||||
sx={{
|
||||
padding: 2,
|
||||
backgroundColor: alpha(
|
||||
theme.palette.background.paper,
|
||||
0.5
|
||||
),
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{row.text
|
||||
? row.text
|
||||
.replace(/(\r\n|\n|\r)/g, " ")
|
||||
.replace(/\t/g, " ")
|
||||
: "No text available"}
|
||||
</Typography>
|
||||
</Paper>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
)}
|
||||
</React.Fragment>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
) : (
|
||||
<Paper
|
||||
sx={{
|
||||
p: 4,
|
||||
display: "flex",
|
||||
justifyContent: "center",
|
||||
alignItems: "center",
|
||||
height: "100%",
|
||||
borderRadius: theme.shape.borderRadius,
|
||||
backgroundColor: alpha(theme.palette.background.paper, 0.5),
|
||||
border: `1px dashed ${theme.palette.divider}`,
|
||||
}}
|
||||
>
|
||||
<Typography color="text.secondary">No data available</Typography>
|
||||
</Paper>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
1
src/components/common/csv-table/index.ts
Normal file
1
src/components/common/csv-table/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./csv-table";
|
||||
@@ -38,6 +38,7 @@ interface Props {
|
||||
onDownload: (job: string[]) => void;
|
||||
onNavigate: (elements: Object[], url: string, options: any) => void;
|
||||
onFavorite: (ids: string[], field: string, value: any) => void;
|
||||
onJobClick: (job: Job) => void;
|
||||
stateProps: stateProps;
|
||||
}
|
||||
|
||||
@@ -48,6 +49,7 @@ export const JobQueue = ({
|
||||
onDownload,
|
||||
onNavigate,
|
||||
onFavorite,
|
||||
onJobClick,
|
||||
}: Props) => {
|
||||
const { selectedJobs, filteredJobs } = stateProps;
|
||||
const router = useRouter();
|
||||
@@ -106,7 +108,14 @@ export const JobQueue = ({
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.id}</Box>
|
||||
<Box
|
||||
sx={{
|
||||
maxHeight: 100,
|
||||
overflow: "auto",
|
||||
}}
|
||||
>
|
||||
{row.id}
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 200, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.url}</Box>
|
||||
@@ -117,41 +126,24 @@ export const JobQueue = ({
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
|
||||
<Accordion sx={{ margin: 0, padding: 0.5 }}>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
aria-controls="panel1a-content"
|
||||
id="panel1a-header"
|
||||
<Box
|
||||
sx={{
|
||||
maxHeight: 100,
|
||||
overflow: "auto",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
}}
|
||||
>
|
||||
<Button
|
||||
sx={{
|
||||
minHeight: 0,
|
||||
"&.Mui-expanded": { minHeight: 0 },
|
||||
fontSize: "0.875rem",
|
||||
}}
|
||||
onClick={() => onJobClick(row)}
|
||||
>
|
||||
<Box
|
||||
sx={{
|
||||
maxHeight: 150,
|
||||
overflow: "auto",
|
||||
width: "100%",
|
||||
}}
|
||||
>
|
||||
<Typography sx={{ fontSize: "0.875rem" }}>
|
||||
Show Result
|
||||
</Typography>
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails sx={{ padding: 1 }}>
|
||||
<Box sx={{ maxHeight: 200, overflow: "auto" }}>
|
||||
<Typography
|
||||
sx={{
|
||||
fontSize: "0.875rem",
|
||||
whiteSpace: "pre-wrap",
|
||||
}}
|
||||
>
|
||||
{JSON.stringify(row.result, null, 2)}
|
||||
</Typography>
|
||||
</Box>
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
Show Result
|
||||
</Button>
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||
|
||||
@@ -156,24 +156,8 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
});
|
||||
};
|
||||
|
||||
const scrollbarStyles = {
|
||||
"&::-webkit-scrollbar": {
|
||||
width: "8px",
|
||||
height: "8px",
|
||||
},
|
||||
"&::-webkit-scrollbar-track": {
|
||||
backgroundColor: "rgba(0,0,0,0.05)",
|
||||
borderRadius: "8px",
|
||||
},
|
||||
"&::-webkit-scrollbar-thumb": {
|
||||
backgroundColor: "rgba(0,0,0,0.2)",
|
||||
borderRadius: "8px",
|
||||
"&:hover": {
|
||||
backgroundColor: "rgba(0,0,0,0.3)",
|
||||
},
|
||||
},
|
||||
scrollbarWidth: "thin",
|
||||
scrollbarColor: "rgba(0,0,0,0.2) rgba(0,0,0,0.05)",
|
||||
const handleJobClick = (job: Job) => {
|
||||
router.push(`/job/csv/${job.id}`);
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -190,7 +174,6 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
maxWidth="100%"
|
||||
bgcolor="background.default"
|
||||
overflow="auto"
|
||||
sx={scrollbarStyles}
|
||||
>
|
||||
<Box
|
||||
className="flex flex-row justify-between p-2 w-full"
|
||||
@@ -275,6 +258,7 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
onNavigate={handleNavigate}
|
||||
onSelectJob={handleSelectJob}
|
||||
onFavorite={favoriteJob}
|
||||
onJobClick={handleJobClick}
|
||||
></JobQueue>
|
||||
) : (
|
||||
<Favorites
|
||||
|
||||
35
src/components/pages/job/csv/id/get-server-side-props.ts
Normal file
35
src/components/pages/job/csv/id/get-server-side-props.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { GetServerSideProps } from "next";
|
||||
import { parseCookies } from "nookies";
|
||||
|
||||
export const getServerSideProps: GetServerSideProps = async (context) => {
|
||||
const { req, params } = context;
|
||||
const id = params?.id;
|
||||
|
||||
const cookies = parseCookies({ req });
|
||||
const token = cookies.token;
|
||||
let csv = null;
|
||||
|
||||
try {
|
||||
const csvResponse = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/api/job/${id}/convert-to-csv`,
|
||||
{
|
||||
|
||||
method: "GET",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
csv = await csvResponse.json();
|
||||
} catch (error) {
|
||||
console.error("Error fetching job:", error);
|
||||
}
|
||||
|
||||
return {
|
||||
props: {
|
||||
csv,
|
||||
},
|
||||
};
|
||||
};
|
||||
10
src/components/pages/job/csv/id/id.tsx
Normal file
10
src/components/pages/job/csv/id/id.tsx
Normal file
@@ -0,0 +1,10 @@
|
||||
import { CsvRow, CsvTable } from "@/components/common/csv-table/csv-table";
|
||||
|
||||
export type Csv = {
|
||||
rows: CsvRow[];
|
||||
headers: string[];
|
||||
};
|
||||
|
||||
export const JobCsvId = ({ csv }: { csv: Csv }) => {
|
||||
return <CsvTable csv={csv} />;
|
||||
};
|
||||
1
src/components/pages/job/csv/id/index.ts
Normal file
1
src/components/pages/job/csv/id/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./id";
|
||||
2
src/pages/job/csv/[id].tsx
Normal file
2
src/pages/job/csv/[id].tsx
Normal file
@@ -0,0 +1,2 @@
|
||||
export { JobCsvId as default } from "@/components/pages/job/csv/id";
|
||||
export { getServerSideProps } from "@/components/pages/job/csv/id/get-server-side-props";
|
||||
@@ -5,6 +5,8 @@
|
||||
:root {
|
||||
--delete-red: #ef4444;
|
||||
--delete-red-hover: #ff6969;
|
||||
--primary-blue: #007bff;
|
||||
--primary-gray: #f8f9fa;
|
||||
}
|
||||
|
||||
#__next {
|
||||
@@ -20,3 +22,22 @@ body {
|
||||
.MuiPopover-paper {
|
||||
padding: 0 !important;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background-color: rgba(0, 0, 0, 0.05);
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background-color: rgba(0, 0, 0, 0.2);
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb:hover {
|
||||
background-color: rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user