mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-12-17 05:05:49 +00:00
Feature: Allow Multiple Download Options (#75)
* feat: allow downloading in MD format * fix: unit tests * fix: deployments [skip ci] * fix: deployment
This commit is contained in:
2
.github/workflows/docker-image.yml
vendored
2
.github/workflows/docker-image.yml
vendored
@@ -8,7 +8,7 @@ on:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'master' }} || github.event.workflow_dispatch.inputs.branch == 'feat/add-helm-chart'
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.ref == 'refs/heads/master' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
24
api/backend/job/utils/stream_md_from_job_results.py
Normal file
24
api/backend/job/utils/stream_md_from_job_results.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from typing import Any
|
||||
|
||||
from api.backend.utils import clean_text
|
||||
|
||||
|
||||
def stream_md_from_job_results(jobs: list[dict[str, Any]]):
|
||||
md = "# Job Results Summary\n\n"
|
||||
for i, job in enumerate(jobs, start=1):
|
||||
md += f"## Job #{i}\n"
|
||||
yield f"- **Job URL:** {job.get('url', 'N/A')}\n"
|
||||
yield f"- **Timestamp:** {job.get('time_created', 'N/A')}\n"
|
||||
yield f"- **ID:** {job.get('id', 'N/A')}\n"
|
||||
yield "### Extracted Results:\n"
|
||||
|
||||
for res in job.get("result", []):
|
||||
for url, elements in res.items():
|
||||
yield f"\n#### URL: {url}\n"
|
||||
for element_name, values in elements.items():
|
||||
for value in values:
|
||||
text = clean_text(value.get("text", "")).strip()
|
||||
if text:
|
||||
yield f"- **Element:** `{element_name}`\n"
|
||||
yield f" - **Text:** {text}\n"
|
||||
yield "\n---\n"
|
||||
@@ -1,5 +1,5 @@
|
||||
# STL
|
||||
from typing import Any, Optional, Union
|
||||
from typing import Any, Literal, Optional, Union
|
||||
from datetime import datetime
|
||||
|
||||
# LOCAL
|
||||
@@ -27,6 +27,7 @@ class RetrieveScrapeJobs(pydantic.BaseModel):
|
||||
|
||||
class DownloadJob(pydantic.BaseModel):
|
||||
ids: list[str]
|
||||
job_format: Literal["csv", "md"]
|
||||
|
||||
|
||||
class DeleteScrapeJobs(pydantic.BaseModel):
|
||||
|
||||
@@ -40,6 +40,7 @@ from api.backend.job.cron_scheduling.cron_scheduling import (
|
||||
)
|
||||
|
||||
from api.backend.job.utils.clean_job_format import clean_job_format
|
||||
from api.backend.job.utils.stream_md_from_job_results import stream_md_from_job_results
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -106,10 +107,19 @@ async def download(download_job: DownloadJob):
|
||||
)
|
||||
results = query(job_query, tuple(download_job.ids))
|
||||
|
||||
if download_job.job_format == "csv":
|
||||
csv_buffer = StringIO()
|
||||
csv_writer = csv.writer(csv_buffer, quotechar='"', quoting=csv.QUOTE_ALL)
|
||||
|
||||
headers = ["id", "url", "element_name", "xpath", "text", "user", "time_created"]
|
||||
headers = [
|
||||
"id",
|
||||
"url",
|
||||
"element_name",
|
||||
"xpath",
|
||||
"text",
|
||||
"user",
|
||||
"time_created",
|
||||
]
|
||||
csv_writer.writerow(headers)
|
||||
|
||||
for result in results:
|
||||
@@ -141,6 +151,15 @@ async def download(download_job: DownloadJob):
|
||||
response.headers["Content-Disposition"] = "attachment; filename=export.csv"
|
||||
return response
|
||||
|
||||
elif download_job.job_format == "md":
|
||||
response = StreamingResponse(
|
||||
stream_md_from_job_results(results),
|
||||
media_type="text/markdown",
|
||||
)
|
||||
|
||||
response.headers["Content-Disposition"] = "attachment; filename=export.md"
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
@@ -21,7 +21,7 @@ async def test_download(mock_randint: AsyncMock, mock_query: AsyncMock):
|
||||
mock_randint.return_value = mocked_random_int
|
||||
|
||||
# Create a DownloadJob instance
|
||||
download_job = DownloadJob(ids=[mocked_job["id"]])
|
||||
download_job = DownloadJob(ids=[mocked_job["id"]], job_format="csv")
|
||||
|
||||
# Make a POST request to the /download endpoint
|
||||
response = client.post("/download", json=download_job.model_dump())
|
||||
|
||||
@@ -30,4 +30,7 @@ EXPOSE 8000
|
||||
|
||||
WORKDIR /project/app
|
||||
|
||||
RUN mkdir -p /project/app/data
|
||||
RUN touch /project/app/data/database.db
|
||||
|
||||
CMD [ "supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf" ]
|
||||
@@ -15,7 +15,7 @@ type: application
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 1.0.12
|
||||
version: 1.0.13
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
|
||||
1
src/components/common/job-download-dialog/index.ts
Normal file
1
src/components/common/job-download-dialog/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./job-download-dialog";
|
||||
@@ -0,0 +1,95 @@
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Button,
|
||||
FormControl,
|
||||
RadioGroup,
|
||||
FormControlLabel,
|
||||
Radio,
|
||||
FormLabel,
|
||||
Typography,
|
||||
Box,
|
||||
} from "@mui/material";
|
||||
import { useState } from "react";
|
||||
|
||||
export type JobDownloadDialogProps = {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
ids: string[];
|
||||
};
|
||||
|
||||
export const JobDownloadDialog = ({
|
||||
open,
|
||||
onClose,
|
||||
ids,
|
||||
}: JobDownloadDialogProps) => {
|
||||
const [jobFormat, setJobFormat] = useState<string>("csv");
|
||||
const handleDownload = async () => {
|
||||
const response = await fetch("/api/download", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ data: { ids: ids, job_format: jobFormat } }),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const blob = await response.blob();
|
||||
const url = window.URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.style.display = "none";
|
||||
a.href = url;
|
||||
a.download = `job_${ids[0]}.${jobFormat}`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
} else {
|
||||
console.error("Failed to download the file.");
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={onClose}>
|
||||
<DialogTitle>Download Job</DialogTitle>
|
||||
<DialogContent>
|
||||
<FormControl>
|
||||
<Typography variant="body1">
|
||||
You are about to download {ids.length} job(s). Please select the
|
||||
format that you would like to download them in.
|
||||
</Typography>
|
||||
<br />
|
||||
<Box
|
||||
sx={{
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
backgroundColor: "background.paper",
|
||||
padding: 2,
|
||||
border: "1px solid",
|
||||
}}
|
||||
>
|
||||
<FormLabel>Format</FormLabel>
|
||||
<hr style={{ width: "100%", margin: "10px 0" }} />
|
||||
<RadioGroup
|
||||
aria-labelledby="job-download-format-radio-buttons"
|
||||
name="job-download-format-radio-buttons"
|
||||
value={jobFormat}
|
||||
onChange={(e) => setJobFormat(e.target.value)}
|
||||
>
|
||||
<FormControlLabel value="csv" control={<Radio />} label="CSV" />
|
||||
<FormControlLabel
|
||||
value="md"
|
||||
control={<Radio />}
|
||||
label="Markdown"
|
||||
/>
|
||||
</RadioGroup>
|
||||
</Box>
|
||||
<br />
|
||||
<Button onClick={handleDownload} size="small">
|
||||
Download
|
||||
</Button>
|
||||
</FormControl>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
@@ -20,6 +20,7 @@ import { Favorites, JobQueue } from ".";
|
||||
import { Job } from "../../types";
|
||||
import Cookies from "js-cookie";
|
||||
import { useSearchParams } from "next/navigation";
|
||||
import { JobDownloadDialog } from "../common/job-download-dialog";
|
||||
|
||||
interface JobTableProps {
|
||||
jobs: Job[];
|
||||
@@ -47,31 +48,15 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
const [searchQuery, setSearchQuery] = useState<string>(search || "");
|
||||
const [searchMode, setSearchMode] = useState<string>(type || "url");
|
||||
const [favoriteView, setFavoriteView] = useState<boolean>(false);
|
||||
const [jobDownloadDialogOpen, setJobDownloadDialogOpen] =
|
||||
useState<boolean>(false);
|
||||
|
||||
const token = Cookies.get("token");
|
||||
const router = useRouter();
|
||||
|
||||
const handleDownload = async (ids: string[]) => {
|
||||
const response = await fetch("/api/download", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ data: { ids: ids } }),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const blob = await response.blob();
|
||||
const url = window.URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.style.display = "none";
|
||||
a.href = url;
|
||||
a.download = `job_${ids[0]}.csv`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
} else {
|
||||
console.error("Failed to download the file.");
|
||||
}
|
||||
const handleDownload = (ids: string[]) => {
|
||||
setSelectedJobs(new Set(ids));
|
||||
setJobDownloadDialogOpen(true);
|
||||
};
|
||||
|
||||
const handleNavigate = (elements: Object[], url: string, options: any) => {
|
||||
@@ -259,17 +244,22 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
onSelectJob={handleSelectJob}
|
||||
onFavorite={favoriteJob}
|
||||
onJobClick={handleJobClick}
|
||||
></JobQueue>
|
||||
/>
|
||||
) : (
|
||||
<Favorites
|
||||
stateProps={{ selectedJobs, filteredJobs }}
|
||||
onNavigate={handleNavigate}
|
||||
onSelectJob={handleSelectJob}
|
||||
onFavorite={favoriteJob}
|
||||
></Favorites>
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
<JobDownloadDialog
|
||||
open={jobDownloadDialogOpen}
|
||||
onClose={() => setJobDownloadDialogOpen(false)}
|
||||
ids={Array.from(selectedJobs)}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user