Feature: Allow Multiple Download Options (#75)

* feat: allow downloading in MD format

* fix: unit tests

* fix: deployments [skip ci]

* fix: deployment
This commit is contained in:
Jayden Pyles
2025-05-13 18:23:59 -05:00
committed by GitHub
parent 267cc73657
commit 1b8c8c779a
10 changed files with 191 additions and 58 deletions

View File

@@ -8,7 +8,7 @@ on:
jobs: jobs:
build: build:
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'master' }} || github.event.workflow_dispatch.inputs.branch == 'feat/add-helm-chart' if: ${{ github.event.workflow_run.conclusion == 'success' && github.ref == 'refs/heads/master' }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout - name: Checkout

View File

@@ -0,0 +1,24 @@
from typing import Any
from api.backend.utils import clean_text
def stream_md_from_job_results(jobs: list[dict[str, Any]]):
md = "# Job Results Summary\n\n"
for i, job in enumerate(jobs, start=1):
md += f"## Job #{i}\n"
yield f"- **Job URL:** {job.get('url', 'N/A')}\n"
yield f"- **Timestamp:** {job.get('time_created', 'N/A')}\n"
yield f"- **ID:** {job.get('id', 'N/A')}\n"
yield "### Extracted Results:\n"
for res in job.get("result", []):
for url, elements in res.items():
yield f"\n#### URL: {url}\n"
for element_name, values in elements.items():
for value in values:
text = clean_text(value.get("text", "")).strip()
if text:
yield f"- **Element:** `{element_name}`\n"
yield f" - **Text:** {text}\n"
yield "\n---\n"

View File

@@ -1,5 +1,5 @@
# STL # STL
from typing import Any, Optional, Union from typing import Any, Literal, Optional, Union
from datetime import datetime from datetime import datetime
# LOCAL # LOCAL
@@ -27,6 +27,7 @@ class RetrieveScrapeJobs(pydantic.BaseModel):
class DownloadJob(pydantic.BaseModel): class DownloadJob(pydantic.BaseModel):
ids: list[str] ids: list[str]
job_format: Literal["csv", "md"]
class DeleteScrapeJobs(pydantic.BaseModel): class DeleteScrapeJobs(pydantic.BaseModel):

View File

@@ -40,6 +40,7 @@ from api.backend.job.cron_scheduling.cron_scheduling import (
) )
from api.backend.job.utils.clean_job_format import clean_job_format from api.backend.job.utils.clean_job_format import clean_job_format
from api.backend.job.utils.stream_md_from_job_results import stream_md_from_job_results
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@@ -106,10 +107,19 @@ async def download(download_job: DownloadJob):
) )
results = query(job_query, tuple(download_job.ids)) results = query(job_query, tuple(download_job.ids))
if download_job.job_format == "csv":
csv_buffer = StringIO() csv_buffer = StringIO()
csv_writer = csv.writer(csv_buffer, quotechar='"', quoting=csv.QUOTE_ALL) csv_writer = csv.writer(csv_buffer, quotechar='"', quoting=csv.QUOTE_ALL)
headers = ["id", "url", "element_name", "xpath", "text", "user", "time_created"] headers = [
"id",
"url",
"element_name",
"xpath",
"text",
"user",
"time_created",
]
csv_writer.writerow(headers) csv_writer.writerow(headers)
for result in results: for result in results:
@@ -141,6 +151,15 @@ async def download(download_job: DownloadJob):
response.headers["Content-Disposition"] = "attachment; filename=export.csv" response.headers["Content-Disposition"] = "attachment; filename=export.csv"
return response return response
elif download_job.job_format == "md":
response = StreamingResponse(
stream_md_from_job_results(results),
media_type="text/markdown",
)
response.headers["Content-Disposition"] = "attachment; filename=export.md"
return response
except Exception as e: except Exception as e:
LOG.error(f"Exception occurred: {e}") LOG.error(f"Exception occurred: {e}")
traceback.print_exc() traceback.print_exc()

View File

@@ -21,7 +21,7 @@ async def test_download(mock_randint: AsyncMock, mock_query: AsyncMock):
mock_randint.return_value = mocked_random_int mock_randint.return_value = mocked_random_int
# Create a DownloadJob instance # Create a DownloadJob instance
download_job = DownloadJob(ids=[mocked_job["id"]]) download_job = DownloadJob(ids=[mocked_job["id"]], job_format="csv")
# Make a POST request to the /download endpoint # Make a POST request to the /download endpoint
response = client.post("/download", json=download_job.model_dump()) response = client.post("/download", json=download_job.model_dump())

View File

@@ -30,4 +30,7 @@ EXPOSE 8000
WORKDIR /project/app WORKDIR /project/app
RUN mkdir -p /project/app/data
RUN touch /project/app/data/database.db
CMD [ "supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf" ] CMD [ "supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf" ]

View File

@@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes # This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version. # to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/) # Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0.12 version: 1.0.13
# This is the version number of the application being deployed. This version number should be # This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to # incremented each time you make changes to the application. Versions are not expected to

View File

@@ -0,0 +1 @@
export * from "./job-download-dialog";

View File

@@ -0,0 +1,95 @@
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Button,
FormControl,
RadioGroup,
FormControlLabel,
Radio,
FormLabel,
Typography,
Box,
} from "@mui/material";
import { useState } from "react";
export type JobDownloadDialogProps = {
open: boolean;
onClose: () => void;
ids: string[];
};
export const JobDownloadDialog = ({
open,
onClose,
ids,
}: JobDownloadDialogProps) => {
const [jobFormat, setJobFormat] = useState<string>("csv");
const handleDownload = async () => {
const response = await fetch("/api/download", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ data: { ids: ids, job_format: jobFormat } }),
});
if (response.ok) {
const blob = await response.blob();
const url = window.URL.createObjectURL(blob);
const a = document.createElement("a");
a.style.display = "none";
a.href = url;
a.download = `job_${ids[0]}.${jobFormat}`;
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
} else {
console.error("Failed to download the file.");
}
};
return (
<Dialog open={open} onClose={onClose}>
<DialogTitle>Download Job</DialogTitle>
<DialogContent>
<FormControl>
<Typography variant="body1">
You are about to download {ids.length} job(s). Please select the
format that you would like to download them in.
</Typography>
<br />
<Box
sx={{
display: "flex",
flexDirection: "column",
backgroundColor: "background.paper",
padding: 2,
border: "1px solid",
}}
>
<FormLabel>Format</FormLabel>
<hr style={{ width: "100%", margin: "10px 0" }} />
<RadioGroup
aria-labelledby="job-download-format-radio-buttons"
name="job-download-format-radio-buttons"
value={jobFormat}
onChange={(e) => setJobFormat(e.target.value)}
>
<FormControlLabel value="csv" control={<Radio />} label="CSV" />
<FormControlLabel
value="md"
control={<Radio />}
label="Markdown"
/>
</RadioGroup>
</Box>
<br />
<Button onClick={handleDownload} size="small">
Download
</Button>
</FormControl>
</DialogContent>
</Dialog>
);
};

View File

@@ -20,6 +20,7 @@ import { Favorites, JobQueue } from ".";
import { Job } from "../../types"; import { Job } from "../../types";
import Cookies from "js-cookie"; import Cookies from "js-cookie";
import { useSearchParams } from "next/navigation"; import { useSearchParams } from "next/navigation";
import { JobDownloadDialog } from "../common/job-download-dialog";
interface JobTableProps { interface JobTableProps {
jobs: Job[]; jobs: Job[];
@@ -47,31 +48,15 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
const [searchQuery, setSearchQuery] = useState<string>(search || ""); const [searchQuery, setSearchQuery] = useState<string>(search || "");
const [searchMode, setSearchMode] = useState<string>(type || "url"); const [searchMode, setSearchMode] = useState<string>(type || "url");
const [favoriteView, setFavoriteView] = useState<boolean>(false); const [favoriteView, setFavoriteView] = useState<boolean>(false);
const [jobDownloadDialogOpen, setJobDownloadDialogOpen] =
useState<boolean>(false);
const token = Cookies.get("token"); const token = Cookies.get("token");
const router = useRouter(); const router = useRouter();
const handleDownload = async (ids: string[]) => { const handleDownload = (ids: string[]) => {
const response = await fetch("/api/download", { setSelectedJobs(new Set(ids));
method: "POST", setJobDownloadDialogOpen(true);
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ data: { ids: ids } }),
});
if (response.ok) {
const blob = await response.blob();
const url = window.URL.createObjectURL(blob);
const a = document.createElement("a");
a.style.display = "none";
a.href = url;
a.download = `job_${ids[0]}.csv`;
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
} else {
console.error("Failed to download the file.");
}
}; };
const handleNavigate = (elements: Object[], url: string, options: any) => { const handleNavigate = (elements: Object[], url: string, options: any) => {
@@ -259,17 +244,22 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
onSelectJob={handleSelectJob} onSelectJob={handleSelectJob}
onFavorite={favoriteJob} onFavorite={favoriteJob}
onJobClick={handleJobClick} onJobClick={handleJobClick}
></JobQueue> />
) : ( ) : (
<Favorites <Favorites
stateProps={{ selectedJobs, filteredJobs }} stateProps={{ selectedJobs, filteredJobs }}
onNavigate={handleNavigate} onNavigate={handleNavigate}
onSelectJob={handleSelectJob} onSelectJob={handleSelectJob}
onFavorite={favoriteJob} onFavorite={favoriteJob}
></Favorites> />
)} )}
</Box> </Box>
</Box> </Box>
<JobDownloadDialog
open={jobDownloadDialogOpen}
onClose={() => setJobDownloadDialogOpen(false)}
ids={Array.from(selectedJobs)}
/>
</Box> </Box>
); );
}; };