mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-10-30 05:57:12 +00:00
feat: add statistics page
This commit is contained in:
@@ -28,6 +28,10 @@ From the table, users can download an excel sheet of the job's results, along wi
|
||||
|
||||

|
||||
|
||||
- View a small statistics view of jobs ran
|
||||
|
||||

|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
@@ -3,6 +3,9 @@ import uuid
|
||||
import logging
|
||||
from io import BytesIO
|
||||
from openpyxl import Workbook
|
||||
from typing import Any
|
||||
from datetime import datetime
|
||||
from bson import ObjectId
|
||||
|
||||
# PDM
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException
|
||||
@@ -15,9 +18,16 @@ import docker
|
||||
client = docker.from_env()
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job import query, insert, delete_jobs
|
||||
from api.backend.job import (
|
||||
average_elements_per_link,
|
||||
get_jobs_per_day,
|
||||
query,
|
||||
insert,
|
||||
delete_jobs,
|
||||
)
|
||||
from api.backend.models import (
|
||||
DownloadJob,
|
||||
GetStatistics,
|
||||
SubmitScrapeJob,
|
||||
DeleteScrapeJobs,
|
||||
RetrieveScrapeJobs,
|
||||
@@ -64,7 +74,8 @@ async def submit_scrape_job(job: SubmitScrapeJob, background_tasks: BackgroundTa
|
||||
job.id = uuid.uuid4().hex
|
||||
|
||||
if job.user:
|
||||
await insert(jsonable_encoder(job))
|
||||
job_dict = job.model_dump()
|
||||
await insert(job_dict)
|
||||
|
||||
return JSONResponse(content=f"Job queued for scraping: {job.id}")
|
||||
except Exception as e:
|
||||
@@ -76,7 +87,7 @@ async def retrieve_scrape_jobs(retrieve: RetrieveScrapeJobs):
|
||||
LOG.info(f"Retrieving jobs for account: {retrieve.user}")
|
||||
try:
|
||||
results = await query({"user": retrieve.user})
|
||||
return JSONResponse(content=results[::-1])
|
||||
return JSONResponse(content=jsonable_encoder(results[::-1]))
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
@@ -184,3 +195,14 @@ async def get_own_logs():
|
||||
return StreamingResponse(log_generator(), media_type="text/event-stream")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/api/statistics/get-average-element-per-link")
|
||||
async def get_average_element_per_link(get_statistics: GetStatistics):
|
||||
return await average_elements_per_link(get_statistics.user)
|
||||
|
||||
|
||||
@app.post("/api/statistics/get-average-jobs-per-day")
|
||||
async def average_jobs_per_day(get_statistics: GetStatistics):
|
||||
data = await get_jobs_per_day(get_statistics.user)
|
||||
return data
|
||||
|
||||
@@ -49,3 +49,62 @@ async def delete_jobs(jobs: list[str]):
|
||||
LOG.info(f"RESULT: {result.deleted_count} documents deleted")
|
||||
|
||||
return True if result.deleted_count > 0 else False
|
||||
|
||||
|
||||
async def average_elements_per_link(user: str):
|
||||
collection = get_job_collection()
|
||||
pipeline = [
|
||||
{"$match": {"status": "Completed", "user": user}},
|
||||
{
|
||||
"$project": {
|
||||
"date": {
|
||||
"$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"}
|
||||
},
|
||||
"num_elements": {"$size": "$elements"},
|
||||
}
|
||||
},
|
||||
{
|
||||
"$group": {
|
||||
"_id": "$date",
|
||||
"average_elements": {"$avg": "$num_elements"},
|
||||
"count": {"$sum": 1},
|
||||
}
|
||||
},
|
||||
{"$sort": {"_id": 1}},
|
||||
]
|
||||
cursor = collection.aggregate(pipeline)
|
||||
results: list[dict[str, Any]] = []
|
||||
|
||||
async for document in cursor:
|
||||
results.append(
|
||||
{
|
||||
"date": document["_id"],
|
||||
"average_elements": document["average_elements"],
|
||||
"count": document["count"],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_jobs_per_day(user: str):
|
||||
collection = get_job_collection()
|
||||
pipeline = [
|
||||
{"$match": {"status": "Completed", "user": user}},
|
||||
{
|
||||
"$project": {
|
||||
"date": {
|
||||
"$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"}
|
||||
}
|
||||
}
|
||||
},
|
||||
{"$group": {"_id": "$date", "job_count": {"$sum": 1}}},
|
||||
{"$sort": {"_id": 1}},
|
||||
]
|
||||
cursor = collection.aggregate(pipeline)
|
||||
|
||||
results: list[dict[str, Any]] = []
|
||||
async for document in cursor:
|
||||
results.append({"date": document["_id"], "job_count": document["job_count"]})
|
||||
|
||||
return results
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# STL
|
||||
from typing import Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
# PDM
|
||||
import pydantic
|
||||
@@ -27,7 +28,7 @@ class SubmitScrapeJob(pydantic.BaseModel):
|
||||
url: str
|
||||
elements: list[Element]
|
||||
user: Optional[str] = None
|
||||
time_created: Optional[str] = None
|
||||
time_created: Optional[datetime] = None
|
||||
result: Optional[dict[str, Any]] = None
|
||||
job_options: JobOptions
|
||||
status: str = "Queued"
|
||||
@@ -43,3 +44,7 @@ class DownloadJob(pydantic.BaseModel):
|
||||
|
||||
class DeleteScrapeJobs(pydantic.BaseModel):
|
||||
ids: list[str]
|
||||
|
||||
|
||||
class GetStatistics(pydantic.BaseModel):
|
||||
user: str
|
||||
|
||||
@@ -41,7 +41,7 @@ async def main():
|
||||
LOG.info("Starting job worker...")
|
||||
while True:
|
||||
await process_job()
|
||||
await asyncio.sleep(5) # Sleep for 5 seconds before checking for new jobs
|
||||
await asyncio.sleep(5)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 44 KiB After Width: | Height: | Size: 46 KiB |
BIN
docs/stats_page.png
Normal file
BIN
docs/stats_page.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 73 KiB |
17
package-lock.json
generated
17
package-lock.json
generated
@@ -21,6 +21,7 @@
|
||||
"@testing-library/user-event": "^13.5.0",
|
||||
"axios": "^1.7.2",
|
||||
"bootstrap": "^5.3.0",
|
||||
"chart.js": "^4.4.3",
|
||||
"framer-motion": "^4.1.17",
|
||||
"next": "^14.2.4",
|
||||
"next-auth": "^4.24.7",
|
||||
@@ -4529,6 +4530,11 @@
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz",
|
||||
"integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw=="
|
||||
},
|
||||
"node_modules/@kurkle/color": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.2.tgz",
|
||||
"integrity": "sha512-fuscdXJ9G1qb7W8VdHi+IwRqij3lBkosAm4ydQtEmbY58OzHXqQhvlxqEkoz0yssNVn38bcpRWgA9PP+OGoisw=="
|
||||
},
|
||||
"node_modules/@leichtgewicht/ip-codec": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.4.tgz",
|
||||
@@ -8012,6 +8018,17 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/chart.js": {
|
||||
"version": "4.4.3",
|
||||
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.4.3.tgz",
|
||||
"integrity": "sha512-qK1gkGSRYcJzqrrzdR6a+I0vQ4/R+SoODXyAjscQ/4mzuNzySaMCd+hyVxitSY1+L2fjPD1Gbn+ibNqRmwQeLw==",
|
||||
"dependencies": {
|
||||
"@kurkle/color": "^0.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"pnpm": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/check-types": {
|
||||
"version": "11.2.2",
|
||||
"resolved": "https://registry.npmjs.org/check-types/-/check-types-11.2.2.tgz",
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
"@testing-library/user-event": "^13.5.0",
|
||||
"axios": "^1.7.2",
|
||||
"bootstrap": "^5.3.0",
|
||||
"chart.js": "^4.4.3",
|
||||
"framer-motion": "^4.1.17",
|
||||
"next": "^14.2.4",
|
||||
"next-auth": "^4.24.7",
|
||||
|
||||
@@ -47,9 +47,9 @@ interface ColorMap {
|
||||
}
|
||||
|
||||
const COLOR_MAP: ColorMap = {
|
||||
Queued: "rgba(255,201,5,0.5)",
|
||||
Scraping: "rgba(3,104,255,0.5)",
|
||||
Completed: "rgba(5,255,51,0.5)",
|
||||
Queued: "rgba(255,201,5,0.25)",
|
||||
Scraping: "rgba(3,104,255,0.25)",
|
||||
Completed: "rgba(5,255,51,0.25)",
|
||||
};
|
||||
|
||||
const JobTable: React.FC<JobTableProps> = ({ jobs, fetchJobs }) => {
|
||||
|
||||
@@ -21,6 +21,7 @@ import HomeIcon from "@mui/icons-material/Home";
|
||||
import HttpIcon from "@mui/icons-material/Http";
|
||||
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
||||
import TerminalIcon from "@mui/icons-material/Terminal";
|
||||
import BarChart from "@mui/icons-material/BarChart";
|
||||
import { useRouter } from "next/router";
|
||||
import { useTheme } from "@mui/material/styles";
|
||||
|
||||
@@ -77,6 +78,15 @@ const NavDrawer: React.FC<NavDrawerProps> = ({ toggleTheme, isDarkMode }) => {
|
||||
</ListItemButton>
|
||||
</ListItem>
|
||||
<Divider />
|
||||
<ListItem>
|
||||
<ListItemButton onClick={() => router.push("/statistics")}>
|
||||
<ListItemIcon>
|
||||
<BarChart />
|
||||
</ListItemIcon>
|
||||
<ListItemText primary="Statistics" />
|
||||
</ListItemButton>
|
||||
</ListItem>
|
||||
<Divider />
|
||||
<ListItem>
|
||||
<ListItemButton onClick={() => router.push("/logs")}>
|
||||
<ListItemIcon>
|
||||
|
||||
214
src/pages/statistics.tsx
Normal file
214
src/pages/statistics.tsx
Normal file
@@ -0,0 +1,214 @@
|
||||
import React, { useEffect, useRef, useState } from "react";
|
||||
import { Chart, registerables } from "chart.js";
|
||||
import { Box, Typography, useTheme } from "@mui/material";
|
||||
import { useAuth } from "../contexts/AuthContext";
|
||||
|
||||
Chart.register(...registerables);
|
||||
|
||||
const Statistics: React.FC = () => {
|
||||
const theme = useTheme();
|
||||
const elementsChartRef = useRef<HTMLCanvasElement>(null);
|
||||
const jobsChartRef = useRef<HTMLCanvasElement>(null);
|
||||
const [elementsChart, setElementsChart] = useState<Chart | null>(null);
|
||||
const [jobsChart, setJobsChart] = useState<Chart | null>(null);
|
||||
const { user } = useAuth();
|
||||
|
||||
useEffect(() => {
|
||||
const fetchElementsData = async () => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
"/api/statistics/get-average-element-per-link",
|
||||
{
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ user: user.email }),
|
||||
}
|
||||
);
|
||||
const data = await response.json();
|
||||
|
||||
const dates = data.map((item: any) => item.date);
|
||||
const averages = data.map((item: any) => item.average_elements);
|
||||
|
||||
if (elementsChartRef.current) {
|
||||
const ctx = elementsChartRef.current.getContext("2d");
|
||||
|
||||
if (ctx) {
|
||||
if (elementsChart) {
|
||||
elementsChart.destroy();
|
||||
}
|
||||
|
||||
const newChart = new Chart(ctx, {
|
||||
type: "line",
|
||||
data: {
|
||||
labels: dates,
|
||||
datasets: [
|
||||
{
|
||||
label: "Average Elements per Link",
|
||||
data: averages,
|
||||
backgroundColor:
|
||||
theme.palette.mode === "light"
|
||||
? "rgba(75, 192, 192, 0.2)"
|
||||
: "rgba(255, 99, 132, 0.2)",
|
||||
borderColor:
|
||||
theme.palette.mode === "light"
|
||||
? "rgba(75, 192, 192, 1)"
|
||||
: "rgba(255, 99, 132, 1)",
|
||||
borderWidth: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
options: {
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
ticks: {
|
||||
color: theme.palette.text.primary,
|
||||
},
|
||||
},
|
||||
x: {
|
||||
ticks: {
|
||||
color: theme.palette.text.primary,
|
||||
},
|
||||
},
|
||||
},
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
},
|
||||
});
|
||||
|
||||
setElementsChart(newChart);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error fetching elements data:", error);
|
||||
}
|
||||
};
|
||||
|
||||
fetchElementsData();
|
||||
}, [elementsChartRef, theme.palette.mode]);
|
||||
|
||||
useEffect(() => {
|
||||
const fetchJobsData = async () => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
"/api/statistics/get-average-jobs-per-day",
|
||||
{
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ user: user.email }),
|
||||
}
|
||||
);
|
||||
const data = await response.json();
|
||||
|
||||
const dates = data.map((item: any) => item.date);
|
||||
const jobCounts = data.map((item: any) => item.job_count);
|
||||
|
||||
if (jobsChartRef.current) {
|
||||
const ctx = jobsChartRef.current.getContext("2d");
|
||||
|
||||
if (ctx) {
|
||||
if (jobsChart) {
|
||||
jobsChart.destroy();
|
||||
}
|
||||
|
||||
const newChart = new Chart(ctx, {
|
||||
type: "line",
|
||||
data: {
|
||||
labels: dates,
|
||||
datasets: [
|
||||
{
|
||||
label: "Average Jobs per Day",
|
||||
data: jobCounts,
|
||||
backgroundColor:
|
||||
theme.palette.mode === "light"
|
||||
? "rgba(153, 102, 255, 0.2)"
|
||||
: "rgba(54, 162, 235, 0.2)",
|
||||
borderColor:
|
||||
theme.palette.mode === "light"
|
||||
? "rgba(153, 102, 255, 1)"
|
||||
: "rgba(54, 162, 235, 1)",
|
||||
borderWidth: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
options: {
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
ticks: {
|
||||
color: theme.palette.text.primary,
|
||||
},
|
||||
},
|
||||
x: {
|
||||
ticks: {
|
||||
color: theme.palette.text.primary,
|
||||
},
|
||||
},
|
||||
},
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
},
|
||||
});
|
||||
|
||||
setJobsChart(newChart);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error fetching jobs data:", error);
|
||||
}
|
||||
};
|
||||
|
||||
fetchJobsData();
|
||||
}, [jobsChartRef, theme.palette.mode]);
|
||||
|
||||
return (
|
||||
<>
|
||||
{user ? (
|
||||
<div className="flex flex-col space-y-2 justify-center text-center h-full w-full">
|
||||
<div className="flex flex-row space-x-2 h-full m-0 w-full">
|
||||
<div className="w-full h-full flex flex-col justify-center space-y-2 text-center">
|
||||
<Typography variant="h5">Average Elements per Link</Typography>
|
||||
<div className="relative w-full h-full">
|
||||
<canvas
|
||||
ref={elementsChartRef}
|
||||
className="absolute top-0 left-0 w-full h-full"
|
||||
></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<div className="w-full h-full flex flex-col justify-center space-y-2 text-center">
|
||||
<Typography variant="h5">Average Jobs per Day</Typography>
|
||||
<div className="relative w-full h-full">
|
||||
<canvas
|
||||
ref={jobsChartRef}
|
||||
className="absolute top-0 left-0 w-full h-full"
|
||||
></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<Box
|
||||
bgcolor="background.default"
|
||||
minHeight="100vh"
|
||||
display="flex"
|
||||
justifyContent="center"
|
||||
alignItems="center"
|
||||
>
|
||||
<h4
|
||||
style={{
|
||||
color: "#fff",
|
||||
padding: "20px",
|
||||
borderRadius: "8px",
|
||||
background: "rgba(0, 0, 0, 0.6)",
|
||||
boxShadow: "0 4px 8px rgba(0, 0, 0, 0.2)",
|
||||
}}
|
||||
>
|
||||
Statistics for jobs not viewable unless logged in.
|
||||
</h4>
|
||||
</Box>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default Statistics;
|
||||
Reference in New Issue
Block a user