feat: add statistics page

This commit is contained in:
Jayden
2024-07-21 16:04:40 -05:00
parent 242056b648
commit ccbced2a1e
12 changed files with 340 additions and 8 deletions

View File

@@ -28,6 +28,10 @@ From the table, users can download an excel sheet of the job's results, along wi
![logs](https://github.com/jaypyles/www-scrape/blob/master/docs/log_page.png)
- View a small statistics view of jobs ran
![statistics](https://github.com/jaypyles/www-scrape/blob/master/docs/stats_page.png)
## Installation
1. Clone the repository:

View File

@@ -3,6 +3,9 @@ import uuid
import logging
from io import BytesIO
from openpyxl import Workbook
from typing import Any
from datetime import datetime
from bson import ObjectId
# PDM
from fastapi import BackgroundTasks, FastAPI, HTTPException
@@ -15,9 +18,16 @@ import docker
client = docker.from_env()
# LOCAL
from api.backend.job import query, insert, delete_jobs
from api.backend.job import (
average_elements_per_link,
get_jobs_per_day,
query,
insert,
delete_jobs,
)
from api.backend.models import (
DownloadJob,
GetStatistics,
SubmitScrapeJob,
DeleteScrapeJobs,
RetrieveScrapeJobs,
@@ -64,7 +74,8 @@ async def submit_scrape_job(job: SubmitScrapeJob, background_tasks: BackgroundTa
job.id = uuid.uuid4().hex
if job.user:
await insert(jsonable_encoder(job))
job_dict = job.model_dump()
await insert(job_dict)
return JSONResponse(content=f"Job queued for scraping: {job.id}")
except Exception as e:
@@ -76,7 +87,7 @@ async def retrieve_scrape_jobs(retrieve: RetrieveScrapeJobs):
LOG.info(f"Retrieving jobs for account: {retrieve.user}")
try:
results = await query({"user": retrieve.user})
return JSONResponse(content=results[::-1])
return JSONResponse(content=jsonable_encoder(results[::-1]))
except Exception as e:
LOG.error(f"Exception occurred: {e}")
return JSONResponse(content={"error": str(e)}, status_code=500)
@@ -184,3 +195,14 @@ async def get_own_logs():
return StreamingResponse(log_generator(), media_type="text/event-stream")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/statistics/get-average-element-per-link")
async def get_average_element_per_link(get_statistics: GetStatistics):
return await average_elements_per_link(get_statistics.user)
@app.post("/api/statistics/get-average-jobs-per-day")
async def average_jobs_per_day(get_statistics: GetStatistics):
data = await get_jobs_per_day(get_statistics.user)
return data

View File

@@ -49,3 +49,62 @@ async def delete_jobs(jobs: list[str]):
LOG.info(f"RESULT: {result.deleted_count} documents deleted")
return True if result.deleted_count > 0 else False
async def average_elements_per_link(user: str):
collection = get_job_collection()
pipeline = [
{"$match": {"status": "Completed", "user": user}},
{
"$project": {
"date": {
"$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"}
},
"num_elements": {"$size": "$elements"},
}
},
{
"$group": {
"_id": "$date",
"average_elements": {"$avg": "$num_elements"},
"count": {"$sum": 1},
}
},
{"$sort": {"_id": 1}},
]
cursor = collection.aggregate(pipeline)
results: list[dict[str, Any]] = []
async for document in cursor:
results.append(
{
"date": document["_id"],
"average_elements": document["average_elements"],
"count": document["count"],
}
)
return results
async def get_jobs_per_day(user: str):
collection = get_job_collection()
pipeline = [
{"$match": {"status": "Completed", "user": user}},
{
"$project": {
"date": {
"$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"}
}
}
},
{"$group": {"_id": "$date", "job_count": {"$sum": 1}}},
{"$sort": {"_id": 1}},
]
cursor = collection.aggregate(pipeline)
results: list[dict[str, Any]] = []
async for document in cursor:
results.append({"date": document["_id"], "job_count": document["job_count"]})
return results

View File

@@ -1,5 +1,6 @@
# STL
from typing import Any, Optional
from datetime import datetime
# PDM
import pydantic
@@ -27,7 +28,7 @@ class SubmitScrapeJob(pydantic.BaseModel):
url: str
elements: list[Element]
user: Optional[str] = None
time_created: Optional[str] = None
time_created: Optional[datetime] = None
result: Optional[dict[str, Any]] = None
job_options: JobOptions
status: str = "Queued"
@@ -43,3 +44,7 @@ class DownloadJob(pydantic.BaseModel):
class DeleteScrapeJobs(pydantic.BaseModel):
ids: list[str]
class GetStatistics(pydantic.BaseModel):
user: str

View File

@@ -41,7 +41,7 @@ async def main():
LOG.info("Starting job worker...")
while True:
await process_job()
await asyncio.sleep(5) # Sleep for 5 seconds before checking for new jobs
await asyncio.sleep(5)
if __name__ == "__main__":

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 46 KiB

BIN
docs/stats_page.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

17
package-lock.json generated
View File

@@ -21,6 +21,7 @@
"@testing-library/user-event": "^13.5.0",
"axios": "^1.7.2",
"bootstrap": "^5.3.0",
"chart.js": "^4.4.3",
"framer-motion": "^4.1.17",
"next": "^14.2.4",
"next-auth": "^4.24.7",
@@ -4529,6 +4530,11 @@
"resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz",
"integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw=="
},
"node_modules/@kurkle/color": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.2.tgz",
"integrity": "sha512-fuscdXJ9G1qb7W8VdHi+IwRqij3lBkosAm4ydQtEmbY58OzHXqQhvlxqEkoz0yssNVn38bcpRWgA9PP+OGoisw=="
},
"node_modules/@leichtgewicht/ip-codec": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.4.tgz",
@@ -8012,6 +8018,17 @@
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/chart.js": {
"version": "4.4.3",
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.4.3.tgz",
"integrity": "sha512-qK1gkGSRYcJzqrrzdR6a+I0vQ4/R+SoODXyAjscQ/4mzuNzySaMCd+hyVxitSY1+L2fjPD1Gbn+ibNqRmwQeLw==",
"dependencies": {
"@kurkle/color": "^0.3.0"
},
"engines": {
"pnpm": ">=8"
}
},
"node_modules/check-types": {
"version": "11.2.2",
"resolved": "https://registry.npmjs.org/check-types/-/check-types-11.2.2.tgz",

View File

@@ -16,6 +16,7 @@
"@testing-library/user-event": "^13.5.0",
"axios": "^1.7.2",
"bootstrap": "^5.3.0",
"chart.js": "^4.4.3",
"framer-motion": "^4.1.17",
"next": "^14.2.4",
"next-auth": "^4.24.7",

View File

@@ -47,9 +47,9 @@ interface ColorMap {
}
const COLOR_MAP: ColorMap = {
Queued: "rgba(255,201,5,0.5)",
Scraping: "rgba(3,104,255,0.5)",
Completed: "rgba(5,255,51,0.5)",
Queued: "rgba(255,201,5,0.25)",
Scraping: "rgba(3,104,255,0.25)",
Completed: "rgba(5,255,51,0.25)",
};
const JobTable: React.FC<JobTableProps> = ({ jobs, fetchJobs }) => {

View File

@@ -21,6 +21,7 @@ import HomeIcon from "@mui/icons-material/Home";
import HttpIcon from "@mui/icons-material/Http";
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
import TerminalIcon from "@mui/icons-material/Terminal";
import BarChart from "@mui/icons-material/BarChart";
import { useRouter } from "next/router";
import { useTheme } from "@mui/material/styles";
@@ -77,6 +78,15 @@ const NavDrawer: React.FC<NavDrawerProps> = ({ toggleTheme, isDarkMode }) => {
</ListItemButton>
</ListItem>
<Divider />
<ListItem>
<ListItemButton onClick={() => router.push("/statistics")}>
<ListItemIcon>
<BarChart />
</ListItemIcon>
<ListItemText primary="Statistics" />
</ListItemButton>
</ListItem>
<Divider />
<ListItem>
<ListItemButton onClick={() => router.push("/logs")}>
<ListItemIcon>

214
src/pages/statistics.tsx Normal file
View File

@@ -0,0 +1,214 @@
import React, { useEffect, useRef, useState } from "react";
import { Chart, registerables } from "chart.js";
import { Box, Typography, useTheme } from "@mui/material";
import { useAuth } from "../contexts/AuthContext";
Chart.register(...registerables);
const Statistics: React.FC = () => {
const theme = useTheme();
const elementsChartRef = useRef<HTMLCanvasElement>(null);
const jobsChartRef = useRef<HTMLCanvasElement>(null);
const [elementsChart, setElementsChart] = useState<Chart | null>(null);
const [jobsChart, setJobsChart] = useState<Chart | null>(null);
const { user } = useAuth();
useEffect(() => {
const fetchElementsData = async () => {
try {
const response = await fetch(
"/api/statistics/get-average-element-per-link",
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ user: user.email }),
}
);
const data = await response.json();
const dates = data.map((item: any) => item.date);
const averages = data.map((item: any) => item.average_elements);
if (elementsChartRef.current) {
const ctx = elementsChartRef.current.getContext("2d");
if (ctx) {
if (elementsChart) {
elementsChart.destroy();
}
const newChart = new Chart(ctx, {
type: "line",
data: {
labels: dates,
datasets: [
{
label: "Average Elements per Link",
data: averages,
backgroundColor:
theme.palette.mode === "light"
? "rgba(75, 192, 192, 0.2)"
: "rgba(255, 99, 132, 0.2)",
borderColor:
theme.palette.mode === "light"
? "rgba(75, 192, 192, 1)"
: "rgba(255, 99, 132, 1)",
borderWidth: 1,
},
],
},
options: {
scales: {
y: {
beginAtZero: true,
ticks: {
color: theme.palette.text.primary,
},
},
x: {
ticks: {
color: theme.palette.text.primary,
},
},
},
responsive: true,
maintainAspectRatio: false,
},
});
setElementsChart(newChart);
}
}
} catch (error) {
console.error("Error fetching elements data:", error);
}
};
fetchElementsData();
}, [elementsChartRef, theme.palette.mode]);
useEffect(() => {
const fetchJobsData = async () => {
try {
const response = await fetch(
"/api/statistics/get-average-jobs-per-day",
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ user: user.email }),
}
);
const data = await response.json();
const dates = data.map((item: any) => item.date);
const jobCounts = data.map((item: any) => item.job_count);
if (jobsChartRef.current) {
const ctx = jobsChartRef.current.getContext("2d");
if (ctx) {
if (jobsChart) {
jobsChart.destroy();
}
const newChart = new Chart(ctx, {
type: "line",
data: {
labels: dates,
datasets: [
{
label: "Average Jobs per Day",
data: jobCounts,
backgroundColor:
theme.palette.mode === "light"
? "rgba(153, 102, 255, 0.2)"
: "rgba(54, 162, 235, 0.2)",
borderColor:
theme.palette.mode === "light"
? "rgba(153, 102, 255, 1)"
: "rgba(54, 162, 235, 1)",
borderWidth: 1,
},
],
},
options: {
scales: {
y: {
beginAtZero: true,
ticks: {
color: theme.palette.text.primary,
},
},
x: {
ticks: {
color: theme.palette.text.primary,
},
},
},
responsive: true,
maintainAspectRatio: false,
},
});
setJobsChart(newChart);
}
}
} catch (error) {
console.error("Error fetching jobs data:", error);
}
};
fetchJobsData();
}, [jobsChartRef, theme.palette.mode]);
return (
<>
{user ? (
<div className="flex flex-col space-y-2 justify-center text-center h-full w-full">
<div className="flex flex-row space-x-2 h-full m-0 w-full">
<div className="w-full h-full flex flex-col justify-center space-y-2 text-center">
<Typography variant="h5">Average Elements per Link</Typography>
<div className="relative w-full h-full">
<canvas
ref={elementsChartRef}
className="absolute top-0 left-0 w-full h-full"
></canvas>
</div>
</div>
<div className="w-full h-full flex flex-col justify-center space-y-2 text-center">
<Typography variant="h5">Average Jobs per Day</Typography>
<div className="relative w-full h-full">
<canvas
ref={jobsChartRef}
className="absolute top-0 left-0 w-full h-full"
></canvas>
</div>
</div>
</div>
</div>
) : (
<Box
bgcolor="background.default"
minHeight="100vh"
display="flex"
justifyContent="center"
alignItems="center"
>
<h4
style={{
color: "#fff",
padding: "20px",
borderRadius: "8px",
background: "rgba(0, 0, 0, 0.6)",
boxShadow: "0 4px 8px rgba(0, 0, 0, 0.2)",
}}
>
Statistics for jobs not viewable unless logged in.
</h4>
</Box>
)}
</>
);
};
export default Statistics;