diff --git a/README.md b/README.md index 5e0851f..6f61990 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,10 @@ From the table, users can download an excel sheet of the job's results, along wi ![logs](https://github.com/jaypyles/www-scrape/blob/master/docs/log_page.png) +- View a small statistics view of jobs ran + +![statistics](https://github.com/jaypyles/www-scrape/blob/master/docs/stats_page.png) + ## Installation 1. Clone the repository: diff --git a/api/backend/app.py b/api/backend/app.py index d138e68..052bb19 100644 --- a/api/backend/app.py +++ b/api/backend/app.py @@ -3,6 +3,9 @@ import uuid import logging from io import BytesIO from openpyxl import Workbook +from typing import Any +from datetime import datetime +from bson import ObjectId # PDM from fastapi import BackgroundTasks, FastAPI, HTTPException @@ -15,9 +18,16 @@ import docker client = docker.from_env() # LOCAL -from api.backend.job import query, insert, delete_jobs +from api.backend.job import ( + average_elements_per_link, + get_jobs_per_day, + query, + insert, + delete_jobs, +) from api.backend.models import ( DownloadJob, + GetStatistics, SubmitScrapeJob, DeleteScrapeJobs, RetrieveScrapeJobs, @@ -64,7 +74,8 @@ async def submit_scrape_job(job: SubmitScrapeJob, background_tasks: BackgroundTa job.id = uuid.uuid4().hex if job.user: - await insert(jsonable_encoder(job)) + job_dict = job.model_dump() + await insert(job_dict) return JSONResponse(content=f"Job queued for scraping: {job.id}") except Exception as e: @@ -76,7 +87,7 @@ async def retrieve_scrape_jobs(retrieve: RetrieveScrapeJobs): LOG.info(f"Retrieving jobs for account: {retrieve.user}") try: results = await query({"user": retrieve.user}) - return JSONResponse(content=results[::-1]) + return JSONResponse(content=jsonable_encoder(results[::-1])) except Exception as e: LOG.error(f"Exception occurred: {e}") return JSONResponse(content={"error": str(e)}, status_code=500) @@ -184,3 +195,14 @@ async def get_own_logs(): return StreamingResponse(log_generator(), media_type="text/event-stream") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/statistics/get-average-element-per-link") +async def get_average_element_per_link(get_statistics: GetStatistics): + return await average_elements_per_link(get_statistics.user) + + +@app.post("/api/statistics/get-average-jobs-per-day") +async def average_jobs_per_day(get_statistics: GetStatistics): + data = await get_jobs_per_day(get_statistics.user) + return data diff --git a/api/backend/job.py b/api/backend/job.py index d15f98f..69992a9 100644 --- a/api/backend/job.py +++ b/api/backend/job.py @@ -49,3 +49,62 @@ async def delete_jobs(jobs: list[str]): LOG.info(f"RESULT: {result.deleted_count} documents deleted") return True if result.deleted_count > 0 else False + + +async def average_elements_per_link(user: str): + collection = get_job_collection() + pipeline = [ + {"$match": {"status": "Completed", "user": user}}, + { + "$project": { + "date": { + "$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"} + }, + "num_elements": {"$size": "$elements"}, + } + }, + { + "$group": { + "_id": "$date", + "average_elements": {"$avg": "$num_elements"}, + "count": {"$sum": 1}, + } + }, + {"$sort": {"_id": 1}}, + ] + cursor = collection.aggregate(pipeline) + results: list[dict[str, Any]] = [] + + async for document in cursor: + results.append( + { + "date": document["_id"], + "average_elements": document["average_elements"], + "count": document["count"], + } + ) + + return results + + +async def get_jobs_per_day(user: str): + collection = get_job_collection() + pipeline = [ + {"$match": {"status": "Completed", "user": user}}, + { + "$project": { + "date": { + "$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"} + } + } + }, + {"$group": {"_id": "$date", "job_count": {"$sum": 1}}}, + {"$sort": {"_id": 1}}, + ] + cursor = collection.aggregate(pipeline) + + results: list[dict[str, Any]] = [] + async for document in cursor: + results.append({"date": document["_id"], "job_count": document["job_count"]}) + + return results diff --git a/api/backend/models.py b/api/backend/models.py index 8fe7282..b46065c 100644 --- a/api/backend/models.py +++ b/api/backend/models.py @@ -1,5 +1,6 @@ # STL from typing import Any, Optional +from datetime import datetime # PDM import pydantic @@ -27,7 +28,7 @@ class SubmitScrapeJob(pydantic.BaseModel): url: str elements: list[Element] user: Optional[str] = None - time_created: Optional[str] = None + time_created: Optional[datetime] = None result: Optional[dict[str, Any]] = None job_options: JobOptions status: str = "Queued" @@ -43,3 +44,7 @@ class DownloadJob(pydantic.BaseModel): class DeleteScrapeJobs(pydantic.BaseModel): ids: list[str] + + +class GetStatistics(pydantic.BaseModel): + user: str diff --git a/api/backend/worker/job_worker.py b/api/backend/worker/job_worker.py index 4d0b304..6ee160d 100644 --- a/api/backend/worker/job_worker.py +++ b/api/backend/worker/job_worker.py @@ -41,7 +41,7 @@ async def main(): LOG.info("Starting job worker...") while True: await process_job() - await asyncio.sleep(5) # Sleep for 5 seconds before checking for new jobs + await asyncio.sleep(5) if __name__ == "__main__": diff --git a/docs/main_page.png b/docs/main_page.png index 17e5ab6..46dc341 100644 Binary files a/docs/main_page.png and b/docs/main_page.png differ diff --git a/docs/stats_page.png b/docs/stats_page.png new file mode 100644 index 0000000..6e260f4 Binary files /dev/null and b/docs/stats_page.png differ diff --git a/package-lock.json b/package-lock.json index 37ab2ce..d979427 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,6 +21,7 @@ "@testing-library/user-event": "^13.5.0", "axios": "^1.7.2", "bootstrap": "^5.3.0", + "chart.js": "^4.4.3", "framer-motion": "^4.1.17", "next": "^14.2.4", "next-auth": "^4.24.7", @@ -4529,6 +4530,11 @@ "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz", "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==" }, + "node_modules/@kurkle/color": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.2.tgz", + "integrity": "sha512-fuscdXJ9G1qb7W8VdHi+IwRqij3lBkosAm4ydQtEmbY58OzHXqQhvlxqEkoz0yssNVn38bcpRWgA9PP+OGoisw==" + }, "node_modules/@leichtgewicht/ip-codec": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.4.tgz", @@ -8012,6 +8018,17 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/chart.js": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.4.3.tgz", + "integrity": "sha512-qK1gkGSRYcJzqrrzdR6a+I0vQ4/R+SoODXyAjscQ/4mzuNzySaMCd+hyVxitSY1+L2fjPD1Gbn+ibNqRmwQeLw==", + "dependencies": { + "@kurkle/color": "^0.3.0" + }, + "engines": { + "pnpm": ">=8" + } + }, "node_modules/check-types": { "version": "11.2.2", "resolved": "https://registry.npmjs.org/check-types/-/check-types-11.2.2.tgz", diff --git a/package.json b/package.json index 9c8bb6d..aaf87bc 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "@testing-library/user-event": "^13.5.0", "axios": "^1.7.2", "bootstrap": "^5.3.0", + "chart.js": "^4.4.3", "framer-motion": "^4.1.17", "next": "^14.2.4", "next-auth": "^4.24.7", diff --git a/src/components/JobTable.tsx b/src/components/JobTable.tsx index 83710cd..ce3bc38 100644 --- a/src/components/JobTable.tsx +++ b/src/components/JobTable.tsx @@ -47,9 +47,9 @@ interface ColorMap { } const COLOR_MAP: ColorMap = { - Queued: "rgba(255,201,5,0.5)", - Scraping: "rgba(3,104,255,0.5)", - Completed: "rgba(5,255,51,0.5)", + Queued: "rgba(255,201,5,0.25)", + Scraping: "rgba(3,104,255,0.25)", + Completed: "rgba(5,255,51,0.25)", }; const JobTable: React.FC = ({ jobs, fetchJobs }) => { diff --git a/src/components/NavDrawer.tsx b/src/components/NavDrawer.tsx index 213c3e8..5bccca6 100644 --- a/src/components/NavDrawer.tsx +++ b/src/components/NavDrawer.tsx @@ -21,6 +21,7 @@ import HomeIcon from "@mui/icons-material/Home"; import HttpIcon from "@mui/icons-material/Http"; import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; import TerminalIcon from "@mui/icons-material/Terminal"; +import BarChart from "@mui/icons-material/BarChart"; import { useRouter } from "next/router"; import { useTheme } from "@mui/material/styles"; @@ -77,6 +78,15 @@ const NavDrawer: React.FC = ({ toggleTheme, isDarkMode }) => { + + router.push("/statistics")}> + + + + + + + router.push("/logs")}> diff --git a/src/pages/statistics.tsx b/src/pages/statistics.tsx new file mode 100644 index 0000000..ca83009 --- /dev/null +++ b/src/pages/statistics.tsx @@ -0,0 +1,214 @@ +import React, { useEffect, useRef, useState } from "react"; +import { Chart, registerables } from "chart.js"; +import { Box, Typography, useTheme } from "@mui/material"; +import { useAuth } from "../contexts/AuthContext"; + +Chart.register(...registerables); + +const Statistics: React.FC = () => { + const theme = useTheme(); + const elementsChartRef = useRef(null); + const jobsChartRef = useRef(null); + const [elementsChart, setElementsChart] = useState(null); + const [jobsChart, setJobsChart] = useState(null); + const { user } = useAuth(); + + useEffect(() => { + const fetchElementsData = async () => { + try { + const response = await fetch( + "/api/statistics/get-average-element-per-link", + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ user: user.email }), + } + ); + const data = await response.json(); + + const dates = data.map((item: any) => item.date); + const averages = data.map((item: any) => item.average_elements); + + if (elementsChartRef.current) { + const ctx = elementsChartRef.current.getContext("2d"); + + if (ctx) { + if (elementsChart) { + elementsChart.destroy(); + } + + const newChart = new Chart(ctx, { + type: "line", + data: { + labels: dates, + datasets: [ + { + label: "Average Elements per Link", + data: averages, + backgroundColor: + theme.palette.mode === "light" + ? "rgba(75, 192, 192, 0.2)" + : "rgba(255, 99, 132, 0.2)", + borderColor: + theme.palette.mode === "light" + ? "rgba(75, 192, 192, 1)" + : "rgba(255, 99, 132, 1)", + borderWidth: 1, + }, + ], + }, + options: { + scales: { + y: { + beginAtZero: true, + ticks: { + color: theme.palette.text.primary, + }, + }, + x: { + ticks: { + color: theme.palette.text.primary, + }, + }, + }, + responsive: true, + maintainAspectRatio: false, + }, + }); + + setElementsChart(newChart); + } + } + } catch (error) { + console.error("Error fetching elements data:", error); + } + }; + + fetchElementsData(); + }, [elementsChartRef, theme.palette.mode]); + + useEffect(() => { + const fetchJobsData = async () => { + try { + const response = await fetch( + "/api/statistics/get-average-jobs-per-day", + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ user: user.email }), + } + ); + const data = await response.json(); + + const dates = data.map((item: any) => item.date); + const jobCounts = data.map((item: any) => item.job_count); + + if (jobsChartRef.current) { + const ctx = jobsChartRef.current.getContext("2d"); + + if (ctx) { + if (jobsChart) { + jobsChart.destroy(); + } + + const newChart = new Chart(ctx, { + type: "line", + data: { + labels: dates, + datasets: [ + { + label: "Average Jobs per Day", + data: jobCounts, + backgroundColor: + theme.palette.mode === "light" + ? "rgba(153, 102, 255, 0.2)" + : "rgba(54, 162, 235, 0.2)", + borderColor: + theme.palette.mode === "light" + ? "rgba(153, 102, 255, 1)" + : "rgba(54, 162, 235, 1)", + borderWidth: 1, + }, + ], + }, + options: { + scales: { + y: { + beginAtZero: true, + ticks: { + color: theme.palette.text.primary, + }, + }, + x: { + ticks: { + color: theme.palette.text.primary, + }, + }, + }, + responsive: true, + maintainAspectRatio: false, + }, + }); + + setJobsChart(newChart); + } + } + } catch (error) { + console.error("Error fetching jobs data:", error); + } + }; + + fetchJobsData(); + }, [jobsChartRef, theme.palette.mode]); + + return ( + <> + {user ? ( +
+
+
+ Average Elements per Link +
+ +
+
+
+ Average Jobs per Day +
+ +
+
+
+
+ ) : ( + +

+ Statistics for jobs not viewable unless logged in. +

+
+ )} + + ); +}; + +export default Statistics;