mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-11-13 12:46:29 +00:00
Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8cd30599fa | ||
|
|
a58212b214 | ||
|
|
a6ab6ec71d | ||
|
|
c5c9427af4 | ||
|
|
e8d80c1a77 | ||
|
|
ee8047ac78 | ||
|
|
e74c4f392c | ||
|
|
6b484952a3 | ||
|
|
2283808605 | ||
|
|
ee5ada70f7 | ||
|
|
56cc457e6e | ||
|
|
21a38181de | ||
|
|
3063bc0d53 | ||
|
|
f42e7ed531 | ||
|
|
c197f2becd | ||
|
|
a534129702 | ||
|
|
455ed049c9 | ||
|
|
de4ccfbf3a | ||
|
|
3475d66995 | ||
|
|
186b4a0231 | ||
|
|
0af0ebf5b5 | ||
|
|
ef35db00d7 | ||
|
|
d65e600ec3 | ||
|
|
6fe145f649 | ||
|
|
563ca2245e | ||
|
|
d54fdbd405 | ||
|
|
7169755cd2 | ||
|
|
15b56b5704 | ||
|
|
bf6b740005 | ||
|
|
c339e75e06 | ||
|
|
b6ed40e6cf | ||
|
|
3085f9d31a | ||
|
|
7d80ff5c7f |
58
.github/actions/run-cypress-tests/action.yaml
vendored
Normal file
58
.github/actions/run-cypress-tests/action.yaml
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
name: Run Cypress Tests
|
||||
|
||||
description: Run Cypress tests
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Setup Docker project
|
||||
shell: bash
|
||||
run: make build up-dev
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: npm install
|
||||
|
||||
- name: Wait for frontend to be ready
|
||||
shell: bash
|
||||
run: |
|
||||
for i in {1..10}; do
|
||||
curl -s http://127.0.0.1:80 && echo "Frontend is ready" && exit 0
|
||||
echo "Waiting for frontend to be ready... attempt $i"
|
||||
sleep 1
|
||||
done
|
||||
echo "Frontend failed to be ready after 10 retries"
|
||||
exit 1
|
||||
|
||||
- name: Wait for backend to be ready
|
||||
shell: bash
|
||||
run: |
|
||||
for i in {1..10}; do
|
||||
curl -s http://127.0.0.1:8000 && echo "Backend is ready" && exit 0
|
||||
echo "Waiting for backend to be ready... attempt $i"
|
||||
sleep 1
|
||||
done
|
||||
echo "Backend failed to be ready after 10 retries"
|
||||
exit 1
|
||||
|
||||
- name: Show backend logs on failure
|
||||
if: failure()
|
||||
shell: bash
|
||||
run: |
|
||||
echo "== Docker Containers =="
|
||||
docker ps -a
|
||||
echo "== Backend Logs =="
|
||||
docker logs $(docker ps -a --filter "name=scraperr_api" --format "{{.Names}}") || echo "Could not get backend logs"
|
||||
|
||||
- name: Run Cypress tests
|
||||
shell: bash
|
||||
run: npm run cy:run
|
||||
|
||||
20
.github/workflows/docker-image.yml
vendored
20
.github/workflows/docker-image.yml
vendored
@@ -4,10 +4,11 @@ on:
|
||||
workflows: ["Unit Tests"]
|
||||
types:
|
||||
- completed
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.ref == 'refs/heads/master' && github.event_name != 'pull_request' }}
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'master' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -37,3 +38,20 @@ jobs:
|
||||
file: ./docker/api/Dockerfile
|
||||
push: true
|
||||
tags: ${{ secrets.DOCKERHUB_USERNAME }}/scraperr_api:latest
|
||||
|
||||
success-message:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
steps:
|
||||
- name: Send Discord Message
|
||||
uses: jaypyles/discord-webhook-action@v1.0.0
|
||||
with:
|
||||
webhook-url: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
content: "Scraperr Successfully Built Docker Images"
|
||||
username: "Scraperr CI"
|
||||
embed-title: "✅ Deployment Status"
|
||||
embed-description: "Scraperr successfully built docker images."
|
||||
embed-color: 3066993 # Green
|
||||
embed-footer-text: "Scraperr CI"
|
||||
embed-timestamp: ${{ github.event.head_commit.timestamp }}
|
||||
|
||||
33
.github/workflows/unit-tests.yml
vendored
33
.github/workflows/unit-tests.yml
vendored
@@ -4,9 +4,11 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
unit-tests:
|
||||
@@ -15,6 +17,9 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set env
|
||||
run: echo "ENV=test" >> $GITHUB_ENV
|
||||
|
||||
- name: Install pdm
|
||||
run: pip install pdm
|
||||
|
||||
@@ -23,3 +28,27 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
run: PYTHONPATH=. pdm run pytest api/backend/tests
|
||||
|
||||
cypress-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/run-cypress-tests
|
||||
|
||||
success-message:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- unit-tests
|
||||
- cypress-tests
|
||||
steps:
|
||||
- name: Send Discord Message
|
||||
uses: jaypyles/discord-webhook-action@v1.0.0
|
||||
with:
|
||||
webhook-url: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
content: "Scraperr Successfully Passed Tests"
|
||||
username: "Scraperr CI"
|
||||
embed-title: "✅ Deployment Status"
|
||||
embed-description: "Scraperr successfully passed all tests."
|
||||
embed-color: 3066993 # Green
|
||||
embed-footer-text: "Scraperr CI"
|
||||
embed-timestamp: ${{ github.event.head_commit.timestamp }}
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -187,3 +187,5 @@ cython_debug/
|
||||
postgres_data
|
||||
.vscode
|
||||
ollama
|
||||
data
|
||||
media
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.10.12
|
||||
@@ -1,3 +0,0 @@
|
||||
github_repo: https://github.com/jaypyles/webapp-template.git
|
||||
deploy_path: /home/admin/site-test6
|
||||
deploy_command: make pull up-prd
|
||||
@@ -1,10 +0,0 @@
|
||||
- name: Deploy site
|
||||
hosts: all
|
||||
become: true
|
||||
vars_files:
|
||||
- ./config.yaml
|
||||
tasks:
|
||||
- name: Deploy
|
||||
command: "{{deploy_command}}"
|
||||
args:
|
||||
chdir: "{{deploy_path}}"
|
||||
@@ -1,6 +0,0 @@
|
||||
all:
|
||||
hosts:
|
||||
host1:
|
||||
ansible_host: 192.168.0.1
|
||||
ansible_user: admin
|
||||
ansible_ssh_private_key_file: private_key.pem
|
||||
@@ -1,54 +0,0 @@
|
||||
- name: Install Docker and run make pull up
|
||||
hosts: all
|
||||
become: true
|
||||
vars_files:
|
||||
- ./config.yaml
|
||||
tasks:
|
||||
- name: Update apt cache
|
||||
apt:
|
||||
update_cache: yes
|
||||
- name: Install required packages
|
||||
apt:
|
||||
name:
|
||||
- apt-transport-https
|
||||
- ca-certificates
|
||||
- curl
|
||||
- gnupg-agent
|
||||
- software-properties-common
|
||||
- rsync
|
||||
- make
|
||||
state: present
|
||||
- name: Add Docker’s official GPG key
|
||||
apt_key:
|
||||
url: https://download.docker.com/linux/ubuntu/gpg
|
||||
state: present
|
||||
- name: Add Docker APT repository
|
||||
apt_repository:
|
||||
repo: deb [arch=amd64] https://download.docker.com/linux/ubuntu focal stable
|
||||
state: present
|
||||
- name: Update apt cache again after adding Docker repo
|
||||
apt:
|
||||
update_cache: yes
|
||||
- name: Install Docker
|
||||
apt:
|
||||
name: docker-ce
|
||||
state: present
|
||||
- name: Start and enable Docker service
|
||||
systemd:
|
||||
name: docker
|
||||
enabled: yes
|
||||
state: started
|
||||
- name: Install Docker Compose
|
||||
apt:
|
||||
name: docker-compose-plugin
|
||||
state: present
|
||||
- name: Verify Docker is installed
|
||||
command: docker --version
|
||||
register: docker_version
|
||||
- name: Display Docker version
|
||||
debug:
|
||||
msg: "Docker version: {{ docker_version.stdout }}"
|
||||
- name: Clone repo
|
||||
ansible.builtin.git:
|
||||
repo: "{{github_repo}}"
|
||||
dest: "{{deploy_path}}"
|
||||
@@ -1,9 +1,13 @@
|
||||
# STL
|
||||
import os
|
||||
import logging
|
||||
import apscheduler # type: ignore
|
||||
|
||||
# PDM
|
||||
from fastapi import FastAPI
|
||||
import apscheduler.schedulers
|
||||
import apscheduler.schedulers.background
|
||||
from fastapi import FastAPI, Request, status
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
# LOCAL
|
||||
@@ -13,6 +17,11 @@ from api.backend.utils import get_log_level
|
||||
from api.backend.routers.job_router import job_router
|
||||
from api.backend.routers.log_router import log_router
|
||||
from api.backend.routers.stats_router import stats_router
|
||||
from api.backend.database.startup import init_database
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from api.backend.job.cron_scheduling.cron_scheduling import start_cron_scheduler
|
||||
from api.backend.scheduler import scheduler
|
||||
|
||||
log_level = os.getenv("LOG_LEVEL")
|
||||
LOG_LEVEL = get_log_level(log_level)
|
||||
@@ -41,3 +50,28 @@ app.include_router(ai_router)
|
||||
app.include_router(job_router)
|
||||
app.include_router(log_router)
|
||||
app.include_router(stats_router)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
start_cron_scheduler(scheduler)
|
||||
scheduler.start()
|
||||
|
||||
if os.getenv("ENV") != "test":
|
||||
init_database()
|
||||
LOG.info("Starting up...")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
def shutdown_scheduler():
|
||||
scheduler.shutdown(wait=False) # Set wait=False to not block shutdown
|
||||
|
||||
|
||||
@app.exception_handler(RequestValidationError)
|
||||
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
||||
exc_str = f"{exc}".replace("\n", " ").replace(" ", " ")
|
||||
logging.error(f"{request}: {exc_str}")
|
||||
content = {"status_code": 10422, "message": exc_str, "data": None}
|
||||
return JSONResponse(
|
||||
content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY
|
||||
)
|
||||
|
||||
@@ -7,7 +7,6 @@ from fastapi.security import OAuth2PasswordRequestForm
|
||||
|
||||
# LOCAL
|
||||
from api.backend.schemas import User, Token, UserCreate
|
||||
from api.backend.database import get_user_collection
|
||||
from api.backend.auth.auth_utils import (
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES,
|
||||
get_current_user,
|
||||
@@ -15,9 +14,14 @@ from api.backend.auth.auth_utils import (
|
||||
get_password_hash,
|
||||
create_access_token,
|
||||
)
|
||||
import logging
|
||||
|
||||
from api.backend.database.common import update
|
||||
|
||||
auth_router = APIRouter()
|
||||
|
||||
LOG = logging.getLogger("auth_router")
|
||||
|
||||
|
||||
@auth_router.post("/auth/token", response_model=Token)
|
||||
async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
|
||||
@@ -43,12 +47,14 @@ async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends(
|
||||
|
||||
@auth_router.post("/auth/signup", response_model=User)
|
||||
async def create_user(user: UserCreate):
|
||||
users_collection = get_user_collection()
|
||||
hashed_password = get_password_hash(user.password)
|
||||
user_dict = user.model_dump()
|
||||
user_dict["hashed_password"] = hashed_password
|
||||
del user_dict["password"]
|
||||
_ = await users_collection.insert_one(user_dict)
|
||||
|
||||
query = "INSERT INTO users (email, hashed_password, full_name) VALUES (?, ?, ?)"
|
||||
_ = update(query, (user_dict["email"], hashed_password, user_dict["full_name"]))
|
||||
|
||||
return user_dict
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
# STL
|
||||
import os
|
||||
from gc import disable
|
||||
from queue import Empty
|
||||
from typing import Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
@@ -15,7 +13,8 @@ from fastapi.security import OAuth2PasswordBearer
|
||||
|
||||
# LOCAL
|
||||
from api.backend.schemas import User, UserInDB, TokenData
|
||||
from api.backend.database import get_user_collection
|
||||
|
||||
from api.backend.database.common import query
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -40,8 +39,8 @@ def get_password_hash(password: str):
|
||||
|
||||
|
||||
async def get_user(email: str):
|
||||
user_collection = get_user_collection()
|
||||
user = await user_collection.find_one({"email": email})
|
||||
user_query = "SELECT * FROM users WHERE email = ?"
|
||||
user = query(user_query, (email,))[0]
|
||||
|
||||
if not user:
|
||||
return
|
||||
@@ -77,27 +76,42 @@ def create_access_token(
|
||||
|
||||
|
||||
async def get_current_user(token: str = Depends(oauth2_scheme)):
|
||||
LOG.info(f"Getting current user with token: {token}")
|
||||
LOG.debug(f"Getting current user with token: {token}")
|
||||
|
||||
if not token:
|
||||
LOG.debug("No token provided")
|
||||
return EMPTY_USER
|
||||
|
||||
if len(token.split(".")) != 3:
|
||||
LOG.error(f"Malformed token: {token}")
|
||||
return EMPTY_USER
|
||||
|
||||
try:
|
||||
LOG.debug(
|
||||
f"Decoding token: {token} with secret key: {SECRET_KEY} and algorithm: {ALGORITHM}"
|
||||
)
|
||||
|
||||
if token.startswith("Bearer "):
|
||||
token = token.split(" ")[1]
|
||||
|
||||
payload: Optional[dict[str, Any]] = jwt.decode(
|
||||
token, SECRET_KEY, algorithms=[ALGORITHM]
|
||||
)
|
||||
|
||||
if not payload:
|
||||
LOG.error("No payload found in token")
|
||||
return EMPTY_USER
|
||||
|
||||
email = payload.get("sub")
|
||||
|
||||
if email is None:
|
||||
LOG.error("No email found in payload")
|
||||
return EMPTY_USER
|
||||
|
||||
token_data = TokenData(email=email)
|
||||
|
||||
except JWTError:
|
||||
except JWTError as e:
|
||||
LOG.error(f"JWTError occurred: {e}")
|
||||
return EMPTY_USER
|
||||
|
||||
except Exception as e:
|
||||
@@ -105,7 +119,6 @@ async def get_current_user(token: str = Depends(oauth2_scheme)):
|
||||
return EMPTY_USER
|
||||
|
||||
user = await get_user(email=token_data.email)
|
||||
|
||||
if user is None:
|
||||
return EMPTY_USER
|
||||
|
||||
|
||||
1
api/backend/constants.py
Normal file
1
api/backend/constants.py
Normal file
@@ -0,0 +1 @@
|
||||
DATABASE_PATH = "data/database.db"
|
||||
@@ -1,23 +0,0 @@
|
||||
# STL
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
# PDM
|
||||
from dotenv import load_dotenv
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
_ = load_dotenv()
|
||||
|
||||
MONGODB_URI = os.getenv("MONGODB_URI")
|
||||
|
||||
|
||||
def get_user_collection():
|
||||
client: AsyncIOMotorClient[dict[str, Any]] = AsyncIOMotorClient(MONGODB_URI)
|
||||
db = client["scrape"]
|
||||
return db["users"]
|
||||
|
||||
|
||||
def get_job_collection():
|
||||
client: AsyncIOMotorClient[dict[str, Any]] = AsyncIOMotorClient(MONGODB_URI)
|
||||
db = client["scrape"]
|
||||
return db["jobs"]
|
||||
3
api/backend/database/__init__.py
Normal file
3
api/backend/database/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .common import insert, QUERIES, update
|
||||
|
||||
__all__ = ["insert", "QUERIES", "update"]
|
||||
92
api/backend/database/common.py
Normal file
92
api/backend/database/common.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import sqlite3
|
||||
from typing import Any, Optional
|
||||
from api.backend.constants import DATABASE_PATH
|
||||
from api.backend.utils import format_json, format_sql_row_to_python
|
||||
from api.backend.database.schema import INIT_QUERY
|
||||
from api.backend.database.queries import JOB_INSERT_QUERY, DELETE_JOB_QUERY
|
||||
import logging
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def connect():
|
||||
connection = sqlite3.connect(DATABASE_PATH)
|
||||
connection.set_trace_callback(print)
|
||||
cursor = connection.cursor()
|
||||
return cursor
|
||||
|
||||
|
||||
def insert(query: str, values: tuple[Any, ...]):
|
||||
connection = sqlite3.connect(DATABASE_PATH)
|
||||
cursor = connection.cursor()
|
||||
copy = list(values)
|
||||
format_json(copy)
|
||||
|
||||
try:
|
||||
_ = cursor.execute(query, copy)
|
||||
connection.commit()
|
||||
except sqlite3.Error as e:
|
||||
LOG.error(f"An error occurred: {e}")
|
||||
finally:
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
|
||||
def query(query: str, values: Optional[tuple[Any, ...]] = None):
|
||||
connection = sqlite3.connect(DATABASE_PATH)
|
||||
connection.row_factory = sqlite3.Row
|
||||
cursor = connection.cursor()
|
||||
rows = []
|
||||
try:
|
||||
if values:
|
||||
_ = cursor.execute(query, values)
|
||||
else:
|
||||
_ = cursor.execute(query)
|
||||
|
||||
rows = cursor.fetchall()
|
||||
|
||||
finally:
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
formatted_rows: list[dict[str, Any]] = []
|
||||
|
||||
for row in rows:
|
||||
row = dict(row)
|
||||
formatted_row = format_sql_row_to_python(row)
|
||||
formatted_rows.append(formatted_row)
|
||||
|
||||
return formatted_rows
|
||||
|
||||
|
||||
def update(query: str, values: Optional[tuple[Any, ...]] = None):
|
||||
connection = sqlite3.connect(DATABASE_PATH)
|
||||
cursor = connection.cursor()
|
||||
|
||||
copy = None
|
||||
|
||||
if values:
|
||||
copy = list(values)
|
||||
format_json(copy)
|
||||
|
||||
try:
|
||||
if copy:
|
||||
res = cursor.execute(query, copy)
|
||||
else:
|
||||
res = cursor.execute(query)
|
||||
connection.commit()
|
||||
return res.rowcount
|
||||
except sqlite3.Error as e:
|
||||
LOG.error(f"An error occurred: {e}")
|
||||
finally:
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
QUERIES = {
|
||||
"init": INIT_QUERY,
|
||||
"insert_job": JOB_INSERT_QUERY,
|
||||
"delete_job": DELETE_JOB_QUERY,
|
||||
}
|
||||
3
api/backend/database/queries/__init__.py
Normal file
3
api/backend/database/queries/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .queries import JOB_INSERT_QUERY, DELETE_JOB_QUERY
|
||||
|
||||
__all__ = ["JOB_INSERT_QUERY", "DELETE_JOB_QUERY"]
|
||||
9
api/backend/database/queries/queries.py
Normal file
9
api/backend/database/queries/queries.py
Normal file
@@ -0,0 +1,9 @@
|
||||
JOB_INSERT_QUERY = """
|
||||
INSERT INTO jobs
|
||||
(id, url, elements, user, time_created, result, status, chat, job_options)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
DELETE_JOB_QUERY = """
|
||||
DELETE FROM jobs WHERE id IN ()
|
||||
"""
|
||||
3
api/backend/database/schema/__init__.py
Normal file
3
api/backend/database/schema/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .schema import INIT_QUERY
|
||||
|
||||
__all__ = ["INIT_QUERY"]
|
||||
30
api/backend/database/schema/schema.py
Normal file
30
api/backend/database/schema/schema.py
Normal file
@@ -0,0 +1,30 @@
|
||||
INIT_QUERY = """
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
id STRING PRIMARY KEY NOT NULL,
|
||||
url STRING NOT NULL,
|
||||
elements JSON NOT NULL,
|
||||
user STRING,
|
||||
time_created DATETIME NOT NULL,
|
||||
result JSON NOT NULL,
|
||||
status STRING NOT NULL,
|
||||
chat JSON,
|
||||
job_options JSON
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS users (
|
||||
email STRING PRIMARY KEY NOT NULL,
|
||||
hashed_password STRING NOT NULL,
|
||||
full_name STRING,
|
||||
disabled BOOLEAN
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS cron_jobs (
|
||||
id STRING PRIMARY KEY NOT NULL,
|
||||
user_email STRING NOT NULL,
|
||||
job_id STRING NOT NULL,
|
||||
cron_expression STRING NOT NULL,
|
||||
time_created DATETIME NOT NULL,
|
||||
time_updated DATETIME NOT NULL,
|
||||
FOREIGN KEY (job_id) REFERENCES jobs(id)
|
||||
);
|
||||
"""
|
||||
15
api/backend/database/startup.py
Normal file
15
api/backend/database/startup.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from api.backend.database.common import connect, QUERIES
|
||||
import logging
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def init_database():
|
||||
cursor = connect()
|
||||
|
||||
for query in QUERIES["init"].strip().split(";"):
|
||||
if query.strip():
|
||||
LOG.info(f"Executing query: {query}")
|
||||
_ = cursor.execute(query)
|
||||
|
||||
cursor.close()
|
||||
@@ -1,161 +0,0 @@
|
||||
# STL
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
# PDM
|
||||
from pymongo import DESCENDING
|
||||
|
||||
# LOCAL
|
||||
from api.backend.models import FetchOptions
|
||||
from api.backend.database import get_job_collection
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def insert(item: dict[str, Any]) -> None:
|
||||
collection = get_job_collection()
|
||||
i = await collection.insert_one(item)
|
||||
LOG.info(f"Inserted item: {i}")
|
||||
|
||||
|
||||
async def get_queued_job():
|
||||
collection = get_job_collection()
|
||||
return await collection.find_one(
|
||||
{"status": "Queued"}, sort=[("created_at", DESCENDING)]
|
||||
)
|
||||
|
||||
|
||||
async def query(
|
||||
filter: dict[str, Any], fetch_options: Optional[FetchOptions] = None
|
||||
) -> list[dict[str, Any]]:
|
||||
collection = get_job_collection()
|
||||
cursor = collection.find(filter)
|
||||
results: list[dict[str, Any]] = []
|
||||
|
||||
async for document in cursor:
|
||||
del document["_id"]
|
||||
|
||||
if fetch_options and not fetch_options.chat and document.get("chat"):
|
||||
del document["chat"]
|
||||
|
||||
results.append(document)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def update_job(ids: list[str], field: str, value: Any):
|
||||
collection = get_job_collection()
|
||||
for id in ids:
|
||||
_ = await collection.update_one(
|
||||
{"id": id},
|
||||
{"$set": {field: value}},
|
||||
)
|
||||
|
||||
|
||||
async def delete_jobs(jobs: list[str]):
|
||||
collection = get_job_collection()
|
||||
result = await collection.delete_many({"id": {"$in": jobs}})
|
||||
LOG.info(f"{result.deleted_count} documents deleted")
|
||||
|
||||
return True if result.deleted_count > 0 else False
|
||||
|
||||
|
||||
async def average_elements_per_link(user: str):
|
||||
collection = get_job_collection()
|
||||
pipeline = [
|
||||
{"$match": {"status": "Completed", "user": user}},
|
||||
{
|
||||
"$addFields": {
|
||||
"time_created_date": {
|
||||
"$cond": {
|
||||
"if": {"$eq": [{"$type": "$time_created"}, "date"]},
|
||||
"then": "$time_created",
|
||||
"else": {
|
||||
"$convert": {
|
||||
"input": "$time_created",
|
||||
"to": "date",
|
||||
"onError": None,
|
||||
"onNull": None,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$project": {
|
||||
"date": {
|
||||
"$dateToString": {
|
||||
"format": "%Y-%m-%d",
|
||||
"date": "$time_created_date",
|
||||
}
|
||||
},
|
||||
"num_elements": {"$size": "$elements"},
|
||||
}
|
||||
},
|
||||
{
|
||||
"$group": {
|
||||
"_id": "$date",
|
||||
"average_elements": {"$avg": "$num_elements"},
|
||||
"count": {"$sum": 1},
|
||||
}
|
||||
},
|
||||
{"$sort": {"_id": 1}},
|
||||
]
|
||||
cursor = collection.aggregate(pipeline)
|
||||
results: list[dict[str, Any]] = []
|
||||
|
||||
async for document in cursor:
|
||||
results.append(
|
||||
{
|
||||
"date": document["_id"],
|
||||
"average_elements": document["average_elements"],
|
||||
"count": document["count"],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_jobs_per_day(user: str):
|
||||
collection = get_job_collection()
|
||||
pipeline = [
|
||||
{"$match": {"status": "Completed", "user": user}},
|
||||
{
|
||||
"$addFields": {
|
||||
"time_created_date": {
|
||||
"$cond": {
|
||||
"if": {"$eq": [{"$type": "$time_created"}, "date"]},
|
||||
"then": "$time_created",
|
||||
"else": {
|
||||
"$convert": {
|
||||
"input": "$time_created",
|
||||
"to": "date",
|
||||
"onError": None,
|
||||
"onNull": None,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$project": {
|
||||
"date": {
|
||||
"$dateToString": {
|
||||
"format": "%Y-%m-%d",
|
||||
"date": "$time_created_date",
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{"$group": {"_id": "$date", "job_count": {"$sum": 1}}},
|
||||
{"$sort": {"_id": 1}},
|
||||
]
|
||||
cursor = collection.aggregate(pipeline)
|
||||
|
||||
results: list[dict[str, Any]] = []
|
||||
async for document in cursor:
|
||||
results.append({"date": document["_id"], "job_count": document["job_count"]})
|
||||
|
||||
return results
|
||||
17
api/backend/job/__init__.py
Normal file
17
api/backend/job/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from .job import (
|
||||
insert,
|
||||
update_job,
|
||||
delete_jobs,
|
||||
get_jobs_per_day,
|
||||
get_queued_job,
|
||||
average_elements_per_link,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"insert",
|
||||
"update_job",
|
||||
"delete_jobs",
|
||||
"get_jobs_per_day",
|
||||
"get_queued_job",
|
||||
"average_elements_per_link",
|
||||
]
|
||||
100
api/backend/job/cron_scheduling/cron_scheduling.py
Normal file
100
api/backend/job/cron_scheduling/cron_scheduling.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
import uuid
|
||||
from api.backend.database.common import insert, query
|
||||
from api.backend.models import CronJob
|
||||
from apscheduler.schedulers.background import BackgroundScheduler # type: ignore
|
||||
from apscheduler.triggers.cron import CronTrigger # type: ignore
|
||||
|
||||
from api.backend.job import insert as insert_job
|
||||
import logging
|
||||
|
||||
LOG = logging.getLogger("Cron Scheduler")
|
||||
|
||||
|
||||
def insert_cron_job(cron_job: CronJob):
|
||||
query = """
|
||||
INSERT INTO cron_jobs (id, user_email, job_id, cron_expression, time_created, time_updated)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
values = (
|
||||
cron_job.id,
|
||||
cron_job.user_email,
|
||||
cron_job.job_id,
|
||||
cron_job.cron_expression,
|
||||
cron_job.time_created,
|
||||
cron_job.time_updated,
|
||||
)
|
||||
|
||||
insert(query, values)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def delete_cron_job(id: str, user_email: str):
|
||||
query = """
|
||||
DELETE FROM cron_jobs
|
||||
WHERE id = ? AND user_email = ?
|
||||
"""
|
||||
values = (id, user_email)
|
||||
insert(query, values)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_cron_jobs(user_email: str):
|
||||
cron_jobs = query("SELECT * FROM cron_jobs WHERE user_email = ?", (user_email,))
|
||||
|
||||
return cron_jobs
|
||||
|
||||
|
||||
def get_all_cron_jobs():
|
||||
cron_jobs = query("SELECT * FROM cron_jobs")
|
||||
|
||||
return cron_jobs
|
||||
|
||||
|
||||
def insert_job_from_cron_job(job: dict[str, Any]):
|
||||
insert_job(
|
||||
{
|
||||
**job,
|
||||
"id": uuid.uuid4().hex,
|
||||
"status": "Queued",
|
||||
"result": "",
|
||||
"chat": None,
|
||||
"time_created": datetime.datetime.now(),
|
||||
"time_updated": datetime.datetime.now(),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_cron_job_trigger(cron_expression: str):
|
||||
expression_parts = cron_expression.split()
|
||||
|
||||
if len(expression_parts) != 5:
|
||||
print(f"Invalid cron expression: {cron_expression}")
|
||||
return None
|
||||
|
||||
minute, hour, day, month, day_of_week = expression_parts
|
||||
|
||||
return CronTrigger(
|
||||
minute=minute, hour=hour, day=day, month=month, day_of_week=day_of_week
|
||||
)
|
||||
|
||||
|
||||
def start_cron_scheduler(scheduler: BackgroundScheduler):
|
||||
cron_jobs = get_all_cron_jobs()
|
||||
|
||||
LOG.info(f"Cron jobs: {cron_jobs}")
|
||||
|
||||
for job in cron_jobs:
|
||||
queried_job = query("SELECT * FROM jobs WHERE id = ?", (job["job_id"],))
|
||||
|
||||
LOG.info(f"Adding job: {queried_job}")
|
||||
|
||||
scheduler.add_job(
|
||||
insert_job_from_cron_job,
|
||||
get_cron_job_trigger(job["cron_expression"]),
|
||||
id=job["id"],
|
||||
args=[queried_job[0]],
|
||||
)
|
||||
97
api/backend/job/job.py
Normal file
97
api/backend/job/job.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# STL
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
# LOCAL
|
||||
from api.backend.utils import format_list_for_query
|
||||
from api.backend.database.common import (
|
||||
insert as common_insert,
|
||||
query as common_query,
|
||||
QUERIES,
|
||||
update as common_update,
|
||||
)
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def insert(item: dict[str, Any]) -> None:
|
||||
common_insert(
|
||||
QUERIES["insert_job"],
|
||||
(
|
||||
item["id"],
|
||||
item["url"],
|
||||
item["elements"],
|
||||
item["user"],
|
||||
item["time_created"],
|
||||
item["result"],
|
||||
item["status"],
|
||||
item["chat"],
|
||||
item["job_options"],
|
||||
),
|
||||
)
|
||||
LOG.info(f"Inserted item: {item}")
|
||||
|
||||
|
||||
async def get_queued_job():
|
||||
query = (
|
||||
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
|
||||
)
|
||||
res = common_query(query)
|
||||
LOG.info(f"Got queued job: {res}")
|
||||
return res[0] if res else None
|
||||
|
||||
|
||||
async def update_job(ids: list[str], field: str, value: Any):
|
||||
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
|
||||
res = common_update(query, tuple([value] + ids))
|
||||
LOG.info(f"Updated job: {res}")
|
||||
|
||||
|
||||
async def delete_jobs(jobs: list[str]):
|
||||
if not jobs:
|
||||
LOG.info("No jobs to delete.")
|
||||
return False
|
||||
|
||||
query = f"DELETE FROM jobs WHERE id IN {format_list_for_query(jobs)}"
|
||||
res = common_update(query, tuple(jobs))
|
||||
|
||||
return res > 0
|
||||
|
||||
|
||||
async def average_elements_per_link(user: str):
|
||||
job_query = """
|
||||
SELECT
|
||||
DATE(time_created) AS date,
|
||||
AVG(json_array_length(elements)) AS average_elements,
|
||||
COUNT(*) AS count
|
||||
FROM
|
||||
jobs
|
||||
WHERE
|
||||
status = 'Completed' AND user = ?
|
||||
GROUP BY
|
||||
DATE(time_created)
|
||||
ORDER BY
|
||||
date ASC;
|
||||
"""
|
||||
results = common_query(job_query, (user,))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_jobs_per_day(user: str):
|
||||
job_query = """
|
||||
SELECT
|
||||
DATE(time_created) AS date,
|
||||
COUNT(*) AS job_count
|
||||
FROM
|
||||
jobs
|
||||
WHERE
|
||||
status = 'Completed' AND user = ?
|
||||
GROUP BY
|
||||
DATE(time_created)
|
||||
ORDER BY
|
||||
date ASC;
|
||||
"""
|
||||
results = common_query(job_query, (user,))
|
||||
|
||||
return results
|
||||
3
api/backend/job/models/__init__.py
Normal file
3
api/backend/job/models/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .job_options import JobOptions
|
||||
|
||||
__all__ = ["JobOptions"]
|
||||
15
api/backend/job/models/job_options.py
Normal file
15
api/backend/job/models/job_options.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Any, Optional
|
||||
from api.backend.job.models.site_map import SiteMap
|
||||
|
||||
|
||||
class FetchOptions(BaseModel):
|
||||
chat: Optional[bool] = None
|
||||
|
||||
|
||||
class JobOptions(BaseModel):
|
||||
multi_page_scrape: bool = False
|
||||
custom_headers: dict[str, Any] = {}
|
||||
proxies: list[str] = []
|
||||
site_map: Optional[SiteMap] = None
|
||||
collect_media: bool = False
|
||||
14
api/backend/job/models/site_map.py
Normal file
14
api/backend/job/models/site_map.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Literal
|
||||
|
||||
|
||||
class Action(BaseModel):
|
||||
type: Literal["click", "input"]
|
||||
xpath: str
|
||||
name: str
|
||||
input: str = ""
|
||||
do_once: bool = True
|
||||
|
||||
|
||||
class SiteMap(BaseModel):
|
||||
actions: list[Action]
|
||||
91
api/backend/job/scraping/collect_media.py
Normal file
91
api/backend/job/scraping/collect_media.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import os
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from selenium.webdriver.common.by import By
|
||||
from seleniumwire import webdriver
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from api.backend.utils import LOG
|
||||
|
||||
|
||||
def collect_media(driver: webdriver.Chrome):
|
||||
media_types = {
|
||||
"images": "img",
|
||||
"videos": "video",
|
||||
"audio": "audio",
|
||||
"pdfs": 'a[href$=".pdf"]',
|
||||
"documents": 'a[href$=".doc"], a[href$=".docx"], a[href$=".txt"], a[href$=".rtf"]',
|
||||
"presentations": 'a[href$=".ppt"], a[href$=".pptx"]',
|
||||
"spreadsheets": 'a[href$=".xls"], a[href$=".xlsx"], a[href$=".csv"]',
|
||||
}
|
||||
|
||||
base_dir = Path("media")
|
||||
base_dir.mkdir(exist_ok=True)
|
||||
|
||||
media_urls = {}
|
||||
|
||||
for media_type, selector in media_types.items():
|
||||
elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||
urls: list[dict[str, str]] = []
|
||||
|
||||
media_dir = base_dir / media_type
|
||||
media_dir.mkdir(exist_ok=True)
|
||||
|
||||
for element in elements:
|
||||
if media_type == "images":
|
||||
url = element.get_attribute("src")
|
||||
elif media_type == "videos":
|
||||
url = element.get_attribute("src") or element.get_attribute("data-src")
|
||||
else:
|
||||
url = element.get_attribute("href")
|
||||
|
||||
if url and url.startswith(("http://", "https://")):
|
||||
try:
|
||||
filename = os.path.basename(urlparse(url).path)
|
||||
|
||||
if not filename:
|
||||
filename = f"{media_type}_{len(urls)}"
|
||||
|
||||
if media_type == "images":
|
||||
filename += ".jpg"
|
||||
elif media_type == "videos":
|
||||
filename += ".mp4"
|
||||
elif media_type == "audio":
|
||||
filename += ".mp3"
|
||||
elif media_type == "pdfs":
|
||||
filename += ".pdf"
|
||||
elif media_type == "documents":
|
||||
filename += ".doc"
|
||||
elif media_type == "presentations":
|
||||
filename += ".ppt"
|
||||
elif media_type == "spreadsheets":
|
||||
filename += ".xls"
|
||||
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Save the file
|
||||
file_path = media_dir / filename
|
||||
with open(file_path, "wb") as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
urls.append({"url": url, "local_path": str(file_path)})
|
||||
LOG.info(f"Downloaded {filename} to {file_path}")
|
||||
|
||||
except Exception as e:
|
||||
LOG.error(f"Error downloading {url}: {str(e)}")
|
||||
continue
|
||||
|
||||
media_urls[media_type] = urls
|
||||
|
||||
with open(base_dir / "download_summary.txt", "w") as f:
|
||||
for media_type, downloads in media_urls.items():
|
||||
if downloads:
|
||||
f.write(f"\n=== {media_type.upper()} ===\n")
|
||||
for download in downloads:
|
||||
f.write(f"URL: {download['url']}\n")
|
||||
f.write(f"Saved to: {download['local_path']}\n\n")
|
||||
|
||||
return media_urls
|
||||
41
api/backend/job/scraping/scraping_utils.py
Normal file
41
api/backend/job/scraping/scraping_utils.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import time
|
||||
from typing import cast
|
||||
|
||||
from seleniumwire import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
|
||||
from api.backend.utils import LOG
|
||||
|
||||
from api.backend.job.scraping.collect_media import collect_media as collect_media_utils
|
||||
|
||||
|
||||
def scrape_content(
|
||||
driver: webdriver.Chrome, pages: set[tuple[str, str]], collect_media: bool
|
||||
):
|
||||
_ = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
||||
)
|
||||
|
||||
last_height = cast(str, driver.execute_script("return document.body.scrollHeight"))
|
||||
while True:
|
||||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||
|
||||
time.sleep(3) # Wait for the page to load
|
||||
new_height = cast(
|
||||
str, driver.execute_script("return document.body.scrollHeight")
|
||||
)
|
||||
|
||||
if new_height == last_height:
|
||||
break
|
||||
|
||||
last_height = new_height
|
||||
|
||||
pages.add((driver.page_source, driver.current_url))
|
||||
|
||||
if collect_media:
|
||||
LOG.info("Collecting media")
|
||||
collect_media_utils(driver)
|
||||
|
||||
return driver.page_source
|
||||
0
api/backend/job/site_mapping/__init__.py
Normal file
0
api/backend/job/site_mapping/__init__.py
Normal file
93
api/backend/job/site_mapping/site_mapping.py
Normal file
93
api/backend/job/site_mapping/site_mapping.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from api.backend.job.models.site_map import Action, SiteMap
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
from selenium.webdriver.common.by import By
|
||||
from typing import Any
|
||||
import logging
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
from api.backend.job.scraping.scraping_utils import scrape_content
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from seleniumwire.inspect import TimeoutException
|
||||
from seleniumwire.webdriver import Chrome
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def clear_done_actions(site_map: dict[str, Any]):
|
||||
"""Clear all actions that have been clicked."""
|
||||
cleared_site_map = deepcopy(site_map)
|
||||
|
||||
cleared_site_map["actions"] = [
|
||||
action for action in cleared_site_map["actions"] if not action["do_once"]
|
||||
]
|
||||
|
||||
return cleared_site_map
|
||||
|
||||
|
||||
def handle_input(action: Action, driver: webdriver.Chrome):
|
||||
try:
|
||||
element = WebDriverWait(driver, 10).until(
|
||||
EC.element_to_be_clickable((By.XPATH, action.xpath))
|
||||
)
|
||||
LOG.info(f"Sending keys: {action.input} to element: {element}")
|
||||
|
||||
element.send_keys(action.input)
|
||||
|
||||
except NoSuchElementException:
|
||||
LOG.info(f"Element not found: {action.xpath}")
|
||||
return False
|
||||
|
||||
except TimeoutException:
|
||||
LOG.info(f"Timeout waiting for element: {action.xpath}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
LOG.info(f"Error handling input: {e}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def handle_click(action: Action, driver: webdriver.Chrome):
|
||||
try:
|
||||
element = driver.find_element(By.XPATH, action.xpath)
|
||||
LOG.info(f"Clicking element: {element}")
|
||||
|
||||
element.click()
|
||||
|
||||
except NoSuchElementException:
|
||||
LOG.info(f"Element not found: {action.xpath}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
ACTION_MAP = {
|
||||
"click": handle_click,
|
||||
"input": handle_input,
|
||||
}
|
||||
|
||||
|
||||
async def handle_site_mapping(
|
||||
site_map_dict: dict[str, Any],
|
||||
driver: Chrome,
|
||||
pages: set[tuple[str, str]],
|
||||
):
|
||||
site_map = SiteMap(**site_map_dict)
|
||||
|
||||
for action in site_map.actions:
|
||||
action_handler = ACTION_MAP[action.type]
|
||||
if not action_handler(action, driver):
|
||||
return
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
_ = scrape_content(driver, pages)
|
||||
|
||||
cleared_site_map_dict = clear_done_actions(site_map_dict)
|
||||
|
||||
if cleared_site_map_dict["actions"]:
|
||||
await handle_site_mapping(cleared_site_map_dict, driver, pages)
|
||||
@@ -2,14 +2,13 @@
|
||||
from typing import Any, Optional, Union
|
||||
from datetime import datetime
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job.models.job_options import JobOptions
|
||||
|
||||
# PDM
|
||||
import pydantic
|
||||
|
||||
|
||||
class FetchOptions(pydantic.BaseModel):
|
||||
chat: Optional[bool] = None
|
||||
|
||||
|
||||
class Element(pydantic.BaseModel):
|
||||
name: str
|
||||
xpath: str
|
||||
@@ -22,12 +21,6 @@ class CapturedElement(pydantic.BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
class JobOptions(pydantic.BaseModel):
|
||||
multi_page_scrape: bool = False
|
||||
custom_headers: Optional[dict[str, Any]] = {}
|
||||
proxies: Optional[list[str]] = []
|
||||
|
||||
|
||||
class RetrieveScrapeJobs(pydantic.BaseModel):
|
||||
user: str
|
||||
|
||||
@@ -64,3 +57,17 @@ class Job(pydantic.BaseModel):
|
||||
job_options: JobOptions
|
||||
status: str = "Queued"
|
||||
chat: Optional[str] = None
|
||||
|
||||
|
||||
class CronJob(pydantic.BaseModel):
|
||||
id: Optional[str] = None
|
||||
user_email: str
|
||||
job_id: str
|
||||
cron_expression: str
|
||||
time_created: Optional[Union[datetime, str]] = None
|
||||
time_updated: Optional[Union[datetime, str]] = None
|
||||
|
||||
|
||||
class DeleteCronJob(pydantic.BaseModel):
|
||||
id: str
|
||||
user_email: str
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# STL
|
||||
import datetime
|
||||
import uuid
|
||||
import traceback
|
||||
from io import StringIO
|
||||
@@ -10,24 +11,33 @@ import random
|
||||
from fastapi import Depends, APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from api.backend.scheduler import scheduler
|
||||
from apscheduler.triggers.cron import CronTrigger # type: ignore
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job import (
|
||||
query,
|
||||
insert,
|
||||
update_job,
|
||||
delete_jobs,
|
||||
)
|
||||
from api.backend.job import insert, update_job, delete_jobs
|
||||
from api.backend.models import (
|
||||
DeleteCronJob,
|
||||
UpdateJobs,
|
||||
DownloadJob,
|
||||
FetchOptions,
|
||||
DeleteScrapeJobs,
|
||||
Job,
|
||||
CronJob,
|
||||
)
|
||||
from api.backend.schemas import User
|
||||
from api.backend.auth.auth_utils import get_current_user
|
||||
from api.backend.utils import clean_text
|
||||
from api.backend.utils import clean_text, format_list_for_query
|
||||
from api.backend.job.models.job_options import FetchOptions
|
||||
|
||||
from api.backend.database.common import query
|
||||
|
||||
from api.backend.job.cron_scheduling.cron_scheduling import (
|
||||
delete_cron_job,
|
||||
get_cron_job_trigger,
|
||||
insert_cron_job,
|
||||
get_cron_jobs,
|
||||
insert_job_from_cron_job,
|
||||
)
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -47,10 +57,11 @@ async def submit_scrape_job(job: Job):
|
||||
job.id = uuid.uuid4().hex
|
||||
|
||||
job_dict = job.model_dump()
|
||||
await insert(job_dict)
|
||||
insert(job_dict)
|
||||
|
||||
return JSONResponse(content={"id": job.id})
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {traceback.format_exc()}")
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
@@ -59,8 +70,11 @@ async def retrieve_scrape_jobs(
|
||||
fetch_options: FetchOptions, user: User = Depends(get_current_user)
|
||||
):
|
||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||
ATTRIBUTES = "chat" if fetch_options.chat else "*"
|
||||
|
||||
try:
|
||||
results = await query({"user": user.email}, fetch_options=fetch_options)
|
||||
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
|
||||
results = query(job_query, (user.email,))
|
||||
return JSONResponse(content=jsonable_encoder(results[::-1]))
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
@@ -72,8 +86,8 @@ async def job(id: str, user: User = Depends(get_current_user)):
|
||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||
|
||||
try:
|
||||
filter = {"user": user.email, "id": id}
|
||||
results = await query(filter)
|
||||
job_query = "SELECT * FROM jobs WHERE user = ? AND id = ?"
|
||||
results = query(job_query, (user.email, id))
|
||||
return JSONResponse(content=jsonable_encoder(results))
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
@@ -85,7 +99,10 @@ async def download(download_job: DownloadJob):
|
||||
LOG.info(f"Downloading job with ids: {download_job.ids}")
|
||||
|
||||
try:
|
||||
results = await query({"id": {"$in": download_job.ids}})
|
||||
job_query = (
|
||||
f"SELECT * FROM jobs WHERE id IN {format_list_for_query(download_job.ids)}"
|
||||
)
|
||||
results = query(job_query, tuple(download_job.ids))
|
||||
|
||||
csv_buffer = StringIO()
|
||||
csv_writer = csv.writer(csv_buffer, quotechar='"', quoting=csv.QUOTE_ALL)
|
||||
@@ -136,3 +153,47 @@ async def delete(delete_scrape_jobs: DeleteScrapeJobs):
|
||||
if result
|
||||
else JSONResponse({"error": "Jobs not deleted."})
|
||||
)
|
||||
|
||||
|
||||
@job_router.post("/schedule-cron-job")
|
||||
async def schedule_cron_job(cron_job: CronJob):
|
||||
if not cron_job.id:
|
||||
cron_job.id = uuid.uuid4().hex
|
||||
|
||||
if not cron_job.time_created:
|
||||
cron_job.time_created = datetime.datetime.now()
|
||||
|
||||
if not cron_job.time_updated:
|
||||
cron_job.time_updated = datetime.datetime.now()
|
||||
|
||||
insert_cron_job(cron_job)
|
||||
|
||||
queried_job = query("SELECT * FROM jobs WHERE id = ?", (cron_job.job_id,))
|
||||
|
||||
scheduler.add_job(
|
||||
insert_job_from_cron_job,
|
||||
get_cron_job_trigger(cron_job.cron_expression),
|
||||
id=cron_job.id,
|
||||
args=[queried_job[0]],
|
||||
)
|
||||
|
||||
return JSONResponse(content={"message": "Cron job scheduled successfully."})
|
||||
|
||||
|
||||
@job_router.post("/delete-cron-job")
|
||||
async def delete_cron_job_request(request: DeleteCronJob):
|
||||
if not request.id:
|
||||
return JSONResponse(
|
||||
content={"error": "Cron job id is required."}, status_code=400
|
||||
)
|
||||
|
||||
delete_cron_job(request.id, request.user_email)
|
||||
scheduler.remove_job(request.id)
|
||||
|
||||
return JSONResponse(content={"message": "Cron job deleted successfully."})
|
||||
|
||||
|
||||
@job_router.get("/cron-jobs")
|
||||
async def get_cron_jobs_request(user: User = Depends(get_current_user)):
|
||||
cron_jobs = get_cron_jobs(user.email)
|
||||
return JSONResponse(content=jsonable_encoder(cron_jobs))
|
||||
|
||||
3
api/backend/scheduler.py
Normal file
3
api/backend/scheduler.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from apscheduler.schedulers.background import BackgroundScheduler # type: ignore
|
||||
|
||||
scheduler = BackgroundScheduler()
|
||||
@@ -1,19 +1,21 @@
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
import time
|
||||
import random
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from lxml import etree
|
||||
from seleniumwire import webdriver
|
||||
from lxml.etree import _Element # type: ignore [reportPrivateImport]
|
||||
from lxml.etree import _Element
|
||||
from fake_useragent import UserAgent
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from api.backend.models import Element, CapturedElement
|
||||
from api.backend.job.site_mapping.site_mapping import (
|
||||
handle_site_mapping,
|
||||
)
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from api.backend.job.scraping.scraping_utils import scrape_content
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -69,21 +71,27 @@ def create_driver(proxies: Optional[list[str]] = []):
|
||||
chrome_options.add_argument(f"user-agent={ua.random}")
|
||||
|
||||
sw_options = {}
|
||||
|
||||
if proxies:
|
||||
selected_proxy = proxies[random.randint(0, len(proxies) - 1)]
|
||||
selected_proxy = random.choice(proxies)
|
||||
LOG.info(f"Using proxy: {selected_proxy}")
|
||||
|
||||
sw_options = {
|
||||
"proxy": {
|
||||
"https": f"https://{selected_proxy}",
|
||||
"http": f"http://{selected_proxy}",
|
||||
"no_proxy": "localhost,127.0.0.1",
|
||||
}
|
||||
}
|
||||
|
||||
service = Service(ChromeDriverManager().install())
|
||||
|
||||
driver = webdriver.Chrome(
|
||||
service=service,
|
||||
options=chrome_options,
|
||||
seleniumwire_options=sw_options,
|
||||
)
|
||||
|
||||
return driver
|
||||
|
||||
|
||||
@@ -95,6 +103,8 @@ async def make_site_request(
|
||||
pages: set[tuple[str, str]] = set(),
|
||||
original_url: str = "",
|
||||
proxies: Optional[list[str]] = [],
|
||||
site_map: Optional[dict[str, Any]] = None,
|
||||
collect_media: bool = False,
|
||||
) -> None:
|
||||
"""Make basic `GET` request to site using Selenium."""
|
||||
# Check if URL has already been visited
|
||||
@@ -114,27 +124,16 @@ async def make_site_request(
|
||||
final_url = driver.current_url
|
||||
visited_urls.add(url)
|
||||
visited_urls.add(final_url)
|
||||
_ = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
||||
)
|
||||
|
||||
last_height = driver.execute_script("return document.body.scrollHeight")
|
||||
while True:
|
||||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||
page_source = scrape_content(driver, pages, collect_media)
|
||||
|
||||
time.sleep(3) # Wait for the page to load
|
||||
new_height = driver.execute_script("return document.body.scrollHeight")
|
||||
|
||||
if new_height == last_height:
|
||||
break
|
||||
|
||||
last_height = new_height
|
||||
|
||||
final_height = driver.execute_script("return document.body.scrollHeight")
|
||||
|
||||
page_source = driver.page_source
|
||||
LOG.debug(f"Page source for url: {url}\n{page_source}")
|
||||
pages.add((page_source, final_url))
|
||||
if site_map:
|
||||
LOG.info("Site map: %s", site_map)
|
||||
_ = await handle_site_mapping(
|
||||
site_map,
|
||||
driver,
|
||||
pages,
|
||||
)
|
||||
finally:
|
||||
driver.quit()
|
||||
|
||||
@@ -144,7 +143,10 @@ async def make_site_request(
|
||||
soup = BeautifulSoup(page_source, "html.parser")
|
||||
|
||||
for a_tag in soup.find_all("a"):
|
||||
link = a_tag.get("href")
|
||||
if not isinstance(a_tag, Tag):
|
||||
continue
|
||||
|
||||
link = str(a_tag.get("href", ""))
|
||||
|
||||
if link:
|
||||
if not urlparse(link).netloc:
|
||||
@@ -172,7 +174,10 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
|
||||
el = sxpath(root, elem.xpath)
|
||||
|
||||
for e in el:
|
||||
text = "\t".join(str(t) for t in e.itertext())
|
||||
if isinstance(e, etree._Element): # type: ignore
|
||||
text = "\t".join(str(t) for t in e.itertext())
|
||||
else:
|
||||
text = str(e)
|
||||
captured_element = CapturedElement(
|
||||
xpath=elem.xpath, text=text, name=elem.name
|
||||
)
|
||||
@@ -192,6 +197,8 @@ async def scrape(
|
||||
headers: Optional[dict[str, Any]],
|
||||
multi_page_scrape: bool = False,
|
||||
proxies: Optional[list[str]] = [],
|
||||
site_map: Optional[dict[str, Any]] = None,
|
||||
collect_media: bool = False,
|
||||
):
|
||||
visited_urls: set[str] = set()
|
||||
pages: set[tuple[str, str]] = set()
|
||||
@@ -204,6 +211,8 @@ async def scrape(
|
||||
pages=pages,
|
||||
original_url=url,
|
||||
proxies=proxies,
|
||||
site_map=site_map,
|
||||
collect_media=collect_media,
|
||||
)
|
||||
|
||||
elements: list[dict[str, dict[str, list[CapturedElement]]]] = list()
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
import pytest
|
||||
import logging
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
from api.backend.tests.factories.job_factory import create_job
|
||||
from api.backend.models import JobOptions
|
||||
from api.backend.scraping import create_driver
|
||||
|
||||
|
||||
mocked_job = create_job(
|
||||
job_options=JobOptions(
|
||||
multi_page_scrape=False, custom_headers={}, proxies=["127.0.0.1:8080"]
|
||||
)
|
||||
).model_dump()
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -26,8 +21,7 @@ async def test_proxy(mock_get: AsyncMock):
|
||||
driver.get("http://example.com")
|
||||
response = driver.last_request
|
||||
|
||||
# Check if the proxy header is set correctly
|
||||
if response:
|
||||
assert response.headers["Proxy"] == "127.0.0.1:8080"
|
||||
assert response.headers["Proxy-Connection"] == "keep-alive"
|
||||
|
||||
driver.quit()
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
import logging
|
||||
import json
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def clean_text(text: str):
|
||||
@@ -17,3 +20,30 @@ def get_log_level(level_name: Optional[str]) -> int:
|
||||
level = getattr(logging, level_name, logging.INFO)
|
||||
|
||||
return level
|
||||
|
||||
|
||||
def format_list_for_query(ids: list[str]):
|
||||
return (
|
||||
f"({','.join(['?' for _ in ids])})" # Returns placeholders, e.g., "(?, ?, ?)"
|
||||
)
|
||||
|
||||
|
||||
def format_sql_row_to_python(row: dict[str, Any]):
|
||||
new_row: dict[str, Any] = {}
|
||||
for key, value in row.items():
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
new_row[key] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
new_row[key] = value
|
||||
else:
|
||||
new_row[key] = value
|
||||
|
||||
return new_row
|
||||
|
||||
|
||||
def format_json(items: list[Any]):
|
||||
for idx, item in enumerate(items):
|
||||
if isinstance(item, (dict, list)):
|
||||
formatted_item = json.dumps(item)
|
||||
items[idx] = formatted_item
|
||||
|
||||
@@ -8,6 +8,8 @@ import logging
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from api.backend.database.startup import init_database
|
||||
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -24,6 +26,8 @@ async def process_job():
|
||||
job["job_options"]["custom_headers"],
|
||||
job["job_options"]["multi_page_scrape"],
|
||||
job["job_options"]["proxies"],
|
||||
job["job_options"]["site_map"],
|
||||
job["job_options"]["collect_media"],
|
||||
)
|
||||
LOG.info(
|
||||
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
|
||||
@@ -40,6 +44,9 @@ async def process_job():
|
||||
|
||||
async def main():
|
||||
LOG.info("Starting job worker...")
|
||||
|
||||
init_database()
|
||||
|
||||
while True:
|
||||
await process_job()
|
||||
await asyncio.sleep(5)
|
||||
|
||||
60
cypress/e2e/authentication.cy.ts
Normal file
60
cypress/e2e/authentication.cy.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
describe("Authentication", () => {
|
||||
it("should register", () => {
|
||||
cy.intercept("POST", "/api/signup").as("signup");
|
||||
|
||||
cy.visit("/").then(() => {
|
||||
cy.get("button").contains("Login").click();
|
||||
cy.url().should("include", "/login");
|
||||
|
||||
cy.get("form").should("be.visible");
|
||||
cy.get("button")
|
||||
.contains("No Account? Sign up")
|
||||
.should("be.visible")
|
||||
.click();
|
||||
|
||||
cy.get("input[name='email']").type("test@test.com");
|
||||
cy.get("input[name='password']").type("password");
|
||||
cy.get("input[name='fullName']").type("John Doe");
|
||||
cy.get("button[type='submit']").contains("Signup").click();
|
||||
|
||||
cy.wait("@signup").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("signup request did not return a response");
|
||||
}
|
||||
|
||||
cy.log("Response status: " + interception.response.statusCode);
|
||||
cy.log("Response body: " + JSON.stringify(interception.response.body));
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("should login", () => {
|
||||
cy.intercept("POST", "/api/token").as("token");
|
||||
|
||||
cy.visit("/").then(() => {
|
||||
cy.get("button")
|
||||
.contains("Login")
|
||||
.click()
|
||||
.then(() => {
|
||||
cy.get("input[name='email']").type("test@test.com");
|
||||
cy.get("input[name='password']").type("password");
|
||||
cy.get("button[type='submit']").contains("Login").click();
|
||||
|
||||
cy.wait("@token").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("token request did not return a response");
|
||||
}
|
||||
|
||||
cy.log("Response status: " + interception.response.statusCode);
|
||||
cy.log("Response body: " + JSON.stringify(interception.response.body));
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,19 +1,34 @@
|
||||
describe("Job", () => {
|
||||
describe.only("Job", () => {
|
||||
it("should create a job", () => {
|
||||
cy.intercept("POST", "/api/submit-scrape-job").as("submitScrapeJob");
|
||||
|
||||
cy.visit("/");
|
||||
|
||||
const input = cy.get('[data-cy="url-input"]');
|
||||
input.type("https://example.com");
|
||||
cy.get('[data-cy="url-input"]').type("https://example.com");
|
||||
cy.get('[data-cy="name-field"]').type("example");
|
||||
cy.get('[data-cy="xpath-field"]').type("//body");
|
||||
cy.get('[data-cy="add-button"]').click();
|
||||
|
||||
const nameField = cy.get('[data-cy="name-field"]');
|
||||
const xPathField = cy.get('[data-cy="xpath-field"]');
|
||||
const addButton = cy.get('[data-cy="add-button"]');
|
||||
cy.contains("Submit").click();
|
||||
|
||||
nameField.type("example");
|
||||
xPathField.type("//body");
|
||||
addButton.click();
|
||||
cy.wait("@submitScrapeJob").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
cy.log("Request body: " + JSON.stringify(interception.request?.body));
|
||||
throw new Error("submitScrapeJob request did not return a response");
|
||||
}
|
||||
|
||||
const submit = cy.contains("Submit");
|
||||
submit.click();
|
||||
cy.log("Response status: " + interception.response.statusCode);
|
||||
cy.log("Response body: " + JSON.stringify(interception.response.body));
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
|
||||
cy.get("li").contains("Previous Jobs").click();
|
||||
|
||||
cy.contains("div", "https://example.com", { timeout: 10000 }).should(
|
||||
"exist"
|
||||
);
|
||||
cy.contains("div", "Completed", { timeout: 20000 }).should("exist");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -34,4 +34,4 @@
|
||||
// visit(originalFn: CommandOriginalFn, url: string, options: Partial<VisitOptions>): Chainable<Element>
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
@@ -10,5 +10,8 @@ services:
|
||||
- "$PWD/package-lock.json:/app/package-lock.json"
|
||||
- "$PWD/tsconfig.json:/app/tsconfig.json"
|
||||
scraperr_api:
|
||||
environment:
|
||||
- LOG_LEVEL=INFO
|
||||
volumes:
|
||||
- "$PWD/api:/project/api"
|
||||
- "$PWD/scraping:/project/scraping"
|
||||
|
||||
@@ -23,25 +23,17 @@ services:
|
||||
dockerfile: docker/api/Dockerfile
|
||||
environment:
|
||||
- LOG_LEVEL=INFO
|
||||
- MONGODB_URI=mongodb://root:example@webscrape-mongo:27017 # used to access MongoDB
|
||||
- SECRET_KEY=your_secret_key # used to encode authentication tokens (can be a random string)
|
||||
- SECRET_KEY=MRo9PfasPibnqFeK4Oswb6Z+PhFmjzdvxZzwdAkbf/Y= # used to encode authentication tokens (can be a random string)
|
||||
- ALGORITHM=HS256 # authentication encoding algorithm
|
||||
- ACCESS_TOKEN_EXPIRE_MINUTES=600 # access token expire minutes
|
||||
container_name: scraperr_api
|
||||
ports:
|
||||
- 8000:8000
|
||||
volumes:
|
||||
- "$PWD/data:/project/data"
|
||||
- "$PWD/media:/project/media"
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
networks:
|
||||
- web
|
||||
mongo:
|
||||
container_name: webscrape-mongo
|
||||
image: mongo
|
||||
restart: always
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: root
|
||||
MONGO_INITDB_ROOT_PASSWORD: example
|
||||
networks:
|
||||
- web
|
||||
networks:
|
||||
web:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Build next dependencies
|
||||
FROM node:latest
|
||||
FROM node:23.1
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
@@ -15,6 +15,4 @@ COPY src /app/src
|
||||
|
||||
RUN npm run build
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
# CMD [ "npm", "run" ]
|
||||
EXPOSE 3000
|
||||
@@ -1,4 +0,0 @@
|
||||
tls:
|
||||
certificates:
|
||||
- certFile: /etc/certs/ssl-cert.pem
|
||||
keyFile: /etc/certs/ssl-cert.key
|
||||
37
ipython.py
37
ipython.py
@@ -1,37 +0,0 @@
|
||||
# STL
|
||||
import os
|
||||
|
||||
# PDM
|
||||
import boto3
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def test_insert_and_delete():
|
||||
# Get environment variables
|
||||
region_name = os.getenv("AWS_REGION")
|
||||
# Initialize DynamoDB resource
|
||||
dynamodb = boto3.resource("dynamodb", region_name=region_name)
|
||||
table = dynamodb.Table("scrape")
|
||||
|
||||
# Item to insert
|
||||
item = {
|
||||
"id": "123", # Replace with the appropriate id value
|
||||
"attribute1": "value1",
|
||||
"attribute2": "value2",
|
||||
# Add more attributes as needed
|
||||
}
|
||||
|
||||
# Insert the item
|
||||
table.put_item(Item=item)
|
||||
print(f"Inserted item: {item}")
|
||||
|
||||
# Delete the item
|
||||
table.delete_item(Key={"id": "123"}) # Replace with the appropriate id value
|
||||
print(f"Deleted item with id: {item['id']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_insert_and_delete()
|
||||
15660
package-lock.json
generated
15660
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
12
package.json
12
package.json
@@ -19,6 +19,7 @@
|
||||
"bootstrap": "^5.3.0",
|
||||
"chart.js": "^4.4.3",
|
||||
"cookie": "^0.6.0",
|
||||
"dotenv": "^16.5.0",
|
||||
"framer-motion": "^4.1.17",
|
||||
"js-cookie": "^3.0.5",
|
||||
"next": "^14.2.4",
|
||||
@@ -31,7 +32,6 @@
|
||||
"react-modal-image": "^2.6.0",
|
||||
"react-router": "^6.14.1",
|
||||
"react-router-dom": "^6.14.1",
|
||||
"react-scripts": "^5.0.1",
|
||||
"react-spinners": "^0.14.1",
|
||||
"typescript": "^4.9.5",
|
||||
"web-vitals": "^2.1.4"
|
||||
@@ -63,12 +63,18 @@
|
||||
]
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/cypress": "^0.1.6",
|
||||
"@types/cypress": "^1.1.6",
|
||||
"@types/js-cookie": "^3.0.6",
|
||||
"cypress": "^13.15.0",
|
||||
"autoprefixer": "^10.4.21",
|
||||
"cypress": "^13.17.0",
|
||||
"eslint": "^9.26.0",
|
||||
"postcss": "^8.5.3",
|
||||
"tailwindcss": "^3.3.5"
|
||||
},
|
||||
"overrides": {
|
||||
"react-refresh": "0.11.0"
|
||||
},
|
||||
"resolutions": {
|
||||
"postcss": "^8.4.31"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,9 +2,7 @@
|
||||
name = "web-scrape"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = [
|
||||
{name = "Jayden Pyles", email = "jpylesbuisness@gmail.com"},
|
||||
]
|
||||
authors = [{ name = "Jayden Pyles", email = "jpylesbuisness@gmail.com" }]
|
||||
dependencies = [
|
||||
"uvicorn>=0.30.1",
|
||||
"fastapi>=0.111.0",
|
||||
@@ -39,20 +37,19 @@ dependencies = [
|
||||
"exceptiongroup>=1.2.2",
|
||||
"Faker>=30.6.0",
|
||||
"pytest-asyncio>=0.24.0",
|
||||
"python-multipart>=0.0.12",
|
||||
"python-multipart>=0.0.1",
|
||||
"bcrypt==4.0.1",
|
||||
"apscheduler>=3.11.0",
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
readme = "README.md"
|
||||
license = {text = "MIT"}
|
||||
license = { text = "MIT" }
|
||||
|
||||
[tool.pdm]
|
||||
distribution = true
|
||||
|
||||
[tool.pdm.dev-dependencies]
|
||||
dev = [
|
||||
"ipython>=8.26.0",
|
||||
"pytest>=8.3.3",
|
||||
]
|
||||
dev = ["ipython>=8.26.0", "pytest>=8.3.3"]
|
||||
[tool.pyright]
|
||||
include = ["./api/backend/"]
|
||||
exclude = ["**/node_modules", "**/__pycache__"]
|
||||
@@ -60,14 +57,42 @@ ignore = []
|
||||
defineConstant = { DEBUG = true }
|
||||
stubPath = ""
|
||||
|
||||
reportUnknownMemberType= false
|
||||
reportMissingImports = true
|
||||
reportMissingTypeStubs = false
|
||||
reportAny = false
|
||||
reportCallInDefaultInitializer = false
|
||||
# Type checking strictness
|
||||
typeCheckingMode = "strict" # Enables strict type checking mode
|
||||
reportPrivateUsage = "none"
|
||||
reportMissingTypeStubs = "none"
|
||||
reportUntypedFunctionDecorator = "error"
|
||||
reportUntypedClassDecorator = "error"
|
||||
reportUntypedBaseClass = "error"
|
||||
reportInvalidTypeVarUse = "error"
|
||||
reportUnnecessaryTypeIgnoreComment = "information"
|
||||
reportUnknownVariableType = "none"
|
||||
reportUnknownMemberType = "none"
|
||||
reportUnknownParameterType = "none"
|
||||
|
||||
pythonVersion = "3.9"
|
||||
pythonPlatform = "Linux"
|
||||
# Additional checks
|
||||
reportImplicitStringConcatenation = "error"
|
||||
reportInvalidStringEscapeSequence = "error"
|
||||
reportMissingImports = "error"
|
||||
reportMissingModuleSource = "error"
|
||||
reportOptionalCall = "error"
|
||||
reportOptionalIterable = "error"
|
||||
reportOptionalMemberAccess = "error"
|
||||
reportOptionalOperand = "error"
|
||||
reportOptionalSubscript = "error"
|
||||
reportTypedDictNotRequiredAccess = "error"
|
||||
|
||||
# Function return type checking
|
||||
reportIncompleteStub = "error"
|
||||
reportIncompatibleMethodOverride = "error"
|
||||
reportInvalidStubStatement = "error"
|
||||
reportInconsistentOverload = "error"
|
||||
|
||||
# Misc settings
|
||||
pythonVersion = "3.10" # Matches your Python version from pyproject.toml
|
||||
strictListInference = true
|
||||
strictDictionaryInference = true
|
||||
strictSetInference = true
|
||||
|
||||
|
||||
[tool.isort]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import React from "react";
|
||||
import { useAuth } from "../../../contexts/AuthContext";
|
||||
import { Box, Drawer, Divider } from "@mui/material";
|
||||
import { Box, Drawer } from "@mui/material";
|
||||
|
||||
import { QuickSettings } from "../../nav/quick-settings";
|
||||
import { NavItems } from "./nav-items/nav-items";
|
||||
|
||||
@@ -7,6 +7,7 @@ import TerminalIcon from "@mui/icons-material/Terminal";
|
||||
import BarChart from "@mui/icons-material/BarChart";
|
||||
import AutoAwesomeIcon from "@mui/icons-material/AutoAwesome";
|
||||
import { List } from "@mui/material";
|
||||
import { Schedule } from "@mui/icons-material";
|
||||
|
||||
const items = [
|
||||
{
|
||||
@@ -34,6 +35,11 @@ const items = [
|
||||
text: "View App Logs",
|
||||
href: "/logs",
|
||||
},
|
||||
{
|
||||
icon: <Schedule />,
|
||||
text: "Cron Jobs",
|
||||
href: "/cron-jobs",
|
||||
},
|
||||
];
|
||||
|
||||
export const NavItems = () => {
|
||||
|
||||
@@ -15,6 +15,7 @@ import {
|
||||
Button,
|
||||
Tooltip,
|
||||
IconButton,
|
||||
TableContainer,
|
||||
} from "@mui/material";
|
||||
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
||||
import StarIcon from "@mui/icons-material/Star";
|
||||
@@ -52,145 +53,155 @@ export const JobQueue = ({
|
||||
const router = useRouter();
|
||||
|
||||
return (
|
||||
<Table sx={{ tableLayout: "fixed", width: "100%" }}>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell>Select</TableCell>
|
||||
<TableCell>Id</TableCell>
|
||||
<TableCell>Url</TableCell>
|
||||
<TableCell>Elements</TableCell>
|
||||
<TableCell>Result</TableCell>
|
||||
<TableCell>Time Created</TableCell>
|
||||
<TableCell>Status</TableCell>
|
||||
<TableCell>Actions</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{filteredJobs.map((row, index) => (
|
||||
<TableRow key={index}>
|
||||
<TableCell padding="checkbox">
|
||||
<Checkbox
|
||||
checked={selectedJobs.has(row.id)}
|
||||
onChange={() => onSelectJob(row.id)}
|
||||
/>
|
||||
<Tooltip title="Chat with AI">
|
||||
<span>
|
||||
<IconButton
|
||||
onClick={() => {
|
||||
router.push({
|
||||
pathname: "/chat",
|
||||
query: {
|
||||
job: row.id,
|
||||
},
|
||||
});
|
||||
}}
|
||||
>
|
||||
<AutoAwesome />
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
<Tooltip title="Favorite Job">
|
||||
<span>
|
||||
<IconButton
|
||||
color={row.favorite ? "warning" : "default"}
|
||||
onClick={() => {
|
||||
onFavorite([row.id], "favorite", !row.favorite);
|
||||
row.favorite = !row.favorite;
|
||||
}}
|
||||
>
|
||||
<StarIcon />
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.id}</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 200, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.url}</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||
{JSON.stringify(row.elements)}
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
|
||||
<Accordion sx={{ margin: 0, padding: 0.5 }}>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
aria-controls="panel1a-content"
|
||||
id="panel1a-header"
|
||||
sx={{
|
||||
minHeight: 0,
|
||||
"&.Mui-expanded": { minHeight: 0 },
|
||||
}}
|
||||
>
|
||||
<Box
|
||||
sx={{
|
||||
maxHeight: 150,
|
||||
overflow: "auto",
|
||||
width: "100%",
|
||||
}}
|
||||
>
|
||||
<Typography sx={{ fontSize: "0.875rem" }}>
|
||||
Show Result
|
||||
</Typography>
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails sx={{ padding: 1 }}>
|
||||
<Box sx={{ maxHeight: 200, overflow: "auto" }}>
|
||||
<Typography
|
||||
sx={{
|
||||
fontSize: "0.875rem",
|
||||
whiteSpace: "pre-wrap",
|
||||
<TableContainer component={Box} sx={{ maxHeight: "90dvh" }}>
|
||||
<Table sx={{ tableLayout: "fixed", width: "100%" }}>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell>Select</TableCell>
|
||||
<TableCell>Id</TableCell>
|
||||
<TableCell>Url</TableCell>
|
||||
<TableCell>Elements</TableCell>
|
||||
<TableCell>Result</TableCell>
|
||||
<TableCell>Time Created</TableCell>
|
||||
<TableCell>Status</TableCell>
|
||||
<TableCell>Actions</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody sx={{ overflow: "auto" }}>
|
||||
{filteredJobs.map((row, index) => (
|
||||
<TableRow key={index}>
|
||||
<TableCell padding="checkbox">
|
||||
<Checkbox
|
||||
checked={selectedJobs.has(row.id)}
|
||||
onChange={() => onSelectJob(row.id)}
|
||||
/>
|
||||
<Tooltip title="Chat with AI">
|
||||
<span>
|
||||
<IconButton
|
||||
onClick={() => {
|
||||
router.push({
|
||||
pathname: "/chat",
|
||||
query: {
|
||||
job: row.id,
|
||||
},
|
||||
});
|
||||
}}
|
||||
>
|
||||
{JSON.stringify(row.result, null, 2)}
|
||||
</Typography>
|
||||
</Box>
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||
{new Date(row.time_created).toLocaleString()}
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 50, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||
<Box
|
||||
className="rounded-md p-2 text-center"
|
||||
sx={{ bgcolor: colors[row.status] }}
|
||||
>
|
||||
{row.status}
|
||||
<AutoAwesome />
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
<Tooltip title="Favorite Job">
|
||||
<span>
|
||||
<IconButton
|
||||
color={row.favorite ? "warning" : "default"}
|
||||
onClick={() => {
|
||||
onFavorite([row.id], "favorite", !row.favorite);
|
||||
row.favorite = !row.favorite;
|
||||
}}
|
||||
>
|
||||
<StarIcon />
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.id}</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 200, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.url}</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||
{JSON.stringify(row.elements)}
|
||||
</Box>
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||
<Box sx={{ display: "flex", gap: 1 }}>
|
||||
<Button
|
||||
onClick={() => {
|
||||
onDownload([row.id]);
|
||||
}}
|
||||
size="small"
|
||||
sx={{ minWidth: 0, padding: "4px 8px" }}
|
||||
>
|
||||
Download
|
||||
</Button>
|
||||
<Button
|
||||
onClick={() =>
|
||||
onNavigate(row.elements, row.url, row.job_options)
|
||||
}
|
||||
size="small"
|
||||
sx={{ minWidth: 0, padding: "4px 8px" }}
|
||||
>
|
||||
Rerun
|
||||
</Button>
|
||||
</Box>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
|
||||
<Accordion sx={{ margin: 0, padding: 0.5 }}>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
aria-controls="panel1a-content"
|
||||
id="panel1a-header"
|
||||
sx={{
|
||||
minHeight: 0,
|
||||
"&.Mui-expanded": { minHeight: 0 },
|
||||
}}
|
||||
>
|
||||
<Box
|
||||
sx={{
|
||||
maxHeight: 150,
|
||||
overflow: "auto",
|
||||
width: "100%",
|
||||
}}
|
||||
>
|
||||
<Typography sx={{ fontSize: "0.875rem" }}>
|
||||
Show Result
|
||||
</Typography>
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails sx={{ padding: 1 }}>
|
||||
<Box sx={{ maxHeight: 200, overflow: "auto" }}>
|
||||
<Typography
|
||||
sx={{
|
||||
fontSize: "0.875rem",
|
||||
whiteSpace: "pre-wrap",
|
||||
}}
|
||||
>
|
||||
{JSON.stringify(row.result, null, 2)}
|
||||
</Typography>
|
||||
</Box>
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||
{new Date(row.time_created).toLocaleString()}
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 50, overflow: "auto" }}>
|
||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||
<Box
|
||||
className="rounded-md p-2 text-center"
|
||||
sx={{ bgcolor: colors[row.status] }}
|
||||
>
|
||||
{row.status}
|
||||
</Box>
|
||||
</Box>
|
||||
</TableCell>
|
||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||
<Box sx={{ display: "flex", gap: 1 }}>
|
||||
<Button
|
||||
onClick={() => {
|
||||
onDownload([row.id]);
|
||||
}}
|
||||
size="small"
|
||||
sx={{
|
||||
minWidth: 0,
|
||||
padding: "4px 8px",
|
||||
fontSize: "0.625rem",
|
||||
}}
|
||||
>
|
||||
Download
|
||||
</Button>
|
||||
<Button
|
||||
onClick={() =>
|
||||
onNavigate(row.elements, row.url, row.job_options)
|
||||
}
|
||||
size="small"
|
||||
sx={{
|
||||
minWidth: 0,
|
||||
padding: "4px 8px",
|
||||
fontSize: "0.625rem",
|
||||
}}
|
||||
>
|
||||
Rerun
|
||||
</Button>
|
||||
</Box>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
import { Job } from "@/types";
|
||||
import {
|
||||
Button,
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
TextField,
|
||||
Snackbar,
|
||||
Alert,
|
||||
} from "@mui/material";
|
||||
import Cookies from "js-cookie";
|
||||
import { useState } from "react";
|
||||
|
||||
export type CreateCronJobsProps = {
|
||||
availableJobs: Job[];
|
||||
user: any;
|
||||
};
|
||||
|
||||
export const CreateCronJobs = ({
|
||||
availableJobs,
|
||||
user,
|
||||
}: CreateCronJobsProps) => {
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Button
|
||||
variant="contained"
|
||||
color="primary"
|
||||
onClick={() => setOpen(true)}
|
||||
sx={{ borderRadius: 2 }}
|
||||
>
|
||||
Create Cron Job
|
||||
</Button>
|
||||
<CreateCronJobDialog
|
||||
open={open}
|
||||
onClose={() => setOpen(false)}
|
||||
availableJobs={availableJobs}
|
||||
user={user}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
const CreateCronJobDialog = ({
|
||||
open,
|
||||
onClose,
|
||||
availableJobs,
|
||||
user,
|
||||
}: {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
availableJobs: Job[];
|
||||
user: any;
|
||||
}) => {
|
||||
const [cronExpression, setCronExpression] = useState("");
|
||||
const [jobId, setJobId] = useState("");
|
||||
const [successOpen, setSuccessOpen] = useState(false);
|
||||
const [isSubmitting, setIsSubmitting] = useState(false);
|
||||
const [error, setError] = useState("");
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (!cronExpression || !jobId) {
|
||||
setError("Please fill in all fields");
|
||||
return;
|
||||
}
|
||||
|
||||
setIsSubmitting(true);
|
||||
const token = Cookies.get("token");
|
||||
|
||||
try {
|
||||
const response = await fetch("/api/schedule-cron-job", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
data: {
|
||||
cron_expression: cronExpression,
|
||||
job_id: jobId,
|
||||
user_email: user.email,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to schedule job");
|
||||
}
|
||||
|
||||
setSuccessOpen(true);
|
||||
setCronExpression("");
|
||||
setJobId("");
|
||||
setTimeout(() => {
|
||||
onClose();
|
||||
}, 1500);
|
||||
window.location.reload();
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
setError("Failed to create cron job");
|
||||
} finally {
|
||||
setIsSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleClose = () => {
|
||||
setSuccessOpen(false);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Dialog
|
||||
open={open}
|
||||
onClose={onClose}
|
||||
PaperProps={{
|
||||
sx: { borderRadius: 2, minWidth: "400px" },
|
||||
}}
|
||||
>
|
||||
<DialogTitle sx={{ fontWeight: 500 }}>Create Cron Job</DialogTitle>
|
||||
<DialogContent>
|
||||
<div className="flex flex-col gap-1 mt0">
|
||||
<TextField
|
||||
label="Cron Expression"
|
||||
fullWidth
|
||||
value={cronExpression}
|
||||
onChange={(e) => setCronExpression(e.target.value)}
|
||||
variant="outlined"
|
||||
placeholder="* * * * *"
|
||||
margin="normal"
|
||||
helperText="Format: minute hour day month day-of-week"
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Job ID"
|
||||
fullWidth
|
||||
value={jobId}
|
||||
onChange={(e) => setJobId(e.target.value)}
|
||||
variant="outlined"
|
||||
margin="normal"
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<Alert severity="error" sx={{ mt: 2 }}>
|
||||
{error}
|
||||
</Alert>
|
||||
)}
|
||||
|
||||
<div className="flex justify-end gap-2 mt-4">
|
||||
<Button
|
||||
variant="outlined"
|
||||
onClick={onClose}
|
||||
sx={{ borderRadius: 2 }}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
variant="contained"
|
||||
color="primary"
|
||||
onClick={handleSubmit}
|
||||
disabled={isSubmitting}
|
||||
sx={{ borderRadius: 2 }}
|
||||
>
|
||||
{isSubmitting ? "Submitting..." : "Create Job"}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
|
||||
<Snackbar
|
||||
open={successOpen}
|
||||
autoHideDuration={4000}
|
||||
onClose={handleClose}
|
||||
anchorOrigin={{ vertical: "bottom", horizontal: "right" }}
|
||||
>
|
||||
<Alert onClose={handleClose} severity="success" sx={{ width: "100%" }}>
|
||||
Cron job created successfully!
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
</>
|
||||
);
|
||||
};
|
||||
1
src/components/pages/cron-jobs/create-cron-jobs/index.ts
Normal file
1
src/components/pages/cron-jobs/create-cron-jobs/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./create-cron-jobs";
|
||||
0
src/components/pages/cron-jobs/cron-jobs.module.css
Normal file
0
src/components/pages/cron-jobs/cron-jobs.module.css
Normal file
104
src/components/pages/cron-jobs/cron-jobs.tsx
Normal file
104
src/components/pages/cron-jobs/cron-jobs.tsx
Normal file
@@ -0,0 +1,104 @@
|
||||
import { Job, CronJob } from "@/types/job";
|
||||
import { useState, useEffect } from "react";
|
||||
import { CreateCronJobs } from "./create-cron-jobs";
|
||||
import {
|
||||
Table,
|
||||
TableHead,
|
||||
TableRow,
|
||||
TableCell,
|
||||
TableBody,
|
||||
Button,
|
||||
Box,
|
||||
Typography,
|
||||
} from "@mui/material";
|
||||
import Cookies from "js-cookie";
|
||||
|
||||
export type CronJobsProps = {
|
||||
initialJobs: Job[];
|
||||
initialCronJobs: CronJob[];
|
||||
initialUser: any;
|
||||
};
|
||||
|
||||
export const CronJobs = ({
|
||||
initialJobs,
|
||||
initialCronJobs,
|
||||
initialUser,
|
||||
}: CronJobsProps) => {
|
||||
const [jobs, setJobs] = useState<Job[]>(initialJobs);
|
||||
const [cronJobs, setCronJobs] = useState<CronJob[]>(initialCronJobs);
|
||||
const [user, setUser] = useState<any>(initialUser);
|
||||
|
||||
useEffect(() => {
|
||||
setJobs(initialJobs);
|
||||
setCronJobs(initialCronJobs);
|
||||
setUser(initialUser);
|
||||
}, [initialJobs, initialCronJobs, initialUser]);
|
||||
|
||||
const handleDeleteCronJob = async (id: string) => {
|
||||
const token = Cookies.get("token");
|
||||
const response = await fetch("/api/delete-cron-job", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({ data: { id, user_email: user.email } }),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
console.log("Cron job deleted successfully");
|
||||
setCronJobs(cronJobs.filter((cronJob) => cronJob.id !== id));
|
||||
} else {
|
||||
console.error("Failed to delete cron job");
|
||||
}
|
||||
};
|
||||
|
||||
if (!user) {
|
||||
return (
|
||||
<Box>
|
||||
<Typography variant="h6">
|
||||
Please login to view your cron jobs
|
||||
</Typography>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div>
|
||||
<CreateCronJobs availableJobs={jobs} user={user} />
|
||||
|
||||
<Table>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell>Cron Expression</TableCell>
|
||||
<TableCell>Job ID</TableCell>
|
||||
<TableCell>User Email</TableCell>
|
||||
<TableCell>Created At</TableCell>
|
||||
<TableCell>Updated At</TableCell>
|
||||
<TableCell>Actions</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{cronJobs.map((cronJob) => (
|
||||
<TableRow key={cronJob.id}>
|
||||
<TableCell>{cronJob.cron_expression}</TableCell>
|
||||
<TableCell>{cronJob.job_id}</TableCell>
|
||||
<TableCell>{cronJob.user_email}</TableCell>
|
||||
<TableCell>
|
||||
{new Date(cronJob.time_created).toLocaleString()}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
{new Date(cronJob.time_updated).toLocaleString()}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Button onClick={() => handleDeleteCronJob(cronJob.id)}>
|
||||
Delete
|
||||
</Button>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
62
src/components/pages/cron-jobs/get-server-side-props.ts
Normal file
62
src/components/pages/cron-jobs/get-server-side-props.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import axios from "axios";
|
||||
import { GetServerSideProps } from "next";
|
||||
import { parseCookies } from "nookies";
|
||||
import { CronJob, Job } from "../../../types";
|
||||
|
||||
export const getServerSideProps: GetServerSideProps = async (context) => {
|
||||
const { req } = context;
|
||||
const cookies = parseCookies({ req });
|
||||
const token = cookies.token;
|
||||
let user = null;
|
||||
let initialJobs: Job[] = [];
|
||||
let initialCronJobs: CronJob[] = [];
|
||||
if (token) {
|
||||
try {
|
||||
const userResponse = await axios.get(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/api/auth/users/me`,
|
||||
{
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
}
|
||||
);
|
||||
|
||||
user = userResponse.data;
|
||||
|
||||
const jobsResponse = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/api/retrieve-scrape-jobs`,
|
||||
{
|
||||
method: "POST",
|
||||
body: JSON.stringify({ user: user.email }),
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
initialJobs = await jobsResponse.json();
|
||||
console.log(initialJobs);
|
||||
|
||||
const cronJobsResponse = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/api/cron-jobs`,
|
||||
{
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
initialCronJobs = await cronJobsResponse.json();
|
||||
} catch (error) {
|
||||
console.error("Error fetching user or jobs:", error);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
props: {
|
||||
initialJobs,
|
||||
initialUser: user,
|
||||
initialCronJobs,
|
||||
},
|
||||
};
|
||||
};
|
||||
1
src/components/pages/cron-jobs/index.ts
Normal file
1
src/components/pages/cron-jobs/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { CronJobs } from "./cron-jobs";
|
||||
107
src/components/pages/home/home.tsx
Normal file
107
src/components/pages/home/home.tsx
Normal file
@@ -0,0 +1,107 @@
|
||||
"use client";
|
||||
|
||||
import React, { useState, useEffect, useRef } from "react";
|
||||
import { Button, Container, Box, Snackbar, Alert } from "@mui/material";
|
||||
import { useRouter } from "next/router";
|
||||
import { Element, Result } from "@/types";
|
||||
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
|
||||
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||
|
||||
export const Home = () => {
|
||||
const {
|
||||
submittedURL,
|
||||
setSubmittedURL,
|
||||
rows,
|
||||
setRows,
|
||||
results,
|
||||
snackbarOpen,
|
||||
setSnackbarOpen,
|
||||
snackbarMessage,
|
||||
snackbarSeverity,
|
||||
} = useJobSubmitterProvider();
|
||||
const router = useRouter();
|
||||
const { elements, url } = router.query;
|
||||
|
||||
const resultsRef = useRef<HTMLTableElement | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (elements) {
|
||||
setRows(JSON.parse(elements as string));
|
||||
}
|
||||
if (url) {
|
||||
setSubmittedURL(url as string);
|
||||
}
|
||||
}, [elements, url]);
|
||||
|
||||
useEffect(() => {
|
||||
if (results && resultsRef.current) {
|
||||
resultsRef.current.scrollIntoView({ behavior: "smooth" });
|
||||
}
|
||||
}, [results]);
|
||||
|
||||
const handleCloseSnackbar = () => {
|
||||
setSnackbarOpen(false);
|
||||
};
|
||||
|
||||
const ErrorSnackbar = () => {
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="error">
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
|
||||
const NotifySnackbar = () => {
|
||||
const goTo = () => {
|
||||
router.push("/jobs");
|
||||
};
|
||||
|
||||
const action = (
|
||||
<Button color="inherit" size="small" onClick={goTo}>
|
||||
Go To Job
|
||||
</Button>
|
||||
);
|
||||
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<Box
|
||||
bgcolor="background.default"
|
||||
display="flex"
|
||||
flexDirection="column"
|
||||
justifyContent="center"
|
||||
alignItems="center"
|
||||
height="100%"
|
||||
py={4}
|
||||
>
|
||||
<Container maxWidth="lg" className="overflow-y-auto max-h-full">
|
||||
<JobSubmitter />
|
||||
{submittedURL.length ? (
|
||||
<ElementTable
|
||||
rows={rows}
|
||||
setRows={setRows}
|
||||
submittedURL={submittedURL}
|
||||
/>
|
||||
) : null}
|
||||
</Container>
|
||||
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
1
src/components/pages/home/index.ts
Normal file
1
src/components/pages/home/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./home";
|
||||
@@ -1,2 +1 @@
|
||||
export * from "./ElementTable";
|
||||
export * from "./job-submitter";
|
||||
|
||||
@@ -15,9 +15,11 @@ import {
|
||||
IconButton,
|
||||
Tooltip,
|
||||
useTheme,
|
||||
Divider,
|
||||
} from "@mui/material";
|
||||
import AddIcon from "@mui/icons-material/Add";
|
||||
import { Element } from "../../types";
|
||||
import { Element } from "@/types";
|
||||
import { SiteMap } from "../site-map";
|
||||
|
||||
interface Props {
|
||||
rows: Element[];
|
||||
@@ -169,6 +171,13 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
|
||||
</div>
|
||||
</TableContainer>
|
||||
</Box>
|
||||
<Divider
|
||||
sx={{
|
||||
borderColor: theme.palette.mode === "dark" ? "#ffffff" : "0000000",
|
||||
marginBottom: 2,
|
||||
}}
|
||||
/>
|
||||
<SiteMap />
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1 @@
|
||||
export { ElementTable } from "./element-table";
|
||||
@@ -1 +1,2 @@
|
||||
export { JobSubmitter } from "./job-submitter";
|
||||
export { ElementTable } from "./element-table";
|
||||
|
||||
@@ -1,26 +1,20 @@
|
||||
import React, { Dispatch } from "react";
|
||||
import React from "react";
|
||||
import { TextField, Button, CircularProgress } from "@mui/material";
|
||||
import { Element } from "@/types";
|
||||
import { useJobSubmitterProvider } from "../provider";
|
||||
|
||||
export type JobSubmitterInputProps = {
|
||||
submittedURL: string;
|
||||
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
||||
isValidURL: boolean;
|
||||
urlError: string | null;
|
||||
handleSubmit: () => void;
|
||||
loading: boolean;
|
||||
rows: Element[];
|
||||
};
|
||||
|
||||
export const JobSubmitterInput = ({
|
||||
submittedURL,
|
||||
setSubmittedURL,
|
||||
isValidURL,
|
||||
urlError,
|
||||
handleSubmit,
|
||||
loading,
|
||||
rows,
|
||||
urlError,
|
||||
}: JobSubmitterInputProps) => {
|
||||
const { submittedURL, setSubmittedURL, isValidURL, rows } =
|
||||
useJobSubmitterProvider();
|
||||
return (
|
||||
<div className="flex flex-row space-x-4 items-center mb-2">
|
||||
<TextField
|
||||
|
||||
@@ -14,9 +14,9 @@ export type JobSubmitterOptionsProps = {
|
||||
export const JobSubmitterOptions = ({
|
||||
jobOptions,
|
||||
setJobOptions,
|
||||
handleSelectProxies,
|
||||
customJSONSelected,
|
||||
setCustomJSONSelected,
|
||||
handleSelectProxies,
|
||||
proxiesSelected,
|
||||
}: JobSubmitterOptionsProps) => {
|
||||
const handleMultiPageScrapeChange = () => {
|
||||
@@ -42,6 +42,13 @@ export const JobSubmitterOptions = ({
|
||||
}));
|
||||
};
|
||||
|
||||
const handleCollectMediaChange = () => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
collect_media: !prevJobOptions.collect_media,
|
||||
}));
|
||||
};
|
||||
|
||||
return (
|
||||
<Box bgcolor="background.paper" className="flex flex-col mb-2 rounded-md">
|
||||
<div id="options" className="p-2 flex flex-row space-x-2">
|
||||
@@ -94,6 +101,15 @@ export const JobSubmitterOptions = ({
|
||||
/>
|
||||
}
|
||||
></FormControlLabel>
|
||||
<FormControlLabel
|
||||
label="Collect Media"
|
||||
control={
|
||||
<Checkbox
|
||||
checked={jobOptions.collect_media}
|
||||
onChange={handleCollectMediaChange}
|
||||
/>
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
{customJSONSelected ? (
|
||||
<div id="custom-json" className="pl-2 pr-2 pb-2">
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import React, { useEffect, useState, Dispatch } from "react";
|
||||
import { Element } from "@/types";
|
||||
import React, { useEffect, useState } from "react";
|
||||
import { useAuth } from "@/contexts/AuthContext";
|
||||
import { useRouter } from "next/router";
|
||||
import { RawJobOptions } from "@/types/job";
|
||||
@@ -10,29 +9,16 @@ import { JobSubmitterHeader } from "./job-submitter-header";
|
||||
import { JobSubmitterInput } from "./job-submitter-input";
|
||||
import { JobSubmitterOptions } from "./job-submitter-options";
|
||||
import { ApiService } from "@/services";
|
||||
|
||||
interface StateProps {
|
||||
submittedURL: string;
|
||||
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
||||
rows: Element[];
|
||||
isValidURL: boolean;
|
||||
setIsValidUrl: Dispatch<React.SetStateAction<boolean>>;
|
||||
setSnackbarMessage: Dispatch<React.SetStateAction<string>>;
|
||||
setSnackbarOpen: Dispatch<React.SetStateAction<boolean>>;
|
||||
setSnackbarSeverity: Dispatch<React.SetStateAction<string>>;
|
||||
}
|
||||
|
||||
interface Props {
|
||||
stateProps: StateProps;
|
||||
}
|
||||
import { useJobSubmitterProvider } from "./provider";
|
||||
|
||||
const initialJobOptions: RawJobOptions = {
|
||||
multi_page_scrape: false,
|
||||
custom_headers: null,
|
||||
proxies: null,
|
||||
collect_media: false,
|
||||
};
|
||||
|
||||
export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
export const JobSubmitter = () => {
|
||||
const { user } = useAuth();
|
||||
const router = useRouter();
|
||||
const { job_options } = router.query;
|
||||
@@ -40,11 +26,13 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
const {
|
||||
submittedURL,
|
||||
rows,
|
||||
siteMap,
|
||||
setIsValidUrl,
|
||||
setSnackbarMessage,
|
||||
setSnackbarOpen,
|
||||
setSnackbarSeverity,
|
||||
} = stateProps;
|
||||
setSiteMap,
|
||||
} = useJobSubmitterProvider();
|
||||
|
||||
const [urlError, setUrlError] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState<boolean>(false);
|
||||
@@ -87,7 +75,8 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
rows,
|
||||
user,
|
||||
jobOptions,
|
||||
customHeaders
|
||||
customHeaders,
|
||||
siteMap
|
||||
)
|
||||
.then(async (response) => {
|
||||
if (!response.ok) {
|
||||
@@ -120,31 +109,28 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
job_options as string,
|
||||
setCustomJSONSelected,
|
||||
setProxiesSelected,
|
||||
setJobOptions
|
||||
setJobOptions,
|
||||
setSiteMap
|
||||
);
|
||||
}
|
||||
}, [job_options]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<div>
|
||||
<JobSubmitterHeader />
|
||||
<JobSubmitterInput
|
||||
{...stateProps}
|
||||
urlError={urlError}
|
||||
handleSubmit={handleSubmit}
|
||||
loading={loading}
|
||||
/>
|
||||
<JobSubmitterOptions
|
||||
{...stateProps}
|
||||
jobOptions={jobOptions}
|
||||
setJobOptions={setJobOptions}
|
||||
customJSONSelected={customJSONSelected}
|
||||
setCustomJSONSelected={setCustomJSONSelected}
|
||||
handleSelectProxies={handleSelectProxies}
|
||||
proxiesSelected={proxiesSelected}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
<div>
|
||||
<JobSubmitterHeader />
|
||||
<JobSubmitterInput
|
||||
urlError={urlError}
|
||||
handleSubmit={handleSubmit}
|
||||
loading={loading}
|
||||
/>
|
||||
<JobSubmitterOptions
|
||||
jobOptions={jobOptions}
|
||||
setJobOptions={setJobOptions}
|
||||
customJSONSelected={customJSONSelected}
|
||||
setCustomJSONSelected={setCustomJSONSelected}
|
||||
handleSelectProxies={handleSelectProxies}
|
||||
proxiesSelected={proxiesSelected}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
84
src/components/submit/job-submitter/provider.tsx
Normal file
84
src/components/submit/job-submitter/provider.tsx
Normal file
@@ -0,0 +1,84 @@
|
||||
import React, {
|
||||
createContext,
|
||||
PropsWithChildren,
|
||||
useContext,
|
||||
useState,
|
||||
Dispatch,
|
||||
useMemo,
|
||||
} from "react";
|
||||
import { Element, Result, SiteMap } from "@/types";
|
||||
|
||||
type JobSubmitterProviderType = {
|
||||
submittedURL: string;
|
||||
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
||||
rows: Element[];
|
||||
setRows: Dispatch<React.SetStateAction<Element[]>>;
|
||||
results: Result;
|
||||
setResults: Dispatch<React.SetStateAction<Result>>;
|
||||
snackbarOpen: boolean;
|
||||
setSnackbarOpen: Dispatch<React.SetStateAction<boolean>>;
|
||||
snackbarMessage: string;
|
||||
setSnackbarMessage: Dispatch<React.SetStateAction<string>>;
|
||||
snackbarSeverity: string;
|
||||
setSnackbarSeverity: Dispatch<React.SetStateAction<string>>;
|
||||
isValidURL: boolean;
|
||||
setIsValidUrl: Dispatch<React.SetStateAction<boolean>>;
|
||||
siteMap: SiteMap | null;
|
||||
setSiteMap: Dispatch<React.SetStateAction<SiteMap | null>>;
|
||||
};
|
||||
|
||||
const JobSubmitterProvider = createContext<JobSubmitterProviderType>(
|
||||
{} as JobSubmitterProviderType
|
||||
);
|
||||
|
||||
export const Provider = ({ children }: PropsWithChildren) => {
|
||||
const [submittedURL, setSubmittedURL] = useState<string>("");
|
||||
const [rows, setRows] = useState<Element[]>([]);
|
||||
const [results, setResults] = useState<Result>({});
|
||||
const [snackbarOpen, setSnackbarOpen] = useState<boolean>(false);
|
||||
const [snackbarMessage, setSnackbarMessage] = useState<string>("");
|
||||
const [snackbarSeverity, setSnackbarSeverity] = useState<string>("error");
|
||||
const [isValidURL, setIsValidUrl] = useState<boolean>(true);
|
||||
const [siteMap, setSiteMap] = useState<SiteMap | null>(null);
|
||||
|
||||
const value: JobSubmitterProviderType = useMemo(
|
||||
() => ({
|
||||
submittedURL,
|
||||
setSubmittedURL,
|
||||
rows,
|
||||
setRows,
|
||||
results,
|
||||
setResults,
|
||||
snackbarOpen,
|
||||
setSnackbarOpen,
|
||||
snackbarMessage,
|
||||
setSnackbarMessage,
|
||||
snackbarSeverity,
|
||||
setSnackbarSeverity,
|
||||
isValidURL,
|
||||
setIsValidUrl,
|
||||
siteMap,
|
||||
setSiteMap,
|
||||
}),
|
||||
[
|
||||
submittedURL,
|
||||
rows,
|
||||
results,
|
||||
snackbarOpen,
|
||||
snackbarMessage,
|
||||
snackbarSeverity,
|
||||
isValidURL,
|
||||
siteMap,
|
||||
]
|
||||
);
|
||||
|
||||
return (
|
||||
<JobSubmitterProvider.Provider value={value}>
|
||||
{children}
|
||||
</JobSubmitterProvider.Provider>
|
||||
);
|
||||
};
|
||||
|
||||
export const useJobSubmitterProvider = () => {
|
||||
return useContext(JobSubmitterProvider);
|
||||
};
|
||||
1
src/components/submit/job-submitter/site-map/index.ts
Normal file
1
src/components/submit/job-submitter/site-map/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./site-map";
|
||||
@@ -0,0 +1 @@
|
||||
export * from "./site-map-input";
|
||||
@@ -0,0 +1,22 @@
|
||||
.button {
|
||||
height: 3rem;
|
||||
width: 2rem;
|
||||
|
||||
color: #ffffff;
|
||||
font-weight: 600;
|
||||
border-radius: 0.375rem;
|
||||
transition: transform 0.2s ease-in-out;
|
||||
transform: scale(1);
|
||||
}
|
||||
|
||||
.button:hover {
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.remove {
|
||||
background-color: var(--delete-red) !important;
|
||||
}
|
||||
|
||||
.remove:hover {
|
||||
background-color: var(--delete-red-hover) !important;
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
import { useState } from "react";
|
||||
import { useJobSubmitterProvider } from "../../provider";
|
||||
import {
|
||||
MenuItem,
|
||||
Select,
|
||||
TextField,
|
||||
FormControl,
|
||||
Button,
|
||||
Checkbox,
|
||||
FormControlLabel,
|
||||
} from "@mui/material";
|
||||
import { ActionOption } from "@/types/job";
|
||||
import classes from "./site-map-input.module.css";
|
||||
import { clsx } from "clsx";
|
||||
|
||||
export type SiteMapInputProps = {
|
||||
disabled?: boolean;
|
||||
xpath?: string;
|
||||
option?: ActionOption;
|
||||
clickOnce?: boolean;
|
||||
input?: string;
|
||||
};
|
||||
|
||||
export const SiteMapInput = ({
|
||||
disabled,
|
||||
xpath,
|
||||
option,
|
||||
clickOnce,
|
||||
input,
|
||||
}: SiteMapInputProps) => {
|
||||
console.log(clickOnce);
|
||||
const [optionState, setOptionState] = useState<ActionOption>(
|
||||
option || "click"
|
||||
);
|
||||
const [xpathState, setXpathState] = useState<string>(xpath || "");
|
||||
const [clickOnceState, setClickOnceState] = useState<boolean>(
|
||||
clickOnce || false
|
||||
);
|
||||
const [inputState, setInputState] = useState<string>(input || "");
|
||||
|
||||
const { siteMap, setSiteMap } = useJobSubmitterProvider();
|
||||
|
||||
const handleAdd = () => {
|
||||
if (!siteMap) return;
|
||||
|
||||
console.log(optionState, xpathState, clickOnceState, inputState);
|
||||
|
||||
setSiteMap((prevSiteMap) => ({
|
||||
...prevSiteMap,
|
||||
actions: [
|
||||
{
|
||||
type: optionState,
|
||||
xpath: xpathState,
|
||||
name: "",
|
||||
do_once: clickOnceState,
|
||||
input: inputState,
|
||||
},
|
||||
...(prevSiteMap?.actions || []),
|
||||
],
|
||||
}));
|
||||
|
||||
setXpathState("");
|
||||
};
|
||||
|
||||
const handleRemove = () => {
|
||||
if (!siteMap) return;
|
||||
|
||||
setSiteMap((prevSiteMap) => ({
|
||||
...prevSiteMap,
|
||||
actions: (prevSiteMap?.actions || []).slice(0, -1),
|
||||
}));
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-2 w-full">
|
||||
<div className="flex gap-2 items-center">
|
||||
<FormControl className="w-1/4">
|
||||
<Select
|
||||
disabled={disabled}
|
||||
displayEmpty
|
||||
value={optionState}
|
||||
onChange={(e) => setOptionState(e.target.value as ActionOption)}
|
||||
>
|
||||
<MenuItem value="click">Click</MenuItem>
|
||||
<MenuItem value="input">Input</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
{optionState === "input" && (
|
||||
<TextField
|
||||
label="Input Text"
|
||||
fullWidth
|
||||
value={inputState}
|
||||
onChange={(e) => setInputState(e.target.value)}
|
||||
disabled={disabled}
|
||||
/>
|
||||
)}
|
||||
<TextField
|
||||
label="XPath Selector"
|
||||
fullWidth
|
||||
value={xpathState}
|
||||
onChange={(e) => setXpathState(e.target.value)}
|
||||
disabled={disabled}
|
||||
/>
|
||||
{disabled ? (
|
||||
<Button
|
||||
onClick={handleRemove}
|
||||
className={clsx(classes.button, classes.remove)}
|
||||
>
|
||||
Delete
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
onClick={handleAdd}
|
||||
disabled={!xpathState}
|
||||
className={clsx(classes.button, classes.add)}
|
||||
>
|
||||
Add
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
{!disabled && (
|
||||
<FormControlLabel
|
||||
label="Do Once"
|
||||
control={
|
||||
<Checkbox
|
||||
checked={clickOnceState}
|
||||
disabled={disabled}
|
||||
onChange={() => setClickOnceState(!clickOnceState)}
|
||||
/>
|
||||
}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
70
src/components/submit/job-submitter/site-map/site-map.tsx
Normal file
70
src/components/submit/job-submitter/site-map/site-map.tsx
Normal file
@@ -0,0 +1,70 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { useJobSubmitterProvider } from "../provider";
|
||||
import { Button, Divider, Typography, useTheme } from "@mui/material";
|
||||
import { SiteMapInput } from "./site-map-input";
|
||||
|
||||
export const SiteMap = () => {
|
||||
const { siteMap, setSiteMap } = useJobSubmitterProvider();
|
||||
const [showSiteMap, setShowSiteMap] = useState<boolean>(false);
|
||||
const theme = useTheme();
|
||||
|
||||
const handleCreateSiteMap = () => {
|
||||
setSiteMap({ actions: [] });
|
||||
setShowSiteMap(true);
|
||||
};
|
||||
|
||||
const handleClearSiteMap = () => {
|
||||
setSiteMap(null);
|
||||
setShowSiteMap(false);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (siteMap) {
|
||||
setShowSiteMap(true);
|
||||
}
|
||||
}, [siteMap]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-4">
|
||||
{siteMap ? (
|
||||
<Button onClick={handleClearSiteMap}>Clear Site Map</Button>
|
||||
) : (
|
||||
<Button onClick={handleCreateSiteMap}>Create Site Map</Button>
|
||||
)}
|
||||
{showSiteMap && (
|
||||
<div className="flex flex-col gap-4">
|
||||
<SiteMapInput />
|
||||
{siteMap?.actions && siteMap?.actions.length > 0 && (
|
||||
<>
|
||||
<Divider
|
||||
sx={{
|
||||
borderColor:
|
||||
theme.palette.mode === "dark" ? "#ffffff" : "0000000",
|
||||
}}
|
||||
/>
|
||||
<Typography className="w-full text-center" variant="h5">
|
||||
Site Map Actions
|
||||
</Typography>
|
||||
</>
|
||||
)}
|
||||
<ul className="flex flex-col gap-4">
|
||||
{siteMap?.actions.reverse().map((action, index) => (
|
||||
<li key={action.xpath} className="flex w-full items-center">
|
||||
<Typography variant="h6" className="w-[10%] mr-2">
|
||||
Action {index + 1}:
|
||||
</Typography>
|
||||
<SiteMapInput
|
||||
disabled={Boolean(siteMap)}
|
||||
xpath={action.xpath}
|
||||
option={action.type}
|
||||
clickOnce={action.do_once}
|
||||
input={action.input}
|
||||
/>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
@@ -1,6 +1,5 @@
|
||||
import React, { createContext, useContext, useState, useEffect } from "react";
|
||||
import axios from "axios";
|
||||
import { Constants } from "../lib";
|
||||
import Cookies from "js-cookie";
|
||||
|
||||
interface AuthContextProps {
|
||||
@@ -55,6 +54,7 @@ export const AuthProvider: React.FC<AuthProps> = ({ children }) => {
|
||||
const userResponse = await axios.get(`/api/me`, {
|
||||
headers: { Authorization: `Bearer ${response.data.access_token}` },
|
||||
});
|
||||
|
||||
setUser(userResponse.data);
|
||||
setIsAuthenticated(true);
|
||||
};
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import { Dispatch, SetStateAction } from "react";
|
||||
|
||||
import { RawJobOptions } from "@/types";
|
||||
import { RawJobOptions, SiteMap } from "@/types";
|
||||
|
||||
export const parseJobOptions = (
|
||||
job_options: string,
|
||||
setCustomJSONSelected: Dispatch<SetStateAction<boolean>>,
|
||||
setProxiesSelected: Dispatch<SetStateAction<boolean>>,
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
||||
setSiteMap: Dispatch<SetStateAction<any>>
|
||||
) => {
|
||||
if (job_options) {
|
||||
const jsonOptions = JSON.parse(job_options as string);
|
||||
@@ -14,6 +15,7 @@ export const parseJobOptions = (
|
||||
multi_page_scrape: false,
|
||||
custom_headers: null,
|
||||
proxies: null,
|
||||
collect_media: false,
|
||||
};
|
||||
|
||||
if (
|
||||
@@ -31,6 +33,10 @@ export const parseJobOptions = (
|
||||
newJobOptions.proxies = jsonOptions.proxies.join(",");
|
||||
}
|
||||
|
||||
if (jsonOptions.site_map) {
|
||||
setSiteMap(jsonOptions.site_map);
|
||||
}
|
||||
|
||||
setJobOptions(newJobOptions);
|
||||
}
|
||||
};
|
||||
|
||||
39
src/pages/api/delete-cron-job.ts
Normal file
39
src/pages/api/delete-cron-job.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { NextApiRequest, NextApiResponse } from "next";
|
||||
|
||||
export default async function handler(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse
|
||||
) {
|
||||
if (req.method === "POST") {
|
||||
const { data } = req.body;
|
||||
console.log("Data", data);
|
||||
|
||||
const headers = new Headers();
|
||||
headers.set("content-type", "application/json");
|
||||
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${global.process.env.NEXT_PUBLIC_API_URL}/api/delete-cron-job`,
|
||||
{
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(data),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(response);
|
||||
throw new Error(`Error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
res.status(200).json(result);
|
||||
} catch (error) {
|
||||
console.error("Error deleting cron job:", error);
|
||||
res.status(500).json({ error: "Internal Server Error" });
|
||||
}
|
||||
} else {
|
||||
res.setHeader("Allow", ["POST"]);
|
||||
res.status(405).end(`Method ${req.method} Not Allowed`);
|
||||
}
|
||||
}
|
||||
39
src/pages/api/schedule-cron-job.ts
Normal file
39
src/pages/api/schedule-cron-job.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { NextApiRequest, NextApiResponse } from "next";
|
||||
|
||||
export default async function handler(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse
|
||||
) {
|
||||
if (req.method === "POST") {
|
||||
const { data } = req.body;
|
||||
console.log("Data", data);
|
||||
|
||||
const headers = new Headers();
|
||||
headers.set("content-type", "application/json");
|
||||
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${global.process.env.NEXT_PUBLIC_API_URL}/api/schedule-cron-job`,
|
||||
{
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(data),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(response);
|
||||
throw new Error(`Error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
res.status(200).json(result);
|
||||
} catch (error) {
|
||||
console.error("Error scheduling cron job:", error);
|
||||
res.status(500).json({ error: "Internal Server Error" });
|
||||
}
|
||||
} else {
|
||||
res.setHeader("Allow", ["POST"]);
|
||||
res.status(405).end(`Method ${req.method} Not Allowed`);
|
||||
}
|
||||
}
|
||||
4
src/pages/cron-jobs.tsx
Normal file
4
src/pages/cron-jobs.tsx
Normal file
@@ -0,0 +1,4 @@
|
||||
import { CronJobs } from "../components/pages/cron-jobs";
|
||||
import { getServerSideProps } from "../components/pages/cron-jobs/get-server-side-props";
|
||||
export { getServerSideProps };
|
||||
export default CronJobs;
|
||||
@@ -1,117 +1,10 @@
|
||||
"use client";
|
||||
|
||||
import React, { useState, useEffect, useRef } from "react";
|
||||
import { Button, Container, Box, Snackbar, Alert } from "@mui/material";
|
||||
import { useRouter } from "next/router";
|
||||
import { Element, Result } from "@/types";
|
||||
import { ElementTable } from "@/components/submit";
|
||||
import { JobSubmitter } from "@/components/submit/job-submitter";
|
||||
|
||||
const Home = () => {
|
||||
const router = useRouter();
|
||||
const { elements, url } = router.query;
|
||||
|
||||
const [submittedURL, setSubmittedURL] = useState<string>("");
|
||||
const [rows, setRows] = useState<Element[]>([]);
|
||||
const [results, setResults] = useState<Result>({});
|
||||
const [snackbarOpen, setSnackbarOpen] = useState<boolean>(false);
|
||||
const [snackbarMessage, setSnackbarMessage] = useState<string>("");
|
||||
const [snackbarSeverity, setSnackbarSeverity] = useState<string>("error");
|
||||
const [isValidURL, setIsValidUrl] = useState<boolean>(true);
|
||||
|
||||
const resultsRef = useRef<HTMLTableElement | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (elements) {
|
||||
setRows(JSON.parse(elements as string));
|
||||
}
|
||||
if (url) {
|
||||
setSubmittedURL(url as string);
|
||||
}
|
||||
}, [elements, url]);
|
||||
|
||||
useEffect(() => {
|
||||
if (results && resultsRef.current) {
|
||||
resultsRef.current.scrollIntoView({ behavior: "smooth" });
|
||||
}
|
||||
}, [results]);
|
||||
|
||||
const handleCloseSnackbar = () => {
|
||||
setSnackbarOpen(false);
|
||||
};
|
||||
|
||||
const ErrorSnackbar = () => {
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="error">
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
|
||||
const NotifySnackbar = () => {
|
||||
const goTo = () => {
|
||||
router.push("/jobs");
|
||||
};
|
||||
|
||||
const action = (
|
||||
<Button color="inherit" size="small" onClick={goTo}>
|
||||
Go To Job
|
||||
</Button>
|
||||
);
|
||||
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
import { Provider as JobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||
import { Home } from "@/components/pages/home/home";
|
||||
|
||||
export default function Main() {
|
||||
return (
|
||||
<Box
|
||||
bgcolor="background.default"
|
||||
display="flex"
|
||||
flexDirection="column"
|
||||
justifyContent="center"
|
||||
alignItems="center"
|
||||
height="100%"
|
||||
py={4}
|
||||
>
|
||||
<Container maxWidth="lg">
|
||||
<JobSubmitter
|
||||
stateProps={{
|
||||
submittedURL,
|
||||
setSubmittedURL,
|
||||
rows,
|
||||
isValidURL,
|
||||
setIsValidUrl,
|
||||
setSnackbarMessage,
|
||||
setSnackbarOpen,
|
||||
setSnackbarSeverity,
|
||||
}}
|
||||
/>
|
||||
{submittedURL.length ? (
|
||||
<ElementTable
|
||||
rows={rows}
|
||||
setRows={setRows}
|
||||
submittedURL={submittedURL}
|
||||
/>
|
||||
) : null}
|
||||
</Container>
|
||||
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
|
||||
</Box>
|
||||
<JobSubmitterProvider>
|
||||
<Home />
|
||||
</JobSubmitterProvider>
|
||||
);
|
||||
};
|
||||
|
||||
export default Home;
|
||||
}
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import { SiteMap } from "@/types/job";
|
||||
|
||||
export const submitJob = async (
|
||||
submittedURL: string,
|
||||
rows: any[],
|
||||
user: any,
|
||||
jobOptions: any,
|
||||
customHeaders: any
|
||||
customHeaders: any,
|
||||
siteMap: SiteMap | null
|
||||
) => {
|
||||
return await fetch(`/api/submit-scrape-job`, {
|
||||
method: "POST",
|
||||
@@ -16,8 +19,10 @@ export const submitJob = async (
|
||||
time_created: new Date().toISOString(),
|
||||
job_options: {
|
||||
...jobOptions,
|
||||
collect_media: jobOptions.collect_media || false,
|
||||
custom_headers: customHeaders || {},
|
||||
proxies: jobOptions.proxies ? jobOptions.proxies.split(",") : [],
|
||||
site_map: siteMap,
|
||||
},
|
||||
},
|
||||
}),
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
:root {
|
||||
--delete-red: #ef4444;
|
||||
--delete-red-hover: #ff6969;
|
||||
}
|
||||
|
||||
#__next {
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,12 @@ const commonThemeOptions = {
|
||||
h4: {
|
||||
fontWeight: 500,
|
||||
},
|
||||
h5: {
|
||||
fontWeight: 500,
|
||||
},
|
||||
h6: {
|
||||
fontWeight: 500,
|
||||
},
|
||||
body1: {
|
||||
fontFamily: '"Schibsted Grotesk", sans-serif',
|
||||
},
|
||||
@@ -175,6 +181,9 @@ const darkTheme = createTheme({
|
||||
h5: {
|
||||
color: "#ffffff",
|
||||
},
|
||||
h6: {
|
||||
color: "#ffffff",
|
||||
},
|
||||
body1: {
|
||||
...commonThemeOptions.typography.body1,
|
||||
color: "#ffffff",
|
||||
|
||||
@@ -16,10 +16,35 @@ export type JobOptions = {
|
||||
multi_page_scrape: boolean;
|
||||
custom_headers: null | string;
|
||||
proxies: string[];
|
||||
site_map?: SiteMap;
|
||||
};
|
||||
|
||||
export type RawJobOptions = {
|
||||
multi_page_scrape: boolean;
|
||||
custom_headers: string | null;
|
||||
proxies: string | null;
|
||||
collect_media: boolean;
|
||||
};
|
||||
|
||||
export type ActionOption = "click" | "input";
|
||||
|
||||
export type Action = {
|
||||
type: ActionOption;
|
||||
xpath: string;
|
||||
name: string;
|
||||
do_once?: boolean;
|
||||
input?: string;
|
||||
};
|
||||
|
||||
export type SiteMap = {
|
||||
actions: Action[];
|
||||
};
|
||||
|
||||
export type CronJob = {
|
||||
id: string;
|
||||
user_email: string;
|
||||
job_id: string;
|
||||
cron_expression: string;
|
||||
time_created: Date;
|
||||
time_updated: Date;
|
||||
};
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# LOCAL
|
||||
from freeaskinternet.models.Models import (
|
||||
ModelCard,
|
||||
ModelList,
|
||||
SearchItem,
|
||||
SearchResp,
|
||||
ChatMessage,
|
||||
DeltaMessage,
|
||||
QueryRequest,
|
||||
SearchItemList,
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseChoice,
|
||||
ChatCompletionResponseStreamChoice,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ModelCard",
|
||||
"ModelList",
|
||||
"ChatMessage",
|
||||
"DeltaMessage",
|
||||
"QueryRequest",
|
||||
"ChatCompletionRequest",
|
||||
"ChatCompletionResponseChoice",
|
||||
"ChatCompletionResponseStreamChoice",
|
||||
"ChatCompletionResponse",
|
||||
"SearchItem",
|
||||
"SearchItemList",
|
||||
"SearchResp",
|
||||
]
|
||||
@@ -109,5 +109,5 @@
|
||||
"isolatedModules": true
|
||||
},
|
||||
"include": ["src", "src/declaration.d.ts", "src/next-auth.d.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
"exclude": ["node_modules", "src-tauri"]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user