mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-10-30 05:57:12 +00:00
Compare commits
29 Commits
master
...
d2bc9c53ff
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d2bc9c53ff | ||
|
|
734974df83 | ||
|
|
b6dbd0dc82 | ||
|
|
d57dd0af1a | ||
|
|
23fccd7afb | ||
|
|
f89a460206 | ||
|
|
1169d48992 | ||
|
|
ff809d7833 | ||
|
|
41c7f6795c | ||
|
|
21c2155786 | ||
|
|
6d45bd129c | ||
|
|
3ab31bd186 | ||
|
|
e00c187e68 | ||
|
|
39c0d17e1e | ||
|
|
b66963ed33 | ||
|
|
813550e07c | ||
|
|
dcb4afe01b | ||
|
|
344d9036c3 | ||
|
|
96552cd1d1 | ||
|
|
d4ac85d206 | ||
|
|
a805b98ce3 | ||
|
|
eaf047ecd8 | ||
|
|
d43040fe08 | ||
|
|
f0813323f0 | ||
|
|
fb7986bccf | ||
|
|
a1664856a6 | ||
|
|
467244b7f8 | ||
|
|
b91a133b4d | ||
|
|
aeed81a6df |
@@ -2,6 +2,13 @@ name: Run Cypress Tests
|
||||
|
||||
description: Run Cypress tests
|
||||
|
||||
inputs:
|
||||
openai_key:
|
||||
description: "OpenAI API key"
|
||||
required: true
|
||||
default: ""
|
||||
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
@@ -15,6 +22,8 @@ runs:
|
||||
|
||||
- name: Setup Docker project
|
||||
shell: bash
|
||||
env:
|
||||
OPENAI_KEY: ${{ inputs.openai_key }}
|
||||
run: make build-ci up-ci
|
||||
|
||||
- name: Install dependencies
|
||||
|
||||
2
.github/workflows/unit-tests.yml
vendored
2
.github/workflows/unit-tests.yml
vendored
@@ -37,6 +37,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/run-cypress-tests
|
||||
with:
|
||||
openai_key: ${{ secrets.OPENAI_KEY }}
|
||||
|
||||
success-message:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
5
Makefile
5
Makefile
@@ -60,4 +60,7 @@ up-ci:
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --force-recreate
|
||||
|
||||
cypress-start:
|
||||
DISPLAY=:0 npx cypress open
|
||||
DISPLAY=:0 npx cypress open
|
||||
|
||||
cypress-run:
|
||||
npx cypress run
|
||||
@@ -1,29 +1,26 @@
|
||||
# STL
|
||||
import random
|
||||
from typing import Any
|
||||
|
||||
# PDM
|
||||
from camoufox import AsyncCamoufox
|
||||
from playwright.async_api import Page
|
||||
|
||||
# LOCAL
|
||||
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
|
||||
from api.backend.job.models import CapturedElement
|
||||
from api.backend.worker.logger import LOG
|
||||
from api.backend.ai.agent.utils import (
|
||||
parse_response,
|
||||
capture_elements,
|
||||
convert_to_markdown,
|
||||
parse_response,
|
||||
)
|
||||
|
||||
from api.backend.ai.clients import ask_open_ai, ask_ollama, open_ai_key
|
||||
|
||||
from api.backend.ai.agent.prompts import (
|
||||
ELEMENT_EXTRACTION_PROMPT,
|
||||
EXTRACT_ELEMENTS_PROMPT,
|
||||
ELEMENT_EXTRACTION_PROMPT,
|
||||
)
|
||||
|
||||
from api.backend.job.scraping.collect_media import collect_media
|
||||
from api.backend.worker.logger import LOG
|
||||
|
||||
from api.backend.job.scraping.add_custom import add_custom_items
|
||||
|
||||
from api.backend.models import CapturedElement
|
||||
|
||||
from api.backend.job.scraping.collect_media import collect_media
|
||||
|
||||
ask_ai = ask_open_ai if open_ai_key else ask_ollama
|
||||
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
from lxml import html, etree
|
||||
# STL
|
||||
import re
|
||||
|
||||
# PDM
|
||||
from lxml import html, etree
|
||||
from playwright.async_api import Page
|
||||
|
||||
from api.backend.models import CapturedElement
|
||||
|
||||
from api.backend.job.scraping.scraping_utils import clean_format_characters
|
||||
# LOCAL
|
||||
from api.backend.job.models import CapturedElement
|
||||
from api.backend.job.utils.text_utils import clean_text
|
||||
|
||||
|
||||
def convert_to_markdown(html_str: str):
|
||||
@@ -231,7 +234,7 @@ async def capture_elements(
|
||||
if link:
|
||||
element_text += f" ({link})"
|
||||
|
||||
cleaned = clean_format_characters(element_text)
|
||||
cleaned = clean_text(element_text)
|
||||
|
||||
if cleaned in seen_texts:
|
||||
continue
|
||||
|
||||
@@ -3,24 +3,23 @@ import logging
|
||||
from collections.abc import Iterable, AsyncGenerator
|
||||
|
||||
# PDM
|
||||
from ollama import Message
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
|
||||
# LOCAL
|
||||
from ollama import Message
|
||||
from api.backend.models import AI
|
||||
|
||||
from api.backend.ai.clients import (
|
||||
llama_client,
|
||||
llama_model,
|
||||
openai_client,
|
||||
open_ai_model,
|
||||
open_ai_key,
|
||||
llama_client,
|
||||
open_ai_model,
|
||||
openai_client,
|
||||
)
|
||||
from api.backend.ai.schemas import AI
|
||||
from api.backend.routers.handle_exceptions import handle_exceptions
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("AI")
|
||||
|
||||
ai_router = APIRouter()
|
||||
|
||||
@@ -64,6 +63,7 @@ chat_function = llama_chat if llama_client else openai_chat
|
||||
|
||||
|
||||
@ai_router.post("/ai")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def ai(c: AI):
|
||||
return StreamingResponse(
|
||||
chat_function(chat_messages=c.messages), media_type="text/plain"
|
||||
@@ -71,5 +71,6 @@ async def ai(c: AI):
|
||||
|
||||
|
||||
@ai_router.get("/ai/check")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def check():
|
||||
return JSONResponse(content={"ai_enabled": bool(open_ai_key or llama_model)})
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
# STL
|
||||
import os
|
||||
|
||||
from openai import OpenAI
|
||||
# PDM
|
||||
from ollama import AsyncClient
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
# Load environment variables
|
||||
open_ai_key = os.getenv("OPENAI_KEY")
|
||||
|
||||
4
api/backend/ai/schemas/__init__.py
Normal file
4
api/backend/ai/schemas/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# LOCAL
|
||||
from .ai import AI
|
||||
|
||||
__all__ = ["AI"]
|
||||
9
api/backend/ai/schemas/ai.py
Normal file
9
api/backend/ai/schemas/ai.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# STL
|
||||
from typing import Any
|
||||
|
||||
# PDM
|
||||
import pydantic
|
||||
|
||||
|
||||
class AI(pydantic.BaseModel):
|
||||
messages: list[Any]
|
||||
@@ -1,34 +1,30 @@
|
||||
# STL
|
||||
import os
|
||||
import logging
|
||||
import apscheduler # type: ignore
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
# PDM
|
||||
import apscheduler.schedulers
|
||||
import apscheduler.schedulers.background
|
||||
from fastapi import FastAPI, Request, status
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
# LOCAL
|
||||
from api.backend.ai.ai_router import ai_router
|
||||
from api.backend.auth.auth_router import auth_router
|
||||
from api.backend.utils import get_log_level
|
||||
from api.backend.routers.job_router import job_router
|
||||
from api.backend.routers.stats_router import stats_router
|
||||
from api.backend.database.startup import init_database
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from api.backend.job.cron_scheduling.cron_scheduling import start_cron_scheduler
|
||||
from api.backend.scheduler import scheduler
|
||||
from api.backend.ai.ai_router import ai_router
|
||||
from api.backend.job.job_router import job_router
|
||||
from api.backend.auth.auth_router import auth_router
|
||||
from api.backend.database.startup import init_database
|
||||
from api.backend.stats.stats_router import stats_router
|
||||
from api.backend.job.cron_scheduling.cron_scheduling import start_cron_scheduler
|
||||
|
||||
log_level = os.getenv("LOG_LEVEL")
|
||||
LOG_LEVEL = get_log_level(log_level)
|
||||
|
||||
logging.basicConfig(
|
||||
level=LOG_LEVEL,
|
||||
format="%(levelname)s: %(asctime)s - %(name)s - %(message)s",
|
||||
format="%(levelname)s: %(asctime)s - [%(name)s] - %(message)s",
|
||||
handlers=[logging.StreamHandler()],
|
||||
)
|
||||
|
||||
@@ -36,7 +32,7 @@ LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
async def lifespan(_: FastAPI):
|
||||
# Startup
|
||||
LOG.info("Starting application...")
|
||||
|
||||
@@ -45,6 +41,7 @@ async def lifespan(app: FastAPI):
|
||||
LOG.info("Starting cron scheduler...")
|
||||
start_cron_scheduler(scheduler)
|
||||
scheduler.start()
|
||||
|
||||
LOG.info("Cron scheduler started successfully")
|
||||
|
||||
yield
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
# STL
|
||||
from datetime import timedelta
|
||||
import os
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
# PDM
|
||||
from fastapi import Depends, APIRouter, HTTPException, status
|
||||
from fastapi.security import OAuth2PasswordRequestForm
|
||||
|
||||
# LOCAL
|
||||
from api.backend.schemas import User, Token, UserCreate
|
||||
from api.backend.auth.schemas import User, Token, UserCreate
|
||||
from api.backend.auth.auth_utils import (
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES,
|
||||
get_current_user,
|
||||
@@ -15,18 +16,19 @@ from api.backend.auth.auth_utils import (
|
||||
get_password_hash,
|
||||
create_access_token,
|
||||
)
|
||||
import logging
|
||||
|
||||
from api.backend.database.common import update
|
||||
from api.backend.routers.handle_exceptions import handle_exceptions
|
||||
|
||||
auth_router = APIRouter()
|
||||
|
||||
LOG = logging.getLogger("auth_router")
|
||||
LOG = logging.getLogger("Auth")
|
||||
|
||||
|
||||
@auth_router.post("/auth/token", response_model=Token)
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
|
||||
user = await authenticate_user(form_data.username, form_data.password)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
@@ -47,6 +49,7 @@ async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends(
|
||||
|
||||
|
||||
@auth_router.post("/auth/signup", response_model=User)
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def create_user(user: UserCreate):
|
||||
hashed_password = get_password_hash(user.password)
|
||||
user_dict = user.model_dump()
|
||||
@@ -60,11 +63,13 @@ async def create_user(user: UserCreate):
|
||||
|
||||
|
||||
@auth_router.get("/auth/users/me", response_model=User)
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def read_users_me(current_user: User = Depends(get_current_user)):
|
||||
return current_user
|
||||
|
||||
|
||||
@auth_router.get("/auth/check")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def check_auth():
|
||||
return {
|
||||
"registration": os.environ.get("REGISTRATION_ENABLED", "True") == "True",
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# STL
|
||||
import os
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
# PDM
|
||||
from jose import JWTError, jwt
|
||||
@@ -12,11 +12,10 @@ from passlib.context import CryptContext
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
|
||||
# LOCAL
|
||||
from api.backend.schemas import User, UserInDB, TokenData
|
||||
|
||||
from api.backend.auth.schemas import User, UserInDB, TokenData
|
||||
from api.backend.database.common import query
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("Auth")
|
||||
|
||||
_ = load_dotenv()
|
||||
|
||||
@@ -118,7 +117,8 @@ async def get_current_user(token: str = Depends(oauth2_scheme)):
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
return EMPTY_USER
|
||||
|
||||
user = await get_user(email=token_data.email)
|
||||
user = await get_user(email=token_data.email or "")
|
||||
|
||||
if user is None:
|
||||
return EMPTY_USER
|
||||
|
||||
@@ -136,6 +136,7 @@ async def require_user(token: str = Depends(oauth2_scheme)):
|
||||
payload: Optional[dict[str, Any]] = jwt.decode(
|
||||
token, SECRET_KEY, algorithms=[ALGORITHM]
|
||||
)
|
||||
|
||||
if not payload:
|
||||
raise credentials_exception
|
||||
|
||||
@@ -149,7 +150,7 @@ async def require_user(token: str = Depends(oauth2_scheme)):
|
||||
except JWTError:
|
||||
raise credentials_exception
|
||||
|
||||
user = await get_user(email=token_data.email)
|
||||
user = await get_user(email=token_data.email or "")
|
||||
|
||||
if user is None:
|
||||
raise credentials_exception
|
||||
|
||||
4
api/backend/auth/schemas/__init__.py
Normal file
4
api/backend/auth/schemas/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# LOCAL
|
||||
from .auth import User, Token, UserInDB, TokenData, UserCreate
|
||||
|
||||
__all__ = ["User", "Token", "UserInDB", "TokenData", "UserCreate"]
|
||||
@@ -1,5 +1,6 @@
|
||||
from pathlib import Path
|
||||
# STL
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
DATABASE_PATH = "data/database.db"
|
||||
RECORDINGS_DIR = Path("media/recordings")
|
||||
@@ -14,3 +15,10 @@ MEDIA_TYPES = [
|
||||
"spreadsheets",
|
||||
"videos",
|
||||
]
|
||||
|
||||
REGISTRATION_ENABLED = os.getenv("REGISTRATION_ENABLED", "true").lower() == "true"
|
||||
DEFAULT_USER_EMAIL = os.getenv("DEFAULT_USER_EMAIL")
|
||||
DEFAULT_USER_PASSWORD = os.getenv("DEFAULT_USER_PASSWORD")
|
||||
DEFAULT_USER_FULL_NAME = os.getenv("DEFAULT_USER_FULL_NAME")
|
||||
|
||||
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from .common import insert, QUERIES, update
|
||||
# LOCAL
|
||||
from .common import insert, update, connect
|
||||
from .schema import INIT_QUERY
|
||||
|
||||
__all__ = ["insert", "QUERIES", "update"]
|
||||
__all__ = ["insert", "update", "INIT_QUERY", "connect"]
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
# STL
|
||||
import logging
|
||||
import sqlite3
|
||||
from typing import Any, Optional
|
||||
from api.backend.constants import DATABASE_PATH
|
||||
from api.backend.utils import format_json, format_sql_row_to_python
|
||||
from api.backend.database.schema import INIT_QUERY
|
||||
from api.backend.database.queries import JOB_INSERT_QUERY, DELETE_JOB_QUERY
|
||||
import logging
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
# LOCAL
|
||||
from api.backend.constants import DATABASE_PATH
|
||||
from api.backend.database.utils import format_json, format_sql_row_to_python
|
||||
|
||||
LOG = logging.getLogger("Database")
|
||||
|
||||
|
||||
def connect():
|
||||
@@ -25,8 +26,10 @@ def insert(query: str, values: tuple[Any, ...]):
|
||||
try:
|
||||
_ = cursor.execute(query, copy)
|
||||
connection.commit()
|
||||
|
||||
except sqlite3.Error as e:
|
||||
LOG.error(f"An error occurred: {e}")
|
||||
|
||||
finally:
|
||||
cursor.close()
|
||||
connection.close()
|
||||
@@ -78,15 +81,9 @@ def update(query: str, values: Optional[tuple[Any, ...]] = None):
|
||||
return res.rowcount
|
||||
except sqlite3.Error as e:
|
||||
LOG.error(f"An error occurred: {e}")
|
||||
|
||||
finally:
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
QUERIES = {
|
||||
"init": INIT_QUERY,
|
||||
"insert_job": JOB_INSERT_QUERY,
|
||||
"delete_job": DELETE_JOB_QUERY,
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .queries import JOB_INSERT_QUERY, DELETE_JOB_QUERY
|
||||
# LOCAL
|
||||
from .job.job_queries import DELETE_JOB_QUERY, JOB_INSERT_QUERY
|
||||
|
||||
__all__ = ["JOB_INSERT_QUERY", "DELETE_JOB_QUERY"]
|
||||
|
||||
68
api/backend/database/queries/job/job_queries.py
Normal file
68
api/backend/database/queries/job/job_queries.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# STL
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
# LOCAL
|
||||
from api.backend.database.utils import format_list_for_query
|
||||
from api.backend.database.common import query, insert, update
|
||||
|
||||
JOB_INSERT_QUERY = """
|
||||
INSERT INTO jobs
|
||||
(id, url, elements, user, time_created, result, status, chat, job_options, agent_mode, prompt)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
DELETE_JOB_QUERY = """
|
||||
DELETE FROM jobs WHERE id IN ()
|
||||
"""
|
||||
|
||||
LOG = logging.getLogger("Database")
|
||||
|
||||
|
||||
def insert_job(item: dict[str, Any]) -> None:
|
||||
insert(
|
||||
JOB_INSERT_QUERY,
|
||||
(
|
||||
item["id"],
|
||||
item["url"],
|
||||
item["elements"],
|
||||
item["user"],
|
||||
item["time_created"],
|
||||
item["result"],
|
||||
item["status"],
|
||||
item["chat"],
|
||||
item["job_options"],
|
||||
item["agent_mode"],
|
||||
item["prompt"],
|
||||
),
|
||||
)
|
||||
LOG.info(f"Inserted item: {item}")
|
||||
|
||||
|
||||
async def get_queued_job():
|
||||
queued_job_query = (
|
||||
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
|
||||
)
|
||||
|
||||
res = query(queued_job_query)
|
||||
LOG.info(f"Got queued job: {res}")
|
||||
return res[0] if res else None
|
||||
|
||||
|
||||
async def update_job(ids: list[str], field: str, value: Any):
|
||||
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
|
||||
res = update(query, tuple([value] + ids))
|
||||
LOG.info(f"Updated job: {res}")
|
||||
|
||||
|
||||
async def delete_jobs(jobs: list[str]):
|
||||
if not jobs:
|
||||
LOG.info("No jobs to delete.")
|
||||
return False
|
||||
|
||||
query = f"DELETE FROM jobs WHERE id IN {format_list_for_query(jobs)}"
|
||||
res = update(query, tuple(jobs))
|
||||
|
||||
LOG.info(f"Deleted jobs: {res}")
|
||||
|
||||
return res
|
||||
@@ -1,9 +0,0 @@
|
||||
JOB_INSERT_QUERY = """
|
||||
INSERT INTO jobs
|
||||
(id, url, elements, user, time_created, result, status, chat, job_options, agent_mode, prompt)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
DELETE_JOB_QUERY = """
|
||||
DELETE FROM jobs WHERE id IN ()
|
||||
"""
|
||||
41
api/backend/database/queries/statistics/statistic_queries.py
Normal file
41
api/backend/database/queries/statistics/statistic_queries.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# LOCAL
|
||||
from api.backend.database.common import query
|
||||
|
||||
|
||||
async def average_elements_per_link(user: str):
|
||||
job_query = """
|
||||
SELECT
|
||||
DATE(time_created) AS date,
|
||||
AVG(json_array_length(elements)) AS average_elements,
|
||||
COUNT(*) AS count
|
||||
FROM
|
||||
jobs
|
||||
WHERE
|
||||
status = 'Completed' AND user = ?
|
||||
GROUP BY
|
||||
DATE(time_created)
|
||||
ORDER BY
|
||||
date ASC;
|
||||
"""
|
||||
results = query(job_query, (user,))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_jobs_per_day(user: str):
|
||||
job_query = """
|
||||
SELECT
|
||||
DATE(time_created) AS date,
|
||||
COUNT(*) AS job_count
|
||||
FROM
|
||||
jobs
|
||||
WHERE
|
||||
status = 'Completed' AND user = ?
|
||||
GROUP BY
|
||||
DATE(time_created)
|
||||
ORDER BY
|
||||
date ASC;
|
||||
"""
|
||||
results = query(job_query, (user,))
|
||||
|
||||
return results
|
||||
@@ -30,4 +30,5 @@ CREATE TABLE IF NOT EXISTS cron_jobs (
|
||||
|
||||
ALTER TABLE jobs ADD COLUMN agent_mode BOOLEAN NOT NULL DEFAULT FALSE;
|
||||
ALTER TABLE jobs ADD COLUMN prompt STRING;
|
||||
ALTER TABLE jobs ADD COLUMN favorite BOOLEAN NOT NULL DEFAULT FALSE;
|
||||
"""
|
||||
|
||||
@@ -1,24 +1,34 @@
|
||||
import os
|
||||
from api.backend.database.common import connect, QUERIES, insert
|
||||
# STL
|
||||
import logging
|
||||
import sqlite3
|
||||
|
||||
# LOCAL
|
||||
from api.backend.constants import (
|
||||
DEFAULT_USER_EMAIL,
|
||||
REGISTRATION_ENABLED,
|
||||
DEFAULT_USER_PASSWORD,
|
||||
DEFAULT_USER_FULL_NAME,
|
||||
)
|
||||
from api.backend.auth.auth_utils import get_password_hash
|
||||
from api.backend.database.common import insert, connect
|
||||
from api.backend.database.schema import INIT_QUERY
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("Database")
|
||||
|
||||
|
||||
def init_database():
|
||||
def execute_startup_query():
|
||||
cursor = connect()
|
||||
|
||||
for query in QUERIES["init"].strip().split(";"):
|
||||
for query in INIT_QUERY.strip().split(";"):
|
||||
query = query.strip()
|
||||
|
||||
if not query:
|
||||
continue
|
||||
|
||||
try:
|
||||
LOG.info(f"Executing query: {query}")
|
||||
_ = cursor.execute(query)
|
||||
|
||||
except sqlite3.OperationalError as e:
|
||||
if "duplicate column name" in str(e).lower():
|
||||
LOG.warning(f"Skipping duplicate column error: {e}")
|
||||
@@ -27,10 +37,16 @@ def init_database():
|
||||
LOG.error(f"Error executing query: {query}")
|
||||
raise
|
||||
|
||||
if os.environ.get("REGISTRATION_ENABLED", "true").lower() == "false":
|
||||
default_user_email = os.environ.get("DEFAULT_USER_EMAIL")
|
||||
default_user_password = os.environ.get("DEFAULT_USER_PASSWORD")
|
||||
default_user_full_name = os.environ.get("DEFAULT_USER_FULL_NAME")
|
||||
cursor.close()
|
||||
|
||||
|
||||
def init_database():
|
||||
execute_startup_query()
|
||||
|
||||
if not REGISTRATION_ENABLED:
|
||||
default_user_email = DEFAULT_USER_EMAIL
|
||||
default_user_password = DEFAULT_USER_PASSWORD
|
||||
default_user_full_name = DEFAULT_USER_FULL_NAME
|
||||
|
||||
if (
|
||||
not default_user_email
|
||||
@@ -51,5 +67,3 @@ def init_database():
|
||||
default_user_full_name,
|
||||
),
|
||||
)
|
||||
|
||||
cursor.close()
|
||||
|
||||
30
api/backend/database/utils.py
Normal file
30
api/backend/database/utils.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# STL
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
def format_list_for_query(ids: list[str]):
|
||||
return (
|
||||
f"({','.join(['?' for _ in ids])})" # Returns placeholders, e.g., "(?, ?, ?)"
|
||||
)
|
||||
|
||||
|
||||
def format_sql_row_to_python(row: dict[str, Any]):
|
||||
new_row: dict[str, Any] = {}
|
||||
for key, value in row.items():
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
new_row[key] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
new_row[key] = value
|
||||
else:
|
||||
new_row[key] = value
|
||||
|
||||
return new_row
|
||||
|
||||
|
||||
def format_json(items: list[Any]):
|
||||
for idx, item in enumerate(items):
|
||||
if isinstance(item, (dict, list)):
|
||||
formatted_item = json.dumps(item)
|
||||
items[idx] = formatted_item
|
||||
@@ -1,17 +1,9 @@
|
||||
from .job import (
|
||||
insert,
|
||||
update_job,
|
||||
delete_jobs,
|
||||
get_jobs_per_day,
|
||||
get_queued_job,
|
||||
average_elements_per_link,
|
||||
)
|
||||
# LOCAL
|
||||
from .job import insert, update_job, delete_jobs, get_queued_job
|
||||
|
||||
__all__ = [
|
||||
"insert",
|
||||
"update_job",
|
||||
"delete_jobs",
|
||||
"get_jobs_per_day",
|
||||
"get_queued_job",
|
||||
"average_elements_per_link",
|
||||
]
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
# STL
|
||||
import uuid
|
||||
import logging
|
||||
import datetime
|
||||
from typing import Any
|
||||
import uuid
|
||||
from api.backend.database.common import insert, query
|
||||
from api.backend.models import CronJob
|
||||
from apscheduler.schedulers.background import BackgroundScheduler # type: ignore
|
||||
from apscheduler.triggers.cron import CronTrigger # type: ignore
|
||||
|
||||
# PDM
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job import insert as insert_job
|
||||
import logging
|
||||
from api.backend.schemas.cron import CronJob
|
||||
from api.backend.database.common import query, insert
|
||||
|
||||
LOG = logging.getLogger("Cron Scheduler")
|
||||
LOG = logging.getLogger("Cron")
|
||||
|
||||
|
||||
def insert_cron_job(cron_job: CronJob):
|
||||
@@ -17,6 +21,7 @@ def insert_cron_job(cron_job: CronJob):
|
||||
INSERT INTO cron_jobs (id, user_email, job_id, cron_expression, time_created, time_updated)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
values = (
|
||||
cron_job.id,
|
||||
cron_job.user_email,
|
||||
@@ -36,6 +41,7 @@ def delete_cron_job(id: str, user_email: str):
|
||||
DELETE FROM cron_jobs
|
||||
WHERE id = ? AND user_email = ?
|
||||
"""
|
||||
|
||||
values = (id, user_email)
|
||||
insert(query, values)
|
||||
|
||||
|
||||
@@ -3,20 +3,18 @@ import logging
|
||||
from typing import Any
|
||||
|
||||
# LOCAL
|
||||
from api.backend.utils import format_list_for_query
|
||||
from api.backend.database.common import (
|
||||
insert as common_insert,
|
||||
query as common_query,
|
||||
QUERIES,
|
||||
update as common_update,
|
||||
)
|
||||
from api.backend.database.utils import format_list_for_query
|
||||
from api.backend.database.common import query as common_query
|
||||
from api.backend.database.common import insert as common_insert
|
||||
from api.backend.database.common import update as common_update
|
||||
from api.backend.database.queries.job.job_queries import JOB_INSERT_QUERY
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
|
||||
def insert(item: dict[str, Any]) -> None:
|
||||
common_insert(
|
||||
QUERIES["insert_job"],
|
||||
JOB_INSERT_QUERY,
|
||||
(
|
||||
item["id"],
|
||||
item["url"],
|
||||
@@ -31,7 +29,8 @@ def insert(item: dict[str, Any]) -> None:
|
||||
item["prompt"],
|
||||
),
|
||||
)
|
||||
LOG.info(f"Inserted item: {item}")
|
||||
|
||||
LOG.debug(f"Inserted item: {item}")
|
||||
|
||||
|
||||
async def get_queued_job():
|
||||
@@ -39,61 +38,22 @@ async def get_queued_job():
|
||||
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
|
||||
)
|
||||
res = common_query(query)
|
||||
LOG.info(f"Got queued job: {res}")
|
||||
LOG.debug(f"Got queued job: {res}")
|
||||
return res[0] if res else None
|
||||
|
||||
|
||||
async def update_job(ids: list[str], field: str, value: Any):
|
||||
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
|
||||
res = common_update(query, tuple([value] + ids))
|
||||
LOG.info(f"Updated job: {res}")
|
||||
LOG.debug(f"Updated job: {res}")
|
||||
|
||||
|
||||
async def delete_jobs(jobs: list[str]):
|
||||
if not jobs:
|
||||
LOG.info("No jobs to delete.")
|
||||
LOG.debug("No jobs to delete.")
|
||||
return False
|
||||
|
||||
query = f"DELETE FROM jobs WHERE id IN {format_list_for_query(jobs)}"
|
||||
res = common_update(query, tuple(jobs))
|
||||
|
||||
return res > 0
|
||||
|
||||
|
||||
async def average_elements_per_link(user: str):
|
||||
job_query = """
|
||||
SELECT
|
||||
DATE(time_created) AS date,
|
||||
AVG(json_array_length(elements)) AS average_elements,
|
||||
COUNT(*) AS count
|
||||
FROM
|
||||
jobs
|
||||
WHERE
|
||||
status = 'Completed' AND user = ?
|
||||
GROUP BY
|
||||
DATE(time_created)
|
||||
ORDER BY
|
||||
date ASC;
|
||||
"""
|
||||
results = common_query(job_query, (user,))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_jobs_per_day(user: str):
|
||||
job_query = """
|
||||
SELECT
|
||||
DATE(time_created) AS date,
|
||||
COUNT(*) AS job_count
|
||||
FROM
|
||||
jobs
|
||||
WHERE
|
||||
status = 'Completed' AND user = ?
|
||||
GROUP BY
|
||||
DATE(time_created)
|
||||
ORDER BY
|
||||
date ASC;
|
||||
"""
|
||||
results = common_query(job_query, (user,))
|
||||
|
||||
return results
|
||||
|
||||
248
api/backend/job/job_router.py
Normal file
248
api/backend/job/job_router.py
Normal file
@@ -0,0 +1,248 @@
|
||||
# STL
|
||||
import csv
|
||||
import uuid
|
||||
import random
|
||||
import logging
|
||||
import datetime
|
||||
from io import StringIO
|
||||
|
||||
# PDM
|
||||
from fastapi import Depends, APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from apscheduler.triggers.cron import CronTrigger # type: ignore
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job import insert, update_job, delete_jobs
|
||||
from api.backend.constants import MEDIA_DIR, MEDIA_TYPES, RECORDINGS_DIR
|
||||
from api.backend.scheduler import scheduler
|
||||
from api.backend.schemas.job import Job, UpdateJobs, DownloadJob, DeleteScrapeJobs
|
||||
from api.backend.auth.schemas import User
|
||||
from api.backend.schemas.cron import CronJob, DeleteCronJob
|
||||
from api.backend.database.utils import format_list_for_query
|
||||
from api.backend.auth.auth_utils import get_current_user
|
||||
from api.backend.database.common import query
|
||||
from api.backend.job.utils.text_utils import clean_text
|
||||
from api.backend.job.models.job_options import FetchOptions
|
||||
from api.backend.routers.handle_exceptions import handle_exceptions
|
||||
from api.backend.job.utils.clean_job_format import clean_job_format
|
||||
from api.backend.job.cron_scheduling.cron_scheduling import (
|
||||
get_cron_jobs,
|
||||
delete_cron_job,
|
||||
insert_cron_job,
|
||||
get_cron_job_trigger,
|
||||
insert_job_from_cron_job,
|
||||
)
|
||||
from api.backend.job.utils.stream_md_from_job_results import stream_md_from_job_results
|
||||
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
job_router = APIRouter()
|
||||
|
||||
|
||||
@job_router.post("/update")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
|
||||
"""Used to update jobs"""
|
||||
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
|
||||
|
||||
return JSONResponse(content={"message": "Jobs updated successfully."})
|
||||
|
||||
|
||||
@job_router.post("/submit-scrape-job")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def submit_scrape_job(job: Job):
|
||||
LOG.info(f"Recieved job: {job}")
|
||||
|
||||
job.id = uuid.uuid4().hex
|
||||
job_dict = job.model_dump()
|
||||
insert(job_dict)
|
||||
|
||||
return JSONResponse(
|
||||
content={"id": job.id, "message": "Job submitted successfully."}
|
||||
)
|
||||
|
||||
|
||||
@job_router.post("/retrieve-scrape-jobs")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def retrieve_scrape_jobs(
|
||||
fetch_options: FetchOptions, user: User = Depends(get_current_user)
|
||||
):
|
||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||
ATTRIBUTES = "chat" if fetch_options.chat else "*"
|
||||
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
|
||||
results = query(job_query, (user.email,))
|
||||
return JSONResponse(content=jsonable_encoder(results[::-1]))
|
||||
|
||||
|
||||
@job_router.get("/job/{id}")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def job(id: str, user: User = Depends(get_current_user)):
|
||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||
job_query = "SELECT * FROM jobs WHERE user = ? AND id = ?"
|
||||
results = query(job_query, (user.email, id))
|
||||
return JSONResponse(content=jsonable_encoder(results))
|
||||
|
||||
|
||||
@job_router.post("/download")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def download(download_job: DownloadJob):
|
||||
LOG.info(f"Downloading job with ids: {download_job.ids}")
|
||||
job_query = (
|
||||
f"SELECT * FROM jobs WHERE id IN {format_list_for_query(download_job.ids)}"
|
||||
)
|
||||
results = query(job_query, tuple(download_job.ids))
|
||||
|
||||
if download_job.job_format == "csv":
|
||||
csv_buffer = StringIO()
|
||||
csv_writer = csv.writer(csv_buffer, quotechar='"', quoting=csv.QUOTE_ALL)
|
||||
|
||||
headers = [
|
||||
"id",
|
||||
"url",
|
||||
"element_name",
|
||||
"xpath",
|
||||
"text",
|
||||
"user",
|
||||
"time_created",
|
||||
]
|
||||
csv_writer.writerow(headers)
|
||||
|
||||
for result in results:
|
||||
for res in result["result"]:
|
||||
for url, elements in res.items():
|
||||
for element_name, values in elements.items():
|
||||
for value in values:
|
||||
text = clean_text(value.get("text", "")).strip()
|
||||
if text:
|
||||
csv_writer.writerow(
|
||||
[
|
||||
result.get("id", "")
|
||||
+ "-"
|
||||
+ str(random.randint(0, 1000000)),
|
||||
url,
|
||||
element_name,
|
||||
value.get("xpath", ""),
|
||||
text,
|
||||
result.get("user", ""),
|
||||
result.get("time_created", ""),
|
||||
]
|
||||
)
|
||||
|
||||
_ = csv_buffer.seek(0)
|
||||
response = StreamingResponse(
|
||||
csv_buffer,
|
||||
media_type="text/csv",
|
||||
)
|
||||
response.headers["Content-Disposition"] = "attachment; filename=export.csv"
|
||||
return response
|
||||
|
||||
elif download_job.job_format == "md":
|
||||
response = StreamingResponse(
|
||||
stream_md_from_job_results(results),
|
||||
media_type="text/markdown",
|
||||
)
|
||||
|
||||
response.headers["Content-Disposition"] = "attachment; filename=export.md"
|
||||
return response
|
||||
|
||||
|
||||
@job_router.get("/job/{id}/convert-to-csv")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def convert_to_csv(id: str):
|
||||
job_query = f"SELECT * FROM jobs WHERE id = ?"
|
||||
results = query(job_query, (id,))
|
||||
return JSONResponse(content=clean_job_format(results))
|
||||
|
||||
|
||||
@job_router.post("/delete-scrape-jobs")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def delete(delete_scrape_jobs: DeleteScrapeJobs):
|
||||
result = await delete_jobs(delete_scrape_jobs.ids)
|
||||
return (
|
||||
JSONResponse(content={"message": "Jobs successfully deleted."})
|
||||
if result
|
||||
else JSONResponse(content={"error": "Jobs not deleted."})
|
||||
)
|
||||
|
||||
|
||||
@job_router.post("/schedule-cron-job")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def schedule_cron_job(cron_job: CronJob):
|
||||
if not cron_job.id:
|
||||
cron_job.id = uuid.uuid4().hex
|
||||
|
||||
if not cron_job.time_created:
|
||||
cron_job.time_created = datetime.datetime.now()
|
||||
|
||||
if not cron_job.time_updated:
|
||||
cron_job.time_updated = datetime.datetime.now()
|
||||
|
||||
insert_cron_job(cron_job)
|
||||
|
||||
queried_job = query("SELECT * FROM jobs WHERE id = ?", (cron_job.job_id,))
|
||||
|
||||
scheduler.add_job(
|
||||
insert_job_from_cron_job,
|
||||
get_cron_job_trigger(cron_job.cron_expression),
|
||||
id=cron_job.id,
|
||||
args=[queried_job[0]],
|
||||
)
|
||||
|
||||
return JSONResponse(content={"message": "Cron job scheduled successfully."})
|
||||
|
||||
|
||||
@job_router.post("/delete-cron-job")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def delete_cron_job_request(request: DeleteCronJob):
|
||||
if not request.id:
|
||||
return JSONResponse(
|
||||
content={"error": "Cron job id is required."}, status_code=400
|
||||
)
|
||||
|
||||
delete_cron_job(request.id, request.user_email)
|
||||
scheduler.remove_job(request.id)
|
||||
|
||||
return JSONResponse(content={"message": "Cron job deleted successfully."})
|
||||
|
||||
|
||||
@job_router.get("/cron-jobs")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def get_cron_jobs_request(user: User = Depends(get_current_user)):
|
||||
cron_jobs = get_cron_jobs(user.email)
|
||||
return JSONResponse(content=jsonable_encoder(cron_jobs))
|
||||
|
||||
|
||||
@job_router.get("/recordings/{id}")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def get_recording(id: str):
|
||||
path = RECORDINGS_DIR / f"{id}.mp4"
|
||||
if not path.exists():
|
||||
return JSONResponse(content={"error": "Recording not found."}, status_code=404)
|
||||
|
||||
return FileResponse(
|
||||
path, headers={"Content-Type": "video/mp4", "Accept-Ranges": "bytes"}
|
||||
)
|
||||
|
||||
|
||||
@job_router.get("/get-media")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def get_media(id: str):
|
||||
files: dict[str, list[str]] = {}
|
||||
|
||||
for media_type in MEDIA_TYPES:
|
||||
path = MEDIA_DIR / media_type / f"{id}"
|
||||
files[media_type] = [file.name for file in path.glob("*")]
|
||||
|
||||
return JSONResponse(content={"files": files})
|
||||
|
||||
|
||||
@job_router.get("/media")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def get_media_file(id: str, type: str, file: str):
|
||||
path = MEDIA_DIR / type / f"{id}" / file
|
||||
|
||||
if not path.exists():
|
||||
return JSONResponse(content={"error": "Media file not found."}, status_code=404)
|
||||
|
||||
return FileResponse(path)
|
||||
@@ -1,3 +1,5 @@
|
||||
from .job_options import JobOptions
|
||||
# LOCAL
|
||||
from .job import Element, CapturedElement
|
||||
from .job_options import Proxy, JobOptions
|
||||
|
||||
__all__ = ["JobOptions"]
|
||||
__all__ = ["JobOptions", "CapturedElement", "Element", "Proxy"]
|
||||
|
||||
15
api/backend/job/models/job.py
Normal file
15
api/backend/job/models/job.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from typing import Optional
|
||||
|
||||
import pydantic
|
||||
|
||||
|
||||
class Element(pydantic.BaseModel):
|
||||
name: str
|
||||
xpath: str
|
||||
url: Optional[str] = None
|
||||
|
||||
|
||||
class CapturedElement(pydantic.BaseModel):
|
||||
xpath: str
|
||||
text: str
|
||||
name: str
|
||||
@@ -1,8 +1,19 @@
|
||||
from pydantic import BaseModel
|
||||
# STL
|
||||
from typing import Any, Optional
|
||||
|
||||
# PDM
|
||||
from pydantic import BaseModel
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job.models.site_map import SiteMap
|
||||
|
||||
|
||||
class Proxy(BaseModel):
|
||||
server: str
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
|
||||
|
||||
class FetchOptions(BaseModel):
|
||||
chat: Optional[bool] = None
|
||||
|
||||
@@ -10,7 +21,7 @@ class FetchOptions(BaseModel):
|
||||
class JobOptions(BaseModel):
|
||||
multi_page_scrape: bool = False
|
||||
custom_headers: dict[str, Any] = {}
|
||||
proxies: list[str] = []
|
||||
proxies: list[Proxy] = []
|
||||
site_map: Optional[SiteMap] = None
|
||||
collect_media: bool = False
|
||||
custom_cookies: list[dict[str, Any]] = []
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
# STL
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# PDM
|
||||
from playwright.async_api import Page, BrowserContext
|
||||
|
||||
import logging
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
|
||||
async def add_custom_cookies(
|
||||
@@ -18,8 +19,8 @@ async def add_custom_cookies(
|
||||
|
||||
for cookie in custom_cookies:
|
||||
cookie_dict = {
|
||||
"name": cookie.get("name", "default_name"),
|
||||
"value": cookie.get("value", "default_value"),
|
||||
"name": cookie.get("name", ""),
|
||||
"value": cookie.get("value", ""),
|
||||
"domain": domain,
|
||||
"path": "/",
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
# STL
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import logging
|
||||
from typing import Dict, List
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
# PDM
|
||||
import aiohttp
|
||||
from playwright.async_api import Page
|
||||
|
||||
from api.backend.utils import LOG
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
|
||||
async def collect_media(id: str, page: Page) -> dict[str, list[dict[str, str]]]:
|
||||
|
||||
@@ -1,59 +1,45 @@
|
||||
import logging
|
||||
# STL
|
||||
import random
|
||||
from typing import Any, Optional, cast
|
||||
import logging
|
||||
from typing import Any, cast
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
# PDM
|
||||
from bs4 import Tag, BeautifulSoup
|
||||
from lxml import etree
|
||||
from camoufox import AsyncCamoufox
|
||||
from playwright.async_api import Page
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
from api.backend.models import Element, CapturedElement
|
||||
# LOCAL
|
||||
from api.backend.constants import RECORDINGS_ENABLED
|
||||
from api.backend.job.models import Element, CapturedElement
|
||||
from api.backend.job.utils.text_utils import clean_text
|
||||
from api.backend.job.scraping.add_custom import add_custom_items
|
||||
from api.backend.job.scraping.scraping_utils import (
|
||||
clean_format_characters,
|
||||
sxpath,
|
||||
is_same_domain,
|
||||
scrape_content,
|
||||
)
|
||||
from api.backend.job.site_mapping.site_mapping import handle_site_mapping
|
||||
|
||||
from api.backend.job.scraping.add_custom import add_custom_items
|
||||
|
||||
from api.backend.constants import RECORDINGS_ENABLED
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_same_domain(url: str, original_url: str) -> bool:
|
||||
parsed_url = urlparse(url)
|
||||
parsed_original_url = urlparse(original_url)
|
||||
return parsed_url.netloc == parsed_original_url.netloc or parsed_url.netloc == ""
|
||||
|
||||
|
||||
def clean_xpath(xpath: str) -> str:
|
||||
parts = xpath.split("/")
|
||||
clean_parts = ["/" if part == "" else part for part in parts]
|
||||
clean_xpath = "//".join(clean_parts).replace("////", "//").replace("'", "\\'")
|
||||
LOG.info(f"Cleaned xpath: {clean_xpath}")
|
||||
|
||||
return clean_xpath
|
||||
|
||||
|
||||
def sxpath(context: etree._Element, xpath: str):
|
||||
return context.xpath(xpath)
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
|
||||
async def make_site_request(
|
||||
id: str,
|
||||
url: str,
|
||||
headers: Optional[dict[str, Any]],
|
||||
multi_page_scrape: bool = False,
|
||||
job_options: dict[str, Any],
|
||||
visited_urls: set[str] = set(),
|
||||
pages: set[tuple[str, str]] = set(),
|
||||
original_url: str = "",
|
||||
proxies: Optional[list[str]] = None,
|
||||
site_map: Optional[dict[str, Any]] = None,
|
||||
collect_media: bool = False,
|
||||
custom_cookies: Optional[list[dict[str, Any]]] = None,
|
||||
):
|
||||
headers = job_options["custom_headers"]
|
||||
multi_page_scrape = job_options["multi_page_scrape"]
|
||||
proxies = job_options["proxies"]
|
||||
site_map = job_options["site_map"]
|
||||
collect_media = job_options["collect_media"]
|
||||
custom_cookies = job_options["custom_cookies"]
|
||||
|
||||
if url in visited_urls:
|
||||
return
|
||||
|
||||
@@ -117,15 +103,10 @@ async def make_site_request(
|
||||
await make_site_request(
|
||||
id,
|
||||
link,
|
||||
headers=headers,
|
||||
multi_page_scrape=multi_page_scrape,
|
||||
job_options=job_options,
|
||||
visited_urls=visited_urls,
|
||||
pages=pages,
|
||||
original_url=original_url,
|
||||
proxies=proxies,
|
||||
site_map=site_map,
|
||||
collect_media=collect_media,
|
||||
custom_cookies=custom_cookies,
|
||||
)
|
||||
|
||||
|
||||
@@ -145,7 +126,7 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
|
||||
else str(e) # type: ignore
|
||||
)
|
||||
|
||||
text = clean_format_characters(text)
|
||||
text = clean_text(text)
|
||||
|
||||
captured_element = CapturedElement(
|
||||
xpath=elem.xpath, text=text, name=elem.name
|
||||
@@ -163,12 +144,7 @@ async def scrape(
|
||||
id: str,
|
||||
url: str,
|
||||
xpaths: list[Element],
|
||||
headers: Optional[dict[str, Any]] = None,
|
||||
multi_page_scrape: bool = False,
|
||||
proxies: Optional[list[str]] = None,
|
||||
site_map: Optional[dict[str, Any]] = None,
|
||||
collect_media: bool = False,
|
||||
custom_cookies: Optional[list[dict[str, Any]]] = None,
|
||||
job_options: dict[str, Any],
|
||||
):
|
||||
visited_urls: set[str] = set()
|
||||
pages: set[tuple[str, str]] = set()
|
||||
@@ -176,15 +152,10 @@ async def scrape(
|
||||
await make_site_request(
|
||||
id,
|
||||
url,
|
||||
headers=headers,
|
||||
multi_page_scrape=multi_page_scrape,
|
||||
job_options=job_options,
|
||||
visited_urls=visited_urls,
|
||||
pages=pages,
|
||||
original_url=url,
|
||||
proxies=proxies,
|
||||
site_map=site_map,
|
||||
collect_media=collect_media,
|
||||
custom_cookies=custom_cookies,
|
||||
)
|
||||
|
||||
elements: list[dict[str, dict[str, list[CapturedElement]]]] = []
|
||||
@@ -1,11 +1,18 @@
|
||||
# STL
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Set, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# PDM
|
||||
from lxml import etree
|
||||
from playwright.async_api import Page
|
||||
|
||||
from api.backend.utils import LOG
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job.scraping.collect_media import collect_media as collect_media_utils
|
||||
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
|
||||
async def scrape_content(
|
||||
id: str, page: Page, pages: Set[Tuple[str, str]], collect_media: bool
|
||||
@@ -32,14 +39,20 @@ async def scrape_content(
|
||||
return html
|
||||
|
||||
|
||||
def clean_format_characters(text: str) -> str:
|
||||
text = text.strip()
|
||||
text = text.replace("\n", " ")
|
||||
text = text.replace("\t", " ")
|
||||
text = text.replace("\r", " ")
|
||||
text = text.replace("\f", " ")
|
||||
text = text.replace("\v", " ")
|
||||
text = text.replace("\b", " ")
|
||||
text = text.replace("\a", " ")
|
||||
def is_same_domain(url: str, original_url: str) -> bool:
|
||||
parsed_url = urlparse(url)
|
||||
parsed_original_url = urlparse(original_url)
|
||||
return parsed_url.netloc == parsed_original_url.netloc or parsed_url.netloc == ""
|
||||
|
||||
return text
|
||||
|
||||
def clean_xpath(xpath: str) -> str:
|
||||
parts = xpath.split("/")
|
||||
clean_parts = ["/" if part == "" else part for part in parts]
|
||||
clean_xpath = "//".join(clean_parts).replace("////", "//").replace("'", "\\'")
|
||||
LOG.info(f"Cleaned xpath: {clean_xpath}")
|
||||
|
||||
return clean_xpath
|
||||
|
||||
|
||||
def sxpath(context: etree._Element, xpath: str):
|
||||
return context.xpath(xpath)
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import logging
|
||||
# STL
|
||||
import asyncio
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
# PDM
|
||||
from playwright.async_api import Page
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job.models.site_map import Action, SiteMap
|
||||
from api.backend.job.scraping.scraping_utils import scrape_content
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("Job")
|
||||
|
||||
|
||||
def clear_done_actions(site_map: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# STL
|
||||
from typing import Any
|
||||
|
||||
from api.backend.utils import clean_text
|
||||
# LOCAL
|
||||
from api.backend.job.utils.text_utils import clean_text
|
||||
|
||||
|
||||
def clean_job_format(jobs: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# STL
|
||||
from typing import Any
|
||||
|
||||
from api.backend.utils import clean_text
|
||||
# LOCAL
|
||||
from api.backend.job.utils.text_utils import clean_text
|
||||
|
||||
|
||||
def stream_md_from_job_results(jobs: list[dict[str, Any]]):
|
||||
|
||||
10
api/backend/job/utils/text_utils.py
Normal file
10
api/backend/job/utils/text_utils.py
Normal file
@@ -0,0 +1,10 @@
|
||||
def clean_text(text: str):
|
||||
text = text.strip()
|
||||
text = text.replace("\n", " ")
|
||||
text = text.replace("\t", " ")
|
||||
text = text.replace("\r", " ")
|
||||
text = text.replace("\f", " ")
|
||||
text = text.replace("\v", " ")
|
||||
text = text.replace("\b", " ")
|
||||
text = text.replace("\a", " ")
|
||||
return text
|
||||
31
api/backend/routers/handle_exceptions.py
Normal file
31
api/backend/routers/handle_exceptions.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# STL
|
||||
import logging
|
||||
import traceback
|
||||
from typing import Any, Union, Callable, Awaitable
|
||||
from functools import wraps
|
||||
|
||||
# PDM
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
|
||||
def handle_exceptions(
|
||||
logger: logging.Logger,
|
||||
) -> Callable[
|
||||
[Callable[..., Awaitable[Any]]], Callable[..., Awaitable[Union[Any, JSONResponse]]]
|
||||
]:
|
||||
def decorator(
|
||||
func: Callable[..., Awaitable[Any]],
|
||||
) -> Callable[..., Awaitable[Union[Any, JSONResponse]]]:
|
||||
@wraps(func)
|
||||
async def wrapper(*args: Any, **kwargs: Any) -> Union[Any, JSONResponse]:
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Exception occurred: {e}")
|
||||
traceback.print_exc()
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
@@ -1,273 +0,0 @@
|
||||
# STL
|
||||
import datetime
|
||||
import uuid
|
||||
import traceback
|
||||
from io import StringIO
|
||||
import csv
|
||||
import logging
|
||||
import random
|
||||
|
||||
# PDM
|
||||
from fastapi import Depends, APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from api.backend.scheduler import scheduler
|
||||
from apscheduler.triggers.cron import CronTrigger # type: ignore
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job import insert, update_job, delete_jobs
|
||||
from api.backend.models import (
|
||||
DeleteCronJob,
|
||||
UpdateJobs,
|
||||
DownloadJob,
|
||||
DeleteScrapeJobs,
|
||||
Job,
|
||||
CronJob,
|
||||
)
|
||||
from api.backend.schemas import User
|
||||
from api.backend.auth.auth_utils import get_current_user
|
||||
from api.backend.utils import clean_text, format_list_for_query
|
||||
from api.backend.job.models.job_options import FetchOptions
|
||||
|
||||
from api.backend.database.common import query
|
||||
|
||||
from api.backend.job.cron_scheduling.cron_scheduling import (
|
||||
delete_cron_job,
|
||||
get_cron_job_trigger,
|
||||
insert_cron_job,
|
||||
get_cron_jobs,
|
||||
insert_job_from_cron_job,
|
||||
)
|
||||
|
||||
from api.backend.job.utils.clean_job_format import clean_job_format
|
||||
from api.backend.job.utils.stream_md_from_job_results import stream_md_from_job_results
|
||||
|
||||
from api.backend.constants import MEDIA_DIR, MEDIA_TYPES, RECORDINGS_DIR
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
job_router = APIRouter()
|
||||
|
||||
|
||||
@job_router.post("/update")
|
||||
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
|
||||
"""Used to update jobs"""
|
||||
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)
|
||||
|
||||
|
||||
@job_router.post("/submit-scrape-job")
|
||||
async def submit_scrape_job(job: Job):
|
||||
LOG.info(f"Recieved job: {job}")
|
||||
try:
|
||||
job.id = uuid.uuid4().hex
|
||||
|
||||
job_dict = job.model_dump()
|
||||
insert(job_dict)
|
||||
|
||||
return JSONResponse(content={"id": job.id})
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {traceback.format_exc()}")
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
@job_router.post("/retrieve-scrape-jobs")
|
||||
async def retrieve_scrape_jobs(
|
||||
fetch_options: FetchOptions, user: User = Depends(get_current_user)
|
||||
):
|
||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||
ATTRIBUTES = "chat" if fetch_options.chat else "*"
|
||||
|
||||
try:
|
||||
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
|
||||
results = query(job_query, (user.email,))
|
||||
return JSONResponse(content=jsonable_encoder(results[::-1]))
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
return JSONResponse(content=[], status_code=500)
|
||||
|
||||
|
||||
@job_router.get("/job/{id}")
|
||||
async def job(id: str, user: User = Depends(get_current_user)):
|
||||
LOG.info(f"Retrieving jobs for account: {user.email}")
|
||||
|
||||
try:
|
||||
job_query = "SELECT * FROM jobs WHERE user = ? AND id = ?"
|
||||
results = query(job_query, (user.email, id))
|
||||
return JSONResponse(content=jsonable_encoder(results))
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
@job_router.post("/download")
|
||||
async def download(download_job: DownloadJob):
|
||||
LOG.info(f"Downloading job with ids: {download_job.ids}")
|
||||
|
||||
try:
|
||||
job_query = (
|
||||
f"SELECT * FROM jobs WHERE id IN {format_list_for_query(download_job.ids)}"
|
||||
)
|
||||
results = query(job_query, tuple(download_job.ids))
|
||||
|
||||
if download_job.job_format == "csv":
|
||||
csv_buffer = StringIO()
|
||||
csv_writer = csv.writer(csv_buffer, quotechar='"', quoting=csv.QUOTE_ALL)
|
||||
|
||||
headers = [
|
||||
"id",
|
||||
"url",
|
||||
"element_name",
|
||||
"xpath",
|
||||
"text",
|
||||
"user",
|
||||
"time_created",
|
||||
]
|
||||
csv_writer.writerow(headers)
|
||||
|
||||
for result in results:
|
||||
for res in result["result"]:
|
||||
for url, elements in res.items():
|
||||
for element_name, values in elements.items():
|
||||
for value in values:
|
||||
text = clean_text(value.get("text", "")).strip()
|
||||
if text:
|
||||
csv_writer.writerow(
|
||||
[
|
||||
result.get("id", "")
|
||||
+ "-"
|
||||
+ str(random.randint(0, 1000000)),
|
||||
url,
|
||||
element_name,
|
||||
value.get("xpath", ""),
|
||||
text,
|
||||
result.get("user", ""),
|
||||
result.get("time_created", ""),
|
||||
]
|
||||
)
|
||||
|
||||
_ = csv_buffer.seek(0)
|
||||
response = StreamingResponse(
|
||||
csv_buffer,
|
||||
media_type="text/csv",
|
||||
)
|
||||
response.headers["Content-Disposition"] = "attachment; filename=export.csv"
|
||||
return response
|
||||
|
||||
elif download_job.job_format == "md":
|
||||
response = StreamingResponse(
|
||||
stream_md_from_job_results(results),
|
||||
media_type="text/markdown",
|
||||
)
|
||||
|
||||
response.headers["Content-Disposition"] = "attachment; filename=export.md"
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
traceback.print_exc()
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
@job_router.get("/job/{id}/convert-to-csv")
|
||||
async def convert_to_csv(id: str):
|
||||
try:
|
||||
job_query = f"SELECT * FROM jobs WHERE id = ?"
|
||||
results = query(job_query, (id,))
|
||||
|
||||
return JSONResponse(content=clean_job_format(results))
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
traceback.print_exc()
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
@job_router.post("/delete-scrape-jobs")
|
||||
async def delete(delete_scrape_jobs: DeleteScrapeJobs):
|
||||
result = await delete_jobs(delete_scrape_jobs.ids)
|
||||
return (
|
||||
JSONResponse(content={"message": "Jobs successfully deleted."})
|
||||
if result
|
||||
else JSONResponse({"error": "Jobs not deleted."})
|
||||
)
|
||||
|
||||
|
||||
@job_router.post("/schedule-cron-job")
|
||||
async def schedule_cron_job(cron_job: CronJob):
|
||||
if not cron_job.id:
|
||||
cron_job.id = uuid.uuid4().hex
|
||||
|
||||
if not cron_job.time_created:
|
||||
cron_job.time_created = datetime.datetime.now()
|
||||
|
||||
if not cron_job.time_updated:
|
||||
cron_job.time_updated = datetime.datetime.now()
|
||||
|
||||
insert_cron_job(cron_job)
|
||||
|
||||
queried_job = query("SELECT * FROM jobs WHERE id = ?", (cron_job.job_id,))
|
||||
|
||||
scheduler.add_job(
|
||||
insert_job_from_cron_job,
|
||||
get_cron_job_trigger(cron_job.cron_expression),
|
||||
id=cron_job.id,
|
||||
args=[queried_job[0]],
|
||||
)
|
||||
|
||||
return JSONResponse(content={"message": "Cron job scheduled successfully."})
|
||||
|
||||
|
||||
@job_router.post("/delete-cron-job")
|
||||
async def delete_cron_job_request(request: DeleteCronJob):
|
||||
if not request.id:
|
||||
return JSONResponse(
|
||||
content={"error": "Cron job id is required."}, status_code=400
|
||||
)
|
||||
|
||||
delete_cron_job(request.id, request.user_email)
|
||||
scheduler.remove_job(request.id)
|
||||
|
||||
return JSONResponse(content={"message": "Cron job deleted successfully."})
|
||||
|
||||
|
||||
@job_router.get("/cron-jobs")
|
||||
async def get_cron_jobs_request(user: User = Depends(get_current_user)):
|
||||
cron_jobs = get_cron_jobs(user.email)
|
||||
return JSONResponse(content=jsonable_encoder(cron_jobs))
|
||||
|
||||
|
||||
@job_router.get("/recordings/{id}")
|
||||
async def get_recording(id: str):
|
||||
path = RECORDINGS_DIR / f"{id}.mp4"
|
||||
if not path.exists():
|
||||
return JSONResponse(content={"error": "Recording not found."}, status_code=404)
|
||||
|
||||
return FileResponse(
|
||||
path, headers={"Content-Type": "video/mp4", "Accept-Ranges": "bytes"}
|
||||
)
|
||||
|
||||
|
||||
@job_router.get("/get-media")
|
||||
async def get_media(id: str):
|
||||
try:
|
||||
files: dict[str, list[str]] = {}
|
||||
|
||||
for media_type in MEDIA_TYPES:
|
||||
path = MEDIA_DIR / media_type / f"{id}"
|
||||
|
||||
files[media_type] = [file.name for file in path.glob("*")]
|
||||
|
||||
return JSONResponse(content={"files": files})
|
||||
except Exception as e:
|
||||
LOG.error(f"Exception occurred: {e}")
|
||||
traceback.print_exc()
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
@job_router.get("/media")
|
||||
async def get_media_file(id: str, type: str, file: str):
|
||||
path = MEDIA_DIR / type / f"{id}" / file
|
||||
|
||||
if not path.exists():
|
||||
return JSONResponse(content={"error": "Media file not found."}, status_code=404)
|
||||
|
||||
return FileResponse(path)
|
||||
@@ -1,3 +1,4 @@
|
||||
from apscheduler.schedulers.background import BackgroundScheduler # type: ignore
|
||||
# PDM
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
|
||||
scheduler = BackgroundScheduler()
|
||||
|
||||
17
api/backend/schemas/cron.py
Normal file
17
api/backend/schemas/cron.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from typing import Optional, Union
|
||||
from datetime import datetime
|
||||
import pydantic
|
||||
|
||||
|
||||
class CronJob(pydantic.BaseModel):
|
||||
id: Optional[str] = None
|
||||
user_email: str
|
||||
job_id: str
|
||||
cron_expression: str
|
||||
time_created: Optional[Union[datetime, str]] = None
|
||||
time_updated: Optional[Union[datetime, str]] = None
|
||||
|
||||
|
||||
class DeleteCronJob(pydantic.BaseModel):
|
||||
id: str
|
||||
user_email: str
|
||||
@@ -1,24 +1,24 @@
|
||||
# STL
|
||||
from typing import Any, Literal, Optional, Union
|
||||
from datetime import datetime
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job.models.job_options import JobOptions
|
||||
|
||||
# PDM
|
||||
import pydantic
|
||||
|
||||
|
||||
class Element(pydantic.BaseModel):
|
||||
name: str
|
||||
xpath: str
|
||||
url: Optional[str] = None
|
||||
from api.backend.job.models import Element, CapturedElement
|
||||
|
||||
|
||||
class CapturedElement(pydantic.BaseModel):
|
||||
xpath: str
|
||||
text: str
|
||||
name: str
|
||||
class Job(pydantic.BaseModel):
|
||||
id: Optional[str] = None
|
||||
url: str
|
||||
elements: list[Element]
|
||||
user: str = ""
|
||||
time_created: Optional[Union[datetime, str]] = None
|
||||
result: list[dict[str, dict[str, list[CapturedElement]]]] = []
|
||||
job_options: JobOptions
|
||||
status: str = "Queued"
|
||||
chat: Optional[str] = None
|
||||
agent_mode: bool = False
|
||||
prompt: Optional[str] = None
|
||||
favorite: bool = False
|
||||
|
||||
|
||||
class RetrieveScrapeJobs(pydantic.BaseModel):
|
||||
@@ -34,43 +34,7 @@ class DeleteScrapeJobs(pydantic.BaseModel):
|
||||
ids: list[str]
|
||||
|
||||
|
||||
class GetStatistics(pydantic.BaseModel):
|
||||
user: str
|
||||
|
||||
|
||||
class UpdateJobs(pydantic.BaseModel):
|
||||
ids: list[str]
|
||||
field: str
|
||||
value: Any
|
||||
|
||||
|
||||
class AI(pydantic.BaseModel):
|
||||
messages: list[Any]
|
||||
|
||||
|
||||
class Job(pydantic.BaseModel):
|
||||
id: Optional[str] = None
|
||||
url: str
|
||||
elements: list[Element]
|
||||
user: str = ""
|
||||
time_created: Optional[Union[datetime, str]] = None
|
||||
result: list[dict[str, dict[str, list[CapturedElement]]]] = []
|
||||
job_options: JobOptions
|
||||
status: str = "Queued"
|
||||
chat: Optional[str] = None
|
||||
agent_mode: bool = False
|
||||
prompt: Optional[str] = None
|
||||
|
||||
|
||||
class CronJob(pydantic.BaseModel):
|
||||
id: Optional[str] = None
|
||||
user_email: str
|
||||
job_id: str
|
||||
cron_expression: str
|
||||
time_created: Optional[Union[datetime, str]] = None
|
||||
time_updated: Optional[Union[datetime, str]] = None
|
||||
|
||||
|
||||
class DeleteCronJob(pydantic.BaseModel):
|
||||
id: str
|
||||
user_email: str
|
||||
@@ -2,28 +2,30 @@
|
||||
import logging
|
||||
|
||||
# PDM
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi import Depends, APIRouter
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job import (
|
||||
from api.backend.auth.schemas import User
|
||||
from api.backend.auth.auth_utils import get_current_user
|
||||
from api.backend.routers.handle_exceptions import handle_exceptions
|
||||
from api.backend.database.queries.statistics.statistic_queries import (
|
||||
get_jobs_per_day,
|
||||
average_elements_per_link,
|
||||
)
|
||||
from api.backend.auth.auth_utils import get_current_user
|
||||
from api.backend.schemas import User
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("Statistics")
|
||||
|
||||
stats_router = APIRouter()
|
||||
|
||||
|
||||
@stats_router.get("/statistics/get-average-element-per-link")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def get_average_element_per_link(user: User = Depends(get_current_user)):
|
||||
return await average_elements_per_link(user.email)
|
||||
|
||||
|
||||
@stats_router.get("/statistics/get-average-jobs-per-day")
|
||||
@handle_exceptions(logger=LOG)
|
||||
async def average_jobs_per_day(user: User = Depends(get_current_user)):
|
||||
data = await get_jobs_per_day(user.email)
|
||||
return data
|
||||
63
api/backend/tests/conftest.py
Normal file
63
api/backend/tests/conftest.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# STL
|
||||
import os
|
||||
import sqlite3
|
||||
from typing import Generator
|
||||
from unittest.mock import patch
|
||||
|
||||
# PDM
|
||||
import pytest
|
||||
from proxy import Proxy
|
||||
|
||||
# LOCAL
|
||||
from api.backend.database.schema import INIT_QUERY
|
||||
from api.backend.tests.constants import TEST_DB_PATH
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def running_proxy():
|
||||
proxy = Proxy(["--hostname", "127.0.0.1", "--port", "8080"])
|
||||
proxy.setup()
|
||||
yield proxy
|
||||
proxy.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def patch_database_path():
|
||||
with patch("api.backend.database.common.DATABASE_PATH", TEST_DB_PATH):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def patch_recordings_enabled():
|
||||
with patch("api.backend.job.scraping.scraping.RECORDINGS_ENABLED", False):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_db_path() -> str:
|
||||
return TEST_DB_PATH
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def test_db(test_db_path: str) -> Generator[str, None, None]:
|
||||
"""Create a fresh test database for each test function."""
|
||||
os.makedirs(os.path.dirname(test_db_path), exist_ok=True)
|
||||
|
||||
if os.path.exists(test_db_path):
|
||||
os.remove(test_db_path)
|
||||
|
||||
conn = sqlite3.connect(test_db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
for query in INIT_QUERY.strip().split(";"):
|
||||
query = query.strip()
|
||||
if query:
|
||||
cursor.execute(query)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
yield test_db_path
|
||||
|
||||
if os.path.exists(test_db_path):
|
||||
os.remove(test_db_path)
|
||||
1
api/backend/tests/constants.py
Normal file
1
api/backend/tests/constants.py
Normal file
@@ -0,0 +1 @@
|
||||
TEST_DB_PATH = "tests/test_db.sqlite"
|
||||
@@ -1,7 +1,13 @@
|
||||
from api.backend.models import Element, Job, JobOptions, CapturedElement
|
||||
# STL
|
||||
import uuid
|
||||
|
||||
# PDM
|
||||
from faker import Faker
|
||||
|
||||
# LOCAL
|
||||
from api.backend.job.models import Element, JobOptions, CapturedElement
|
||||
from api.backend.schemas.job import Job
|
||||
|
||||
fake = Faker()
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
# STL
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
# PDM
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
# LOCAL
|
||||
from api.backend.app import app
|
||||
from api.backend.models import DownloadJob
|
||||
from api.backend.schemas.job import DownloadJob
|
||||
from api.backend.tests.factories.job_factory import create_completed_job
|
||||
|
||||
client = TestClient(app)
|
||||
@@ -13,8 +18,8 @@ mocked_random_int = 123456
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("api.backend.routers.job_router.query")
|
||||
@patch("api.backend.routers.job_router.random.randint")
|
||||
@patch("api.backend.job.job_router.query")
|
||||
@patch("api.backend.job.job_router.random.randint")
|
||||
async def test_download(mock_randint: AsyncMock, mock_query: AsyncMock):
|
||||
# Ensure the mock returns immediately
|
||||
mock_query.return_value = mock_results
|
||||
|
||||
@@ -1,12 +1,26 @@
|
||||
import pytest
|
||||
# STL
|
||||
import logging
|
||||
from typing import Dict
|
||||
from playwright.async_api import async_playwright, Cookie, Route
|
||||
from datetime import datetime
|
||||
|
||||
# PDM
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from playwright.async_api import Route, Cookie, async_playwright
|
||||
|
||||
# LOCAL
|
||||
from api.backend.app import app
|
||||
from api.backend.job.models import Proxy, Element, JobOptions
|
||||
from api.backend.schemas.job import Job
|
||||
from api.backend.database.common import query
|
||||
from api.backend.job.scraping.scraping import scrape
|
||||
from api.backend.job.scraping.add_custom import add_custom_items
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_custom_items():
|
||||
@@ -51,3 +65,46 @@ async def test_add_custom_items():
|
||||
assert captured_headers.get("user-agent") == "test-agent"
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxies():
|
||||
job = Job(
|
||||
url="https://example.com",
|
||||
elements=[Element(xpath="//div", name="test")],
|
||||
job_options=JobOptions(
|
||||
proxies=[
|
||||
Proxy(
|
||||
server="127.0.0.1:8080",
|
||||
username="user",
|
||||
password="pass",
|
||||
)
|
||||
],
|
||||
),
|
||||
time_created=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
response = client.post("/submit-scrape-job", json=job.model_dump())
|
||||
assert response.status_code == 200
|
||||
|
||||
jobs = query("SELECT * FROM jobs")
|
||||
job = jobs[0]
|
||||
|
||||
assert job is not None
|
||||
assert job["job_options"]["proxies"] == [
|
||||
{
|
||||
"server": "127.0.0.1:8080",
|
||||
"username": "user",
|
||||
"password": "pass",
|
||||
}
|
||||
]
|
||||
|
||||
response = await scrape(
|
||||
id=job["id"],
|
||||
url=job["url"],
|
||||
xpaths=[Element(**e) for e in job["elements"]],
|
||||
job_options=job["job_options"],
|
||||
)
|
||||
|
||||
example_response = response[0]["https://example.com/"]
|
||||
assert example_response is not {}
|
||||
|
||||
17
api/backend/tests/utilities/database.py
Normal file
17
api/backend/tests/utilities/database.py
Normal file
@@ -0,0 +1,17 @@
|
||||
# STL
|
||||
import sqlite3
|
||||
|
||||
# LOCAL
|
||||
from api.backend.database.schema import INIT_QUERY
|
||||
from api.backend.tests.constants import TEST_DB_PATH
|
||||
|
||||
|
||||
def connect_to_db():
|
||||
conn = sqlite3.connect(TEST_DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in INIT_QUERY.split(";"):
|
||||
cur.execute(query)
|
||||
|
||||
conn.commit()
|
||||
return conn, cur
|
||||
@@ -1,17 +1,10 @@
|
||||
from typing import Any, Optional
|
||||
# STL
|
||||
import logging
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def clean_text(text: str):
|
||||
text = text.replace("\r\n", "\n") # Normalize newlines
|
||||
text = text.replace("\n", "\\n") # Escape newlines
|
||||
text = text.replace('"', '\\"') # Escape double quotes
|
||||
return text
|
||||
|
||||
|
||||
def get_log_level(level_name: Optional[str]) -> int:
|
||||
level = logging.INFO
|
||||
|
||||
@@ -20,30 +13,3 @@ def get_log_level(level_name: Optional[str]) -> int:
|
||||
level = getattr(logging, level_name, logging.INFO)
|
||||
|
||||
return level
|
||||
|
||||
|
||||
def format_list_for_query(ids: list[str]):
|
||||
return (
|
||||
f"({','.join(['?' for _ in ids])})" # Returns placeholders, e.g., "(?, ?, ?)"
|
||||
)
|
||||
|
||||
|
||||
def format_sql_row_to_python(row: dict[str, Any]):
|
||||
new_row: dict[str, Any] = {}
|
||||
for key, value in row.items():
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
new_row[key] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
new_row[key] = value
|
||||
else:
|
||||
new_row[key] = value
|
||||
|
||||
return new_row
|
||||
|
||||
|
||||
def format_json(items: list[Any]):
|
||||
for idx, item in enumerate(items):
|
||||
if isinstance(item, (dict, list)):
|
||||
formatted_item = json.dumps(item)
|
||||
items[idx] = formatted_item
|
||||
|
||||
17
api/backend/worker/constants.py
Normal file
17
api/backend/worker/constants.py
Normal file
@@ -0,0 +1,17 @@
|
||||
# STL
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
NOTIFICATION_CHANNEL = os.getenv("NOTIFICATION_CHANNEL", "")
|
||||
NOTIFICATION_WEBHOOK_URL = os.getenv("NOTIFICATION_WEBHOOK_URL", "")
|
||||
SCRAPERR_FRONTEND_URL = os.getenv("SCRAPERR_FRONTEND_URL", "")
|
||||
EMAIL = os.getenv("EMAIL", "")
|
||||
TO = os.getenv("TO", "")
|
||||
SMTP_HOST = os.getenv("SMTP_HOST", "")
|
||||
SMTP_PORT = int(os.getenv("SMTP_PORT", 587))
|
||||
SMTP_USER = os.getenv("SMTP_USER", "")
|
||||
SMTP_PASSWORD = os.getenv("SMTP_PASSWORD", "")
|
||||
USE_TLS = os.getenv("USE_TLS", "false").lower() == "true"
|
||||
|
||||
RECORDINGS_ENABLED = os.getenv("RECORDINGS_ENABLED", "true").lower() == "true"
|
||||
RECORDINGS_DIR = Path("/project/app/media/recordings")
|
||||
@@ -1,37 +1,34 @@
|
||||
import os
|
||||
# STL
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from api.backend.job import get_queued_job, update_job
|
||||
from api.backend.scraping import scrape
|
||||
from api.backend.models import Element
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
import subprocess
|
||||
|
||||
import asyncio
|
||||
import traceback
|
||||
import subprocess
|
||||
|
||||
from api.backend.database.startup import init_database
|
||||
# PDM
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
from api.backend.worker.post_job_complete.post_job_complete import post_job_complete
|
||||
# LOCAL
|
||||
from api.backend.job import update_job, get_queued_job
|
||||
from api.backend.job.models import Element
|
||||
from api.backend.worker.logger import LOG
|
||||
|
||||
from api.backend.ai.agent.agent import scrape_with_agent
|
||||
|
||||
|
||||
NOTIFICATION_CHANNEL = os.getenv("NOTIFICATION_CHANNEL", "")
|
||||
NOTIFICATION_WEBHOOK_URL = os.getenv("NOTIFICATION_WEBHOOK_URL", "")
|
||||
SCRAPERR_FRONTEND_URL = os.getenv("SCRAPERR_FRONTEND_URL", "")
|
||||
EMAIL = os.getenv("EMAIL", "")
|
||||
TO = os.getenv("TO", "")
|
||||
SMTP_HOST = os.getenv("SMTP_HOST", "")
|
||||
SMTP_PORT = int(os.getenv("SMTP_PORT", 587))
|
||||
SMTP_USER = os.getenv("SMTP_USER", "")
|
||||
SMTP_PASSWORD = os.getenv("SMTP_PASSWORD", "")
|
||||
USE_TLS = os.getenv("USE_TLS", "false").lower() == "true"
|
||||
|
||||
RECORDINGS_ENABLED = os.getenv("RECORDINGS_ENABLED", "true").lower() == "true"
|
||||
RECORDINGS_DIR = Path("/project/app/media/recordings")
|
||||
from api.backend.database.startup import init_database
|
||||
from api.backend.worker.constants import (
|
||||
TO,
|
||||
EMAIL,
|
||||
USE_TLS,
|
||||
SMTP_HOST,
|
||||
SMTP_PORT,
|
||||
SMTP_USER,
|
||||
SMTP_PASSWORD,
|
||||
RECORDINGS_DIR,
|
||||
RECORDINGS_ENABLED,
|
||||
NOTIFICATION_CHANNEL,
|
||||
SCRAPERR_FRONTEND_URL,
|
||||
NOTIFICATION_WEBHOOK_URL,
|
||||
)
|
||||
from api.backend.job.scraping.scraping import scrape
|
||||
from api.backend.worker.post_job_complete.post_job_complete import post_job_complete
|
||||
|
||||
|
||||
async def process_job():
|
||||
@@ -84,12 +81,7 @@ async def process_job():
|
||||
job["id"],
|
||||
job["url"],
|
||||
[Element(**j) for j in job["elements"]],
|
||||
job["job_options"]["custom_headers"],
|
||||
job["job_options"]["multi_page_scrape"],
|
||||
proxies,
|
||||
job["job_options"]["site_map"],
|
||||
job["job_options"]["collect_media"],
|
||||
job["job_options"]["custom_cookies"],
|
||||
{**job["job_options"], "proxies": proxies},
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
# STL
|
||||
import logging
|
||||
import os
|
||||
|
||||
from api.backend.utils import get_log_level
|
||||
# LOCAL
|
||||
from api.backend.app import LOG_LEVEL
|
||||
|
||||
logging.basicConfig(
|
||||
level=get_log_level(os.getenv("LOG_LEVEL")),
|
||||
format="%(levelname)s: %(asctime)s - %(name)s - %(message)s",
|
||||
level=LOG_LEVEL,
|
||||
format="%(levelname)s: %(asctime)s - [%(name)s] - %(message)s",
|
||||
handlers=[logging.StreamHandler()],
|
||||
)
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG = logging.getLogger("Job Worker")
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# STL
|
||||
from typing import Any
|
||||
|
||||
# LOCAL
|
||||
from api.backend.worker.post_job_complete.models import PostJobCompleteOptions
|
||||
from api.backend.worker.post_job_complete.email_notifcation import (
|
||||
send_job_complete_email,
|
||||
@@ -16,9 +18,10 @@ async def post_job_complete(job: dict[str, Any], options: PostJobCompleteOptions
|
||||
if not options.values():
|
||||
return
|
||||
|
||||
if options["channel"] == "discord":
|
||||
discord_notification(job, options)
|
||||
elif options["channel"] == "email":
|
||||
send_job_complete_email(job, options)
|
||||
else:
|
||||
raise ValueError(f"Invalid channel: {options['channel']}")
|
||||
match options["channel"]:
|
||||
case "discord":
|
||||
discord_notification(job, options)
|
||||
case "email":
|
||||
send_job_complete_email(job, options)
|
||||
case _:
|
||||
raise ValueError(f"Invalid channel: {options['channel']}")
|
||||
|
||||
23
cypress/e2e/00-setup.cy.ts
Normal file
23
cypress/e2e/00-setup.cy.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
describe("Global setup", () => {
|
||||
it("signs up user once", () => {
|
||||
cy.request({
|
||||
method: "POST",
|
||||
url: "/api/signup",
|
||||
body: JSON.stringify({
|
||||
data: {
|
||||
email: "test@test.com",
|
||||
password: "password",
|
||||
full_name: "John Doe",
|
||||
},
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
failOnStatusCode: false,
|
||||
}).then((response) => {
|
||||
if (response.status !== 200 && response.status !== 201) {
|
||||
console.warn("Signup failed:", response.status, response.body);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
101
cypress/e2e/advanced-job-options.cy.ts
Normal file
101
cypress/e2e/advanced-job-options.cy.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import { login } from "../utilities/authentication.utils";
|
||||
import {
|
||||
addCustomHeaders,
|
||||
addElement,
|
||||
addMedia,
|
||||
addSiteMapAction,
|
||||
checkForMedia,
|
||||
cleanUpJobs,
|
||||
enterJobUrl,
|
||||
openAdvancedJobOptions,
|
||||
submitBasicJob,
|
||||
submitJob,
|
||||
waitForJobCompletion,
|
||||
} from "../utilities/job.utilities";
|
||||
import { mockSubmitJob } from "../utilities/mocks";
|
||||
|
||||
describe.only("Advanced Job Options", () => {
|
||||
beforeEach(() => {
|
||||
mockSubmitJob();
|
||||
login();
|
||||
cy.visit("/");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanUpJobs();
|
||||
});
|
||||
|
||||
it.only("should handle custom headers", () => {
|
||||
const customHeaders = {
|
||||
"User-Agent": "Test Agent",
|
||||
"Accept-Language": "en-US",
|
||||
};
|
||||
|
||||
addCustomHeaders(customHeaders);
|
||||
submitBasicJob("https://httpbin.org/headers", "headers", "//pre");
|
||||
|
||||
cy.wait("@submitScrapeJob").then((interception) => {
|
||||
expect(interception.response?.statusCode).to.eq(200);
|
||||
expect(
|
||||
interception.request?.body.data.job_options.custom_headers
|
||||
).to.deep.equal(customHeaders);
|
||||
});
|
||||
|
||||
waitForJobCompletion("https://httpbin.org/headers");
|
||||
});
|
||||
|
||||
it("should handle site map actions", () => {
|
||||
addSiteMapAction("click", "//button[contains(text(), 'Load More')]");
|
||||
addSiteMapAction("input", "//input[@type='search']", "test search");
|
||||
|
||||
submitBasicJob("https://example.com", "content", "//div[@class='content']");
|
||||
|
||||
cy.wait("@submitScrapeJob").then((interception) => {
|
||||
expect(interception.response?.statusCode).to.eq(200);
|
||||
const siteMap = interception.request?.body.data.job_options.site_map;
|
||||
expect(siteMap.actions).to.have.length(2);
|
||||
expect(siteMap.actions[0].type).to.equal("click");
|
||||
expect(siteMap.actions[1].type).to.equal("input");
|
||||
});
|
||||
|
||||
waitForJobCompletion("https://example.com");
|
||||
});
|
||||
|
||||
it("should handle multiple elements", () => {
|
||||
enterJobUrl("https://books.toscrape.com");
|
||||
|
||||
addElement("titles", "//h3");
|
||||
addElement("prices", "//p[@class='price_color']");
|
||||
|
||||
submitJob();
|
||||
|
||||
cy.wait("@submitScrapeJob").then((interception) => {
|
||||
expect(interception.response?.statusCode).to.eq(200);
|
||||
expect(interception.request?.body.data.elements).to.have.length(2);
|
||||
});
|
||||
|
||||
waitForJobCompletion("https://books.toscrape.com");
|
||||
});
|
||||
|
||||
it.only("should handle collecting media", () => {
|
||||
enterJobUrl("https://books.toscrape.com");
|
||||
|
||||
openAdvancedJobOptions();
|
||||
addMedia();
|
||||
|
||||
cy.get("body").type("{esc}");
|
||||
|
||||
addElement("images", "//img");
|
||||
|
||||
submitJob();
|
||||
|
||||
cy.wait("@submitScrapeJob").then((interception) => {
|
||||
expect(interception.response?.statusCode).to.eq(200);
|
||||
expect(interception.request?.body.data.job_options.collect_media).to.be
|
||||
.true;
|
||||
});
|
||||
|
||||
waitForJobCompletion("https://books.toscrape.com");
|
||||
checkForMedia();
|
||||
});
|
||||
});
|
||||
35
cypress/e2e/agent.cy.ts
Normal file
35
cypress/e2e/agent.cy.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { login } from "../utilities/authentication.utils";
|
||||
import {
|
||||
buildAgentJob,
|
||||
cleanUpJobs,
|
||||
submitJob,
|
||||
waitForJobCompletion,
|
||||
} from "../utilities/job.utilities";
|
||||
import { mockSubmitJob } from "../utilities/mocks";
|
||||
|
||||
describe.only("Agent", () => {
|
||||
beforeEach(() => {
|
||||
mockSubmitJob();
|
||||
login();
|
||||
cy.visit("/agent");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanUpJobs();
|
||||
});
|
||||
|
||||
it("should be able to scrape some data", () => {
|
||||
const url = "https://books.toscrape.com";
|
||||
const prompt = "Collect all the links on the page";
|
||||
buildAgentJob(url, prompt);
|
||||
|
||||
submitJob();
|
||||
cy.wait("@submitScrapeJob").then((interception) => {
|
||||
expect(interception.response?.statusCode).to.eq(200);
|
||||
expect(interception.request?.body.data.url).to.eq(url);
|
||||
expect(interception.request?.body.data.prompt).to.eq(prompt);
|
||||
});
|
||||
|
||||
waitForJobCompletion("https://books.toscrape.com");
|
||||
});
|
||||
});
|
||||
@@ -1,60 +1,61 @@
|
||||
describe("Authentication", () => {
|
||||
it("should register", () => {
|
||||
cy.intercept("POST", "/api/signup").as("signup");
|
||||
import { faker } from "@faker-js/faker";
|
||||
import { mockLogin, mockSignup } from "../utilities/mocks";
|
||||
|
||||
cy.visit("/").then(() => {
|
||||
cy.get("button").contains("Login").click();
|
||||
cy.url().should("include", "/login");
|
||||
const mockEmail = faker.internet.email();
|
||||
const mockPassword = faker.internet.password();
|
||||
|
||||
cy.get("form").should("be.visible");
|
||||
cy.get("button")
|
||||
.contains("No Account? Sign up")
|
||||
.should("be.visible")
|
||||
.click();
|
||||
|
||||
cy.get("input[name='email']").type("test@test.com");
|
||||
cy.get("input[name='password']").type("password");
|
||||
cy.get("input[name='fullName']").type("John Doe");
|
||||
cy.get("button[type='submit']").contains("Signup").click();
|
||||
|
||||
cy.wait("@signup").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("signup request did not return a response");
|
||||
}
|
||||
|
||||
cy.log("Response status: " + interception.response.statusCode);
|
||||
cy.log("Response body: " + JSON.stringify(interception.response.body));
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
});
|
||||
describe.only("Authentication", () => {
|
||||
beforeEach(() => {
|
||||
cy.visit("/");
|
||||
mockSignup();
|
||||
mockLogin();
|
||||
});
|
||||
|
||||
it("should login", () => {
|
||||
cy.intercept("POST", "/api/token").as("token");
|
||||
it("should register", () => {
|
||||
cy.get("button").contains("Login").click();
|
||||
cy.url().should("include", "/login");
|
||||
|
||||
cy.visit("/").then(() => {
|
||||
cy.get("button")
|
||||
.contains("Login")
|
||||
.click()
|
||||
.then(() => {
|
||||
cy.get("input[name='email']").type("test@test.com");
|
||||
cy.get("input[name='password']").type("password");
|
||||
cy.get("button[type='submit']").contains("Login").click();
|
||||
cy.get("form").should("be.visible");
|
||||
|
||||
cy.wait("@token").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("token request did not return a response");
|
||||
}
|
||||
cy.get("button")
|
||||
.contains("No Account? Sign up")
|
||||
.should("be.visible")
|
||||
.click();
|
||||
|
||||
cy.log("Response status: " + interception.response.statusCode);
|
||||
cy.log("Response body: " + JSON.stringify(interception.response.body));
|
||||
cy.get("input[name='email']").type(mockEmail);
|
||||
cy.get("input[name='password']").type(mockPassword);
|
||||
cy.get("input[name='fullName']").type(faker.person.fullName());
|
||||
cy.get("button[type='submit']").contains("Signup").click();
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
});
|
||||
cy.wait("@signup").then((interception) => {
|
||||
if (!interception.response) {
|
||||
throw new Error("signup request did not return a response");
|
||||
}
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("should login", () => {
|
||||
cy.intercept("POST", "/api/token").as("token");
|
||||
|
||||
cy.visit("/").then(() => {
|
||||
cy.get("button")
|
||||
.contains("Login")
|
||||
.click()
|
||||
.then(() => {
|
||||
cy.get("input[name='email']").type(mockEmail);
|
||||
cy.get("input[name='password']").type(mockPassword);
|
||||
cy.get("button[type='submit']").contains("Login").click();
|
||||
|
||||
cy.wait("@token").then((interception) => {
|
||||
if (!interception.response) {
|
||||
throw new Error("token request did not return a response");
|
||||
}
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
34
cypress/e2e/chat.cy.ts
Normal file
34
cypress/e2e/chat.cy.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import { login } from "../utilities/authentication.utils";
|
||||
import {
|
||||
cleanUpJobs,
|
||||
selectJobFromSelector,
|
||||
submitBasicJob,
|
||||
waitForJobCompletion,
|
||||
} from "../utilities/job.utilities";
|
||||
import { mockLogin } from "../utilities/mocks";
|
||||
|
||||
describe.only("Chat", () => {
|
||||
beforeEach(() => {
|
||||
mockLogin();
|
||||
login();
|
||||
cy.visit("/");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanUpJobs();
|
||||
});
|
||||
|
||||
it.only("should be able to chat", () => {
|
||||
const url = "https://books.toscrape.com";
|
||||
submitBasicJob(url, "test", "//body");
|
||||
waitForJobCompletion(url);
|
||||
|
||||
cy.visit("/chat");
|
||||
selectJobFromSelector();
|
||||
|
||||
cy.get("[data-cy='message-input']").type("Hello");
|
||||
cy.get("[data-cy='send-message']").click();
|
||||
|
||||
cy.get("[data-cy='ai-message']").should("exist");
|
||||
});
|
||||
});
|
||||
@@ -1,88 +1,37 @@
|
||||
import { login } from "../utilities/authentication.utils";
|
||||
import {
|
||||
addElement,
|
||||
cleanUpJobs,
|
||||
enterJobUrl,
|
||||
submitJob,
|
||||
waitForJobCompletion,
|
||||
} from "../utilities/job.utilities";
|
||||
import { mockSubmitJob } from "../utilities/mocks";
|
||||
|
||||
describe.only("Job", () => {
|
||||
it("should create a job", () => {
|
||||
cy.intercept("POST", "/api/submit-scrape-job").as("submitScrapeJob");
|
||||
|
||||
beforeEach(() => {
|
||||
mockSubmitJob();
|
||||
login();
|
||||
cy.visit("/");
|
||||
});
|
||||
|
||||
cy.get('[data-cy="url-input"]').type("https://example.com");
|
||||
cy.get('[data-cy="name-field"]').type("example");
|
||||
cy.get('[data-cy="xpath-field"]').type("//body");
|
||||
cy.get('[data-cy="add-button"]').click();
|
||||
afterEach(() => {
|
||||
cleanUpJobs();
|
||||
});
|
||||
|
||||
cy.contains("Submit").click();
|
||||
it("should create a job", () => {
|
||||
enterJobUrl("https://books.toscrape.com");
|
||||
addElement("body", "//body");
|
||||
submitJob();
|
||||
|
||||
cy.wait("@submitScrapeJob").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
cy.log("Request body: " + JSON.stringify(interception.request?.body));
|
||||
throw new Error("submitScrapeJob request did not return a response");
|
||||
}
|
||||
|
||||
cy.log("Response status: " + interception.response.statusCode);
|
||||
cy.log("Response body: " + JSON.stringify(interception.response.body));
|
||||
|
||||
expect(interception.response.statusCode).to.eq(200);
|
||||
});
|
||||
|
||||
cy.get("li").contains("Jobs").click();
|
||||
|
||||
cy.contains("div", "https://example.com", { timeout: 10000 }).should(
|
||||
"exist"
|
||||
);
|
||||
cy.contains("div", "Completed", { timeout: 20000 }).should("exist");
|
||||
|
||||
cy.get("tbody tr")
|
||||
.first()
|
||||
.within(() => {
|
||||
cy.get('input[type="checkbox"]').click();
|
||||
});
|
||||
|
||||
cy.get("[data-testid='DeleteIcon']").click();
|
||||
|
||||
cy.contains("div", "https://example.com", { timeout: 10000 }).should(
|
||||
"not.exist"
|
||||
);
|
||||
});
|
||||
|
||||
it("should create a job with advanced options (media)", () => {
|
||||
cy.intercept("POST", "/api/submit-scrape-job").as("submitScrapeJob");
|
||||
|
||||
cy.visit("/");
|
||||
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
|
||||
cy.get('[data-cy="collect-media-checkbox"]').click();
|
||||
cy.get("body").type("{esc}");
|
||||
|
||||
cy.get('[data-cy="url-input"]').type("https://books.toscrape.com");
|
||||
cy.get('[data-cy="name-field"]').type("example");
|
||||
cy.get('[data-cy="xpath-field"]').type("//body");
|
||||
cy.get('[data-cy="add-button"]').click();
|
||||
|
||||
cy.get("button").contains("Submit").click();
|
||||
|
||||
cy.get("li").contains("Jobs").click();
|
||||
|
||||
cy.contains("div", "https://books.toscrape.com", { timeout: 10000 }).should(
|
||||
"exist"
|
||||
);
|
||||
|
||||
cy.contains("div", "Completed", { timeout: 20000 }).should("exist");
|
||||
cy.get("li").contains("Media").click();
|
||||
|
||||
cy.get("div[id='select-job']").click();
|
||||
cy.get("li[role='option']").click();
|
||||
|
||||
cy.get("[data-testid='media-grid']", { timeout: 10000 }).should("exist");
|
||||
|
||||
cy.get("li").contains("Jobs").click();
|
||||
|
||||
cy.get("tbody tr")
|
||||
.first()
|
||||
.within(() => {
|
||||
cy.get('input[type="checkbox"]').click();
|
||||
});
|
||||
|
||||
cy.get("[data-testid='DeleteIcon']").click();
|
||||
waitForJobCompletion("https://books.toscrape.com");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
// ***********************************************************
|
||||
|
||||
// Import commands.js using ES2015 syntax:
|
||||
import './commands'
|
||||
import "./commands";
|
||||
|
||||
// Alternatively you can use CommonJS syntax:
|
||||
// require('./commands')
|
||||
// require('./commands')
|
||||
|
||||
68
cypress/utilities/authentication.utils.ts
Normal file
68
cypress/utilities/authentication.utils.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
export const signup = () => {
|
||||
cy.intercept("POST", "/api/token").as("token");
|
||||
|
||||
cy.visit("/").then(() => {
|
||||
cy.get("button").contains("Login").click();
|
||||
cy.url().should("include", "/login");
|
||||
|
||||
cy.get("form").should("be.visible");
|
||||
cy.get("button")
|
||||
.contains("No Account? Sign up")
|
||||
.should("be.visible")
|
||||
.click();
|
||||
|
||||
cy.get("input[name='email']").type("test@test.com");
|
||||
cy.get("input[name='password']").type("password");
|
||||
cy.get("input[name='fullName']").type("John Doe");
|
||||
cy.get("button[type='submit']").contains("Signup").click();
|
||||
|
||||
cy.wait("@token").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("token request did not return a response");
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const login = () => {
|
||||
cy.intercept("POST", "/api/token").as("token");
|
||||
cy.intercept("GET", "/api/me").as("me");
|
||||
cy.intercept("GET", "/api/check").as("check");
|
||||
|
||||
cy.visit("/").then(() => {
|
||||
cy.get("body").then(() => {
|
||||
cy.get("button")
|
||||
.contains("Login")
|
||||
.click()
|
||||
.then(() => {
|
||||
cy.get("input[name='email']").type("test@test.com");
|
||||
cy.get("input[name='password']").type("password");
|
||||
cy.get("button[type='submit']").contains("Login").click();
|
||||
|
||||
cy.wait("@token").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("token request did not return a response");
|
||||
}
|
||||
});
|
||||
|
||||
cy.wait("@me").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("me request did not return a response");
|
||||
}
|
||||
});
|
||||
|
||||
cy.wait("@check").then((interception) => {
|
||||
if (!interception.response) {
|
||||
cy.log("No response received!");
|
||||
throw new Error("check request did not return a response");
|
||||
}
|
||||
});
|
||||
|
||||
cy.url().should("not.include", "/login");
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
151
cypress/utilities/job.utilities.ts
Normal file
151
cypress/utilities/job.utilities.ts
Normal file
@@ -0,0 +1,151 @@
|
||||
export const cleanUpJobs = () => {
|
||||
cy.intercept("POST", "/api/retrieve").as("retrieve");
|
||||
cy.visit("/jobs");
|
||||
|
||||
cy.wait("@retrieve", { timeout: 15000 });
|
||||
|
||||
cy.get("tbody tr", { timeout: 10000 }).should("have.length.at.least", 1);
|
||||
|
||||
const tryClickSelectAll = (attempt = 1, maxAttempts = 5) => {
|
||||
cy.log(`Attempt ${attempt} to click Select All`);
|
||||
|
||||
cy.get('[data-testid="select-all"]')
|
||||
.closest("button")
|
||||
.then(($btn) => {
|
||||
// Retry if button is disabled
|
||||
if ($btn.is(":disabled") || $btn.css("pointer-events") === "none") {
|
||||
if (attempt < maxAttempts) {
|
||||
cy.wait(1000).then(() =>
|
||||
tryClickSelectAll(attempt + 1, maxAttempts)
|
||||
);
|
||||
} else {
|
||||
throw new Error(
|
||||
"Select All button is still disabled after max retries"
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// Click and then verify if checkbox is checked
|
||||
cy.wrap($btn)
|
||||
.click({ force: true })
|
||||
.then(() => {
|
||||
cy.get("tbody tr")
|
||||
.first()
|
||||
.find("td")
|
||||
.first()
|
||||
.find("input[type='checkbox']")
|
||||
.should("be.checked")
|
||||
.then(() => {
|
||||
cy.log("Select All successful");
|
||||
});
|
||||
});
|
||||
|
||||
// Handle failure case
|
||||
cy.on("fail", () => {
|
||||
cy.log("Error clicking Select All");
|
||||
if (attempt < maxAttempts) {
|
||||
cy.wait(1000).then(() =>
|
||||
tryClickSelectAll(attempt + 1, maxAttempts)
|
||||
);
|
||||
} else {
|
||||
throw new Error(
|
||||
"Checkbox was never checked after clicking Select All"
|
||||
);
|
||||
}
|
||||
return false; // Prevent Cypress from failing the test
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
tryClickSelectAll();
|
||||
|
||||
cy.get('[data-testid="DeleteIcon"]', { timeout: 10000 })
|
||||
.closest("button")
|
||||
.should("not.be.disabled")
|
||||
.click();
|
||||
};
|
||||
export const submitBasicJob = (url: string, name: string, xpath: string) => {
|
||||
cy.get('[data-cy="url-input"]').type(url);
|
||||
cy.get('[data-cy="name-field"]').type(name);
|
||||
cy.get('[data-cy="xpath-field"]').type(xpath);
|
||||
cy.get('[data-cy="add-button"]').click();
|
||||
cy.contains("Submit").click();
|
||||
};
|
||||
|
||||
export const waitForJobCompletion = (url: string) => {
|
||||
cy.visit("/jobs");
|
||||
cy.contains("div", url, { timeout: 10000 }).should("exist");
|
||||
cy.contains("div", "Completed", { timeout: 20000 }).should("exist");
|
||||
};
|
||||
|
||||
export const enableMultiPageScraping = () => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
cy.get('[data-cy="multi-page-toggle"]').click();
|
||||
cy.get("body").type("{esc}");
|
||||
};
|
||||
|
||||
export const addCustomHeaders = (headers: Record<string, string>) => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
cy.get('[name="custom_headers"]').type(JSON.stringify(headers), {
|
||||
parseSpecialCharSequences: false,
|
||||
});
|
||||
cy.get("body").type("{esc}");
|
||||
};
|
||||
|
||||
export const addCustomCookies = (cookies: Record<string, string>) => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
cy.get('[name="custom_cookies"]').type(JSON.stringify(cookies));
|
||||
cy.get("body").type("{esc}");
|
||||
};
|
||||
|
||||
export const openAdvancedJobOptions = () => {
|
||||
cy.get("button").contains("Advanced Job Options").click();
|
||||
};
|
||||
|
||||
export const selectJobFromSelector = () => {
|
||||
cy.get("div[id='select-job']").click();
|
||||
cy.get("li[role='option']").click().first();
|
||||
};
|
||||
|
||||
export const addMedia = () => {
|
||||
cy.get('[data-cy="collect-media-checkbox"]').click();
|
||||
};
|
||||
|
||||
export const checkForMedia = () => {
|
||||
cy.visit("/media");
|
||||
selectJobFromSelector();
|
||||
cy.get("[data-testid='media-grid']", { timeout: 10000 }).should("exist");
|
||||
};
|
||||
|
||||
export const addSiteMapAction = (
|
||||
type: "click" | "input",
|
||||
xpath: string,
|
||||
input?: string
|
||||
) => {
|
||||
cy.get("button").contains("Create Site Map").click();
|
||||
cy.get('[data-cy="site-map-select"]').select(type);
|
||||
cy.get('[data-cy="site-map-xpath"]').type(xpath);
|
||||
if (type === "input" && input) {
|
||||
cy.get('[data-cy="site-map-input"]').type(input);
|
||||
}
|
||||
cy.get('[data-cy="add-site-map-action"]').click();
|
||||
};
|
||||
|
||||
export const addElement = (name: string, xpath: string) => {
|
||||
cy.get('[data-cy="name-field"]').type(name);
|
||||
cy.get('[data-cy="xpath-field"]').type(xpath);
|
||||
cy.get('[data-cy="add-button"]').click();
|
||||
};
|
||||
|
||||
export const buildAgentJob = (url: string, prompt: string) => {
|
||||
enterJobUrl(url);
|
||||
cy.get("[data-cy='prompt-input']").type(prompt);
|
||||
};
|
||||
|
||||
export const submitJob = () => {
|
||||
cy.get("button").contains("Submit").click();
|
||||
};
|
||||
|
||||
export const enterJobUrl = (url: string) => {
|
||||
cy.get('[data-cy="url-input"]').type(url);
|
||||
};
|
||||
15
cypress/utilities/mocks.ts
Normal file
15
cypress/utilities/mocks.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
export const mockSubmitJob = () => {
|
||||
cy.intercept("POST", "/api/submit-scrape-job").as("submitScrapeJob");
|
||||
};
|
||||
|
||||
export const mockToken = () => {
|
||||
cy.intercept("POST", "/api/token").as("token");
|
||||
};
|
||||
|
||||
export const mockSignup = () => {
|
||||
cy.intercept("POST", "/api/signup").as("signup");
|
||||
};
|
||||
|
||||
export const mockLogin = () => {
|
||||
cy.intercept("POST", "/api/token").as("token");
|
||||
};
|
||||
1
cypress/utilities/utilities.ts
Normal file
1
cypress/utilities/utilities.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./authentication.utils";
|
||||
@@ -15,6 +15,7 @@ services:
|
||||
image: jpyles0524/scraperr_api:latest
|
||||
environment:
|
||||
- LOG_LEVEL=INFO
|
||||
- OPENAI_KEY=${OPENAI_KEY}
|
||||
container_name: scraperr_api
|
||||
ports:
|
||||
- 8000:8000
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
"react-router": "^6.14.1",
|
||||
"react-router-dom": "^6.14.1",
|
||||
"react-spinners": "^0.14.1",
|
||||
"react-toastify": "^11.0.5",
|
||||
"redux-persist": "^6.0.0",
|
||||
"typescript": "^4.9.5",
|
||||
"web-vitals": "^2.1.4"
|
||||
@@ -67,6 +68,7 @@
|
||||
]
|
||||
},
|
||||
"devDependencies": {
|
||||
"@faker-js/faker": "^9.8.0",
|
||||
"@types/cypress": "^1.1.6",
|
||||
"@types/js-cookie": "^3.0.6",
|
||||
"autoprefixer": "^10.4.21",
|
||||
|
||||
13
pdm.lock
generated
13
pdm.lock
generated
@@ -5,7 +5,7 @@
|
||||
groups = ["default", "dev"]
|
||||
strategy = ["inherit_metadata"]
|
||||
lock_version = "4.5.0"
|
||||
content_hash = "sha256:5f4c90b42c3b35194a7c2af8b46b7c28127e25e836a779e85aae0df2bd0e69eb"
|
||||
content_hash = "sha256:1a65c1e288d2c6827fc6866d3bfe6a9b8707b2ca895d488f4a9b11cd579c4359"
|
||||
|
||||
[[metadata.targets]]
|
||||
requires_python = ">=3.10"
|
||||
@@ -2314,6 +2314,17 @@ files = [
|
||||
{file = "propcache-0.3.1.tar.gz", hash = "sha256:40d980c33765359098837527e18eddefc9a24cea5b45e078a7f3bb5b032c6ecf"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proxy-py"
|
||||
version = "2.4.10"
|
||||
requires_python = ">=3.6"
|
||||
summary = "\\u26a1 Fast \\u2022 \\U0001fab6 Lightweight \\u2022 \\U0001f51f Dependency \\u2022 \\U0001f50c Pluggable \\u2022 \\U0001f608 TLS interception \\u2022 \\U0001f512 DNS-over-HTTPS \\u2022 \\U0001f525 Poor Mans VPN \\u2022 \\u23ea Reverse & \\u23e9 Forward \\u2022 \\U0001f46e\\U0001f3ff Proxy Server framework \\u2022 \\U0001f310 Web Server framework \\u2022 \\u27b5 \\u27b6 \\u27b7 \\u27a0 PubSub framework \\u2022 \\U0001f477 Work acceptor & executor framework."
|
||||
groups = ["default"]
|
||||
files = [
|
||||
{file = "proxy.py-2.4.10-py3-none-any.whl", hash = "sha256:ef3a31f6ef3be6ff78559c0e68198523bfe2fb1e820bb16686750c1bb5baf9e8"},
|
||||
{file = "proxy_py-2.4.10.tar.gz", hash = "sha256:41b9e9d3aae6f80e2304d3726e8e9c583a510d8de224eada53d115f48a63a9ce"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ptyprocess"
|
||||
version = "0.7.0"
|
||||
|
||||
@@ -42,6 +42,7 @@ dependencies = [
|
||||
"playwright>=1.52.0",
|
||||
"camoufox>=0.4.11",
|
||||
"html2text>=2025.4.15",
|
||||
"proxy-py>=2.4.10",
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
readme = "README.md"
|
||||
@@ -98,9 +99,9 @@ strictSetInference = true
|
||||
|
||||
|
||||
[tool.isort]
|
||||
length_sort = "1"
|
||||
length_sort = true
|
||||
profile = "black"
|
||||
sections = "STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER"
|
||||
sections = ["STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
|
||||
import_heading_stdlib = "STL"
|
||||
import_heading_thirdparty = "PDM"
|
||||
import_heading_firstparty = "LOCAL"
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
import React from "react";
|
||||
|
||||
export const Chat = () => {
|
||||
return <h1>Chat</h1>;
|
||||
};
|
||||
@@ -1,2 +0,0 @@
|
||||
export * from "./Chat";
|
||||
export * from "./JobSelector";
|
||||
@@ -15,6 +15,7 @@ export const AdvancedJobOptions = ({
|
||||
multiPageScrapeEnabled = true,
|
||||
}: AdvancedJobOptionsProps) => {
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
return (
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Link
|
||||
@@ -36,6 +37,7 @@ export const AdvancedJobOptions = ({
|
||||
>
|
||||
<Typography variant="body2">Advanced Job Options</Typography>
|
||||
</Link>
|
||||
|
||||
<AdvancedJobOptionsDialog
|
||||
open={open}
|
||||
onClose={() => setOpen(false)}
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
import { ExpandedTableInput } from "@/components/common/expanded-table-input";
|
||||
import { RawJobOptions } from "@/types";
|
||||
import {
|
||||
Code as CodeIcon,
|
||||
ExpandMore as ExpandMoreIcon,
|
||||
InfoOutlined,
|
||||
Settings,
|
||||
} from "@mui/icons-material";
|
||||
import {
|
||||
Accordion,
|
||||
AccordionDetails,
|
||||
@@ -17,15 +25,7 @@ import {
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import {
|
||||
ExpandMore as ExpandMoreIcon,
|
||||
InfoOutlined,
|
||||
Code as CodeIcon,
|
||||
Settings,
|
||||
} from "@mui/icons-material";
|
||||
import { Dispatch, SetStateAction } from "react";
|
||||
import { RawJobOptions } from "@/types";
|
||||
import { ExpandedTableInput } from "../../expanded-table-input";
|
||||
import { Dispatch, SetStateAction, useEffect, useState } from "react";
|
||||
|
||||
export type AdvancedJobOptionsDialogProps = {
|
||||
open: boolean;
|
||||
@@ -43,31 +43,45 @@ export const AdvancedJobOptionsDialog = ({
|
||||
multiPageScrapeEnabled = true,
|
||||
}: AdvancedJobOptionsDialogProps) => {
|
||||
const theme = useTheme();
|
||||
const [localJobOptions, setLocalJobOptions] =
|
||||
useState<RawJobOptions>(jobOptions);
|
||||
|
||||
// Update local state when prop changes
|
||||
useEffect(() => {
|
||||
setLocalJobOptions(jobOptions);
|
||||
}, [jobOptions]);
|
||||
|
||||
const handleMultiPageScrapeChange = () => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
setLocalJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
multi_page_scrape: !prevJobOptions.multi_page_scrape,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleProxiesChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
setLocalJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
proxies: e.target.value,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleCollectMediaChange = () => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
setLocalJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
collect_media: !prevJobOptions.collect_media,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleClose = () => {
|
||||
// Save the local state back to the parent before closing
|
||||
setJobOptions(localJobOptions);
|
||||
onClose();
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog
|
||||
open={open}
|
||||
onClose={onClose}
|
||||
onClose={handleClose}
|
||||
maxWidth="md"
|
||||
fullWidth
|
||||
PaperProps={{
|
||||
@@ -122,7 +136,7 @@ export const AdvancedJobOptionsDialog = ({
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={jobOptions.multi_page_scrape}
|
||||
checked={localJobOptions.multi_page_scrape}
|
||||
onChange={handleMultiPageScrapeChange}
|
||||
disabled={!multiPageScrapeEnabled}
|
||||
/>
|
||||
@@ -147,7 +161,7 @@ export const AdvancedJobOptionsDialog = ({
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={jobOptions.collect_media}
|
||||
checked={localJobOptions.collect_media}
|
||||
onChange={handleCollectMediaChange}
|
||||
data-cy="collect-media-checkbox"
|
||||
/>
|
||||
@@ -233,7 +247,7 @@ export const AdvancedJobOptionsDialog = ({
|
||||
fullWidth
|
||||
variant="outlined"
|
||||
size="small"
|
||||
value={jobOptions.proxies}
|
||||
value={localJobOptions.proxies}
|
||||
onChange={handleProxiesChange}
|
||||
InputProps={{
|
||||
startAdornment: (
|
||||
@@ -251,8 +265,9 @@ export const AdvancedJobOptionsDialog = ({
|
||||
label="Custom Headers"
|
||||
placeholder='{"User-Agent": "CustomAgent", "Accept": "*/*"}'
|
||||
urlParam="custom_headers"
|
||||
name="custom_headers"
|
||||
onChange={(value) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
setLocalJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
custom_headers: value,
|
||||
}));
|
||||
@@ -264,8 +279,9 @@ export const AdvancedJobOptionsDialog = ({
|
||||
label="Custom Cookies"
|
||||
placeholder='[{"name": "value", "name2": "value2"}]'
|
||||
urlParam="custom_cookies"
|
||||
name="custom_cookies"
|
||||
onChange={(value) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
setLocalJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
custom_cookies: value,
|
||||
}));
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import React, { useState } from "react";
|
||||
import {
|
||||
alpha,
|
||||
Box,
|
||||
Paper,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Paper,
|
||||
Box,
|
||||
Typography,
|
||||
useTheme,
|
||||
alpha,
|
||||
} from "@mui/material";
|
||||
import React, { useState } from "react";
|
||||
|
||||
export type CsvRow = {
|
||||
[key: string]: string;
|
||||
|
||||
@@ -1,28 +1,29 @@
|
||||
import { parseJsonToEntries } from "@/lib/helpers/parse-json-to-entries";
|
||||
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
||||
import {
|
||||
Accordion,
|
||||
AccordionSummary,
|
||||
TableCell,
|
||||
TableRow,
|
||||
Paper,
|
||||
TableBody,
|
||||
useTheme,
|
||||
TextField,
|
||||
Box,
|
||||
Typography,
|
||||
AccordionDetails,
|
||||
TableHead,
|
||||
TableContainer,
|
||||
AccordionSummary,
|
||||
Box,
|
||||
Paper,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
TextField,
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import { useEffect, useState } from "react";
|
||||
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
||||
import { parseJsonToEntries } from "@/lib/helpers/parse-json-to-entries";
|
||||
|
||||
export type ExpandedTableInputProps = {
|
||||
label: string;
|
||||
onChange: (value: any) => void;
|
||||
placeholder: string;
|
||||
urlParam: string;
|
||||
name: string;
|
||||
};
|
||||
|
||||
export const ExpandedTableInput = ({
|
||||
@@ -30,6 +31,7 @@ export const ExpandedTableInput = ({
|
||||
onChange,
|
||||
placeholder,
|
||||
urlParam,
|
||||
name,
|
||||
}: ExpandedTableInputProps) => {
|
||||
const theme = useTheme();
|
||||
const [value, setValue] = useState("");
|
||||
@@ -150,6 +152,7 @@ export const ExpandedTableInput = ({
|
||||
size="small"
|
||||
error={jsonError !== null}
|
||||
helperText={jsonError ?? ""}
|
||||
name={name}
|
||||
/>
|
||||
|
||||
{parsedHeaders && parsedHeaders.length > 0 && (
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
import { useDownloadJob } from "@/hooks/use-download-job";
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Button,
|
||||
FormControl,
|
||||
RadioGroup,
|
||||
FormControlLabel,
|
||||
Radio,
|
||||
FormLabel,
|
||||
Typography,
|
||||
Box,
|
||||
Button,
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogTitle,
|
||||
FormControl,
|
||||
FormControlLabel,
|
||||
FormLabel,
|
||||
Radio,
|
||||
RadioGroup,
|
||||
Typography,
|
||||
} from "@mui/material";
|
||||
import { useState } from "react";
|
||||
|
||||
@@ -26,27 +26,10 @@ export const JobDownloadDialog = ({
|
||||
ids,
|
||||
}: JobDownloadDialogProps) => {
|
||||
const [jobFormat, setJobFormat] = useState<string>("csv");
|
||||
const handleDownload = async () => {
|
||||
const response = await fetch("/api/download", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ data: { ids: ids, job_format: jobFormat } }),
|
||||
});
|
||||
const { downloadJob } = useDownloadJob();
|
||||
|
||||
if (response.ok) {
|
||||
const blob = await response.blob();
|
||||
const url = window.URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.style.display = "none";
|
||||
a.href = url;
|
||||
a.download = `job_${ids[0]}.${jobFormat}`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
} else {
|
||||
console.error("Failed to download the file.");
|
||||
}
|
||||
const handleDownload = () => {
|
||||
downloadJob(ids, jobFormat);
|
||||
};
|
||||
|
||||
return (
|
||||
|
||||
1
src/components/common/job-selector/index.ts
Normal file
1
src/components/common/job-selector/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./job-selector";
|
||||
@@ -1,17 +1,17 @@
|
||||
import React, { useState, Dispatch, useEffect } from "react";
|
||||
import { Job } from "../../types";
|
||||
import Box from "@mui/material/Box";
|
||||
import InputLabel from "@mui/material/InputLabel";
|
||||
import FormControl from "@mui/material/FormControl";
|
||||
import Select from "@mui/material/Select";
|
||||
import Popover from "@mui/material/Popover";
|
||||
import { Job } from "@/types";
|
||||
import {
|
||||
Typography,
|
||||
MenuItem,
|
||||
useTheme,
|
||||
ClickAwayListener,
|
||||
MenuItem,
|
||||
SxProps,
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import { SxProps } from "@mui/material";
|
||||
import Box from "@mui/material/Box";
|
||||
import FormControl from "@mui/material/FormControl";
|
||||
import InputLabel from "@mui/material/InputLabel";
|
||||
import Popover from "@mui/material/Popover";
|
||||
import Select from "@mui/material/Select";
|
||||
import React, { Dispatch, useEffect, useState } from "react";
|
||||
|
||||
interface Props {
|
||||
sxProps?: SxProps;
|
||||
@@ -19,7 +19,6 @@ interface Props {
|
||||
| Dispatch<React.SetStateAction<Job | null>>
|
||||
| ((job: Job) => void);
|
||||
selectedJob: Job | null;
|
||||
setJobs: Dispatch<React.SetStateAction<Job[]>>;
|
||||
jobs: Job[];
|
||||
}
|
||||
|
||||
@@ -27,7 +26,6 @@ export const JobSelector = ({
|
||||
sxProps,
|
||||
selectedJob,
|
||||
setSelectedJob,
|
||||
setJobs,
|
||||
jobs,
|
||||
}: Props) => {
|
||||
const [anchorEl, setAnchorEl] = useState<HTMLElement | null>(null);
|
||||
@@ -1,7 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import React from "react";
|
||||
import { useAuth } from "../../../contexts/AuthContext";
|
||||
import { Box, Drawer } from "@mui/material";
|
||||
|
||||
import { QuickSettings } from "../../nav/quick-settings";
|
||||
@@ -21,8 +20,6 @@ export const NavDrawer: React.FC<NavDrawerProps> = ({
|
||||
toggleTheme,
|
||||
isDarkMode,
|
||||
}) => {
|
||||
const { logout, user, isAuthenticated } = useAuth();
|
||||
|
||||
return (
|
||||
<Drawer
|
||||
variant="permanent"
|
||||
@@ -48,12 +45,7 @@ export const NavDrawer: React.FC<NavDrawerProps> = ({
|
||||
<NavItems />
|
||||
</div>
|
||||
<div>
|
||||
<UserControl
|
||||
isAuthenticated={isAuthenticated}
|
||||
user={user}
|
||||
logout={logout}
|
||||
className={classes.userControl}
|
||||
/>
|
||||
<UserControl className={classes.userControl} />
|
||||
<QuickSettings toggleTheme={toggleTheme} isDarkMode={isDarkMode} />
|
||||
</div>
|
||||
</Box>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
.welcome {
|
||||
margin: 0.25rem;
|
||||
margin: 0.5rem !important;
|
||||
}
|
||||
|
||||
.userControlButton {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,23 @@
|
||||
import React from "react";
|
||||
import { Typography, Button } from "@mui/material";
|
||||
import classes from "./logged-in-control.module.css";
|
||||
import { useUser } from "@/store/hooks";
|
||||
import router from "next/router";
|
||||
import { useAuth } from "@/hooks/use-auth";
|
||||
|
||||
type LoggedInControlProps = {
|
||||
user: any;
|
||||
logout: () => void;
|
||||
children?: React.ReactNode;
|
||||
};
|
||||
|
||||
export const LoggedInControl = ({
|
||||
user,
|
||||
logout,
|
||||
children,
|
||||
}: LoggedInControlProps) => {
|
||||
export const LoggedInControl = ({ children }: LoggedInControlProps) => {
|
||||
const { user } = useUser();
|
||||
const { logout } = useAuth();
|
||||
|
||||
const handleLogout = () => {
|
||||
logout();
|
||||
router.push("/");
|
||||
};
|
||||
|
||||
if (children) {
|
||||
return <>{children}</>;
|
||||
}
|
||||
@@ -24,7 +29,7 @@ export const LoggedInControl = ({
|
||||
</Typography>
|
||||
<Button
|
||||
variant="contained"
|
||||
onClick={logout}
|
||||
onClick={handleLogout}
|
||||
className={classes.userControlButton}
|
||||
>
|
||||
Logout
|
||||
|
||||
@@ -5,30 +5,25 @@ import clsx from "clsx";
|
||||
import classes from "./user-control.module.css";
|
||||
import { LoggedInControl } from "./logged-in-control";
|
||||
import { LoggedOutControl } from "./logged-out-control";
|
||||
import { useUser } from "@/store/hooks";
|
||||
|
||||
export type UserControlProps = {
|
||||
isAuthenticated: boolean;
|
||||
user: any;
|
||||
logout: () => void;
|
||||
loggedInChildren?: React.ReactNode;
|
||||
loggedOutChildren?: React.ReactNode;
|
||||
className?: string;
|
||||
};
|
||||
|
||||
export const UserControl = ({
|
||||
isAuthenticated,
|
||||
user,
|
||||
logout,
|
||||
loggedInChildren,
|
||||
loggedOutChildren,
|
||||
className,
|
||||
}: UserControlProps) => {
|
||||
const { user } = useUser();
|
||||
|
||||
return (
|
||||
<Box className={clsx(classes.userControl, className)}>
|
||||
{isAuthenticated ? (
|
||||
<LoggedInControl user={user} logout={logout}>
|
||||
{loggedInChildren}
|
||||
</LoggedInControl>
|
||||
{user.isAuthenticated ? (
|
||||
<LoggedInControl>{loggedInChildren}</LoggedInControl>
|
||||
) : (
|
||||
<LoggedOutControl>{loggedOutChildren}</LoggedOutControl>
|
||||
)}
|
||||
|
||||
21
src/components/common/snackbars/error/error.tsx
Normal file
21
src/components/common/snackbars/error/error.tsx
Normal file
@@ -0,0 +1,21 @@
|
||||
import { Snackbar, Alert } from "@mui/material";
|
||||
|
||||
type ErrorSnackbarProps = {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
message: string;
|
||||
};
|
||||
|
||||
export const ErrorSnackbar = ({
|
||||
open,
|
||||
onClose,
|
||||
message,
|
||||
}: ErrorSnackbarProps) => {
|
||||
if (!open) return null;
|
||||
|
||||
return (
|
||||
<Snackbar open={open} autoHideDuration={6000} onClose={onClose}>
|
||||
<Alert severity="error">{message}</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
1
src/components/common/snackbars/error/index.ts
Normal file
1
src/components/common/snackbars/error/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./error";
|
||||
2
src/components/common/snackbars/index.ts
Normal file
2
src/components/common/snackbars/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from "./job-notify";
|
||||
export * from "./error";
|
||||
1
src/components/common/snackbars/job-notify/index.ts
Normal file
1
src/components/common/snackbars/job-notify/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./job-notify";
|
||||
34
src/components/common/snackbars/job-notify/job-notify.tsx
Normal file
34
src/components/common/snackbars/job-notify/job-notify.tsx
Normal file
@@ -0,0 +1,34 @@
|
||||
import { Snackbar, Alert, Button } from "@mui/material";
|
||||
import router from "next/router";
|
||||
|
||||
type JobNotifySnackbarProps = {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
message: string;
|
||||
};
|
||||
|
||||
export const JobNotifySnackbar = ({
|
||||
open,
|
||||
onClose,
|
||||
message,
|
||||
}: JobNotifySnackbarProps) => {
|
||||
if (!open) return null;
|
||||
|
||||
const goTo = () => {
|
||||
router.push("/jobs");
|
||||
};
|
||||
|
||||
const action = (
|
||||
<Button color="inherit" size="small" onClick={goTo}>
|
||||
Go To Job
|
||||
</Button>
|
||||
);
|
||||
|
||||
return (
|
||||
<Snackbar open={open} autoHideDuration={6000} onClose={onClose}>
|
||||
<Alert severity="info" action={action}>
|
||||
{message}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
3
src/components/jobs/index.ts
Normal file
3
src/components/jobs/index.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
export * from "./job-queue";
|
||||
export * from "./favorites";
|
||||
export * from "./job-table";
|
||||
@@ -1,3 +0,0 @@
|
||||
export * from "./JobQueue";
|
||||
export * from "./Favorites";
|
||||
export * from "./JobTable";
|
||||
@@ -1,43 +1,32 @@
|
||||
import React, { SetStateAction, useState } from "react";
|
||||
import {
|
||||
IconButton,
|
||||
Box,
|
||||
Typography,
|
||||
Tooltip,
|
||||
TextField,
|
||||
FormControl,
|
||||
InputLabel,
|
||||
Select,
|
||||
MenuItem,
|
||||
SelectChangeEvent,
|
||||
} from "@mui/material";
|
||||
import { JobDownloadDialog } from "@/components/common/job-download-dialog";
|
||||
import { ApiService } from "@/services";
|
||||
import { COLOR_MAP, Job } from "@/types";
|
||||
import DeleteIcon from "@mui/icons-material/Delete";
|
||||
import SelectAllIcon from "@mui/icons-material/SelectAll";
|
||||
import DownloadIcon from "@mui/icons-material/Download";
|
||||
import SelectAllIcon from "@mui/icons-material/SelectAll";
|
||||
import StarIcon from "@mui/icons-material/Star";
|
||||
import { useRouter } from "next/router";
|
||||
import { Favorites, JobQueue } from ".";
|
||||
import { Job } from "../../types";
|
||||
import Cookies from "js-cookie";
|
||||
import {
|
||||
Box,
|
||||
FormControl,
|
||||
IconButton,
|
||||
InputLabel,
|
||||
MenuItem,
|
||||
Select,
|
||||
SelectChangeEvent,
|
||||
TextField,
|
||||
Tooltip,
|
||||
Typography,
|
||||
} from "@mui/material";
|
||||
import { useSearchParams } from "next/navigation";
|
||||
import { JobDownloadDialog } from "../common/job-download-dialog";
|
||||
import { useRouter } from "next/router";
|
||||
import React, { SetStateAction, useState } from "react";
|
||||
import { Favorites, JobQueue } from ".";
|
||||
|
||||
interface JobTableProps {
|
||||
jobs: Job[];
|
||||
setJobs: React.Dispatch<SetStateAction<Job[]>>;
|
||||
}
|
||||
|
||||
interface ColorMap {
|
||||
[key: string]: string;
|
||||
}
|
||||
|
||||
const COLOR_MAP: ColorMap = {
|
||||
Queued: "rgba(255,201,5,0.25)",
|
||||
Scraping: "rgba(3,104,255,0.25)",
|
||||
Completed: "rgba(5,255,51,0.25)",
|
||||
Failed: "rgba(214,0,25,0.25)",
|
||||
};
|
||||
|
||||
export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
const searchParams = useSearchParams();
|
||||
const search = searchParams.get("search");
|
||||
@@ -51,7 +40,6 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
const [jobDownloadDialogOpen, setJobDownloadDialogOpen] =
|
||||
useState<boolean>(false);
|
||||
|
||||
const token = Cookies.get("token");
|
||||
const router = useRouter();
|
||||
|
||||
const handleDownload = (ids: string[]) => {
|
||||
@@ -78,6 +66,7 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
} else {
|
||||
newSelected.add(id);
|
||||
}
|
||||
|
||||
return newSelected;
|
||||
});
|
||||
};
|
||||
@@ -89,14 +78,13 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
const allJobIds = new Set(jobs.map((job) => job.id));
|
||||
setSelectedJobs(allJobIds);
|
||||
}
|
||||
|
||||
setAllSelected(!allSelected);
|
||||
};
|
||||
|
||||
const handleDeleteSelected = async () => {
|
||||
const response = await fetch("/api/delete", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ data: { ids: Array.from(selectedJobs) } }),
|
||||
const response = await ApiService.deleteJob({
|
||||
ids: Array.from(selectedJobs),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
@@ -115,6 +103,7 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
} else if (searchMode === "status") {
|
||||
return job.status.toLowerCase().includes(searchQuery.toLowerCase());
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -125,19 +114,10 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
)
|
||||
);
|
||||
|
||||
const postBody = {
|
||||
await ApiService.updateJob({
|
||||
ids: ids,
|
||||
field: field,
|
||||
value: value,
|
||||
};
|
||||
|
||||
await fetch("/api/update", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({ data: postBody }),
|
||||
});
|
||||
};
|
||||
|
||||
@@ -170,7 +150,12 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => {
|
||||
</Typography>
|
||||
<Tooltip title="Select All">
|
||||
<span>
|
||||
<IconButton color="primary" onClick={handleSelectAll}>
|
||||
<IconButton
|
||||
color="primary"
|
||||
onClick={handleSelectAll}
|
||||
data-testid="select-all"
|
||||
aria-label="Select All"
|
||||
>
|
||||
<SelectAllIcon />
|
||||
</IconButton>
|
||||
</span>
|
||||
@@ -1,35 +1,41 @@
|
||||
import { validateURL } from "@/lib/helpers/validate-url";
|
||||
import { ApiService } from "@/services";
|
||||
import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
|
||||
import { Disabled } from "@/components/common/disabled/disabled";
|
||||
import {
|
||||
ErrorSnackbar,
|
||||
JobNotifySnackbar,
|
||||
} from "@/components/common/snackbars";
|
||||
import {
|
||||
Provider as JobSubmitterProvider,
|
||||
useJobSubmitterProvider,
|
||||
} from "@/components/submit/job-submitter/provider";
|
||||
import { useAdvancedJobOptions } from "@/hooks/use-advanced-job-options";
|
||||
import { useSubmitJob } from "@/hooks/use-submit-job";
|
||||
import { checkAI } from "@/lib";
|
||||
import { useUser } from "@/store/hooks";
|
||||
import {
|
||||
Box,
|
||||
Button,
|
||||
Divider,
|
||||
Snackbar,
|
||||
Alert,
|
||||
TextField,
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import { useEffect, useState } from "react";
|
||||
import { useRouter } from "next/router";
|
||||
import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
|
||||
import { useAdvancedJobOptions } from "@/lib/hooks/use-advanced-job-options/use-advanced-job-options";
|
||||
import { checkAI } from "@/lib";
|
||||
import { Disabled } from "@/components/common/disabled/disabled";
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
export const Agent = () => {
|
||||
const router = useRouter();
|
||||
const theme = useTheme();
|
||||
|
||||
const { user } = useUser();
|
||||
const { jobOptions, setJobOptions } = useAdvancedJobOptions();
|
||||
const { submitJob, error } = useSubmitJob();
|
||||
const { snackbarOpen, snackbarMessage, snackbarSeverity, closeSnackbar } =
|
||||
useJobSubmitterProvider();
|
||||
|
||||
const [url, setUrl] = useState("");
|
||||
const [prompt, setPrompt] = useState("");
|
||||
const [urlError, setUrlError] = useState<string | null>(null);
|
||||
const [aiEnabled, setAiEnabled] = useState(false);
|
||||
const [snackbarMessage, setSnackbarMessage] = useState("");
|
||||
const [snackbarSeverity, setSnackbarSeverity] = useState<
|
||||
"success" | "error" | "info" | "warning"
|
||||
>("info");
|
||||
const [snackbarOpen, setSnackbarOpen] = useState(false);
|
||||
const router = useRouter();
|
||||
const { jobOptions, setJobOptions } = useAdvancedJobOptions();
|
||||
const theme = useTheme();
|
||||
|
||||
useEffect(() => {
|
||||
if (router.query.url) {
|
||||
@@ -45,91 +51,8 @@ export const Agent = () => {
|
||||
checkAI(setAiEnabled);
|
||||
}, []);
|
||||
|
||||
const handleCloseSnackbar = () => {
|
||||
setSnackbarOpen(false);
|
||||
};
|
||||
|
||||
const ErrorSnackbar = () => {
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="error">
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
|
||||
const NotifySnackbar = () => {
|
||||
const goTo = () => {
|
||||
router.push("/jobs");
|
||||
};
|
||||
|
||||
const action = (
|
||||
<Button color="inherit" size="small" onClick={goTo}>
|
||||
Go To Job
|
||||
</Button>
|
||||
);
|
||||
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (!validateURL(url)) {
|
||||
setUrlError("Please enter a valid URL.");
|
||||
return;
|
||||
}
|
||||
|
||||
setUrlError(null);
|
||||
|
||||
await ApiService.submitJob(
|
||||
url,
|
||||
[],
|
||||
"",
|
||||
{
|
||||
collect_media: jobOptions.collect_media,
|
||||
multi_page_scrape: jobOptions.multi_page_scrape,
|
||||
},
|
||||
jobOptions.custom_headers,
|
||||
jobOptions.custom_cookies,
|
||||
null,
|
||||
true,
|
||||
prompt
|
||||
)
|
||||
.then(async (response) => {
|
||||
if (!response.ok) {
|
||||
return response.json().then((error) => {
|
||||
throw new Error(error.error);
|
||||
});
|
||||
}
|
||||
return response.json();
|
||||
})
|
||||
.then((data) => {
|
||||
setSnackbarMessage(
|
||||
`Agent job: ${data.id} submitted successfully.` ||
|
||||
"Agent job submitted successfully."
|
||||
);
|
||||
setSnackbarSeverity("info");
|
||||
setSnackbarOpen(true);
|
||||
})
|
||||
.catch((error) => {
|
||||
setSnackbarMessage(error || "An error occurred.");
|
||||
setSnackbarSeverity("error");
|
||||
setSnackbarOpen(true);
|
||||
});
|
||||
await submitJob(url, [], user, jobOptions, null, true, prompt);
|
||||
};
|
||||
|
||||
if (!aiEnabled) {
|
||||
@@ -178,13 +101,14 @@ export const Agent = () => {
|
||||
<TextField
|
||||
value={url}
|
||||
onChange={(e) => setUrl(e.target.value)}
|
||||
error={!!urlError}
|
||||
helperText={urlError}
|
||||
error={!!error}
|
||||
helperText={error}
|
||||
autoComplete="agent-url"
|
||||
fullWidth
|
||||
placeholder="https://www.example.com"
|
||||
variant="outlined"
|
||||
size="small"
|
||||
data-cy="url-input"
|
||||
/>
|
||||
<Typography variant="body1" sx={{ fontWeight: 500, marginBottom: 0 }}>
|
||||
Prompt
|
||||
@@ -197,6 +121,7 @@ export const Agent = () => {
|
||||
placeholder="Collect all the links on the page"
|
||||
variant="outlined"
|
||||
size="small"
|
||||
data-cy="prompt-input"
|
||||
/>
|
||||
<Box
|
||||
sx={{
|
||||
@@ -221,8 +146,28 @@ export const Agent = () => {
|
||||
Submit
|
||||
</Button>
|
||||
</Box>
|
||||
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
|
||||
{snackbarSeverity === "info" ? (
|
||||
<JobNotifySnackbar
|
||||
open={snackbarOpen}
|
||||
onClose={closeSnackbar}
|
||||
message={snackbarMessage}
|
||||
/>
|
||||
) : (
|
||||
<ErrorSnackbar
|
||||
open={snackbarOpen}
|
||||
onClose={closeSnackbar}
|
||||
message={snackbarMessage}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export const AgentPage = () => {
|
||||
return (
|
||||
<JobSubmitterProvider>
|
||||
<Agent />
|
||||
</JobSubmitterProvider>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,31 +1,32 @@
|
||||
import React, { useEffect, useRef, useState } from "react";
|
||||
import { JobSelector } from "@/components/common/job-selector";
|
||||
import { useGetCurrentJobs } from "@/hooks/use-get-current-jobs";
|
||||
import { checkAI, fetchJob, updateJob } from "@/lib";
|
||||
import { Job, Message } from "@/types";
|
||||
import EditNoteIcon from "@mui/icons-material/EditNote";
|
||||
import SendIcon from "@mui/icons-material/Send";
|
||||
import {
|
||||
Box,
|
||||
TextField,
|
||||
Typography,
|
||||
Paper,
|
||||
useTheme,
|
||||
IconButton,
|
||||
Paper,
|
||||
TextField,
|
||||
Tooltip,
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import { JobSelector } from "../../ai";
|
||||
import { Job, Message } from "../../../types";
|
||||
import { useSearchParams } from "next/navigation";
|
||||
import { fetchJob, fetchJobs, updateJob, checkAI } from "../../../lib";
|
||||
import SendIcon from "@mui/icons-material/Send";
|
||||
import EditNoteIcon from "@mui/icons-material/EditNote";
|
||||
import React, { useEffect, useState } from "react";
|
||||
|
||||
export const AI: React.FC = () => {
|
||||
const theme = useTheme();
|
||||
const searchParams = useSearchParams();
|
||||
const { jobs, setJobs } = useGetCurrentJobs();
|
||||
|
||||
const [currentMessage, setCurrentMessage] = useState<string>("");
|
||||
const [selectedJob, setSelectedJob] = useState<Job | null>(null);
|
||||
const [messages, setMessages] = useState<Message[]>([]);
|
||||
const [aiEnabled, setAiEnabled] = useState<boolean>(false);
|
||||
const [jobs, setJobs] = useState<Job[]>([]);
|
||||
const [thinking, setThinking] = useState<boolean>(false);
|
||||
|
||||
const searchParams = useSearchParams();
|
||||
|
||||
const getJobFromParam = async () => {
|
||||
const jobId = searchParams.get("job");
|
||||
|
||||
@@ -134,10 +135,6 @@ export const AI: React.FC = () => {
|
||||
setMessages([]);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchJobs(setJobs);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Box
|
||||
sx={{
|
||||
@@ -187,7 +184,6 @@ export const AI: React.FC = () => {
|
||||
<JobSelector
|
||||
selectedJob={selectedJob}
|
||||
setSelectedJob={setSelectedJob}
|
||||
setJobs={setJobs}
|
||||
jobs={jobs}
|
||||
sxProps={{
|
||||
position: "absolute",
|
||||
@@ -242,6 +238,9 @@ export const AI: React.FC = () => {
|
||||
marginLeft: message.role === "user" ? "auto" : "",
|
||||
maxWidth: "40%",
|
||||
}}
|
||||
data-cy={
|
||||
message.role === "user" ? "user-message" : "ai-message"
|
||||
}
|
||||
>
|
||||
<Typography variant="body1" sx={{ color: "white" }}>
|
||||
{message.content}
|
||||
@@ -309,6 +308,7 @@ export const AI: React.FC = () => {
|
||||
}
|
||||
}}
|
||||
sx={{ borderRadius: "8px" }}
|
||||
data-cy="message-input"
|
||||
/>
|
||||
|
||||
<Tooltip title="Send" placement="top">
|
||||
@@ -319,6 +319,7 @@ export const AI: React.FC = () => {
|
||||
onClick={() => {
|
||||
handleMessageSend(currentMessage);
|
||||
}}
|
||||
data-cy="send-message"
|
||||
>
|
||||
<SendIcon />
|
||||
</IconButton>
|
||||
|
||||
1
src/components/pages/chat/index.ts
Normal file
1
src/components/pages/chat/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./chat";
|
||||
@@ -1,19 +1,18 @@
|
||||
import { Job, CronJob } from "@/types/job";
|
||||
import { useState, useEffect } from "react";
|
||||
import { CreateCronJobs } from "./create-cron-jobs";
|
||||
import { CronJob, Job } from "@/types/job";
|
||||
import {
|
||||
Box,
|
||||
Button,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableHead,
|
||||
TableRow,
|
||||
TableCell,
|
||||
TableBody,
|
||||
Button,
|
||||
Box,
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import Cookies from "js-cookie";
|
||||
import { useEffect, useState } from "react";
|
||||
import { CreateCronJobs } from "./create-cron-jobs";
|
||||
|
||||
import { ApiService } from "@/services/api-service";
|
||||
export type CronJobsProps = {
|
||||
initialJobs: Job[];
|
||||
initialCronJobs: CronJob[];
|
||||
@@ -37,14 +36,9 @@ export const CronJobs = ({
|
||||
}, [initialJobs, initialCronJobs, initialUser]);
|
||||
|
||||
const handleDeleteCronJob = async (id: string) => {
|
||||
const token = Cookies.get("token");
|
||||
const response = await fetch("/api/delete-cron-job", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({ data: { id, user_email: user.email } }),
|
||||
const response = await ApiService.deleteCronJobs({
|
||||
id,
|
||||
user_email: user.email,
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
"use client";
|
||||
|
||||
import React, { useState, useEffect, useRef } from "react";
|
||||
import { Button, Container, Box, Snackbar, Alert } from "@mui/material";
|
||||
import React, { useEffect, useRef } from "react";
|
||||
import { Container, Box } from "@mui/material";
|
||||
import { useRouter } from "next/router";
|
||||
import { Element, Result } from "@/types";
|
||||
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
|
||||
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||
import {
|
||||
ErrorSnackbar,
|
||||
JobNotifySnackbar,
|
||||
} from "@/components/common/snackbars";
|
||||
|
||||
export const Home = () => {
|
||||
const {
|
||||
@@ -15,9 +18,9 @@ export const Home = () => {
|
||||
setRows,
|
||||
results,
|
||||
snackbarOpen,
|
||||
setSnackbarOpen,
|
||||
snackbarMessage,
|
||||
snackbarSeverity,
|
||||
closeSnackbar,
|
||||
} = useJobSubmitterProvider();
|
||||
const router = useRouter();
|
||||
const { elements, url } = router.query;
|
||||
@@ -28,6 +31,7 @@ export const Home = () => {
|
||||
if (elements) {
|
||||
setRows(JSON.parse(elements as string));
|
||||
}
|
||||
|
||||
if (url) {
|
||||
setSubmittedURL(url as string);
|
||||
}
|
||||
@@ -39,48 +43,6 @@ export const Home = () => {
|
||||
}
|
||||
}, [results]);
|
||||
|
||||
const handleCloseSnackbar = () => {
|
||||
setSnackbarOpen(false);
|
||||
};
|
||||
|
||||
const ErrorSnackbar = () => {
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="error">
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
|
||||
const NotifySnackbar = () => {
|
||||
const goTo = () => {
|
||||
router.push("/jobs");
|
||||
};
|
||||
|
||||
const action = (
|
||||
<Button color="inherit" size="small" onClick={goTo}>
|
||||
Go To Job
|
||||
</Button>
|
||||
);
|
||||
|
||||
return (
|
||||
<Snackbar
|
||||
open={snackbarOpen}
|
||||
autoHideDuration={6000}
|
||||
onClose={handleCloseSnackbar}
|
||||
>
|
||||
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
|
||||
{snackbarMessage}
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<Box
|
||||
bgcolor="background.default"
|
||||
@@ -93,7 +55,8 @@ export const Home = () => {
|
||||
>
|
||||
<Container maxWidth="lg" className="overflow-y-auto max-h-full">
|
||||
<JobSubmitter />
|
||||
{submittedURL.length ? (
|
||||
|
||||
{submittedURL.length > 0 ? (
|
||||
<ElementTable
|
||||
rows={rows}
|
||||
setRows={setRows}
|
||||
@@ -101,7 +64,20 @@ export const Home = () => {
|
||||
/>
|
||||
) : null}
|
||||
</Container>
|
||||
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
|
||||
|
||||
{snackbarSeverity === "info" ? (
|
||||
<JobNotifySnackbar
|
||||
open={snackbarOpen}
|
||||
onClose={closeSnackbar}
|
||||
message={snackbarMessage}
|
||||
/>
|
||||
) : (
|
||||
<ErrorSnackbar
|
||||
open={snackbarOpen}
|
||||
onClose={closeSnackbar}
|
||||
message={snackbarMessage}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
import { JobSelector } from "@/components/ai";
|
||||
import { fetchJobs } from "@/lib";
|
||||
import { JobSelector } from "@/components/common/job-selector";
|
||||
import { MediaViewer } from "@/components/common/media-viewer";
|
||||
import { TileGridView } from "@/components/common/media-viewer/tile-grid-view";
|
||||
import { useGetCurrentJobs } from "@/hooks/use-get-current-jobs";
|
||||
import { Job } from "@/types";
|
||||
import {
|
||||
Box,
|
||||
useTheme,
|
||||
Typography,
|
||||
CircularProgress,
|
||||
Alert,
|
||||
Box,
|
||||
CircularProgress,
|
||||
Paper,
|
||||
Tabs,
|
||||
Tab,
|
||||
Tabs,
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import { useRouter, useSearchParams } from "next/navigation";
|
||||
import { useState, useEffect } from "react";
|
||||
import { TileGridView } from "@/components/common/media-viewer/tile-grid-view";
|
||||
import { MediaViewer } from "@/components/common/media-viewer";
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
export interface MediaFiles {
|
||||
audio: string[];
|
||||
@@ -31,10 +31,10 @@ export const MediaId = () => {
|
||||
const searchParams = useSearchParams();
|
||||
const theme = useTheme();
|
||||
const router = useRouter();
|
||||
const { jobs } = useGetCurrentJobs();
|
||||
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [jobs, setJobs] = useState<Job[]>([]);
|
||||
const [selectedJob, setSelectedJob] = useState<Job | null>(null);
|
||||
const [mediaFiles, setMediaFiles] = useState<MediaFiles | null>(null);
|
||||
const [activeTab, setActiveTab] = useState<string>("images");
|
||||
@@ -60,11 +60,6 @@ export const MediaId = () => {
|
||||
router.push(`/media?id=${currentId}&type=${activeTab}&file=${fileName}`);
|
||||
};
|
||||
|
||||
// Fetch jobs on mount
|
||||
useEffect(() => {
|
||||
fetchJobs(setJobs);
|
||||
}, []);
|
||||
|
||||
// Set selected job when currentId changes
|
||||
useEffect(() => {
|
||||
if (!currentId) {
|
||||
@@ -201,7 +196,6 @@ export const MediaId = () => {
|
||||
<JobSelector
|
||||
setSelectedJob={handleSelectJob}
|
||||
selectedJob={selectedJob}
|
||||
setJobs={setJobs}
|
||||
jobs={jobs}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user