mirror of
				https://github.com/jaypyles/Scraperr.git
				synced 2025-10-31 14:37:04 +00:00 
			
		
		
		
	Compare commits
	
		
			40 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 8cd30599fa | ||
|   | a58212b214 | ||
|   | a6ab6ec71d | ||
|   | c5c9427af4 | ||
|   | e8d80c1a77 | ||
|   | ee8047ac78 | ||
|   | e74c4f392c | ||
|   | 6b484952a3 | ||
|   | 2283808605 | ||
|   | ee5ada70f7 | ||
|   | 56cc457e6e | ||
|   | 21a38181de | ||
|   | 3063bc0d53 | ||
|   | f42e7ed531 | ||
|   | c197f2becd | ||
|   | a534129702 | ||
|   | 455ed049c9 | ||
|   | de4ccfbf3a | ||
|   | 3475d66995 | ||
|   | 186b4a0231 | ||
|   | 0af0ebf5b5 | ||
|   | ef35db00d7 | ||
|   | d65e600ec3 | ||
|   | 6fe145f649 | ||
|   | 563ca2245e | ||
|   | d54fdbd405 | ||
|   | 7169755cd2 | ||
|   | 15b56b5704 | ||
|   | bf6b740005 | ||
|   | c339e75e06 | ||
|   | b6ed40e6cf | ||
|   | 3085f9d31a | ||
|   | 7d80ff5c7f | ||
|   | 3a0762f1e3 | ||
|   | dc4d219205 | ||
|   | b3bf780eda | ||
|   | 1dfd3ca92a | ||
|   | fe51140a0e | ||
|   | dd6cec6679 | ||
|   | 2339ba1b77 | 
							
								
								
									
										32
									
								
								.github/ISSUE_TEMPLATE/bug_report.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								.github/ISSUE_TEMPLATE/bug_report.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
| --- | ||||
| name: Bug report | ||||
| about: 'Bug reporting ' | ||||
| title: '' | ||||
| labels: '' | ||||
| assignees: '' | ||||
|  | ||||
| --- | ||||
|  | ||||
| **Describe the bug** | ||||
| A clear and concise description of what the bug is. | ||||
|  | ||||
| **To Reproduce** | ||||
| Steps to reproduce the behavior: | ||||
| 1. Go to '...' | ||||
| 2. Click on '....' | ||||
| 3. Scroll down to '....' | ||||
| 4. See error | ||||
|  | ||||
| **Expected behavior** | ||||
| A clear and concise description of what you expected to happen. | ||||
|  | ||||
| **Screenshots** | ||||
| If applicable, add screenshots to help explain your problem. | ||||
|  | ||||
| **Desktop (please complete the following information):** | ||||
|  - OS: [e.g. iOS] | ||||
|  - Browser [e.g. chrome, safari] | ||||
|  - Version [e.g. 22] | ||||
|  | ||||
| **Additional context** | ||||
| Add any other context about the problem here. | ||||
							
								
								
									
										58
									
								
								.github/actions/run-cypress-tests/action.yaml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								.github/actions/run-cypress-tests/action.yaml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| name: Run Cypress Tests | ||||
|  | ||||
| description: Run Cypress tests | ||||
|  | ||||
| runs: | ||||
|   using: "composite" | ||||
|   steps: | ||||
|     - name: Checkout code | ||||
|       uses: actions/checkout@v4 | ||||
|  | ||||
|     - name: Setup Node | ||||
|       uses: actions/setup-node@v4 | ||||
|       with: | ||||
|         node-version: 22 | ||||
|  | ||||
|     - name: Setup Docker project | ||||
|       shell: bash | ||||
|       run: make build up-dev | ||||
|  | ||||
|     - name: Install dependencies | ||||
|       shell: bash | ||||
|       run: npm install | ||||
|  | ||||
|     - name: Wait for frontend to be ready | ||||
|       shell: bash | ||||
|       run: | | ||||
|         for i in {1..10}; do | ||||
|           curl -s http://127.0.0.1:80 && echo "Frontend is ready" && exit 0 | ||||
|           echo "Waiting for frontend to be ready... attempt $i" | ||||
|           sleep 1 | ||||
|         done | ||||
|         echo "Frontend failed to be ready after 10 retries" | ||||
|         exit 1 | ||||
|  | ||||
|     - name: Wait for backend to be ready | ||||
|       shell: bash | ||||
|       run: | | ||||
|         for i in {1..10}; do | ||||
|           curl -s http://127.0.0.1:8000 && echo "Backend is ready" && exit 0 | ||||
|           echo "Waiting for backend to be ready... attempt $i" | ||||
|           sleep 1 | ||||
|         done | ||||
|         echo "Backend failed to be ready after 10 retries" | ||||
|         exit 1 | ||||
|  | ||||
|     - name: Show backend logs on failure | ||||
|       if: failure() | ||||
|       shell: bash | ||||
|       run: | | ||||
|         echo "== Docker Containers ==" | ||||
|         docker ps -a | ||||
|         echo "== Backend Logs ==" | ||||
|         docker logs $(docker ps -a --filter "name=scraperr_api" --format "{{.Names}}") || echo "Could not get backend logs" | ||||
|  | ||||
|     - name: Run Cypress tests | ||||
|       shell: bash | ||||
|       run: npm run cy:run | ||||
|  | ||||
							
								
								
									
										29
									
								
								.github/workflows/docker-image.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								.github/workflows/docker-image.yml
									
									
									
									
										vendored
									
									
								
							| @@ -1,12 +1,14 @@ | ||||
| name: ci | ||||
| requires: | ||||
|   - unit-tests | ||||
| name: Docker Image | ||||
| on: | ||||
|   push: | ||||
|     branches: ["master"] | ||||
|   workflow_run: | ||||
|     workflows: ["Unit Tests"] | ||||
|     types: | ||||
|       - completed | ||||
|   workflow_dispatch: | ||||
|  | ||||
| jobs: | ||||
|   build: | ||||
|     if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'master' }} | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - name: Checkout | ||||
| @@ -36,3 +38,20 @@ jobs: | ||||
|           file: ./docker/api/Dockerfile | ||||
|           push: true | ||||
|           tags: ${{ secrets.DOCKERHUB_USERNAME }}/scraperr_api:latest | ||||
|  | ||||
|   success-message: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: | ||||
|       - build | ||||
|     steps: | ||||
|       - name: Send Discord Message | ||||
|         uses: jaypyles/discord-webhook-action@v1.0.0 | ||||
|         with: | ||||
|           webhook-url: ${{ secrets.DISCORD_WEBHOOK_URL }} | ||||
|           content: "Scraperr Successfully Built Docker Images" | ||||
|           username: "Scraperr CI" | ||||
|           embed-title: "✅ Deployment Status" | ||||
|           embed-description: "Scraperr successfully built docker images." | ||||
|           embed-color: 3066993 # Green | ||||
|           embed-footer-text: "Scraperr CI" | ||||
|           embed-timestamp: ${{ github.event.head_commit.timestamp }} | ||||
|   | ||||
							
								
								
									
										33
									
								
								.github/workflows/unit-tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										33
									
								
								.github/workflows/unit-tests.yml
									
									
									
									
										vendored
									
									
								
							| @@ -4,9 +4,11 @@ on: | ||||
|   push: | ||||
|     branches: | ||||
|       - master | ||||
|  | ||||
|   pull_request: | ||||
|     branches: | ||||
|       - master | ||||
|     types: [opened, synchronize, reopened] | ||||
|  | ||||
|   workflow_dispatch: | ||||
|  | ||||
| jobs: | ||||
|   unit-tests: | ||||
| @@ -15,6 +17,9 @@ jobs: | ||||
|       - name: Checkout | ||||
|         uses: actions/checkout@v4 | ||||
|  | ||||
|       - name: Set env | ||||
|         run: echo "ENV=test" >> $GITHUB_ENV | ||||
|  | ||||
|       - name: Install pdm | ||||
|         run: pip install pdm | ||||
|  | ||||
| @@ -23,3 +28,27 @@ jobs: | ||||
|  | ||||
|       - name: Run tests | ||||
|         run: PYTHONPATH=. pdm run pytest api/backend/tests | ||||
|  | ||||
|   cypress-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - uses: ./.github/actions/run-cypress-tests | ||||
|  | ||||
|   success-message: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: | ||||
|       - unit-tests | ||||
|       - cypress-tests | ||||
|     steps: | ||||
|       - name: Send Discord Message | ||||
|         uses: jaypyles/discord-webhook-action@v1.0.0 | ||||
|         with: | ||||
|           webhook-url: ${{ secrets.DISCORD_WEBHOOK_URL }} | ||||
|           content: "Scraperr Successfully Passed Tests" | ||||
|           username: "Scraperr CI" | ||||
|           embed-title: "✅ Deployment Status" | ||||
|           embed-description: "Scraperr successfully passed all tests." | ||||
|           embed-color: 3066993 # Green | ||||
|           embed-footer-text: "Scraperr CI" | ||||
|           embed-timestamp: ${{ github.event.head_commit.timestamp }} | ||||
|   | ||||
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -187,3 +187,5 @@ cython_debug/ | ||||
| postgres_data | ||||
| .vscode | ||||
| ollama | ||||
| data | ||||
| media | ||||
							
								
								
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| 3.10.12 | ||||
							
								
								
									
										77
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										77
									
								
								README.md
									
									
									
									
									
								
							| @@ -13,7 +13,7 @@ Scraperr is a self-hosted web application that allows users to scrape data from | ||||
|  | ||||
| From the table, users can download an excel sheet of the job's results, along with an option to rerun the job. | ||||
|  | ||||
| View the [docs](https://scraperr-docs.pages.dev). | ||||
| View the [docs](https://scraperr-docs.pages.dev) for a quickstart guide and more information. | ||||
|  | ||||
| ## Features | ||||
|  | ||||
| @@ -64,87 +64,12 @@ View the [docs](https://scraperr-docs.pages.dev). | ||||
|  | ||||
|  | ||||
|  | ||||
| ## Installation | ||||
|  | ||||
| 1. Clone the repository: | ||||
|  | ||||
|    ```sh | ||||
|    git clone https://github.com/jaypyles/scraperr.git | ||||
|  | ||||
|    ``` | ||||
|  | ||||
| 2. Set environmental variables and labels in `docker-compose.yml`. | ||||
|  | ||||
| ```yaml | ||||
| scraperr: | ||||
|     labels: | ||||
|       - "traefik.enable=true" | ||||
|       - "traefik.http.routers.scraperr.rule=Host(`localhost`)" # change this to your domain, if not running on localhost | ||||
|       - "traefik.http.routers.scraperr.entrypoints=web" # websecure if using https | ||||
|       - "traefik.http.services.scraperr.loadbalancer.server.port=3000" | ||||
|  | ||||
| scraperr_api: | ||||
|  environment: | ||||
|       - LOG_LEVEL=INFO | ||||
|       - MONGODB_URI=mongodb://root:example@webscrape-mongo:27017 # used to access MongoDB | ||||
|       - SECRET_KEY=your_secret_key # used to encode authentication tokens (can be a random string) | ||||
|       - ALGORITHM=HS256 # authentication encoding algorithm | ||||
|       - ACCESS_TOKEN_EXPIRE_MINUTES=600 # access token expire minutes | ||||
|   labels: | ||||
|         - "traefik.enable=true" | ||||
|         - "traefik.http.routers.scraperr_api.rule=Host(`localhost`) && PathPrefix(`/api`)" # change this to your domain, if not running on localhost | ||||
|         - "traefik.http.routers.scraperr_api.entrypoints=web" # websecure if using https | ||||
|         - "traefik.http.middlewares.api-stripprefix.stripprefix.prefixes=/api" | ||||
|         - "traefik.http.routers.scraperr_api.middlewares=api-stripprefix" | ||||
|         - "traefik.http.services.scraperr_api.loadbalancer.server.port=8000" | ||||
|  | ||||
| mongo: | ||||
|     environment: | ||||
|       MONGO_INITDB_ROOT_USERNAME: root | ||||
|       MONGO_INITDB_ROOT_PASSWORD: example | ||||
| ``` | ||||
|  | ||||
| Don't want to use `traefik`? This configuration can be used in other reverse proxies, as long as the API is proxied to `/api` of the frontend container. This is currently | ||||
| not able to be used without a reverse proxy, due to limitations of runtime client-side environmental variables in `next.js`. | ||||
|  | ||||
| 3. Deploy | ||||
|  | ||||
| ```sh | ||||
| make up | ||||
| ``` | ||||
|  | ||||
| The app provides its own `traefik` configuration to use independently, but can easily be reverse-proxied by any other app, or your own reverse-proxy. | ||||
|  | ||||
| ## Usage | ||||
|  | ||||
| 1. Open the application in your browser at `http://localhost`. | ||||
| 2. Enter the URL you want to scrape in the URL field. | ||||
| 3. Add elements to scrape by specifying a name and the corresponding XPath. | ||||
| 4. Click the "Submit" button to queue URL to be scraped. | ||||
| 5. View queue in the "Previous Jobs" section. | ||||
|  | ||||
| ## API Endpoints | ||||
|  | ||||
| Use this service as an API for your own projects. Due to this using FastAPI, a docs page is available at `/docs` for the API. | ||||
|  | ||||
|  | ||||
|  | ||||
| ## AI | ||||
|  | ||||
| Currently supports either an Ollama instance or OpenAI's ChatGPT, using your own API key. Setting up is easy as either setting the Ollama url or the OpenAI API key in the API's environmental variables in the `docker-compose.yml` file: | ||||
|  | ||||
| ```yaml | ||||
| scraperr_api: | ||||
|   environment: | ||||
|     - OLLAMA_URL=http://ollama:11434 | ||||
|     - OLLAMA_MODEL=llama3.1 | ||||
|     # or | ||||
|     - OPENAI_KEY=<your_key> | ||||
|     - OPENAI_MODEL=gpt3.5-turbo | ||||
| ``` | ||||
|  | ||||
| The model's names are taken from the documentation of their respective technologies. | ||||
|  | ||||
| ## Troubleshooting | ||||
|  | ||||
| Q: When running Scraperr, I'm met with "404 Page not found".   | ||||
|   | ||||
| @@ -1,3 +0,0 @@ | ||||
| github_repo: https://github.com/jaypyles/webapp-template.git | ||||
| deploy_path: /home/admin/site-test6 | ||||
| deploy_command: make pull up-prd | ||||
| @@ -1,10 +0,0 @@ | ||||
| - name: Deploy site | ||||
|   hosts: all | ||||
|   become: true | ||||
|   vars_files: | ||||
|     - ./config.yaml | ||||
|   tasks: | ||||
|     - name: Deploy | ||||
|       command: "{{deploy_command}}" | ||||
|       args: | ||||
|         chdir: "{{deploy_path}}" | ||||
| @@ -1,6 +0,0 @@ | ||||
| all: | ||||
|   hosts: | ||||
|     host1: | ||||
|       ansible_host: 192.168.0.1 | ||||
|       ansible_user: admin | ||||
|       ansible_ssh_private_key_file: private_key.pem | ||||
| @@ -1,54 +0,0 @@ | ||||
| - name: Install Docker and run make pull up | ||||
|   hosts: all | ||||
|   become: true | ||||
|   vars_files: | ||||
|     - ./config.yaml | ||||
|   tasks: | ||||
|     - name: Update apt cache | ||||
|       apt: | ||||
|         update_cache: yes | ||||
|     - name: Install required packages | ||||
|       apt: | ||||
|         name: | ||||
|           - apt-transport-https | ||||
|           - ca-certificates | ||||
|           - curl | ||||
|           - gnupg-agent | ||||
|           - software-properties-common | ||||
|           - rsync | ||||
|           - make | ||||
|         state: present | ||||
|     - name: Add Docker’s official GPG key | ||||
|       apt_key: | ||||
|         url: https://download.docker.com/linux/ubuntu/gpg | ||||
|         state: present | ||||
|     - name: Add Docker APT repository | ||||
|       apt_repository: | ||||
|         repo: deb [arch=amd64] https://download.docker.com/linux/ubuntu focal stable | ||||
|         state: present | ||||
|     - name: Update apt cache again after adding Docker repo | ||||
|       apt: | ||||
|         update_cache: yes | ||||
|     - name: Install Docker | ||||
|       apt: | ||||
|         name: docker-ce | ||||
|         state: present | ||||
|     - name: Start and enable Docker service | ||||
|       systemd: | ||||
|         name: docker | ||||
|         enabled: yes | ||||
|         state: started | ||||
|     - name: Install Docker Compose | ||||
|       apt: | ||||
|         name: docker-compose-plugin | ||||
|         state: present | ||||
|     - name: Verify Docker is installed | ||||
|       command: docker --version | ||||
|       register: docker_version | ||||
|     - name: Display Docker version | ||||
|       debug: | ||||
|         msg: "Docker version: {{ docker_version.stdout }}" | ||||
|     - name: Clone repo | ||||
|       ansible.builtin.git: | ||||
|         repo: "{{github_repo}}" | ||||
|         dest: "{{deploy_path}}" | ||||
| @@ -1,9 +1,13 @@ | ||||
| # STL | ||||
| import os | ||||
| import logging | ||||
| import apscheduler  # type: ignore | ||||
|  | ||||
| # PDM | ||||
| from fastapi import FastAPI | ||||
| import apscheduler.schedulers | ||||
| import apscheduler.schedulers.background | ||||
| from fastapi import FastAPI, Request, status | ||||
| from fastapi.exceptions import RequestValidationError | ||||
| from fastapi.middleware.cors import CORSMiddleware | ||||
|  | ||||
| # LOCAL | ||||
| @@ -13,6 +17,11 @@ from api.backend.utils import get_log_level | ||||
| from api.backend.routers.job_router import job_router | ||||
| from api.backend.routers.log_router import log_router | ||||
| from api.backend.routers.stats_router import stats_router | ||||
| from api.backend.database.startup import init_database | ||||
| from fastapi.responses import JSONResponse | ||||
|  | ||||
| from api.backend.job.cron_scheduling.cron_scheduling import start_cron_scheduler | ||||
| from api.backend.scheduler import scheduler | ||||
|  | ||||
| log_level = os.getenv("LOG_LEVEL") | ||||
| LOG_LEVEL = get_log_level(log_level) | ||||
| @@ -25,7 +34,7 @@ logging.basicConfig( | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
| app = FastAPI(title="api") | ||||
| app = FastAPI(title="api", root_path="/api") | ||||
|  | ||||
| app.add_middleware( | ||||
|     CORSMiddleware, | ||||
| @@ -41,3 +50,28 @@ app.include_router(ai_router) | ||||
| app.include_router(job_router) | ||||
| app.include_router(log_router) | ||||
| app.include_router(stats_router) | ||||
|  | ||||
|  | ||||
| @app.on_event("startup") | ||||
| async def startup_event(): | ||||
|     start_cron_scheduler(scheduler) | ||||
|     scheduler.start() | ||||
|  | ||||
|     if os.getenv("ENV") != "test": | ||||
|         init_database() | ||||
|         LOG.info("Starting up...") | ||||
|  | ||||
|  | ||||
| @app.on_event("shutdown") | ||||
| def shutdown_scheduler(): | ||||
|     scheduler.shutdown(wait=False)  # Set wait=False to not block shutdown | ||||
|  | ||||
|  | ||||
| @app.exception_handler(RequestValidationError) | ||||
| async def validation_exception_handler(request: Request, exc: RequestValidationError): | ||||
|     exc_str = f"{exc}".replace("\n", " ").replace("   ", " ") | ||||
|     logging.error(f"{request}: {exc_str}") | ||||
|     content = {"status_code": 10422, "message": exc_str, "data": None} | ||||
|     return JSONResponse( | ||||
|         content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY | ||||
|     ) | ||||
|   | ||||
| @@ -7,7 +7,6 @@ from fastapi.security import OAuth2PasswordRequestForm | ||||
|  | ||||
| # LOCAL | ||||
| from api.backend.schemas import User, Token, UserCreate | ||||
| from api.backend.database import get_user_collection | ||||
| from api.backend.auth.auth_utils import ( | ||||
|     ACCESS_TOKEN_EXPIRE_MINUTES, | ||||
|     get_current_user, | ||||
| @@ -15,9 +14,14 @@ from api.backend.auth.auth_utils import ( | ||||
|     get_password_hash, | ||||
|     create_access_token, | ||||
| ) | ||||
| import logging | ||||
|  | ||||
| from api.backend.database.common import update | ||||
|  | ||||
| auth_router = APIRouter() | ||||
|  | ||||
| LOG = logging.getLogger("auth_router") | ||||
|  | ||||
|  | ||||
| @auth_router.post("/auth/token", response_model=Token) | ||||
| async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()): | ||||
| @@ -43,12 +47,14 @@ async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends( | ||||
|  | ||||
| @auth_router.post("/auth/signup", response_model=User) | ||||
| async def create_user(user: UserCreate): | ||||
|     users_collection = get_user_collection() | ||||
|     hashed_password = get_password_hash(user.password) | ||||
|     user_dict = user.model_dump() | ||||
|     user_dict["hashed_password"] = hashed_password | ||||
|     del user_dict["password"] | ||||
|     _ = await users_collection.insert_one(user_dict) | ||||
|  | ||||
|     query = "INSERT INTO users (email, hashed_password, full_name) VALUES (?, ?, ?)" | ||||
|     _ = update(query, (user_dict["email"], hashed_password, user_dict["full_name"])) | ||||
|  | ||||
|     return user_dict | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| # STL | ||||
| import os | ||||
| from gc import disable | ||||
| from queue import Empty | ||||
| from typing import Any, Optional | ||||
| from datetime import datetime, timedelta | ||||
| import logging | ||||
| @@ -15,7 +13,8 @@ from fastapi.security import OAuth2PasswordBearer | ||||
|  | ||||
| # LOCAL | ||||
| from api.backend.schemas import User, UserInDB, TokenData | ||||
| from api.backend.database import get_user_collection | ||||
|  | ||||
| from api.backend.database.common import query | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
| @@ -40,8 +39,8 @@ def get_password_hash(password: str): | ||||
|  | ||||
|  | ||||
| async def get_user(email: str): | ||||
|     user_collection = get_user_collection() | ||||
|     user = await user_collection.find_one({"email": email}) | ||||
|     user_query = "SELECT * FROM users WHERE email = ?" | ||||
|     user = query(user_query, (email,))[0] | ||||
|  | ||||
|     if not user: | ||||
|         return | ||||
| @@ -77,27 +76,42 @@ def create_access_token( | ||||
|  | ||||
|  | ||||
| async def get_current_user(token: str = Depends(oauth2_scheme)): | ||||
|     LOG.info(f"Getting current user with token: {token}") | ||||
|     LOG.debug(f"Getting current user with token: {token}") | ||||
|  | ||||
|     if not token: | ||||
|         LOG.debug("No token provided") | ||||
|         return EMPTY_USER | ||||
|  | ||||
|     if len(token.split(".")) != 3: | ||||
|         LOG.error(f"Malformed token: {token}") | ||||
|         return EMPTY_USER | ||||
|  | ||||
|     try: | ||||
|         LOG.debug( | ||||
|             f"Decoding token: {token} with secret key: {SECRET_KEY} and algorithm: {ALGORITHM}" | ||||
|         ) | ||||
|  | ||||
|         if token.startswith("Bearer "): | ||||
|             token = token.split(" ")[1] | ||||
|  | ||||
|         payload: Optional[dict[str, Any]] = jwt.decode( | ||||
|             token, SECRET_KEY, algorithms=[ALGORITHM] | ||||
|         ) | ||||
|  | ||||
|         if not payload: | ||||
|             LOG.error("No payload found in token") | ||||
|             return EMPTY_USER | ||||
|  | ||||
|         email = payload.get("sub") | ||||
|  | ||||
|         if email is None: | ||||
|             LOG.error("No email found in payload") | ||||
|             return EMPTY_USER | ||||
|  | ||||
|         token_data = TokenData(email=email) | ||||
|  | ||||
|     except JWTError: | ||||
|     except JWTError as e: | ||||
|         LOG.error(f"JWTError occurred: {e}") | ||||
|         return EMPTY_USER | ||||
|  | ||||
|     except Exception as e: | ||||
| @@ -105,7 +119,6 @@ async def get_current_user(token: str = Depends(oauth2_scheme)): | ||||
|         return EMPTY_USER | ||||
|  | ||||
|     user = await get_user(email=token_data.email) | ||||
|  | ||||
|     if user is None: | ||||
|         return EMPTY_USER | ||||
|  | ||||
|   | ||||
							
								
								
									
										1
									
								
								api/backend/constants.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								api/backend/constants.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| DATABASE_PATH = "data/database.db" | ||||
| @@ -1,23 +0,0 @@ | ||||
| # STL | ||||
| import os | ||||
| from typing import Any | ||||
|  | ||||
| # PDM | ||||
| from dotenv import load_dotenv | ||||
| from motor.motor_asyncio import AsyncIOMotorClient | ||||
|  | ||||
| _ = load_dotenv() | ||||
|  | ||||
| MONGODB_URI = os.getenv("MONGODB_URI") | ||||
|  | ||||
|  | ||||
| def get_user_collection(): | ||||
|     client: AsyncIOMotorClient[dict[str, Any]] = AsyncIOMotorClient(MONGODB_URI) | ||||
|     db = client["scrape"] | ||||
|     return db["users"] | ||||
|  | ||||
|  | ||||
| def get_job_collection(): | ||||
|     client: AsyncIOMotorClient[dict[str, Any]] = AsyncIOMotorClient(MONGODB_URI) | ||||
|     db = client["scrape"] | ||||
|     return db["jobs"] | ||||
							
								
								
									
										3
									
								
								api/backend/database/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								api/backend/database/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| from .common import insert, QUERIES, update | ||||
|  | ||||
| __all__ = ["insert", "QUERIES", "update"] | ||||
							
								
								
									
										92
									
								
								api/backend/database/common.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								api/backend/database/common.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,92 @@ | ||||
| import sqlite3 | ||||
| from typing import Any, Optional | ||||
| from api.backend.constants import DATABASE_PATH | ||||
| from api.backend.utils import format_json, format_sql_row_to_python | ||||
| from api.backend.database.schema import INIT_QUERY | ||||
| from api.backend.database.queries import JOB_INSERT_QUERY, DELETE_JOB_QUERY | ||||
| import logging | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def connect(): | ||||
|     connection = sqlite3.connect(DATABASE_PATH) | ||||
|     connection.set_trace_callback(print) | ||||
|     cursor = connection.cursor() | ||||
|     return cursor | ||||
|  | ||||
|  | ||||
| def insert(query: str, values: tuple[Any, ...]): | ||||
|     connection = sqlite3.connect(DATABASE_PATH) | ||||
|     cursor = connection.cursor() | ||||
|     copy = list(values) | ||||
|     format_json(copy) | ||||
|  | ||||
|     try: | ||||
|         _ = cursor.execute(query, copy) | ||||
|         connection.commit() | ||||
|     except sqlite3.Error as e: | ||||
|         LOG.error(f"An error occurred: {e}") | ||||
|     finally: | ||||
|         cursor.close() | ||||
|         connection.close() | ||||
|  | ||||
|  | ||||
| def query(query: str, values: Optional[tuple[Any, ...]] = None): | ||||
|     connection = sqlite3.connect(DATABASE_PATH) | ||||
|     connection.row_factory = sqlite3.Row | ||||
|     cursor = connection.cursor() | ||||
|     rows = [] | ||||
|     try: | ||||
|         if values: | ||||
|             _ = cursor.execute(query, values) | ||||
|         else: | ||||
|             _ = cursor.execute(query) | ||||
|  | ||||
|         rows = cursor.fetchall() | ||||
|  | ||||
|     finally: | ||||
|         cursor.close() | ||||
|         connection.close() | ||||
|  | ||||
|     formatted_rows: list[dict[str, Any]] = [] | ||||
|  | ||||
|     for row in rows: | ||||
|         row = dict(row) | ||||
|         formatted_row = format_sql_row_to_python(row) | ||||
|         formatted_rows.append(formatted_row) | ||||
|  | ||||
|     return formatted_rows | ||||
|  | ||||
|  | ||||
| def update(query: str, values: Optional[tuple[Any, ...]] = None): | ||||
|     connection = sqlite3.connect(DATABASE_PATH) | ||||
|     cursor = connection.cursor() | ||||
|  | ||||
|     copy = None | ||||
|  | ||||
|     if values: | ||||
|         copy = list(values) | ||||
|         format_json(copy) | ||||
|  | ||||
|     try: | ||||
|         if copy: | ||||
|             res = cursor.execute(query, copy) | ||||
|         else: | ||||
|             res = cursor.execute(query) | ||||
|         connection.commit() | ||||
|         return res.rowcount | ||||
|     except sqlite3.Error as e: | ||||
|         LOG.error(f"An error occurred: {e}") | ||||
|     finally: | ||||
|         cursor.close() | ||||
|         connection.close() | ||||
|  | ||||
|     return 0 | ||||
|  | ||||
|  | ||||
| QUERIES = { | ||||
|     "init": INIT_QUERY, | ||||
|     "insert_job": JOB_INSERT_QUERY, | ||||
|     "delete_job": DELETE_JOB_QUERY, | ||||
| } | ||||
							
								
								
									
										3
									
								
								api/backend/database/queries/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								api/backend/database/queries/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| from .queries import JOB_INSERT_QUERY, DELETE_JOB_QUERY | ||||
|  | ||||
| __all__ = ["JOB_INSERT_QUERY", "DELETE_JOB_QUERY"] | ||||
							
								
								
									
										9
									
								
								api/backend/database/queries/queries.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								api/backend/database/queries/queries.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| JOB_INSERT_QUERY = """ | ||||
| INSERT INTO jobs  | ||||
| (id, url, elements, user, time_created, result, status, chat, job_options) | ||||
| VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) | ||||
| """ | ||||
|  | ||||
| DELETE_JOB_QUERY = """ | ||||
| DELETE FROM jobs WHERE id IN () | ||||
| """ | ||||
							
								
								
									
										3
									
								
								api/backend/database/schema/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								api/backend/database/schema/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| from .schema import INIT_QUERY | ||||
|  | ||||
| __all__ = ["INIT_QUERY"] | ||||
							
								
								
									
										30
									
								
								api/backend/database/schema/schema.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								api/backend/database/schema/schema.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| INIT_QUERY = """ | ||||
| CREATE TABLE IF NOT EXISTS jobs ( | ||||
|     id STRING PRIMARY KEY NOT NULL, | ||||
|     url STRING NOT NULL, | ||||
|     elements JSON NOT NULL, | ||||
|     user STRING, | ||||
|     time_created DATETIME NOT NULL, | ||||
|     result JSON NOT NULL, | ||||
|     status STRING NOT NULL, | ||||
|     chat JSON, | ||||
|     job_options JSON | ||||
| ); | ||||
|  | ||||
| CREATE TABLE IF NOT EXISTS users ( | ||||
|     email STRING PRIMARY KEY NOT NULL, | ||||
|     hashed_password STRING NOT NULL, | ||||
|     full_name STRING, | ||||
|     disabled BOOLEAN | ||||
| ); | ||||
|  | ||||
| CREATE TABLE IF NOT EXISTS cron_jobs ( | ||||
|     id STRING PRIMARY KEY NOT NULL, | ||||
|     user_email STRING NOT NULL, | ||||
|     job_id STRING NOT NULL, | ||||
|     cron_expression STRING NOT NULL, | ||||
|     time_created DATETIME NOT NULL, | ||||
|     time_updated DATETIME NOT NULL, | ||||
|     FOREIGN KEY (job_id) REFERENCES jobs(id) | ||||
| ); | ||||
| """ | ||||
							
								
								
									
										15
									
								
								api/backend/database/startup.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								api/backend/database/startup.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| from api.backend.database.common import connect, QUERIES | ||||
| import logging | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def init_database(): | ||||
|     cursor = connect() | ||||
|  | ||||
|     for query in QUERIES["init"].strip().split(";"): | ||||
|         if query.strip(): | ||||
|             LOG.info(f"Executing query: {query}") | ||||
|             _ = cursor.execute(query) | ||||
|  | ||||
|     cursor.close() | ||||
| @@ -1,119 +0,0 @@ | ||||
| # STL | ||||
| import logging | ||||
| from typing import Any, Optional | ||||
|  | ||||
| # PDM | ||||
| from pymongo import DESCENDING | ||||
|  | ||||
| # LOCAL | ||||
| from api.backend.models import FetchOptions | ||||
| from api.backend.database import get_job_collection | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| async def insert(item: dict[str, Any]) -> None: | ||||
|     collection = get_job_collection() | ||||
|     i = await collection.insert_one(item) | ||||
|     LOG.info(f"Inserted item: {i}") | ||||
|  | ||||
|  | ||||
| async def get_queued_job(): | ||||
|     collection = get_job_collection() | ||||
|     return await collection.find_one( | ||||
|         {"status": "Queued"}, sort=[("created_at", DESCENDING)] | ||||
|     ) | ||||
|  | ||||
|  | ||||
| async def query( | ||||
|     filter: dict[str, Any], fetch_options: Optional[FetchOptions] = None | ||||
| ) -> list[dict[str, Any]]: | ||||
|     collection = get_job_collection() | ||||
|     cursor = collection.find(filter) | ||||
|     results: list[dict[str, Any]] = [] | ||||
|  | ||||
|     async for document in cursor: | ||||
|         del document["_id"] | ||||
|  | ||||
|         if fetch_options and not fetch_options.chat and document.get("chat"): | ||||
|             del document["chat"] | ||||
|  | ||||
|         results.append(document) | ||||
|  | ||||
|     return results | ||||
|  | ||||
|  | ||||
| async def update_job(ids: list[str], field: str, value: Any): | ||||
|     collection = get_job_collection() | ||||
|     for id in ids: | ||||
|         _ = await collection.update_one( | ||||
|             {"id": id}, | ||||
|             {"$set": {field: value}}, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| async def delete_jobs(jobs: list[str]): | ||||
|     collection = get_job_collection() | ||||
|     result = await collection.delete_many({"id": {"$in": jobs}}) | ||||
|     LOG.info(f"{result.deleted_count} documents deleted") | ||||
|  | ||||
|     return True if result.deleted_count > 0 else False | ||||
|  | ||||
|  | ||||
| async def average_elements_per_link(user: str): | ||||
|     collection = get_job_collection() | ||||
|     pipeline = [ | ||||
|         {"$match": {"status": "Completed", "user": user}}, | ||||
|         { | ||||
|             "$project": { | ||||
|                 "date": { | ||||
|                     "$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"} | ||||
|                 }, | ||||
|                 "num_elements": {"$size": "$elements"}, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             "$group": { | ||||
|                 "_id": "$date", | ||||
|                 "average_elements": {"$avg": "$num_elements"}, | ||||
|                 "count": {"$sum": 1}, | ||||
|             } | ||||
|         }, | ||||
|         {"$sort": {"_id": 1}}, | ||||
|     ] | ||||
|     cursor = collection.aggregate(pipeline) | ||||
|     results: list[dict[str, Any]] = [] | ||||
|  | ||||
|     async for document in cursor: | ||||
|         results.append( | ||||
|             { | ||||
|                 "date": document["_id"], | ||||
|                 "average_elements": document["average_elements"], | ||||
|                 "count": document["count"], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|     return results | ||||
|  | ||||
|  | ||||
| async def get_jobs_per_day(user: str): | ||||
|     collection = get_job_collection() | ||||
|     pipeline = [ | ||||
|         {"$match": {"status": "Completed", "user": user}}, | ||||
|         { | ||||
|             "$project": { | ||||
|                 "date": { | ||||
|                     "$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"} | ||||
|                 } | ||||
|             } | ||||
|         }, | ||||
|         {"$group": {"_id": "$date", "job_count": {"$sum": 1}}}, | ||||
|         {"$sort": {"_id": 1}}, | ||||
|     ] | ||||
|     cursor = collection.aggregate(pipeline) | ||||
|  | ||||
|     results: list[dict[str, Any]] = [] | ||||
|     async for document in cursor: | ||||
|         results.append({"date": document["_id"], "job_count": document["job_count"]}) | ||||
|  | ||||
|     return results | ||||
							
								
								
									
										17
									
								
								api/backend/job/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								api/backend/job/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,17 @@ | ||||
| from .job import ( | ||||
|     insert, | ||||
|     update_job, | ||||
|     delete_jobs, | ||||
|     get_jobs_per_day, | ||||
|     get_queued_job, | ||||
|     average_elements_per_link, | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
|     "insert", | ||||
|     "update_job", | ||||
|     "delete_jobs", | ||||
|     "get_jobs_per_day", | ||||
|     "get_queued_job", | ||||
|     "average_elements_per_link", | ||||
| ] | ||||
							
								
								
									
										100
									
								
								api/backend/job/cron_scheduling/cron_scheduling.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								api/backend/job/cron_scheduling/cron_scheduling.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | ||||
| import datetime | ||||
| from typing import Any | ||||
| import uuid | ||||
| from api.backend.database.common import insert, query | ||||
| from api.backend.models import CronJob | ||||
| from apscheduler.schedulers.background import BackgroundScheduler  # type: ignore | ||||
| from apscheduler.triggers.cron import CronTrigger  # type: ignore | ||||
|  | ||||
| from api.backend.job import insert as insert_job | ||||
| import logging | ||||
|  | ||||
| LOG = logging.getLogger("Cron Scheduler") | ||||
|  | ||||
|  | ||||
| def insert_cron_job(cron_job: CronJob): | ||||
|     query = """ | ||||
|     INSERT INTO cron_jobs (id, user_email, job_id, cron_expression, time_created, time_updated) | ||||
|     VALUES (?, ?, ?, ?, ?, ?) | ||||
|     """ | ||||
|     values = ( | ||||
|         cron_job.id, | ||||
|         cron_job.user_email, | ||||
|         cron_job.job_id, | ||||
|         cron_job.cron_expression, | ||||
|         cron_job.time_created, | ||||
|         cron_job.time_updated, | ||||
|     ) | ||||
|  | ||||
|     insert(query, values) | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def delete_cron_job(id: str, user_email: str): | ||||
|     query = """ | ||||
|     DELETE FROM cron_jobs | ||||
|     WHERE id = ? AND user_email = ? | ||||
|     """ | ||||
|     values = (id, user_email) | ||||
|     insert(query, values) | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def get_cron_jobs(user_email: str): | ||||
|     cron_jobs = query("SELECT * FROM cron_jobs WHERE user_email = ?", (user_email,)) | ||||
|  | ||||
|     return cron_jobs | ||||
|  | ||||
|  | ||||
| def get_all_cron_jobs(): | ||||
|     cron_jobs = query("SELECT * FROM cron_jobs") | ||||
|  | ||||
|     return cron_jobs | ||||
|  | ||||
|  | ||||
| def insert_job_from_cron_job(job: dict[str, Any]): | ||||
|     insert_job( | ||||
|         { | ||||
|             **job, | ||||
|             "id": uuid.uuid4().hex, | ||||
|             "status": "Queued", | ||||
|             "result": "", | ||||
|             "chat": None, | ||||
|             "time_created": datetime.datetime.now(), | ||||
|             "time_updated": datetime.datetime.now(), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def get_cron_job_trigger(cron_expression: str): | ||||
|     expression_parts = cron_expression.split() | ||||
|  | ||||
|     if len(expression_parts) != 5: | ||||
|         print(f"Invalid cron expression: {cron_expression}") | ||||
|         return None | ||||
|  | ||||
|     minute, hour, day, month, day_of_week = expression_parts | ||||
|  | ||||
|     return CronTrigger( | ||||
|         minute=minute, hour=hour, day=day, month=month, day_of_week=day_of_week | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def start_cron_scheduler(scheduler: BackgroundScheduler): | ||||
|     cron_jobs = get_all_cron_jobs() | ||||
|  | ||||
|     LOG.info(f"Cron jobs: {cron_jobs}") | ||||
|  | ||||
|     for job in cron_jobs: | ||||
|         queried_job = query("SELECT * FROM jobs WHERE id = ?", (job["job_id"],)) | ||||
|  | ||||
|         LOG.info(f"Adding job: {queried_job}") | ||||
|  | ||||
|         scheduler.add_job( | ||||
|             insert_job_from_cron_job, | ||||
|             get_cron_job_trigger(job["cron_expression"]), | ||||
|             id=job["id"], | ||||
|             args=[queried_job[0]], | ||||
|         ) | ||||
							
								
								
									
										97
									
								
								api/backend/job/job.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								api/backend/job/job.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| # STL | ||||
| import logging | ||||
| from typing import Any | ||||
|  | ||||
| # LOCAL | ||||
| from api.backend.utils import format_list_for_query | ||||
| from api.backend.database.common import ( | ||||
|     insert as common_insert, | ||||
|     query as common_query, | ||||
|     QUERIES, | ||||
|     update as common_update, | ||||
| ) | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def insert(item: dict[str, Any]) -> None: | ||||
|     common_insert( | ||||
|         QUERIES["insert_job"], | ||||
|         ( | ||||
|             item["id"], | ||||
|             item["url"], | ||||
|             item["elements"], | ||||
|             item["user"], | ||||
|             item["time_created"], | ||||
|             item["result"], | ||||
|             item["status"], | ||||
|             item["chat"], | ||||
|             item["job_options"], | ||||
|         ), | ||||
|     ) | ||||
|     LOG.info(f"Inserted item: {item}") | ||||
|  | ||||
|  | ||||
| async def get_queued_job(): | ||||
|     query = ( | ||||
|         "SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1" | ||||
|     ) | ||||
|     res = common_query(query) | ||||
|     LOG.info(f"Got queued job: {res}") | ||||
|     return res[0] if res else None | ||||
|  | ||||
|  | ||||
| async def update_job(ids: list[str], field: str, value: Any): | ||||
|     query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}" | ||||
|     res = common_update(query, tuple([value] + ids)) | ||||
|     LOG.info(f"Updated job: {res}") | ||||
|  | ||||
|  | ||||
| async def delete_jobs(jobs: list[str]): | ||||
|     if not jobs: | ||||
|         LOG.info("No jobs to delete.") | ||||
|         return False | ||||
|  | ||||
|     query = f"DELETE FROM jobs WHERE id IN {format_list_for_query(jobs)}" | ||||
|     res = common_update(query, tuple(jobs)) | ||||
|  | ||||
|     return res > 0 | ||||
|  | ||||
|  | ||||
| async def average_elements_per_link(user: str): | ||||
|     job_query = """ | ||||
|     SELECT  | ||||
|         DATE(time_created) AS date, | ||||
|         AVG(json_array_length(elements)) AS average_elements, | ||||
|         COUNT(*) AS count | ||||
|     FROM  | ||||
|         jobs | ||||
|     WHERE  | ||||
|         status = 'Completed' AND user = ? | ||||
|     GROUP BY  | ||||
|         DATE(time_created) | ||||
|     ORDER BY  | ||||
|         date ASC; | ||||
|     """ | ||||
|     results = common_query(job_query, (user,)) | ||||
|  | ||||
|     return results | ||||
|  | ||||
|  | ||||
| async def get_jobs_per_day(user: str): | ||||
|     job_query = """ | ||||
|     SELECT  | ||||
|         DATE(time_created) AS date, | ||||
|         COUNT(*) AS job_count | ||||
|     FROM  | ||||
|         jobs | ||||
|     WHERE  | ||||
|         status = 'Completed' AND user = ? | ||||
|     GROUP BY  | ||||
|         DATE(time_created) | ||||
|     ORDER BY  | ||||
|         date ASC; | ||||
|     """ | ||||
|     results = common_query(job_query, (user,)) | ||||
|  | ||||
|     return results | ||||
							
								
								
									
										3
									
								
								api/backend/job/models/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								api/backend/job/models/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| from .job_options import JobOptions | ||||
|  | ||||
| __all__ = ["JobOptions"] | ||||
							
								
								
									
										15
									
								
								api/backend/job/models/job_options.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								api/backend/job/models/job_options.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| from pydantic import BaseModel | ||||
| from typing import Any, Optional | ||||
| from api.backend.job.models.site_map import SiteMap | ||||
|  | ||||
|  | ||||
| class FetchOptions(BaseModel): | ||||
|     chat: Optional[bool] = None | ||||
|  | ||||
|  | ||||
| class JobOptions(BaseModel): | ||||
|     multi_page_scrape: bool = False | ||||
|     custom_headers: dict[str, Any] = {} | ||||
|     proxies: list[str] = [] | ||||
|     site_map: Optional[SiteMap] = None | ||||
|     collect_media: bool = False | ||||
							
								
								
									
										14
									
								
								api/backend/job/models/site_map.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								api/backend/job/models/site_map.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| from pydantic import BaseModel | ||||
| from typing import Literal | ||||
|  | ||||
|  | ||||
| class Action(BaseModel): | ||||
|     type: Literal["click", "input"] | ||||
|     xpath: str | ||||
|     name: str | ||||
|     input: str = "" | ||||
|     do_once: bool = True | ||||
|  | ||||
|  | ||||
| class SiteMap(BaseModel): | ||||
|     actions: list[Action] | ||||
							
								
								
									
										91
									
								
								api/backend/job/scraping/collect_media.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								api/backend/job/scraping/collect_media.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| import os | ||||
| import requests | ||||
| from pathlib import Path | ||||
| from selenium.webdriver.common.by import By | ||||
| from seleniumwire import webdriver | ||||
| from urllib.parse import urlparse | ||||
|  | ||||
| from api.backend.utils import LOG | ||||
|  | ||||
|  | ||||
| def collect_media(driver: webdriver.Chrome): | ||||
|     media_types = { | ||||
|         "images": "img", | ||||
|         "videos": "video", | ||||
|         "audio": "audio", | ||||
|         "pdfs": 'a[href$=".pdf"]', | ||||
|         "documents": 'a[href$=".doc"], a[href$=".docx"], a[href$=".txt"], a[href$=".rtf"]', | ||||
|         "presentations": 'a[href$=".ppt"], a[href$=".pptx"]', | ||||
|         "spreadsheets": 'a[href$=".xls"], a[href$=".xlsx"], a[href$=".csv"]', | ||||
|     } | ||||
|  | ||||
|     base_dir = Path("media") | ||||
|     base_dir.mkdir(exist_ok=True) | ||||
|  | ||||
|     media_urls = {} | ||||
|  | ||||
|     for media_type, selector in media_types.items(): | ||||
|         elements = driver.find_elements(By.CSS_SELECTOR, selector) | ||||
|         urls: list[dict[str, str]] = [] | ||||
|  | ||||
|         media_dir = base_dir / media_type | ||||
|         media_dir.mkdir(exist_ok=True) | ||||
|  | ||||
|         for element in elements: | ||||
|             if media_type == "images": | ||||
|                 url = element.get_attribute("src") | ||||
|             elif media_type == "videos": | ||||
|                 url = element.get_attribute("src") or element.get_attribute("data-src") | ||||
|             else: | ||||
|                 url = element.get_attribute("href") | ||||
|  | ||||
|             if url and url.startswith(("http://", "https://")): | ||||
|                 try: | ||||
|                     filename = os.path.basename(urlparse(url).path) | ||||
|  | ||||
|                     if not filename: | ||||
|                         filename = f"{media_type}_{len(urls)}" | ||||
|  | ||||
|                         if media_type == "images": | ||||
|                             filename += ".jpg" | ||||
|                         elif media_type == "videos": | ||||
|                             filename += ".mp4" | ||||
|                         elif media_type == "audio": | ||||
|                             filename += ".mp3" | ||||
|                         elif media_type == "pdfs": | ||||
|                             filename += ".pdf" | ||||
|                         elif media_type == "documents": | ||||
|                             filename += ".doc" | ||||
|                         elif media_type == "presentations": | ||||
|                             filename += ".ppt" | ||||
|                         elif media_type == "spreadsheets": | ||||
|                             filename += ".xls" | ||||
|  | ||||
|                     response = requests.get(url, stream=True) | ||||
|                     response.raise_for_status() | ||||
|  | ||||
|                     # Save the file | ||||
|                     file_path = media_dir / filename | ||||
|                     with open(file_path, "wb") as f: | ||||
|                         for chunk in response.iter_content(chunk_size=8192): | ||||
|                             if chunk: | ||||
|                                 f.write(chunk) | ||||
|  | ||||
|                     urls.append({"url": url, "local_path": str(file_path)}) | ||||
|                     LOG.info(f"Downloaded {filename} to {file_path}") | ||||
|  | ||||
|                 except Exception as e: | ||||
|                     LOG.error(f"Error downloading {url}: {str(e)}") | ||||
|                     continue | ||||
|  | ||||
|         media_urls[media_type] = urls | ||||
|  | ||||
|     with open(base_dir / "download_summary.txt", "w") as f: | ||||
|         for media_type, downloads in media_urls.items(): | ||||
|             if downloads: | ||||
|                 f.write(f"\n=== {media_type.upper()} ===\n") | ||||
|                 for download in downloads: | ||||
|                     f.write(f"URL: {download['url']}\n") | ||||
|                     f.write(f"Saved to: {download['local_path']}\n\n") | ||||
|  | ||||
|     return media_urls | ||||
							
								
								
									
										41
									
								
								api/backend/job/scraping/scraping_utils.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								api/backend/job/scraping/scraping_utils.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| import time | ||||
| from typing import cast | ||||
|  | ||||
| from seleniumwire import webdriver | ||||
| from selenium.webdriver.common.by import By | ||||
| from selenium.webdriver.support import expected_conditions as EC | ||||
| from selenium.webdriver.support.ui import WebDriverWait | ||||
|  | ||||
| from api.backend.utils import LOG | ||||
|  | ||||
| from api.backend.job.scraping.collect_media import collect_media as collect_media_utils | ||||
|  | ||||
|  | ||||
| def scrape_content( | ||||
|     driver: webdriver.Chrome, pages: set[tuple[str, str]], collect_media: bool | ||||
| ): | ||||
|     _ = WebDriverWait(driver, 10).until( | ||||
|         EC.presence_of_element_located((By.TAG_NAME, "body")) | ||||
|     ) | ||||
|  | ||||
|     last_height = cast(str, driver.execute_script("return document.body.scrollHeight")) | ||||
|     while True: | ||||
|         driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") | ||||
|  | ||||
|         time.sleep(3)  # Wait for the page to load | ||||
|         new_height = cast( | ||||
|             str, driver.execute_script("return document.body.scrollHeight") | ||||
|         ) | ||||
|  | ||||
|         if new_height == last_height: | ||||
|             break | ||||
|  | ||||
|         last_height = new_height | ||||
|  | ||||
|     pages.add((driver.page_source, driver.current_url)) | ||||
|  | ||||
|     if collect_media: | ||||
|         LOG.info("Collecting media") | ||||
|         collect_media_utils(driver) | ||||
|  | ||||
|     return driver.page_source | ||||
							
								
								
									
										0
									
								
								api/backend/job/site_mapping/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								api/backend/job/site_mapping/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										93
									
								
								api/backend/job/site_mapping/site_mapping.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								api/backend/job/site_mapping/site_mapping.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| from api.backend.job.models.site_map import Action, SiteMap | ||||
| from selenium import webdriver | ||||
| from selenium.common.exceptions import NoSuchElementException | ||||
| from selenium.webdriver.common.by import By | ||||
| from typing import Any | ||||
| import logging | ||||
| import time | ||||
| from copy import deepcopy | ||||
|  | ||||
| from api.backend.job.scraping.scraping_utils import scrape_content | ||||
| from selenium.webdriver.support.ui import WebDriverWait | ||||
| from seleniumwire.inspect import TimeoutException | ||||
| from seleniumwire.webdriver import Chrome | ||||
| from selenium.webdriver.support import expected_conditions as EC | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def clear_done_actions(site_map: dict[str, Any]): | ||||
|     """Clear all actions that have been clicked.""" | ||||
|     cleared_site_map = deepcopy(site_map) | ||||
|  | ||||
|     cleared_site_map["actions"] = [ | ||||
|         action for action in cleared_site_map["actions"] if not action["do_once"] | ||||
|     ] | ||||
|  | ||||
|     return cleared_site_map | ||||
|  | ||||
|  | ||||
| def handle_input(action: Action, driver: webdriver.Chrome): | ||||
|     try: | ||||
|         element = WebDriverWait(driver, 10).until( | ||||
|             EC.element_to_be_clickable((By.XPATH, action.xpath)) | ||||
|         ) | ||||
|         LOG.info(f"Sending keys: {action.input} to element: {element}") | ||||
|  | ||||
|         element.send_keys(action.input) | ||||
|  | ||||
|     except NoSuchElementException: | ||||
|         LOG.info(f"Element not found: {action.xpath}") | ||||
|         return False | ||||
|  | ||||
|     except TimeoutException: | ||||
|         LOG.info(f"Timeout waiting for element: {action.xpath}") | ||||
|         return False | ||||
|  | ||||
|     except Exception as e: | ||||
|         LOG.info(f"Error handling input: {e}") | ||||
|         return False | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def handle_click(action: Action, driver: webdriver.Chrome): | ||||
|     try: | ||||
|         element = driver.find_element(By.XPATH, action.xpath) | ||||
|         LOG.info(f"Clicking element: {element}") | ||||
|  | ||||
|         element.click() | ||||
|  | ||||
|     except NoSuchElementException: | ||||
|         LOG.info(f"Element not found: {action.xpath}") | ||||
|         return False | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| ACTION_MAP = { | ||||
|     "click": handle_click, | ||||
|     "input": handle_input, | ||||
| } | ||||
|  | ||||
|  | ||||
| async def handle_site_mapping( | ||||
|     site_map_dict: dict[str, Any], | ||||
|     driver: Chrome, | ||||
|     pages: set[tuple[str, str]], | ||||
| ): | ||||
|     site_map = SiteMap(**site_map_dict) | ||||
|  | ||||
|     for action in site_map.actions: | ||||
|         action_handler = ACTION_MAP[action.type] | ||||
|         if not action_handler(action, driver): | ||||
|             return | ||||
|  | ||||
|         time.sleep(2) | ||||
|  | ||||
|     _ = scrape_content(driver, pages) | ||||
|  | ||||
|     cleared_site_map_dict = clear_done_actions(site_map_dict) | ||||
|  | ||||
|     if cleared_site_map_dict["actions"]: | ||||
|         await handle_site_mapping(cleared_site_map_dict, driver, pages) | ||||
| @@ -2,14 +2,13 @@ | ||||
| from typing import Any, Optional, Union | ||||
| from datetime import datetime | ||||
|  | ||||
| # LOCAL | ||||
| from api.backend.job.models.job_options import JobOptions | ||||
|  | ||||
| # PDM | ||||
| import pydantic | ||||
|  | ||||
|  | ||||
| class FetchOptions(pydantic.BaseModel): | ||||
|     chat: Optional[bool] = None | ||||
|  | ||||
|  | ||||
| class Element(pydantic.BaseModel): | ||||
|     name: str | ||||
|     xpath: str | ||||
| @@ -22,12 +21,6 @@ class CapturedElement(pydantic.BaseModel): | ||||
|     name: str | ||||
|  | ||||
|  | ||||
| class JobOptions(pydantic.BaseModel): | ||||
|     multi_page_scrape: bool = False | ||||
|     custom_headers: Optional[dict[str, Any]] = {} | ||||
|     proxies: Optional[list[str]] = [] | ||||
|  | ||||
|  | ||||
| class RetrieveScrapeJobs(pydantic.BaseModel): | ||||
|     user: str | ||||
|  | ||||
| @@ -64,3 +57,17 @@ class Job(pydantic.BaseModel): | ||||
|     job_options: JobOptions | ||||
|     status: str = "Queued" | ||||
|     chat: Optional[str] = None | ||||
|  | ||||
|  | ||||
| class CronJob(pydantic.BaseModel): | ||||
|     id: Optional[str] = None | ||||
|     user_email: str | ||||
|     job_id: str | ||||
|     cron_expression: str | ||||
|     time_created: Optional[Union[datetime, str]] = None | ||||
|     time_updated: Optional[Union[datetime, str]] = None | ||||
|  | ||||
|  | ||||
| class DeleteCronJob(pydantic.BaseModel): | ||||
|     id: str | ||||
|     user_email: str | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| # STL | ||||
| import datetime | ||||
| import uuid | ||||
| import traceback | ||||
| from io import StringIO | ||||
| @@ -10,24 +11,33 @@ import random | ||||
| from fastapi import Depends, APIRouter | ||||
| from fastapi.encoders import jsonable_encoder | ||||
| from fastapi.responses import JSONResponse, StreamingResponse | ||||
| from api.backend.scheduler import scheduler | ||||
| from apscheduler.triggers.cron import CronTrigger  # type: ignore | ||||
|  | ||||
| # LOCAL | ||||
| from api.backend.job import ( | ||||
|     query, | ||||
|     insert, | ||||
|     update_job, | ||||
|     delete_jobs, | ||||
| ) | ||||
| from api.backend.job import insert, update_job, delete_jobs | ||||
| from api.backend.models import ( | ||||
|     DeleteCronJob, | ||||
|     UpdateJobs, | ||||
|     DownloadJob, | ||||
|     FetchOptions, | ||||
|     DeleteScrapeJobs, | ||||
|     Job, | ||||
|     CronJob, | ||||
| ) | ||||
| from api.backend.schemas import User | ||||
| from api.backend.auth.auth_utils import get_current_user | ||||
| from api.backend.utils import clean_text | ||||
| from api.backend.utils import clean_text, format_list_for_query | ||||
| from api.backend.job.models.job_options import FetchOptions | ||||
|  | ||||
| from api.backend.database.common import query | ||||
|  | ||||
| from api.backend.job.cron_scheduling.cron_scheduling import ( | ||||
|     delete_cron_job, | ||||
|     get_cron_job_trigger, | ||||
|     insert_cron_job, | ||||
|     get_cron_jobs, | ||||
|     insert_job_from_cron_job, | ||||
| ) | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
| @@ -47,10 +57,11 @@ async def submit_scrape_job(job: Job): | ||||
|         job.id = uuid.uuid4().hex | ||||
|  | ||||
|         job_dict = job.model_dump() | ||||
|         await insert(job_dict) | ||||
|         insert(job_dict) | ||||
|  | ||||
|         return JSONResponse(content={"id": job.id}) | ||||
|     except Exception as e: | ||||
|         LOG.error(f"Exception occurred: {traceback.format_exc()}") | ||||
|         return JSONResponse(content={"error": str(e)}, status_code=500) | ||||
|  | ||||
|  | ||||
| @@ -59,8 +70,11 @@ async def retrieve_scrape_jobs( | ||||
|     fetch_options: FetchOptions, user: User = Depends(get_current_user) | ||||
| ): | ||||
|     LOG.info(f"Retrieving jobs for account: {user.email}") | ||||
|     ATTRIBUTES = "chat" if fetch_options.chat else "*" | ||||
|  | ||||
|     try: | ||||
|         results = await query({"user": user.email}, fetch_options=fetch_options) | ||||
|         job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?" | ||||
|         results = query(job_query, (user.email,)) | ||||
|         return JSONResponse(content=jsonable_encoder(results[::-1])) | ||||
|     except Exception as e: | ||||
|         LOG.error(f"Exception occurred: {e}") | ||||
| @@ -72,8 +86,8 @@ async def job(id: str, user: User = Depends(get_current_user)): | ||||
|     LOG.info(f"Retrieving jobs for account: {user.email}") | ||||
|  | ||||
|     try: | ||||
|         filter = {"user": user.email, "id": id} | ||||
|         results = await query(filter) | ||||
|         job_query = "SELECT * FROM jobs WHERE user = ? AND id = ?" | ||||
|         results = query(job_query, (user.email, id)) | ||||
|         return JSONResponse(content=jsonable_encoder(results)) | ||||
|     except Exception as e: | ||||
|         LOG.error(f"Exception occurred: {e}") | ||||
| @@ -85,7 +99,10 @@ async def download(download_job: DownloadJob): | ||||
|     LOG.info(f"Downloading job with ids: {download_job.ids}") | ||||
|  | ||||
|     try: | ||||
|         results = await query({"id": {"$in": download_job.ids}}) | ||||
|         job_query = ( | ||||
|             f"SELECT * FROM jobs WHERE id IN {format_list_for_query(download_job.ids)}" | ||||
|         ) | ||||
|         results = query(job_query, tuple(download_job.ids)) | ||||
|  | ||||
|         csv_buffer = StringIO() | ||||
|         csv_writer = csv.writer(csv_buffer, quotechar='"', quoting=csv.QUOTE_ALL) | ||||
| @@ -136,3 +153,47 @@ async def delete(delete_scrape_jobs: DeleteScrapeJobs): | ||||
|         if result | ||||
|         else JSONResponse({"error": "Jobs not deleted."}) | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @job_router.post("/schedule-cron-job") | ||||
| async def schedule_cron_job(cron_job: CronJob): | ||||
|     if not cron_job.id: | ||||
|         cron_job.id = uuid.uuid4().hex | ||||
|  | ||||
|     if not cron_job.time_created: | ||||
|         cron_job.time_created = datetime.datetime.now() | ||||
|  | ||||
|     if not cron_job.time_updated: | ||||
|         cron_job.time_updated = datetime.datetime.now() | ||||
|  | ||||
|     insert_cron_job(cron_job) | ||||
|  | ||||
|     queried_job = query("SELECT * FROM jobs WHERE id = ?", (cron_job.job_id,)) | ||||
|  | ||||
|     scheduler.add_job( | ||||
|         insert_job_from_cron_job, | ||||
|         get_cron_job_trigger(cron_job.cron_expression), | ||||
|         id=cron_job.id, | ||||
|         args=[queried_job[0]], | ||||
|     ) | ||||
|  | ||||
|     return JSONResponse(content={"message": "Cron job scheduled successfully."}) | ||||
|  | ||||
|  | ||||
| @job_router.post("/delete-cron-job") | ||||
| async def delete_cron_job_request(request: DeleteCronJob): | ||||
|     if not request.id: | ||||
|         return JSONResponse( | ||||
|             content={"error": "Cron job id is required."}, status_code=400 | ||||
|         ) | ||||
|  | ||||
|     delete_cron_job(request.id, request.user_email) | ||||
|     scheduler.remove_job(request.id) | ||||
|  | ||||
|     return JSONResponse(content={"message": "Cron job deleted successfully."}) | ||||
|  | ||||
|  | ||||
| @job_router.get("/cron-jobs") | ||||
| async def get_cron_jobs_request(user: User = Depends(get_current_user)): | ||||
|     cron_jobs = get_cron_jobs(user.email) | ||||
|     return JSONResponse(content=jsonable_encoder(cron_jobs)) | ||||
|   | ||||
							
								
								
									
										3
									
								
								api/backend/scheduler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								api/backend/scheduler.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| from apscheduler.schedulers.background import BackgroundScheduler  # type: ignore | ||||
|  | ||||
| scheduler = BackgroundScheduler() | ||||
| @@ -1,19 +1,21 @@ | ||||
| import logging | ||||
| from typing import Any, Optional | ||||
| import time | ||||
| import random | ||||
|  | ||||
| from bs4 import BeautifulSoup | ||||
| from bs4 import BeautifulSoup, Tag | ||||
| from lxml import etree | ||||
| from seleniumwire import webdriver | ||||
| from lxml.etree import _Element  # type: ignore [reportPrivateImport] | ||||
| from lxml.etree import _Element | ||||
| from fake_useragent import UserAgent | ||||
| from selenium.webdriver.support import expected_conditions as EC | ||||
| from selenium.webdriver.common.by import By | ||||
| from selenium.webdriver.support.ui import WebDriverWait | ||||
| from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
| from urllib.parse import urlparse, urljoin | ||||
| from api.backend.models import Element, CapturedElement | ||||
| from api.backend.job.site_mapping.site_mapping import ( | ||||
|     handle_site_mapping, | ||||
| ) | ||||
| from selenium.webdriver.chrome.service import Service | ||||
| from webdriver_manager.chrome import ChromeDriverManager | ||||
| from api.backend.job.scraping.scraping_utils import scrape_content | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
| @@ -69,21 +71,27 @@ def create_driver(proxies: Optional[list[str]] = []): | ||||
|     chrome_options.add_argument(f"user-agent={ua.random}") | ||||
|  | ||||
|     sw_options = {} | ||||
|  | ||||
|     if proxies: | ||||
|         selected_proxy = proxies[random.randint(0, len(proxies) - 1)] | ||||
|         selected_proxy = random.choice(proxies) | ||||
|         LOG.info(f"Using proxy: {selected_proxy}") | ||||
|  | ||||
|         sw_options = { | ||||
|             "proxy": { | ||||
|                 "https": f"https://{selected_proxy}", | ||||
|                 "http": f"http://{selected_proxy}", | ||||
|                 "no_proxy": "localhost,127.0.0.1", | ||||
|             } | ||||
|         } | ||||
|  | ||||
|     service = Service(ChromeDriverManager().install()) | ||||
|  | ||||
|     driver = webdriver.Chrome( | ||||
|         service=service, | ||||
|         options=chrome_options, | ||||
|         seleniumwire_options=sw_options, | ||||
|     ) | ||||
|  | ||||
|     return driver | ||||
|  | ||||
|  | ||||
| @@ -95,6 +103,8 @@ async def make_site_request( | ||||
|     pages: set[tuple[str, str]] = set(), | ||||
|     original_url: str = "", | ||||
|     proxies: Optional[list[str]] = [], | ||||
|     site_map: Optional[dict[str, Any]] = None, | ||||
|     collect_media: bool = False, | ||||
| ) -> None: | ||||
|     """Make basic `GET` request to site using Selenium.""" | ||||
|     # Check if URL has already been visited | ||||
| @@ -114,27 +124,16 @@ async def make_site_request( | ||||
|         final_url = driver.current_url | ||||
|         visited_urls.add(url) | ||||
|         visited_urls.add(final_url) | ||||
|         _ = WebDriverWait(driver, 10).until( | ||||
|             EC.presence_of_element_located((By.TAG_NAME, "body")) | ||||
|         ) | ||||
|  | ||||
|         last_height = driver.execute_script("return document.body.scrollHeight") | ||||
|         while True: | ||||
|             driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") | ||||
|         page_source = scrape_content(driver, pages, collect_media) | ||||
|  | ||||
|             time.sleep(3)  # Wait for the page to load | ||||
|             new_height = driver.execute_script("return document.body.scrollHeight") | ||||
|  | ||||
|             if new_height == last_height: | ||||
|                 break | ||||
|  | ||||
|             last_height = new_height | ||||
|  | ||||
|         final_height = driver.execute_script("return document.body.scrollHeight") | ||||
|  | ||||
|         page_source = driver.page_source | ||||
|         LOG.debug(f"Page source for url: {url}\n{page_source}") | ||||
|         pages.add((page_source, final_url)) | ||||
|         if site_map: | ||||
|             LOG.info("Site map: %s", site_map) | ||||
|             _ = await handle_site_mapping( | ||||
|                 site_map, | ||||
|                 driver, | ||||
|                 pages, | ||||
|             ) | ||||
|     finally: | ||||
|         driver.quit() | ||||
|  | ||||
| @@ -144,7 +143,10 @@ async def make_site_request( | ||||
|     soup = BeautifulSoup(page_source, "html.parser") | ||||
|  | ||||
|     for a_tag in soup.find_all("a"): | ||||
|         link = a_tag.get("href") | ||||
|         if not isinstance(a_tag, Tag): | ||||
|             continue | ||||
|  | ||||
|         link = str(a_tag.get("href", "")) | ||||
|  | ||||
|         if link: | ||||
|             if not urlparse(link).netloc: | ||||
| @@ -172,7 +174,10 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element]) | ||||
|         el = sxpath(root, elem.xpath) | ||||
|  | ||||
|         for e in el: | ||||
|             text = "\t".join(str(t) for t in e.itertext()) | ||||
|             if isinstance(e, etree._Element):  # type: ignore | ||||
|                 text = "\t".join(str(t) for t in e.itertext()) | ||||
|             else: | ||||
|                 text = str(e) | ||||
|             captured_element = CapturedElement( | ||||
|                 xpath=elem.xpath, text=text, name=elem.name | ||||
|             ) | ||||
| @@ -192,6 +197,8 @@ async def scrape( | ||||
|     headers: Optional[dict[str, Any]], | ||||
|     multi_page_scrape: bool = False, | ||||
|     proxies: Optional[list[str]] = [], | ||||
|     site_map: Optional[dict[str, Any]] = None, | ||||
|     collect_media: bool = False, | ||||
| ): | ||||
|     visited_urls: set[str] = set() | ||||
|     pages: set[tuple[str, str]] = set() | ||||
| @@ -204,6 +211,8 @@ async def scrape( | ||||
|         pages=pages, | ||||
|         original_url=url, | ||||
|         proxies=proxies, | ||||
|         site_map=site_map, | ||||
|         collect_media=collect_media, | ||||
|     ) | ||||
|  | ||||
|     elements: list[dict[str, dict[str, list[CapturedElement]]]] = list() | ||||
|   | ||||
| @@ -1,15 +1,10 @@ | ||||
| import pytest | ||||
| import logging | ||||
| from unittest.mock import AsyncMock, patch, MagicMock | ||||
| from api.backend.tests.factories.job_factory import create_job | ||||
| from api.backend.models import JobOptions | ||||
| from api.backend.scraping import create_driver | ||||
|  | ||||
|  | ||||
| mocked_job = create_job( | ||||
|     job_options=JobOptions( | ||||
|         multi_page_scrape=False, custom_headers={}, proxies=["127.0.0.1:8080"] | ||||
|     ) | ||||
| ).model_dump() | ||||
| logging.basicConfig(level=logging.DEBUG) | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| @pytest.mark.asyncio | ||||
| @@ -26,8 +21,7 @@ async def test_proxy(mock_get: AsyncMock): | ||||
|     driver.get("http://example.com") | ||||
|     response = driver.last_request | ||||
|  | ||||
|     # Check if the proxy header is set correctly | ||||
|     if response: | ||||
|         assert response.headers["Proxy"] == "127.0.0.1:8080" | ||||
|         assert response.headers["Proxy-Connection"] == "keep-alive" | ||||
|  | ||||
|     driver.quit() | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| from typing import Optional | ||||
| from typing import Any, Optional | ||||
| import logging | ||||
| import json | ||||
|  | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def clean_text(text: str): | ||||
| @@ -17,3 +20,30 @@ def get_log_level(level_name: Optional[str]) -> int: | ||||
|         level = getattr(logging, level_name, logging.INFO) | ||||
|  | ||||
|     return level | ||||
|  | ||||
|  | ||||
| def format_list_for_query(ids: list[str]): | ||||
|     return ( | ||||
|         f"({','.join(['?' for _ in ids])})"  # Returns placeholders, e.g., "(?, ?, ?)" | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def format_sql_row_to_python(row: dict[str, Any]): | ||||
|     new_row: dict[str, Any] = {} | ||||
|     for key, value in row.items(): | ||||
|         if isinstance(value, str): | ||||
|             try: | ||||
|                 new_row[key] = json.loads(value) | ||||
|             except json.JSONDecodeError: | ||||
|                 new_row[key] = value | ||||
|         else: | ||||
|             new_row[key] = value | ||||
|  | ||||
|     return new_row | ||||
|  | ||||
|  | ||||
| def format_json(items: list[Any]): | ||||
|     for idx, item in enumerate(items): | ||||
|         if isinstance(item, (dict, list)): | ||||
|             formatted_item = json.dumps(item) | ||||
|             items[idx] = formatted_item | ||||
|   | ||||
| @@ -8,6 +8,8 @@ import logging | ||||
| import sys | ||||
| import traceback | ||||
|  | ||||
| from api.backend.database.startup import init_database | ||||
|  | ||||
| logging.basicConfig(stream=sys.stdout, level=logging.INFO) | ||||
| LOG = logging.getLogger(__name__) | ||||
|  | ||||
| @@ -24,6 +26,8 @@ async def process_job(): | ||||
|                 job["job_options"]["custom_headers"], | ||||
|                 job["job_options"]["multi_page_scrape"], | ||||
|                 job["job_options"]["proxies"], | ||||
|                 job["job_options"]["site_map"], | ||||
|                 job["job_options"]["collect_media"], | ||||
|             ) | ||||
|             LOG.info( | ||||
|                 f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}" | ||||
| @@ -40,6 +44,9 @@ async def process_job(): | ||||
|  | ||||
| async def main(): | ||||
|     LOG.info("Starting job worker...") | ||||
|  | ||||
|     init_database() | ||||
|  | ||||
|     while True: | ||||
|         await process_job() | ||||
|         await asyncio.sleep(5) | ||||
|   | ||||
							
								
								
									
										60
									
								
								cypress/e2e/authentication.cy.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								cypress/e2e/authentication.cy.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| describe("Authentication", () => { | ||||
|   it("should register", () => { | ||||
|     cy.intercept("POST", "/api/signup").as("signup"); | ||||
|  | ||||
|     cy.visit("/").then(() => { | ||||
|       cy.get("button").contains("Login").click(); | ||||
|       cy.url().should("include", "/login"); | ||||
|  | ||||
|       cy.get("form").should("be.visible"); | ||||
|       cy.get("button") | ||||
|         .contains("No Account? Sign up") | ||||
|         .should("be.visible") | ||||
|         .click(); | ||||
|  | ||||
|       cy.get("input[name='email']").type("test@test.com"); | ||||
|       cy.get("input[name='password']").type("password"); | ||||
|       cy.get("input[name='fullName']").type("John Doe"); | ||||
|       cy.get("button[type='submit']").contains("Signup").click(); | ||||
|  | ||||
|       cy.wait("@signup").then((interception) => { | ||||
|         if (!interception.response) { | ||||
|           cy.log("No response received!"); | ||||
|           throw new Error("signup request did not return a response"); | ||||
|         } | ||||
|  | ||||
|         cy.log("Response status: " + interception.response.statusCode); | ||||
|         cy.log("Response body: " + JSON.stringify(interception.response.body)); | ||||
|  | ||||
|         expect(interception.response.statusCode).to.eq(200); | ||||
|       }); | ||||
|     }); | ||||
|   }); | ||||
|  | ||||
|   it("should login", () => { | ||||
|     cy.intercept("POST", "/api/token").as("token"); | ||||
|  | ||||
|     cy.visit("/").then(() => { | ||||
|       cy.get("button") | ||||
|         .contains("Login") | ||||
|         .click() | ||||
|         .then(() => { | ||||
|           cy.get("input[name='email']").type("test@test.com"); | ||||
|           cy.get("input[name='password']").type("password"); | ||||
|           cy.get("button[type='submit']").contains("Login").click(); | ||||
|  | ||||
|           cy.wait("@token").then((interception) => { | ||||
|             if (!interception.response) { | ||||
|               cy.log("No response received!"); | ||||
|               throw new Error("token request did not return a response"); | ||||
|             } | ||||
|  | ||||
|             cy.log("Response status: " + interception.response.statusCode); | ||||
|             cy.log("Response body: " + JSON.stringify(interception.response.body)); | ||||
|  | ||||
|             expect(interception.response.statusCode).to.eq(200); | ||||
|           }); | ||||
|         }); | ||||
|     }); | ||||
|   }); | ||||
| }); | ||||
| @@ -1,19 +1,34 @@ | ||||
| describe("Job", () => { | ||||
| describe.only("Job", () => { | ||||
|   it("should create a job", () => { | ||||
|     cy.intercept("POST", "/api/submit-scrape-job").as("submitScrapeJob"); | ||||
|  | ||||
|     cy.visit("/"); | ||||
|  | ||||
|     const input = cy.get('[data-cy="url-input"]'); | ||||
|     input.type("https://example.com"); | ||||
|     cy.get('[data-cy="url-input"]').type("https://example.com"); | ||||
|     cy.get('[data-cy="name-field"]').type("example"); | ||||
|     cy.get('[data-cy="xpath-field"]').type("//body"); | ||||
|     cy.get('[data-cy="add-button"]').click(); | ||||
|  | ||||
|     const nameField = cy.get('[data-cy="name-field"]'); | ||||
|     const xPathField = cy.get('[data-cy="xpath-field"]'); | ||||
|     const addButton = cy.get('[data-cy="add-button"]'); | ||||
|     cy.contains("Submit").click(); | ||||
|  | ||||
|     nameField.type("example"); | ||||
|     xPathField.type("//body"); | ||||
|     addButton.click(); | ||||
|     cy.wait("@submitScrapeJob").then((interception) => { | ||||
|       if (!interception.response) { | ||||
|         cy.log("No response received!"); | ||||
|         cy.log("Request body: " + JSON.stringify(interception.request?.body)); | ||||
|         throw new Error("submitScrapeJob request did not return a response"); | ||||
|       } | ||||
|  | ||||
|     const submit = cy.contains("Submit"); | ||||
|     submit.click(); | ||||
|       cy.log("Response status: " + interception.response.statusCode); | ||||
|       cy.log("Response body: " + JSON.stringify(interception.response.body)); | ||||
|  | ||||
|       expect(interception.response.statusCode).to.eq(200); | ||||
|     }); | ||||
|  | ||||
|     cy.get("li").contains("Previous Jobs").click(); | ||||
|  | ||||
|     cy.contains("div", "https://example.com", { timeout: 10000 }).should( | ||||
|       "exist" | ||||
|     ); | ||||
|     cy.contains("div", "Completed", { timeout: 20000 }).should("exist"); | ||||
|   }); | ||||
| }); | ||||
|   | ||||
| @@ -34,4 +34,4 @@ | ||||
| //       visit(originalFn: CommandOriginalFn, url: string, options: Partial<VisitOptions>): Chainable<Element> | ||||
| //     } | ||||
| //   } | ||||
| // } | ||||
| // } | ||||
|   | ||||
| @@ -2,12 +2,6 @@ version: "3" | ||||
| services: | ||||
|   scraperr: | ||||
|     command: ["npm", "run", "dev"] | ||||
|     labels: | ||||
|       - "traefik.enable=true" | ||||
|       - "traefik.http.routers.scraperr.rule=Host(`localhost`)" | ||||
|       - "traefik.http.routers.scraperr.entrypoints=web" | ||||
|       - "traefik.http.services.scraperr.loadbalancer.server.port=3000" | ||||
|       - "traefik.http.routers.scraperr.tls=false" | ||||
|     volumes: | ||||
|       - "$PWD/src:/app/src" | ||||
|       - "$PWD/public:/app/public" | ||||
| @@ -16,7 +10,8 @@ services: | ||||
|       - "$PWD/package-lock.json:/app/package-lock.json" | ||||
|       - "$PWD/tsconfig.json:/app/tsconfig.json" | ||||
|   scraperr_api: | ||||
|     ports: | ||||
|       - "8000:8000" | ||||
|     environment: | ||||
|       - LOG_LEVEL=INFO | ||||
|     volumes: | ||||
|       - "$PWD/api:/project/api" | ||||
|       - "$PWD/scraping:/project/scraping" | ||||
|   | ||||
| @@ -1,16 +1,18 @@ | ||||
| services: | ||||
|   scraperr: | ||||
|     depends_on: | ||||
|       - scraperr_api | ||||
|     image: jpyles0524/scraperr:latest | ||||
|     build: | ||||
|       context: . | ||||
|       dockerfile: docker/frontend/Dockerfile | ||||
|     container_name: scraperr | ||||
|     command: ["npm", "run", "start"] | ||||
|     labels: | ||||
|       - "traefik.enable=true" | ||||
|       - "traefik.http.routers.scraperr.rule=Host(`localhost`)" # change this to your domain, if not running on localhost | ||||
|       - "traefik.http.routers.scraperr.entrypoints=web" # websecure if using https | ||||
|       - "traefik.http.services.scraperr.loadbalancer.server.port=3000" | ||||
|     environment: | ||||
|       - NEXT_PUBLIC_API_URL=http://scraperr_api:8000 # your API URL | ||||
|       - SERVER_URL=http://scraperr_api:8000 # your docker container API URL | ||||
|     ports: | ||||
|       - 80:3000 | ||||
|     networks: | ||||
|       - web | ||||
|   scraperr_api: | ||||
| @@ -21,45 +23,16 @@ services: | ||||
|       dockerfile: docker/api/Dockerfile | ||||
|     environment: | ||||
|       - LOG_LEVEL=INFO | ||||
|       - OLLAMA_URL=http://ollama:11434 | ||||
|       - OLLAMA_MODEL=phi3 | ||||
|       - MONGODB_URI=mongodb://root:example@webscrape-mongo:27017 # used to access MongoDB | ||||
|       - SECRET_KEY=your_secret_key # used to encode authentication tokens (can be a random string) | ||||
|       - SECRET_KEY=MRo9PfasPibnqFeK4Oswb6Z+PhFmjzdvxZzwdAkbf/Y= # used to encode authentication tokens (can be a random string) | ||||
|       - ALGORITHM=HS256 # authentication encoding algorithm | ||||
|       - ACCESS_TOKEN_EXPIRE_MINUTES=600 # access token expire minutes | ||||
|     container_name: scraperr_api | ||||
|     volumes: | ||||
|       - /var/run/docker.sock:/var/run/docker.sock | ||||
|     labels: | ||||
|       - "traefik.enable=true" | ||||
|       - "traefik.http.routers.scraperr_api.rule=Host(`localhost`) && PathPrefix(`/api`)" # change this to your domain, if not running on localhost | ||||
|       - "traefik.http.routers.scraperr_api.entrypoints=web" # websecure if using https | ||||
|       - "traefik.http.middlewares.api-stripprefix.stripprefix.prefixes=/api" | ||||
|       - "traefik.http.routers.scraperr_api.middlewares=api-stripprefix" | ||||
|       - "traefik.http.services.scraperr_api.loadbalancer.server.port=8000" | ||||
|     networks: | ||||
|       - web | ||||
|   traefik: | ||||
|     image: traefik:latest | ||||
|     container_name: traefik | ||||
|     command: | ||||
|       - "--providers.docker=true" | ||||
|       - "--entrypoints.web.address=:80" | ||||
|       - "--entrypoints.websecure.address=:443" | ||||
|     ports: | ||||
|       - 80:80 | ||||
|       - 443:443 | ||||
|       - 8000:8000 | ||||
|     volumes: | ||||
|       - /var/run/docker.sock:/var/run/docker.sock:ro" | ||||
|     networks: | ||||
|       - web | ||||
|   mongo: | ||||
|     container_name: webscrape-mongo | ||||
|     image: mongo | ||||
|     restart: always | ||||
|     environment: | ||||
|       MONGO_INITDB_ROOT_USERNAME: root | ||||
|       MONGO_INITDB_ROOT_PASSWORD: example | ||||
|       - "$PWD/data:/project/data" | ||||
|       - "$PWD/media:/project/media" | ||||
|       - /var/run/docker.sock:/var/run/docker.sock | ||||
|     networks: | ||||
|       - web | ||||
| networks: | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| # Build next dependencies | ||||
| FROM node:latest | ||||
| FROM node:23.1 | ||||
| WORKDIR /app | ||||
|  | ||||
| COPY package*.json ./ | ||||
| @@ -15,6 +15,4 @@ COPY src /app/src | ||||
|  | ||||
| RUN npm run build | ||||
|  | ||||
| EXPOSE 3000 | ||||
|  | ||||
| # CMD [ "npm", "run" ] | ||||
| EXPOSE 3000 | ||||
| @@ -1,4 +0,0 @@ | ||||
| tls: | ||||
|   certificates: | ||||
|     - certFile: /etc/certs/ssl-cert.pem | ||||
|       keyFile: /etc/certs/ssl-cert.key | ||||
							
								
								
									
										37
									
								
								ipython.py
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								ipython.py
									
									
									
									
									
								
							| @@ -1,37 +0,0 @@ | ||||
| # STL | ||||
| import os | ||||
|  | ||||
| # PDM | ||||
| import boto3 | ||||
| from dotenv import load_dotenv | ||||
|  | ||||
| # Load environment variables from .env file | ||||
| load_dotenv() | ||||
|  | ||||
|  | ||||
| def test_insert_and_delete(): | ||||
|     # Get environment variables | ||||
|     region_name = os.getenv("AWS_REGION") | ||||
|     # Initialize DynamoDB resource | ||||
|     dynamodb = boto3.resource("dynamodb", region_name=region_name) | ||||
|     table = dynamodb.Table("scrape") | ||||
|  | ||||
|     # Item to insert | ||||
|     item = { | ||||
|         "id": "123",  # Replace with the appropriate id value | ||||
|         "attribute1": "value1", | ||||
|         "attribute2": "value2", | ||||
|         # Add more attributes as needed | ||||
|     } | ||||
|  | ||||
|     # Insert the item | ||||
|     table.put_item(Item=item) | ||||
|     print(f"Inserted item: {item}") | ||||
|  | ||||
|     # Delete the item | ||||
|     table.delete_item(Key={"id": "123"})  # Replace with the appropriate id value | ||||
|     print(f"Deleted item with id: {item['id']}") | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     test_insert_and_delete() | ||||
							
								
								
									
										15660
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										15660
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										12
									
								
								package.json
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								package.json
									
									
									
									
									
								
							| @@ -19,6 +19,7 @@ | ||||
|     "bootstrap": "^5.3.0", | ||||
|     "chart.js": "^4.4.3", | ||||
|     "cookie": "^0.6.0", | ||||
|     "dotenv": "^16.5.0", | ||||
|     "framer-motion": "^4.1.17", | ||||
|     "js-cookie": "^3.0.5", | ||||
|     "next": "^14.2.4", | ||||
| @@ -31,7 +32,6 @@ | ||||
|     "react-modal-image": "^2.6.0", | ||||
|     "react-router": "^6.14.1", | ||||
|     "react-router-dom": "^6.14.1", | ||||
|     "react-scripts": "^5.0.1", | ||||
|     "react-spinners": "^0.14.1", | ||||
|     "typescript": "^4.9.5", | ||||
|     "web-vitals": "^2.1.4" | ||||
| @@ -63,12 +63,18 @@ | ||||
|     ] | ||||
|   }, | ||||
|   "devDependencies": { | ||||
|     "@types/cypress": "^0.1.6", | ||||
|     "@types/cypress": "^1.1.6", | ||||
|     "@types/js-cookie": "^3.0.6", | ||||
|     "cypress": "^13.15.0", | ||||
|     "autoprefixer": "^10.4.21", | ||||
|     "cypress": "^13.17.0", | ||||
|     "eslint": "^9.26.0", | ||||
|     "postcss": "^8.5.3", | ||||
|     "tailwindcss": "^3.3.5" | ||||
|   }, | ||||
|   "overrides": { | ||||
|     "react-refresh": "0.11.0" | ||||
|   }, | ||||
|   "resolutions": { | ||||
|     "postcss": "^8.4.31" | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -2,9 +2,7 @@ | ||||
| name = "web-scrape" | ||||
| version = "0.1.0" | ||||
| description = "" | ||||
| authors = [ | ||||
|     {name = "Jayden Pyles", email = "jpylesbuisness@gmail.com"}, | ||||
| ] | ||||
| authors = [{ name = "Jayden Pyles", email = "jpylesbuisness@gmail.com" }] | ||||
| dependencies = [ | ||||
|     "uvicorn>=0.30.1", | ||||
|     "fastapi>=0.111.0", | ||||
| @@ -39,20 +37,19 @@ dependencies = [ | ||||
|     "exceptiongroup>=1.2.2", | ||||
|     "Faker>=30.6.0", | ||||
|     "pytest-asyncio>=0.24.0", | ||||
|     "python-multipart>=0.0.12", | ||||
|     "python-multipart>=0.0.1", | ||||
|     "bcrypt==4.0.1", | ||||
|     "apscheduler>=3.11.0", | ||||
| ] | ||||
| requires-python = ">=3.10" | ||||
| readme = "README.md" | ||||
| license = {text = "MIT"} | ||||
| license = { text = "MIT" } | ||||
|  | ||||
| [tool.pdm] | ||||
| distribution = true | ||||
|  | ||||
| [tool.pdm.dev-dependencies] | ||||
| dev = [ | ||||
|     "ipython>=8.26.0", | ||||
|     "pytest>=8.3.3", | ||||
| ] | ||||
| dev = ["ipython>=8.26.0", "pytest>=8.3.3"] | ||||
| [tool.pyright] | ||||
| include = ["./api/backend/"] | ||||
| exclude = ["**/node_modules", "**/__pycache__"] | ||||
| @@ -60,14 +57,42 @@ ignore = [] | ||||
| defineConstant = { DEBUG = true } | ||||
| stubPath = "" | ||||
|  | ||||
| reportUnknownMemberType= false | ||||
| reportMissingImports = true | ||||
| reportMissingTypeStubs = false | ||||
| reportAny = false | ||||
| reportCallInDefaultInitializer = false | ||||
| # Type checking strictness | ||||
| typeCheckingMode = "strict"                        # Enables strict type checking mode | ||||
| reportPrivateUsage = "none" | ||||
| reportMissingTypeStubs = "none" | ||||
| reportUntypedFunctionDecorator = "error" | ||||
| reportUntypedClassDecorator = "error" | ||||
| reportUntypedBaseClass = "error" | ||||
| reportInvalidTypeVarUse = "error" | ||||
| reportUnnecessaryTypeIgnoreComment = "information" | ||||
| reportUnknownVariableType = "none" | ||||
| reportUnknownMemberType = "none" | ||||
| reportUnknownParameterType = "none" | ||||
|  | ||||
| pythonVersion = "3.9" | ||||
| pythonPlatform = "Linux" | ||||
| # Additional checks | ||||
| reportImplicitStringConcatenation = "error" | ||||
| reportInvalidStringEscapeSequence = "error" | ||||
| reportMissingImports = "error" | ||||
| reportMissingModuleSource = "error" | ||||
| reportOptionalCall = "error" | ||||
| reportOptionalIterable = "error" | ||||
| reportOptionalMemberAccess = "error" | ||||
| reportOptionalOperand = "error" | ||||
| reportOptionalSubscript = "error" | ||||
| reportTypedDictNotRequiredAccess = "error" | ||||
|  | ||||
| # Function return type checking | ||||
| reportIncompleteStub = "error" | ||||
| reportIncompatibleMethodOverride = "error" | ||||
| reportInvalidStubStatement = "error" | ||||
| reportInconsistentOverload = "error" | ||||
|  | ||||
| # Misc settings | ||||
| pythonVersion = "3.10"           # Matches your Python version from pyproject.toml | ||||
| strictListInference = true | ||||
| strictDictionaryInference = true | ||||
| strictSetInference = true | ||||
|  | ||||
|  | ||||
| [tool.isort] | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import React from "react"; | ||||
| import { useAuth } from "../../../contexts/AuthContext"; | ||||
| import { Box, Drawer, Divider } from "@mui/material"; | ||||
| import { Box, Drawer } from "@mui/material"; | ||||
|  | ||||
| import { QuickSettings } from "../../nav/quick-settings"; | ||||
| import { NavItems } from "./nav-items/nav-items"; | ||||
|   | ||||
| @@ -7,6 +7,7 @@ import TerminalIcon from "@mui/icons-material/Terminal"; | ||||
| import BarChart from "@mui/icons-material/BarChart"; | ||||
| import AutoAwesomeIcon from "@mui/icons-material/AutoAwesome"; | ||||
| import { List } from "@mui/material"; | ||||
| import { Schedule } from "@mui/icons-material"; | ||||
|  | ||||
| const items = [ | ||||
|   { | ||||
| @@ -34,6 +35,11 @@ const items = [ | ||||
|     text: "View App Logs", | ||||
|     href: "/logs", | ||||
|   }, | ||||
|   { | ||||
|     icon: <Schedule />, | ||||
|     text: "Cron Jobs", | ||||
|     href: "/cron-jobs", | ||||
|   }, | ||||
| ]; | ||||
|  | ||||
| export const NavItems = () => { | ||||
|   | ||||
| @@ -15,6 +15,7 @@ import { | ||||
|   Button, | ||||
|   Tooltip, | ||||
|   IconButton, | ||||
|   TableContainer, | ||||
| } from "@mui/material"; | ||||
| import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; | ||||
| import StarIcon from "@mui/icons-material/Star"; | ||||
| @@ -52,145 +53,155 @@ export const JobQueue = ({ | ||||
|   const router = useRouter(); | ||||
|  | ||||
|   return ( | ||||
|     <Table sx={{ tableLayout: "fixed", width: "100%" }}> | ||||
|       <TableHead> | ||||
|         <TableRow> | ||||
|           <TableCell>Select</TableCell> | ||||
|           <TableCell>Id</TableCell> | ||||
|           <TableCell>Url</TableCell> | ||||
|           <TableCell>Elements</TableCell> | ||||
|           <TableCell>Result</TableCell> | ||||
|           <TableCell>Time Created</TableCell> | ||||
|           <TableCell>Status</TableCell> | ||||
|           <TableCell>Actions</TableCell> | ||||
|         </TableRow> | ||||
|       </TableHead> | ||||
|       <TableBody> | ||||
|         {filteredJobs.map((row, index) => ( | ||||
|           <TableRow key={index}> | ||||
|             <TableCell padding="checkbox"> | ||||
|               <Checkbox | ||||
|                 checked={selectedJobs.has(row.id)} | ||||
|                 onChange={() => onSelectJob(row.id)} | ||||
|               /> | ||||
|               <Tooltip title="Chat with AI"> | ||||
|                 <span> | ||||
|                   <IconButton | ||||
|                     onClick={() => { | ||||
|                       router.push({ | ||||
|                         pathname: "/chat", | ||||
|                         query: { | ||||
|                           job: row.id, | ||||
|                         }, | ||||
|                       }); | ||||
|                     }} | ||||
|                   > | ||||
|                     <AutoAwesome /> | ||||
|                   </IconButton> | ||||
|                 </span> | ||||
|               </Tooltip> | ||||
|               <Tooltip title="Favorite Job"> | ||||
|                 <span> | ||||
|                   <IconButton | ||||
|                     color={row.favorite ? "warning" : "default"} | ||||
|                     onClick={() => { | ||||
|                       onFavorite([row.id], "favorite", !row.favorite); | ||||
|                       row.favorite = !row.favorite; | ||||
|                     }} | ||||
|                   > | ||||
|                     <StarIcon /> | ||||
|                   </IconButton> | ||||
|                 </span> | ||||
|               </Tooltip> | ||||
|             </TableCell> | ||||
|             <TableCell sx={{ maxWidth: 100, overflow: "auto" }}> | ||||
|               <Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.id}</Box> | ||||
|             </TableCell> | ||||
|             <TableCell sx={{ maxWidth: 200, overflow: "auto" }}> | ||||
|               <Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.url}</Box> | ||||
|             </TableCell> | ||||
|             <TableCell sx={{ maxWidth: 150, overflow: "auto" }}> | ||||
|               <Box sx={{ maxHeight: 100, overflow: "auto" }}> | ||||
|                 {JSON.stringify(row.elements)} | ||||
|               </Box> | ||||
|             </TableCell> | ||||
|             <TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}> | ||||
|               <Accordion sx={{ margin: 0, padding: 0.5 }}> | ||||
|                 <AccordionSummary | ||||
|                   expandIcon={<ExpandMoreIcon />} | ||||
|                   aria-controls="panel1a-content" | ||||
|                   id="panel1a-header" | ||||
|                   sx={{ | ||||
|                     minHeight: 0, | ||||
|                     "&.Mui-expanded": { minHeight: 0 }, | ||||
|                   }} | ||||
|                 > | ||||
|                   <Box | ||||
|                     sx={{ | ||||
|                       maxHeight: 150, | ||||
|                       overflow: "auto", | ||||
|                       width: "100%", | ||||
|                     }} | ||||
|                   > | ||||
|                     <Typography sx={{ fontSize: "0.875rem" }}> | ||||
|                       Show Result | ||||
|                     </Typography> | ||||
|                   </Box> | ||||
|                 </AccordionSummary> | ||||
|                 <AccordionDetails sx={{ padding: 1 }}> | ||||
|                   <Box sx={{ maxHeight: 200, overflow: "auto" }}> | ||||
|                     <Typography | ||||
|                       sx={{ | ||||
|                         fontSize: "0.875rem", | ||||
|                         whiteSpace: "pre-wrap", | ||||
|     <TableContainer component={Box} sx={{ maxHeight: "90dvh" }}> | ||||
|       <Table sx={{ tableLayout: "fixed", width: "100%" }}> | ||||
|         <TableHead> | ||||
|           <TableRow> | ||||
|             <TableCell>Select</TableCell> | ||||
|             <TableCell>Id</TableCell> | ||||
|             <TableCell>Url</TableCell> | ||||
|             <TableCell>Elements</TableCell> | ||||
|             <TableCell>Result</TableCell> | ||||
|             <TableCell>Time Created</TableCell> | ||||
|             <TableCell>Status</TableCell> | ||||
|             <TableCell>Actions</TableCell> | ||||
|           </TableRow> | ||||
|         </TableHead> | ||||
|         <TableBody sx={{ overflow: "auto" }}> | ||||
|           {filteredJobs.map((row, index) => ( | ||||
|             <TableRow key={index}> | ||||
|               <TableCell padding="checkbox"> | ||||
|                 <Checkbox | ||||
|                   checked={selectedJobs.has(row.id)} | ||||
|                   onChange={() => onSelectJob(row.id)} | ||||
|                 /> | ||||
|                 <Tooltip title="Chat with AI"> | ||||
|                   <span> | ||||
|                     <IconButton | ||||
|                       onClick={() => { | ||||
|                         router.push({ | ||||
|                           pathname: "/chat", | ||||
|                           query: { | ||||
|                             job: row.id, | ||||
|                           }, | ||||
|                         }); | ||||
|                       }} | ||||
|                     > | ||||
|                       {JSON.stringify(row.result, null, 2)} | ||||
|                     </Typography> | ||||
|                   </Box> | ||||
|                 </AccordionDetails> | ||||
|               </Accordion> | ||||
|             </TableCell> | ||||
|             <TableCell sx={{ maxWidth: 150, overflow: "auto" }}> | ||||
|               <Box sx={{ maxHeight: 100, overflow: "auto" }}> | ||||
|                 {new Date(row.time_created).toLocaleString()} | ||||
|               </Box> | ||||
|             </TableCell> | ||||
|             <TableCell sx={{ maxWidth: 50, overflow: "auto" }}> | ||||
|               <Box sx={{ maxHeight: 100, overflow: "auto" }}> | ||||
|                 <Box | ||||
|                   className="rounded-md p-2 text-center" | ||||
|                   sx={{ bgcolor: colors[row.status] }} | ||||
|                 > | ||||
|                   {row.status} | ||||
|                       <AutoAwesome /> | ||||
|                     </IconButton> | ||||
|                   </span> | ||||
|                 </Tooltip> | ||||
|                 <Tooltip title="Favorite Job"> | ||||
|                   <span> | ||||
|                     <IconButton | ||||
|                       color={row.favorite ? "warning" : "default"} | ||||
|                       onClick={() => { | ||||
|                         onFavorite([row.id], "favorite", !row.favorite); | ||||
|                         row.favorite = !row.favorite; | ||||
|                       }} | ||||
|                     > | ||||
|                       <StarIcon /> | ||||
|                     </IconButton> | ||||
|                   </span> | ||||
|                 </Tooltip> | ||||
|               </TableCell> | ||||
|               <TableCell sx={{ maxWidth: 100, overflow: "auto" }}> | ||||
|                 <Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.id}</Box> | ||||
|               </TableCell> | ||||
|               <TableCell sx={{ maxWidth: 200, overflow: "auto" }}> | ||||
|                 <Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.url}</Box> | ||||
|               </TableCell> | ||||
|               <TableCell sx={{ maxWidth: 150, overflow: "auto" }}> | ||||
|                 <Box sx={{ maxHeight: 100, overflow: "auto" }}> | ||||
|                   {JSON.stringify(row.elements)} | ||||
|                 </Box> | ||||
|               </Box> | ||||
|             </TableCell> | ||||
|             <TableCell sx={{ maxWidth: 150, overflow: "auto" }}> | ||||
|               <Box sx={{ display: "flex", gap: 1 }}> | ||||
|                 <Button | ||||
|                   onClick={() => { | ||||
|                     onDownload([row.id]); | ||||
|                   }} | ||||
|                   size="small" | ||||
|                   sx={{ minWidth: 0, padding: "4px 8px" }} | ||||
|                 > | ||||
|                   Download | ||||
|                 </Button> | ||||
|                 <Button | ||||
|                   onClick={() => | ||||
|                     onNavigate(row.elements, row.url, row.job_options) | ||||
|                   } | ||||
|                   size="small" | ||||
|                   sx={{ minWidth: 0, padding: "4px 8px" }} | ||||
|                 > | ||||
|                   Rerun | ||||
|                 </Button> | ||||
|               </Box> | ||||
|             </TableCell> | ||||
|           </TableRow> | ||||
|         ))} | ||||
|       </TableBody> | ||||
|     </Table> | ||||
|               </TableCell> | ||||
|               <TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}> | ||||
|                 <Accordion sx={{ margin: 0, padding: 0.5 }}> | ||||
|                   <AccordionSummary | ||||
|                     expandIcon={<ExpandMoreIcon />} | ||||
|                     aria-controls="panel1a-content" | ||||
|                     id="panel1a-header" | ||||
|                     sx={{ | ||||
|                       minHeight: 0, | ||||
|                       "&.Mui-expanded": { minHeight: 0 }, | ||||
|                     }} | ||||
|                   > | ||||
|                     <Box | ||||
|                       sx={{ | ||||
|                         maxHeight: 150, | ||||
|                         overflow: "auto", | ||||
|                         width: "100%", | ||||
|                       }} | ||||
|                     > | ||||
|                       <Typography sx={{ fontSize: "0.875rem" }}> | ||||
|                         Show Result | ||||
|                       </Typography> | ||||
|                     </Box> | ||||
|                   </AccordionSummary> | ||||
|                   <AccordionDetails sx={{ padding: 1 }}> | ||||
|                     <Box sx={{ maxHeight: 200, overflow: "auto" }}> | ||||
|                       <Typography | ||||
|                         sx={{ | ||||
|                           fontSize: "0.875rem", | ||||
|                           whiteSpace: "pre-wrap", | ||||
|                         }} | ||||
|                       > | ||||
|                         {JSON.stringify(row.result, null, 2)} | ||||
|                       </Typography> | ||||
|                     </Box> | ||||
|                   </AccordionDetails> | ||||
|                 </Accordion> | ||||
|               </TableCell> | ||||
|               <TableCell sx={{ maxWidth: 150, overflow: "auto" }}> | ||||
|                 <Box sx={{ maxHeight: 100, overflow: "auto" }}> | ||||
|                   {new Date(row.time_created).toLocaleString()} | ||||
|                 </Box> | ||||
|               </TableCell> | ||||
|               <TableCell sx={{ maxWidth: 50, overflow: "auto" }}> | ||||
|                 <Box sx={{ maxHeight: 100, overflow: "auto" }}> | ||||
|                   <Box | ||||
|                     className="rounded-md p-2 text-center" | ||||
|                     sx={{ bgcolor: colors[row.status] }} | ||||
|                   > | ||||
|                     {row.status} | ||||
|                   </Box> | ||||
|                 </Box> | ||||
|               </TableCell> | ||||
|               <TableCell sx={{ maxWidth: 150, overflow: "auto" }}> | ||||
|                 <Box sx={{ display: "flex", gap: 1 }}> | ||||
|                   <Button | ||||
|                     onClick={() => { | ||||
|                       onDownload([row.id]); | ||||
|                     }} | ||||
|                     size="small" | ||||
|                     sx={{ | ||||
|                       minWidth: 0, | ||||
|                       padding: "4px 8px", | ||||
|                       fontSize: "0.625rem", | ||||
|                     }} | ||||
|                   > | ||||
|                     Download | ||||
|                   </Button> | ||||
|                   <Button | ||||
|                     onClick={() => | ||||
|                       onNavigate(row.elements, row.url, row.job_options) | ||||
|                     } | ||||
|                     size="small" | ||||
|                     sx={{ | ||||
|                       minWidth: 0, | ||||
|                       padding: "4px 8px", | ||||
|                       fontSize: "0.625rem", | ||||
|                     }} | ||||
|                   > | ||||
|                     Rerun | ||||
|                   </Button> | ||||
|                 </Box> | ||||
|               </TableCell> | ||||
|             </TableRow> | ||||
|           ))} | ||||
|         </TableBody> | ||||
|       </Table> | ||||
|     </TableContainer> | ||||
|   ); | ||||
| }; | ||||
|   | ||||
| @@ -48,10 +48,10 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => { | ||||
|   const router = useRouter(); | ||||
|  | ||||
|   const handleDownload = async (ids: string[]) => { | ||||
|     const response = await fetch(`${Constants.DOMAIN}/api/download`, { | ||||
|     const response = await fetch("/api/download", { | ||||
|       method: "POST", | ||||
|       headers: { "Content-Type": "application/json" }, | ||||
|       body: JSON.stringify({ ids: ids }), | ||||
|       body: JSON.stringify({ data: { ids: ids } }), | ||||
|     }); | ||||
|  | ||||
|     if (response.ok) { | ||||
| @@ -104,10 +104,10 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => { | ||||
|   }; | ||||
|  | ||||
|   const handleDeleteSelected = async () => { | ||||
|     const response = await fetch(`${Constants.DOMAIN}/api/delete-scrape-jobs`, { | ||||
|     const response = await fetch("/api/delete", { | ||||
|       method: "POST", | ||||
|       headers: { "Content-Type": "application/json" }, | ||||
|       body: JSON.stringify({ ids: Array.from(selectedJobs) }), | ||||
|       body: JSON.stringify({ data: { ids: Array.from(selectedJobs) } }), | ||||
|     }); | ||||
|  | ||||
|     if (response.ok) { | ||||
| @@ -142,13 +142,13 @@ export const JobTable: React.FC<JobTableProps> = ({ jobs, setJobs }) => { | ||||
|       value: value, | ||||
|     }; | ||||
|  | ||||
|     await fetch(`${Constants.DOMAIN}/api/update`, { | ||||
|     await fetch("/api/update", { | ||||
|       method: "POST", | ||||
|       headers: { | ||||
|         "Content-Type": "application/json", | ||||
|         Authorization: `Bearer ${token}`, | ||||
|       }, | ||||
|       body: JSON.stringify(postBody), | ||||
|       body: JSON.stringify({ data: postBody }), | ||||
|     }); | ||||
|   }; | ||||
|  | ||||
|   | ||||
| @@ -14,19 +14,24 @@ export const LogContainer: React.FC<LogContainerProps> = ({ initialLogs }) => { | ||||
|   const logsContainerRef = useRef<HTMLDivElement | null>(null); | ||||
|  | ||||
|   useEffect(() => { | ||||
|     const eventSource = new EventSource(`${Constants.DOMAIN}/api/logs`); | ||||
|     const eventSource = new EventSource(`/api/logs`); | ||||
|  | ||||
|     setLogs(""); | ||||
|  | ||||
|     eventSource.onmessage = (event) => { | ||||
|       setLogs((prevLogs) => prevLogs + event.data + "\n"); | ||||
|  | ||||
|       if (logsContainerRef.current) { | ||||
|         logsContainerRef.current.scrollTop = | ||||
|           logsContainerRef.current.scrollHeight; | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     eventSource.onerror = () => { | ||||
|     eventSource.onopen = (e) => { | ||||
|     }; | ||||
|  | ||||
|     eventSource.onerror = (error) => { | ||||
|       console.error("EventSource failed:", error); | ||||
|       eventSource.close(); | ||||
|     }; | ||||
|  | ||||
|   | ||||
| @@ -0,0 +1,182 @@ | ||||
| import { Job } from "@/types"; | ||||
| import { | ||||
|   Button, | ||||
|   Dialog, | ||||
|   DialogTitle, | ||||
|   DialogContent, | ||||
|   TextField, | ||||
|   Snackbar, | ||||
|   Alert, | ||||
| } from "@mui/material"; | ||||
| import Cookies from "js-cookie"; | ||||
| import { useState } from "react"; | ||||
|  | ||||
| export type CreateCronJobsProps = { | ||||
|   availableJobs: Job[]; | ||||
|   user: any; | ||||
| }; | ||||
|  | ||||
| export const CreateCronJobs = ({ | ||||
|   availableJobs, | ||||
|   user, | ||||
| }: CreateCronJobsProps) => { | ||||
|   const [open, setOpen] = useState(false); | ||||
|  | ||||
|   return ( | ||||
|     <> | ||||
|       <Button | ||||
|         variant="contained" | ||||
|         color="primary" | ||||
|         onClick={() => setOpen(true)} | ||||
|         sx={{ borderRadius: 2 }} | ||||
|       > | ||||
|         Create Cron Job | ||||
|       </Button> | ||||
|       <CreateCronJobDialog | ||||
|         open={open} | ||||
|         onClose={() => setOpen(false)} | ||||
|         availableJobs={availableJobs} | ||||
|         user={user} | ||||
|       /> | ||||
|     </> | ||||
|   ); | ||||
| }; | ||||
|  | ||||
| const CreateCronJobDialog = ({ | ||||
|   open, | ||||
|   onClose, | ||||
|   availableJobs, | ||||
|   user, | ||||
| }: { | ||||
|   open: boolean; | ||||
|   onClose: () => void; | ||||
|   availableJobs: Job[]; | ||||
|   user: any; | ||||
| }) => { | ||||
|   const [cronExpression, setCronExpression] = useState(""); | ||||
|   const [jobId, setJobId] = useState(""); | ||||
|   const [successOpen, setSuccessOpen] = useState(false); | ||||
|   const [isSubmitting, setIsSubmitting] = useState(false); | ||||
|   const [error, setError] = useState(""); | ||||
|  | ||||
|   const handleSubmit = async () => { | ||||
|     if (!cronExpression || !jobId) { | ||||
|       setError("Please fill in all fields"); | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     setIsSubmitting(true); | ||||
|     const token = Cookies.get("token"); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch("/api/schedule-cron-job", { | ||||
|         method: "POST", | ||||
|         headers: { | ||||
|           "Content-Type": "application/json", | ||||
|           Authorization: `Bearer ${token}`, | ||||
|         }, | ||||
|         body: JSON.stringify({ | ||||
|           data: { | ||||
|             cron_expression: cronExpression, | ||||
|             job_id: jobId, | ||||
|             user_email: user.email, | ||||
|           }, | ||||
|         }), | ||||
|       }); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error("Failed to schedule job"); | ||||
|       } | ||||
|  | ||||
|       setSuccessOpen(true); | ||||
|       setCronExpression(""); | ||||
|       setJobId(""); | ||||
|       setTimeout(() => { | ||||
|         onClose(); | ||||
|       }, 1500); | ||||
|       window.location.reload(); | ||||
|     } catch (error) { | ||||
|       console.error(error); | ||||
|       setError("Failed to create cron job"); | ||||
|     } finally { | ||||
|       setIsSubmitting(false); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   const handleClose = () => { | ||||
|     setSuccessOpen(false); | ||||
|   }; | ||||
|  | ||||
|   return ( | ||||
|     <> | ||||
|       <Dialog | ||||
|         open={open} | ||||
|         onClose={onClose} | ||||
|         PaperProps={{ | ||||
|           sx: { borderRadius: 2, minWidth: "400px" }, | ||||
|         }} | ||||
|       > | ||||
|         <DialogTitle sx={{ fontWeight: 500 }}>Create Cron Job</DialogTitle> | ||||
|         <DialogContent> | ||||
|           <div className="flex flex-col gap-1 mt0"> | ||||
|             <TextField | ||||
|               label="Cron Expression" | ||||
|               fullWidth | ||||
|               value={cronExpression} | ||||
|               onChange={(e) => setCronExpression(e.target.value)} | ||||
|               variant="outlined" | ||||
|               placeholder="* * * * *" | ||||
|               margin="normal" | ||||
|               helperText="Format: minute hour day month day-of-week" | ||||
|             /> | ||||
|  | ||||
|             <TextField | ||||
|               label="Job ID" | ||||
|               fullWidth | ||||
|               value={jobId} | ||||
|               onChange={(e) => setJobId(e.target.value)} | ||||
|               variant="outlined" | ||||
|               margin="normal" | ||||
|             /> | ||||
|  | ||||
|             {error && ( | ||||
|               <Alert severity="error" sx={{ mt: 2 }}> | ||||
|                 {error} | ||||
|               </Alert> | ||||
|             )} | ||||
|  | ||||
|             <div className="flex justify-end gap-2 mt-4"> | ||||
|               <Button | ||||
|                 variant="outlined" | ||||
|                 onClick={onClose} | ||||
|                 sx={{ borderRadius: 2 }} | ||||
|               > | ||||
|                 Cancel | ||||
|               </Button> | ||||
|               <Button | ||||
|                 variant="contained" | ||||
|                 color="primary" | ||||
|                 onClick={handleSubmit} | ||||
|                 disabled={isSubmitting} | ||||
|                 sx={{ borderRadius: 2 }} | ||||
|               > | ||||
|                 {isSubmitting ? "Submitting..." : "Create Job"} | ||||
|               </Button> | ||||
|             </div> | ||||
|           </div> | ||||
|         </DialogContent> | ||||
|       </Dialog> | ||||
|  | ||||
|       <Snackbar | ||||
|         open={successOpen} | ||||
|         autoHideDuration={4000} | ||||
|         onClose={handleClose} | ||||
|         anchorOrigin={{ vertical: "bottom", horizontal: "right" }} | ||||
|       > | ||||
|         <Alert onClose={handleClose} severity="success" sx={{ width: "100%" }}> | ||||
|           Cron job created successfully! | ||||
|         </Alert> | ||||
|       </Snackbar> | ||||
|     </> | ||||
|   ); | ||||
| }; | ||||
							
								
								
									
										1
									
								
								src/components/pages/cron-jobs/create-cron-jobs/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								src/components/pages/cron-jobs/create-cron-jobs/index.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| export * from "./create-cron-jobs"; | ||||
							
								
								
									
										0
									
								
								src/components/pages/cron-jobs/cron-jobs.module.css
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/components/pages/cron-jobs/cron-jobs.module.css
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										104
									
								
								src/components/pages/cron-jobs/cron-jobs.tsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								src/components/pages/cron-jobs/cron-jobs.tsx
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | ||||
| import { Job, CronJob } from "@/types/job"; | ||||
| import { useState, useEffect } from "react"; | ||||
| import { CreateCronJobs } from "./create-cron-jobs"; | ||||
| import { | ||||
|   Table, | ||||
|   TableHead, | ||||
|   TableRow, | ||||
|   TableCell, | ||||
|   TableBody, | ||||
|   Button, | ||||
|   Box, | ||||
|   Typography, | ||||
| } from "@mui/material"; | ||||
| import Cookies from "js-cookie"; | ||||
|  | ||||
| export type CronJobsProps = { | ||||
|   initialJobs: Job[]; | ||||
|   initialCronJobs: CronJob[]; | ||||
|   initialUser: any; | ||||
| }; | ||||
|  | ||||
| export const CronJobs = ({ | ||||
|   initialJobs, | ||||
|   initialCronJobs, | ||||
|   initialUser, | ||||
| }: CronJobsProps) => { | ||||
|   const [jobs, setJobs] = useState<Job[]>(initialJobs); | ||||
|   const [cronJobs, setCronJobs] = useState<CronJob[]>(initialCronJobs); | ||||
|   const [user, setUser] = useState<any>(initialUser); | ||||
|  | ||||
|   useEffect(() => { | ||||
|     setJobs(initialJobs); | ||||
|     setCronJobs(initialCronJobs); | ||||
|     setUser(initialUser); | ||||
|   }, [initialJobs, initialCronJobs, initialUser]); | ||||
|  | ||||
|   const handleDeleteCronJob = async (id: string) => { | ||||
|     const token = Cookies.get("token"); | ||||
|     const response = await fetch("/api/delete-cron-job", { | ||||
|       method: "POST", | ||||
|       headers: { | ||||
|         "Content-Type": "application/json", | ||||
|         Authorization: `Bearer ${token}`, | ||||
|       }, | ||||
|       body: JSON.stringify({ data: { id, user_email: user.email } }), | ||||
|     }); | ||||
|  | ||||
|     if (response.ok) { | ||||
|       console.log("Cron job deleted successfully"); | ||||
|       setCronJobs(cronJobs.filter((cronJob) => cronJob.id !== id)); | ||||
|     } else { | ||||
|       console.error("Failed to delete cron job"); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   if (!user) { | ||||
|     return ( | ||||
|       <Box> | ||||
|         <Typography variant="h6"> | ||||
|           Please login to view your cron jobs | ||||
|         </Typography> | ||||
|       </Box> | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   return ( | ||||
|     <div> | ||||
|       <CreateCronJobs availableJobs={jobs} user={user} /> | ||||
|  | ||||
|       <Table> | ||||
|         <TableHead> | ||||
|           <TableRow> | ||||
|             <TableCell>Cron Expression</TableCell> | ||||
|             <TableCell>Job ID</TableCell> | ||||
|             <TableCell>User Email</TableCell> | ||||
|             <TableCell>Created At</TableCell> | ||||
|             <TableCell>Updated At</TableCell> | ||||
|             <TableCell>Actions</TableCell> | ||||
|           </TableRow> | ||||
|         </TableHead> | ||||
|         <TableBody> | ||||
|           {cronJobs.map((cronJob) => ( | ||||
|             <TableRow key={cronJob.id}> | ||||
|               <TableCell>{cronJob.cron_expression}</TableCell> | ||||
|               <TableCell>{cronJob.job_id}</TableCell> | ||||
|               <TableCell>{cronJob.user_email}</TableCell> | ||||
|               <TableCell> | ||||
|                 {new Date(cronJob.time_created).toLocaleString()} | ||||
|               </TableCell> | ||||
|               <TableCell> | ||||
|                 {new Date(cronJob.time_updated).toLocaleString()} | ||||
|               </TableCell> | ||||
|               <TableCell> | ||||
|                 <Button onClick={() => handleDeleteCronJob(cronJob.id)}> | ||||
|                   Delete | ||||
|                 </Button> | ||||
|               </TableCell> | ||||
|             </TableRow> | ||||
|           ))} | ||||
|         </TableBody> | ||||
|       </Table> | ||||
|     </div> | ||||
|   ); | ||||
| }; | ||||
							
								
								
									
										62
									
								
								src/components/pages/cron-jobs/get-server-side-props.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								src/components/pages/cron-jobs/get-server-side-props.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| import axios from "axios"; | ||||
| import { GetServerSideProps } from "next"; | ||||
| import { parseCookies } from "nookies"; | ||||
| import { CronJob, Job } from "../../../types"; | ||||
|  | ||||
| export const getServerSideProps: GetServerSideProps = async (context) => { | ||||
|   const { req } = context; | ||||
|   const cookies = parseCookies({ req }); | ||||
|   const token = cookies.token; | ||||
|   let user = null; | ||||
|   let initialJobs: Job[] = []; | ||||
|   let initialCronJobs: CronJob[] = []; | ||||
|   if (token) { | ||||
|     try { | ||||
|       const userResponse = await axios.get( | ||||
|         `${process.env.NEXT_PUBLIC_API_URL}/api/auth/users/me`, | ||||
|         { | ||||
|           headers: { Authorization: `Bearer ${token}` }, | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       user = userResponse.data; | ||||
|  | ||||
|       const jobsResponse = await fetch( | ||||
|         `${process.env.NEXT_PUBLIC_API_URL}/api/retrieve-scrape-jobs`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           body: JSON.stringify({ user: user.email }), | ||||
|           headers: { | ||||
|             "content-type": "application/json", | ||||
|             Authorization: `Bearer ${token}`, | ||||
|           }, | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       initialJobs = await jobsResponse.json(); | ||||
|       console.log(initialJobs); | ||||
|  | ||||
|       const cronJobsResponse = await fetch( | ||||
|         `${process.env.NEXT_PUBLIC_API_URL}/api/cron-jobs`, | ||||
|         { | ||||
|           headers: { | ||||
|             "content-type": "application/json", | ||||
|             Authorization: `Bearer ${token}`, | ||||
|           }, | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       initialCronJobs = await cronJobsResponse.json(); | ||||
|     } catch (error) { | ||||
|       console.error("Error fetching user or jobs:", error); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   return { | ||||
|     props: { | ||||
|       initialJobs, | ||||
|       initialUser: user, | ||||
|       initialCronJobs, | ||||
|     }, | ||||
|   }; | ||||
| }; | ||||
							
								
								
									
										1
									
								
								src/components/pages/cron-jobs/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								src/components/pages/cron-jobs/index.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| export { CronJobs } from "./cron-jobs"; | ||||
							
								
								
									
										107
									
								
								src/components/pages/home/home.tsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								src/components/pages/home/home.tsx
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | ||||
| "use client"; | ||||
|  | ||||
| import React, { useState, useEffect, useRef } from "react"; | ||||
| import { Button, Container, Box, Snackbar, Alert } from "@mui/material"; | ||||
| import { useRouter } from "next/router"; | ||||
| import { Element, Result } from "@/types"; | ||||
| import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter"; | ||||
| import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider"; | ||||
|  | ||||
| export const Home = () => { | ||||
|   const { | ||||
|     submittedURL, | ||||
|     setSubmittedURL, | ||||
|     rows, | ||||
|     setRows, | ||||
|     results, | ||||
|     snackbarOpen, | ||||
|     setSnackbarOpen, | ||||
|     snackbarMessage, | ||||
|     snackbarSeverity, | ||||
|   } = useJobSubmitterProvider(); | ||||
|   const router = useRouter(); | ||||
|   const { elements, url } = router.query; | ||||
|  | ||||
|   const resultsRef = useRef<HTMLTableElement | null>(null); | ||||
|  | ||||
|   useEffect(() => { | ||||
|     if (elements) { | ||||
|       setRows(JSON.parse(elements as string)); | ||||
|     } | ||||
|     if (url) { | ||||
|       setSubmittedURL(url as string); | ||||
|     } | ||||
|   }, [elements, url]); | ||||
|  | ||||
|   useEffect(() => { | ||||
|     if (results && resultsRef.current) { | ||||
|       resultsRef.current.scrollIntoView({ behavior: "smooth" }); | ||||
|     } | ||||
|   }, [results]); | ||||
|  | ||||
|   const handleCloseSnackbar = () => { | ||||
|     setSnackbarOpen(false); | ||||
|   }; | ||||
|  | ||||
|   const ErrorSnackbar = () => { | ||||
|     return ( | ||||
|       <Snackbar | ||||
|         open={snackbarOpen} | ||||
|         autoHideDuration={6000} | ||||
|         onClose={handleCloseSnackbar} | ||||
|       > | ||||
|         <Alert onClose={handleCloseSnackbar} severity="error"> | ||||
|           {snackbarMessage} | ||||
|         </Alert> | ||||
|       </Snackbar> | ||||
|     ); | ||||
|   }; | ||||
|  | ||||
|   const NotifySnackbar = () => { | ||||
|     const goTo = () => { | ||||
|       router.push("/jobs"); | ||||
|     }; | ||||
|  | ||||
|     const action = ( | ||||
|       <Button color="inherit" size="small" onClick={goTo}> | ||||
|         Go To Job | ||||
|       </Button> | ||||
|     ); | ||||
|  | ||||
|     return ( | ||||
|       <Snackbar | ||||
|         open={snackbarOpen} | ||||
|         autoHideDuration={6000} | ||||
|         onClose={handleCloseSnackbar} | ||||
|       > | ||||
|         <Alert onClose={handleCloseSnackbar} severity="info" action={action}> | ||||
|           {snackbarMessage} | ||||
|         </Alert> | ||||
|       </Snackbar> | ||||
|     ); | ||||
|   }; | ||||
|  | ||||
|   return ( | ||||
|     <Box | ||||
|       bgcolor="background.default" | ||||
|       display="flex" | ||||
|       flexDirection="column" | ||||
|       justifyContent="center" | ||||
|       alignItems="center" | ||||
|       height="100%" | ||||
|       py={4} | ||||
|     > | ||||
|       <Container maxWidth="lg" className="overflow-y-auto max-h-full"> | ||||
|         <JobSubmitter /> | ||||
|         {submittedURL.length ? ( | ||||
|           <ElementTable | ||||
|             rows={rows} | ||||
|             setRows={setRows} | ||||
|             submittedURL={submittedURL} | ||||
|           /> | ||||
|         ) : null} | ||||
|       </Container> | ||||
|       {snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />} | ||||
|     </Box> | ||||
|   ); | ||||
| }; | ||||
							
								
								
									
										1
									
								
								src/components/pages/home/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								src/components/pages/home/index.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| export * from "./home"; | ||||
| @@ -1,2 +1 @@ | ||||
| export * from "./ElementTable"; | ||||
| export * from "./job-submitter"; | ||||
|   | ||||
| @@ -15,9 +15,11 @@ import { | ||||
|   IconButton, | ||||
|   Tooltip, | ||||
|   useTheme, | ||||
|   Divider, | ||||
| } from "@mui/material"; | ||||
| import AddIcon from "@mui/icons-material/Add"; | ||||
| import { Element } from "../../types"; | ||||
| import { Element } from "@/types"; | ||||
| import { SiteMap } from "../site-map"; | ||||
| 
 | ||||
| interface Props { | ||||
|   rows: Element[]; | ||||
| @@ -169,6 +171,13 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => { | ||||
|           </div> | ||||
|         </TableContainer> | ||||
|       </Box> | ||||
|       <Divider | ||||
|         sx={{ | ||||
|           borderColor: theme.palette.mode === "dark" ? "#ffffff" : "0000000", | ||||
|           marginBottom: 2, | ||||
|         }} | ||||
|       /> | ||||
|       <SiteMap /> | ||||
|     </Box> | ||||
|   ); | ||||
| }; | ||||
| @@ -0,0 +1 @@ | ||||
| export { ElementTable } from "./element-table"; | ||||
| @@ -1 +1,2 @@ | ||||
| export { JobSubmitter } from "./job-submitter"; | ||||
| export { ElementTable } from "./element-table"; | ||||
|   | ||||
| @@ -1,26 +1,20 @@ | ||||
| import React, { Dispatch } from "react"; | ||||
| import React from "react"; | ||||
| import { TextField, Button, CircularProgress } from "@mui/material"; | ||||
| import { Element } from "@/types"; | ||||
| import { useJobSubmitterProvider } from "../provider"; | ||||
|  | ||||
| export type JobSubmitterInputProps = { | ||||
|   submittedURL: string; | ||||
|   setSubmittedURL: Dispatch<React.SetStateAction<string>>; | ||||
|   isValidURL: boolean; | ||||
|   urlError: string | null; | ||||
|   handleSubmit: () => void; | ||||
|   loading: boolean; | ||||
|   rows: Element[]; | ||||
| }; | ||||
|  | ||||
| export const JobSubmitterInput = ({ | ||||
|   submittedURL, | ||||
|   setSubmittedURL, | ||||
|   isValidURL, | ||||
|   urlError, | ||||
|   handleSubmit, | ||||
|   loading, | ||||
|   rows, | ||||
|   urlError, | ||||
| }: JobSubmitterInputProps) => { | ||||
|   const { submittedURL, setSubmittedURL, isValidURL, rows } = | ||||
|     useJobSubmitterProvider(); | ||||
|   return ( | ||||
|     <div className="flex flex-row space-x-4 items-center mb-2"> | ||||
|       <TextField | ||||
|   | ||||
| @@ -14,9 +14,9 @@ export type JobSubmitterOptionsProps = { | ||||
| export const JobSubmitterOptions = ({ | ||||
|   jobOptions, | ||||
|   setJobOptions, | ||||
|   handleSelectProxies, | ||||
|   customJSONSelected, | ||||
|   setCustomJSONSelected, | ||||
|   handleSelectProxies, | ||||
|   proxiesSelected, | ||||
| }: JobSubmitterOptionsProps) => { | ||||
|   const handleMultiPageScrapeChange = () => { | ||||
| @@ -42,6 +42,13 @@ export const JobSubmitterOptions = ({ | ||||
|     })); | ||||
|   }; | ||||
|  | ||||
|   const handleCollectMediaChange = () => { | ||||
|     setJobOptions((prevJobOptions) => ({ | ||||
|       ...prevJobOptions, | ||||
|       collect_media: !prevJobOptions.collect_media, | ||||
|     })); | ||||
|   }; | ||||
|  | ||||
|   return ( | ||||
|     <Box bgcolor="background.paper" className="flex flex-col mb-2 rounded-md"> | ||||
|       <div id="options" className="p-2 flex flex-row space-x-2"> | ||||
| @@ -94,6 +101,15 @@ export const JobSubmitterOptions = ({ | ||||
|             /> | ||||
|           } | ||||
|         ></FormControlLabel> | ||||
|         <FormControlLabel | ||||
|           label="Collect Media" | ||||
|           control={ | ||||
|             <Checkbox | ||||
|               checked={jobOptions.collect_media} | ||||
|               onChange={handleCollectMediaChange} | ||||
|             /> | ||||
|           } | ||||
|         /> | ||||
|       </div> | ||||
|       {customJSONSelected ? ( | ||||
|         <div id="custom-json" className="pl-2 pr-2 pb-2"> | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| "use client"; | ||||
|  | ||||
| import React, { useEffect, useState, Dispatch } from "react"; | ||||
| import { Element } from "@/types"; | ||||
| import React, { useEffect, useState } from "react"; | ||||
| import { useAuth } from "@/contexts/AuthContext"; | ||||
| import { useRouter } from "next/router"; | ||||
| import { RawJobOptions } from "@/types/job"; | ||||
| @@ -10,29 +9,16 @@ import { JobSubmitterHeader } from "./job-submitter-header"; | ||||
| import { JobSubmitterInput } from "./job-submitter-input"; | ||||
| import { JobSubmitterOptions } from "./job-submitter-options"; | ||||
| import { ApiService } from "@/services"; | ||||
|  | ||||
| interface StateProps { | ||||
|   submittedURL: string; | ||||
|   setSubmittedURL: Dispatch<React.SetStateAction<string>>; | ||||
|   rows: Element[]; | ||||
|   isValidURL: boolean; | ||||
|   setIsValidUrl: Dispatch<React.SetStateAction<boolean>>; | ||||
|   setSnackbarMessage: Dispatch<React.SetStateAction<string>>; | ||||
|   setSnackbarOpen: Dispatch<React.SetStateAction<boolean>>; | ||||
|   setSnackbarSeverity: Dispatch<React.SetStateAction<string>>; | ||||
| } | ||||
|  | ||||
| interface Props { | ||||
|   stateProps: StateProps; | ||||
| } | ||||
| import { useJobSubmitterProvider } from "./provider"; | ||||
|  | ||||
| const initialJobOptions: RawJobOptions = { | ||||
|   multi_page_scrape: false, | ||||
|   custom_headers: null, | ||||
|   proxies: null, | ||||
|   collect_media: false, | ||||
| }; | ||||
|  | ||||
| export const JobSubmitter = ({ stateProps }: Props) => { | ||||
| export const JobSubmitter = () => { | ||||
|   const { user } = useAuth(); | ||||
|   const router = useRouter(); | ||||
|   const { job_options } = router.query; | ||||
| @@ -40,11 +26,13 @@ export const JobSubmitter = ({ stateProps }: Props) => { | ||||
|   const { | ||||
|     submittedURL, | ||||
|     rows, | ||||
|     siteMap, | ||||
|     setIsValidUrl, | ||||
|     setSnackbarMessage, | ||||
|     setSnackbarOpen, | ||||
|     setSnackbarSeverity, | ||||
|   } = stateProps; | ||||
|     setSiteMap, | ||||
|   } = useJobSubmitterProvider(); | ||||
|  | ||||
|   const [urlError, setUrlError] = useState<string | null>(null); | ||||
|   const [loading, setLoading] = useState<boolean>(false); | ||||
| @@ -87,7 +75,8 @@ export const JobSubmitter = ({ stateProps }: Props) => { | ||||
|       rows, | ||||
|       user, | ||||
|       jobOptions, | ||||
|       customHeaders | ||||
|       customHeaders, | ||||
|       siteMap | ||||
|     ) | ||||
|       .then(async (response) => { | ||||
|         if (!response.ok) { | ||||
| @@ -120,31 +109,28 @@ export const JobSubmitter = ({ stateProps }: Props) => { | ||||
|         job_options as string, | ||||
|         setCustomJSONSelected, | ||||
|         setProxiesSelected, | ||||
|         setJobOptions | ||||
|         setJobOptions, | ||||
|         setSiteMap | ||||
|       ); | ||||
|     } | ||||
|   }, [job_options]); | ||||
|  | ||||
|   return ( | ||||
|     <> | ||||
|       <div> | ||||
|         <JobSubmitterHeader /> | ||||
|         <JobSubmitterInput | ||||
|           {...stateProps} | ||||
|           urlError={urlError} | ||||
|           handleSubmit={handleSubmit} | ||||
|           loading={loading} | ||||
|         /> | ||||
|         <JobSubmitterOptions | ||||
|           {...stateProps} | ||||
|           jobOptions={jobOptions} | ||||
|           setJobOptions={setJobOptions} | ||||
|           customJSONSelected={customJSONSelected} | ||||
|           setCustomJSONSelected={setCustomJSONSelected} | ||||
|           handleSelectProxies={handleSelectProxies} | ||||
|           proxiesSelected={proxiesSelected} | ||||
|         /> | ||||
|       </div> | ||||
|     </> | ||||
|     <div> | ||||
|       <JobSubmitterHeader /> | ||||
|       <JobSubmitterInput | ||||
|         urlError={urlError} | ||||
|         handleSubmit={handleSubmit} | ||||
|         loading={loading} | ||||
|       /> | ||||
|       <JobSubmitterOptions | ||||
|         jobOptions={jobOptions} | ||||
|         setJobOptions={setJobOptions} | ||||
|         customJSONSelected={customJSONSelected} | ||||
|         setCustomJSONSelected={setCustomJSONSelected} | ||||
|         handleSelectProxies={handleSelectProxies} | ||||
|         proxiesSelected={proxiesSelected} | ||||
|       /> | ||||
|     </div> | ||||
|   ); | ||||
| }; | ||||
|   | ||||
							
								
								
									
										84
									
								
								src/components/submit/job-submitter/provider.tsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								src/components/submit/job-submitter/provider.tsx
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| import React, { | ||||
|   createContext, | ||||
|   PropsWithChildren, | ||||
|   useContext, | ||||
|   useState, | ||||
|   Dispatch, | ||||
|   useMemo, | ||||
| } from "react"; | ||||
| import { Element, Result, SiteMap } from "@/types"; | ||||
|  | ||||
| type JobSubmitterProviderType = { | ||||
|   submittedURL: string; | ||||
|   setSubmittedURL: Dispatch<React.SetStateAction<string>>; | ||||
|   rows: Element[]; | ||||
|   setRows: Dispatch<React.SetStateAction<Element[]>>; | ||||
|   results: Result; | ||||
|   setResults: Dispatch<React.SetStateAction<Result>>; | ||||
|   snackbarOpen: boolean; | ||||
|   setSnackbarOpen: Dispatch<React.SetStateAction<boolean>>; | ||||
|   snackbarMessage: string; | ||||
|   setSnackbarMessage: Dispatch<React.SetStateAction<string>>; | ||||
|   snackbarSeverity: string; | ||||
|   setSnackbarSeverity: Dispatch<React.SetStateAction<string>>; | ||||
|   isValidURL: boolean; | ||||
|   setIsValidUrl: Dispatch<React.SetStateAction<boolean>>; | ||||
|   siteMap: SiteMap | null; | ||||
|   setSiteMap: Dispatch<React.SetStateAction<SiteMap | null>>; | ||||
| }; | ||||
|  | ||||
| const JobSubmitterProvider = createContext<JobSubmitterProviderType>( | ||||
|   {} as JobSubmitterProviderType | ||||
| ); | ||||
|  | ||||
| export const Provider = ({ children }: PropsWithChildren) => { | ||||
|   const [submittedURL, setSubmittedURL] = useState<string>(""); | ||||
|   const [rows, setRows] = useState<Element[]>([]); | ||||
|   const [results, setResults] = useState<Result>({}); | ||||
|   const [snackbarOpen, setSnackbarOpen] = useState<boolean>(false); | ||||
|   const [snackbarMessage, setSnackbarMessage] = useState<string>(""); | ||||
|   const [snackbarSeverity, setSnackbarSeverity] = useState<string>("error"); | ||||
|   const [isValidURL, setIsValidUrl] = useState<boolean>(true); | ||||
|   const [siteMap, setSiteMap] = useState<SiteMap | null>(null); | ||||
|  | ||||
|   const value: JobSubmitterProviderType = useMemo( | ||||
|     () => ({ | ||||
|       submittedURL, | ||||
|       setSubmittedURL, | ||||
|       rows, | ||||
|       setRows, | ||||
|       results, | ||||
|       setResults, | ||||
|       snackbarOpen, | ||||
|       setSnackbarOpen, | ||||
|       snackbarMessage, | ||||
|       setSnackbarMessage, | ||||
|       snackbarSeverity, | ||||
|       setSnackbarSeverity, | ||||
|       isValidURL, | ||||
|       setIsValidUrl, | ||||
|       siteMap, | ||||
|       setSiteMap, | ||||
|     }), | ||||
|     [ | ||||
|       submittedURL, | ||||
|       rows, | ||||
|       results, | ||||
|       snackbarOpen, | ||||
|       snackbarMessage, | ||||
|       snackbarSeverity, | ||||
|       isValidURL, | ||||
|       siteMap, | ||||
|     ] | ||||
|   ); | ||||
|  | ||||
|   return ( | ||||
|     <JobSubmitterProvider.Provider value={value}> | ||||
|       {children} | ||||
|     </JobSubmitterProvider.Provider> | ||||
|   ); | ||||
| }; | ||||
|  | ||||
| export const useJobSubmitterProvider = () => { | ||||
|   return useContext(JobSubmitterProvider); | ||||
| }; | ||||
							
								
								
									
										1
									
								
								src/components/submit/job-submitter/site-map/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								src/components/submit/job-submitter/site-map/index.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| export * from "./site-map"; | ||||
| @@ -0,0 +1 @@ | ||||
| export * from "./site-map-input"; | ||||
| @@ -0,0 +1,22 @@ | ||||
| .button { | ||||
|   height: 3rem; | ||||
|   width: 2rem; | ||||
|  | ||||
|   color: #ffffff; | ||||
|   font-weight: 600; | ||||
|   border-radius: 0.375rem; | ||||
|   transition: transform 0.2s ease-in-out; | ||||
|   transform: scale(1); | ||||
| } | ||||
|  | ||||
| .button:hover { | ||||
|   transform: scale(1.05); | ||||
| } | ||||
|  | ||||
| .remove { | ||||
|   background-color: var(--delete-red) !important; | ||||
| } | ||||
|  | ||||
| .remove:hover { | ||||
|   background-color: var(--delete-red-hover) !important; | ||||
| } | ||||
| @@ -0,0 +1,135 @@ | ||||
| import { useState } from "react"; | ||||
| import { useJobSubmitterProvider } from "../../provider"; | ||||
| import { | ||||
|   MenuItem, | ||||
|   Select, | ||||
|   TextField, | ||||
|   FormControl, | ||||
|   Button, | ||||
|   Checkbox, | ||||
|   FormControlLabel, | ||||
| } from "@mui/material"; | ||||
| import { ActionOption } from "@/types/job"; | ||||
| import classes from "./site-map-input.module.css"; | ||||
| import { clsx } from "clsx"; | ||||
|  | ||||
| export type SiteMapInputProps = { | ||||
|   disabled?: boolean; | ||||
|   xpath?: string; | ||||
|   option?: ActionOption; | ||||
|   clickOnce?: boolean; | ||||
|   input?: string; | ||||
| }; | ||||
|  | ||||
| export const SiteMapInput = ({ | ||||
|   disabled, | ||||
|   xpath, | ||||
|   option, | ||||
|   clickOnce, | ||||
|   input, | ||||
| }: SiteMapInputProps) => { | ||||
|   console.log(clickOnce); | ||||
|   const [optionState, setOptionState] = useState<ActionOption>( | ||||
|     option || "click" | ||||
|   ); | ||||
|   const [xpathState, setXpathState] = useState<string>(xpath || ""); | ||||
|   const [clickOnceState, setClickOnceState] = useState<boolean>( | ||||
|     clickOnce || false | ||||
|   ); | ||||
|   const [inputState, setInputState] = useState<string>(input || ""); | ||||
|  | ||||
|   const { siteMap, setSiteMap } = useJobSubmitterProvider(); | ||||
|  | ||||
|   const handleAdd = () => { | ||||
|     if (!siteMap) return; | ||||
|  | ||||
|     console.log(optionState, xpathState, clickOnceState, inputState); | ||||
|  | ||||
|     setSiteMap((prevSiteMap) => ({ | ||||
|       ...prevSiteMap, | ||||
|       actions: [ | ||||
|         { | ||||
|           type: optionState, | ||||
|           xpath: xpathState, | ||||
|           name: "", | ||||
|           do_once: clickOnceState, | ||||
|           input: inputState, | ||||
|         }, | ||||
|         ...(prevSiteMap?.actions || []), | ||||
|       ], | ||||
|     })); | ||||
|  | ||||
|     setXpathState(""); | ||||
|   }; | ||||
|  | ||||
|   const handleRemove = () => { | ||||
|     if (!siteMap) return; | ||||
|  | ||||
|     setSiteMap((prevSiteMap) => ({ | ||||
|       ...prevSiteMap, | ||||
|       actions: (prevSiteMap?.actions || []).slice(0, -1), | ||||
|     })); | ||||
|   }; | ||||
|  | ||||
|   return ( | ||||
|     <div className="flex flex-col gap-2 w-full"> | ||||
|       <div className="flex gap-2 items-center"> | ||||
|         <FormControl className="w-1/4"> | ||||
|           <Select | ||||
|             disabled={disabled} | ||||
|             displayEmpty | ||||
|             value={optionState} | ||||
|             onChange={(e) => setOptionState(e.target.value as ActionOption)} | ||||
|           > | ||||
|             <MenuItem value="click">Click</MenuItem> | ||||
|             <MenuItem value="input">Input</MenuItem> | ||||
|           </Select> | ||||
|         </FormControl> | ||||
|         {optionState === "input" && ( | ||||
|           <TextField | ||||
|             label="Input Text" | ||||
|             fullWidth | ||||
|             value={inputState} | ||||
|             onChange={(e) => setInputState(e.target.value)} | ||||
|             disabled={disabled} | ||||
|           /> | ||||
|         )} | ||||
|         <TextField | ||||
|           label="XPath Selector" | ||||
|           fullWidth | ||||
|           value={xpathState} | ||||
|           onChange={(e) => setXpathState(e.target.value)} | ||||
|           disabled={disabled} | ||||
|         /> | ||||
|         {disabled ? ( | ||||
|           <Button | ||||
|             onClick={handleRemove} | ||||
|             className={clsx(classes.button, classes.remove)} | ||||
|           > | ||||
|             Delete | ||||
|           </Button> | ||||
|         ) : ( | ||||
|           <Button | ||||
|             onClick={handleAdd} | ||||
|             disabled={!xpathState} | ||||
|             className={clsx(classes.button, classes.add)} | ||||
|           > | ||||
|             Add | ||||
|           </Button> | ||||
|         )} | ||||
|       </div> | ||||
|       {!disabled && ( | ||||
|         <FormControlLabel | ||||
|           label="Do Once" | ||||
|           control={ | ||||
|             <Checkbox | ||||
|               checked={clickOnceState} | ||||
|               disabled={disabled} | ||||
|               onChange={() => setClickOnceState(!clickOnceState)} | ||||
|             /> | ||||
|           } | ||||
|         /> | ||||
|       )} | ||||
|     </div> | ||||
|   ); | ||||
| }; | ||||
							
								
								
									
										70
									
								
								src/components/submit/job-submitter/site-map/site-map.tsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								src/components/submit/job-submitter/site-map/site-map.tsx
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| import { useEffect, useState } from "react"; | ||||
| import { useJobSubmitterProvider } from "../provider"; | ||||
| import { Button, Divider, Typography, useTheme } from "@mui/material"; | ||||
| import { SiteMapInput } from "./site-map-input"; | ||||
|  | ||||
| export const SiteMap = () => { | ||||
|   const { siteMap, setSiteMap } = useJobSubmitterProvider(); | ||||
|   const [showSiteMap, setShowSiteMap] = useState<boolean>(false); | ||||
|   const theme = useTheme(); | ||||
|  | ||||
|   const handleCreateSiteMap = () => { | ||||
|     setSiteMap({ actions: [] }); | ||||
|     setShowSiteMap(true); | ||||
|   }; | ||||
|  | ||||
|   const handleClearSiteMap = () => { | ||||
|     setSiteMap(null); | ||||
|     setShowSiteMap(false); | ||||
|   }; | ||||
|  | ||||
|   useEffect(() => { | ||||
|     if (siteMap) { | ||||
|       setShowSiteMap(true); | ||||
|     } | ||||
|   }, [siteMap]); | ||||
|  | ||||
|   return ( | ||||
|     <div className="flex flex-col gap-4"> | ||||
|       {siteMap ? ( | ||||
|         <Button onClick={handleClearSiteMap}>Clear Site Map</Button> | ||||
|       ) : ( | ||||
|         <Button onClick={handleCreateSiteMap}>Create Site Map</Button> | ||||
|       )} | ||||
|       {showSiteMap && ( | ||||
|         <div className="flex flex-col gap-4"> | ||||
|           <SiteMapInput /> | ||||
|           {siteMap?.actions && siteMap?.actions.length > 0 && ( | ||||
|             <> | ||||
|               <Divider | ||||
|                 sx={{ | ||||
|                   borderColor: | ||||
|                     theme.palette.mode === "dark" ? "#ffffff" : "0000000", | ||||
|                 }} | ||||
|               /> | ||||
|               <Typography className="w-full text-center" variant="h5"> | ||||
|                 Site Map Actions | ||||
|               </Typography> | ||||
|             </> | ||||
|           )} | ||||
|           <ul className="flex flex-col gap-4"> | ||||
|             {siteMap?.actions.reverse().map((action, index) => ( | ||||
|               <li key={action.xpath} className="flex w-full items-center"> | ||||
|                 <Typography variant="h6" className="w-[10%] mr-2"> | ||||
|                   Action {index + 1}: | ||||
|                 </Typography> | ||||
|                 <SiteMapInput | ||||
|                   disabled={Boolean(siteMap)} | ||||
|                   xpath={action.xpath} | ||||
|                   option={action.type} | ||||
|                   clickOnce={action.do_once} | ||||
|                   input={action.input} | ||||
|                 /> | ||||
|               </li> | ||||
|             ))} | ||||
|           </ul> | ||||
|         </div> | ||||
|       )} | ||||
|     </div> | ||||
|   ); | ||||
| }; | ||||
| @@ -1,6 +1,5 @@ | ||||
| import React, { createContext, useContext, useState, useEffect } from "react"; | ||||
| import axios from "axios"; | ||||
| import { Constants } from "../lib"; | ||||
| import Cookies from "js-cookie"; | ||||
|  | ||||
| interface AuthContextProps { | ||||
| @@ -25,7 +24,7 @@ export const AuthProvider: React.FC<AuthProps> = ({ children }) => { | ||||
|     const token = Cookies.get("token"); | ||||
|     if (token) { | ||||
|       axios | ||||
|         .get(`${Constants.DOMAIN}/api/auth/users/me`, { | ||||
|         .get(`/api/me`, { | ||||
|           headers: { Authorization: `Bearer ${token}` }, | ||||
|         }) | ||||
|         .then((response) => { | ||||
| @@ -42,10 +41,8 @@ export const AuthProvider: React.FC<AuthProps> = ({ children }) => { | ||||
|     const params = new URLSearchParams(); | ||||
|     params.append("username", email); | ||||
|     params.append("password", password); | ||||
|     const response = await axios.post( | ||||
|       `${Constants.DOMAIN}/api/auth/token`, | ||||
|       params | ||||
|     ); | ||||
|     const response = await axios.post(`/api/token`, params); | ||||
|  | ||||
|     Cookies.set("token", response.data.access_token, { | ||||
|       expires: 7, | ||||
|       path: "/", | ||||
| @@ -53,12 +50,11 @@ export const AuthProvider: React.FC<AuthProps> = ({ children }) => { | ||||
|       secure: false, | ||||
|       sameSite: "Lax", | ||||
|     }); | ||||
|     const userResponse = await axios.get( | ||||
|       `${Constants.DOMAIN}/api/auth/users/me`, | ||||
|       { | ||||
|         headers: { Authorization: `Bearer ${response.data.access_token}` }, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     const userResponse = await axios.get(`/api/me`, { | ||||
|       headers: { Authorization: `Bearer ${response.data.access_token}` }, | ||||
|     }); | ||||
|  | ||||
|     setUser(userResponse.data); | ||||
|     setIsAuthenticated(true); | ||||
|   }; | ||||
|   | ||||
| @@ -1,12 +1,13 @@ | ||||
| import { Dispatch, SetStateAction } from "react"; | ||||
|  | ||||
| import { RawJobOptions } from "@/types"; | ||||
| import { RawJobOptions, SiteMap } from "@/types"; | ||||
|  | ||||
| export const parseJobOptions = ( | ||||
|   job_options: string, | ||||
|   setCustomJSONSelected: Dispatch<SetStateAction<boolean>>, | ||||
|   setProxiesSelected: Dispatch<SetStateAction<boolean>>, | ||||
|   setJobOptions: Dispatch<SetStateAction<RawJobOptions>> | ||||
|   setJobOptions: Dispatch<SetStateAction<RawJobOptions>>, | ||||
|   setSiteMap: Dispatch<SetStateAction<any>> | ||||
| ) => { | ||||
|   if (job_options) { | ||||
|     const jsonOptions = JSON.parse(job_options as string); | ||||
| @@ -14,6 +15,7 @@ export const parseJobOptions = ( | ||||
|       multi_page_scrape: false, | ||||
|       custom_headers: null, | ||||
|       proxies: null, | ||||
|       collect_media: false, | ||||
|     }; | ||||
|  | ||||
|     if ( | ||||
| @@ -31,6 +33,10 @@ export const parseJobOptions = ( | ||||
|       newJobOptions.proxies = jsonOptions.proxies.join(","); | ||||
|     } | ||||
|  | ||||
|     if (jsonOptions.site_map) { | ||||
|       setSiteMap(jsonOptions.site_map); | ||||
|     } | ||||
|  | ||||
|     setJobOptions(newJobOptions); | ||||
|   } | ||||
| }; | ||||
|   | ||||
| @@ -11,13 +11,13 @@ export const fetchJobs = async ( | ||||
|   fetchOptions: fetchOptions = {} | ||||
| ) => { | ||||
|   const token = Cookies.get("token"); | ||||
|   await fetch(`/api/retrieve-scrape-jobs`, { | ||||
|   await fetch("/api/retrieve", { | ||||
|     method: "POST", | ||||
|     headers: { | ||||
|       "content-type": "application/json", | ||||
|       Authorization: `Bearer ${token}`, | ||||
|     }, | ||||
|     body: JSON.stringify(fetchOptions), | ||||
|     body: JSON.stringify({ data: fetchOptions }), | ||||
|   }) | ||||
|     .then((response) => response.json()) | ||||
|     .then((data) => setJobs(data)) | ||||
| @@ -48,7 +48,7 @@ export const checkAI = async ( | ||||
| ) => { | ||||
|   const token = Cookies.get("token"); | ||||
|   try { | ||||
|     const response = await fetch(`/api/ai/check`, { | ||||
|     const response = await fetch("/api/ai/check", { | ||||
|       headers: { | ||||
|         "content-type": "application/json", | ||||
|         Authorization: `Bearer ${token}`, | ||||
| @@ -69,13 +69,13 @@ export const updateJob = async (ids: string[], field: string, value: any) => { | ||||
|     field: field, | ||||
|     value: value, | ||||
|   }; | ||||
|   await fetch(`/api/update`, { | ||||
|   await fetch("/api/update", { | ||||
|     method: "POST", | ||||
|     headers: { | ||||
|       "content-type": "application/json", | ||||
|       Authorization: `Bearer ${token}`, | ||||
|     }, | ||||
|     body: JSON.stringify(postBody), | ||||
|     body: JSON.stringify({ data: postBody }), | ||||
|   }).catch((error) => { | ||||
|     console.error("Error fetching jobs:", error); | ||||
|   }); | ||||
|   | ||||
							
								
								
									
										30
									
								
								src/pages/api/ai/check.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								src/pages/api/ai/check.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   try { | ||||
|     const headers = new Headers(req.headers as Record<string, string>); | ||||
|     headers.set("content-type", "application/json"); | ||||
|     headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|  | ||||
|     const response = await fetch( | ||||
|       `${global.process.env.NEXT_PUBLIC_API_URL}/api/ai/check`, | ||||
|       { | ||||
|         method: "GET", | ||||
|         headers, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     if (!response.ok) { | ||||
|       throw new Error(`Error: ${response.statusText}`); | ||||
|     } | ||||
|  | ||||
|     const result = await response.json(); | ||||
|     res.status(200).json(result); | ||||
|   } catch (error) { | ||||
|     console.error("Error submitting scrape job:", error); | ||||
|     res.status(500).json({ error: "Internal Server Error" }); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										56
									
								
								src/pages/api/ai/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								src/pages/api/ai/index.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   const { data } = req.body; | ||||
|  | ||||
|   try { | ||||
|     const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/api/ai`, { | ||||
|       method: "POST", | ||||
|       headers: { | ||||
|         Accept: "text/event-stream", | ||||
|         "Content-Type": "application/json", | ||||
|       }, | ||||
|       body: JSON.stringify(data), | ||||
|     }); | ||||
|  | ||||
|     if (!response.ok) { | ||||
|       const errorDetails = await response.text(); | ||||
|       if (response.status === 422) { | ||||
|         console.error(`422 Error: ${errorDetails}`); | ||||
|       } | ||||
|       throw new Error( | ||||
|         `Error fetching logs: ${response.statusText} - ${errorDetails}` | ||||
|       ); | ||||
|     } | ||||
|  | ||||
|     if (!response.body) { | ||||
|       throw new Error(`No response body from API`); | ||||
|     } | ||||
|  | ||||
|     res.writeHead(200, { | ||||
|       "Content-Type": "text/event-stream", | ||||
|       "Cache-Control": "no-cache, no-transform", | ||||
|       Connection: "keep-alive", | ||||
|       "Transfer-Encoding": "chunked", | ||||
|     }); | ||||
|  | ||||
|     let responseStream = response.body; | ||||
|     const reader = responseStream.getReader(); | ||||
|     const decoder = new TextDecoder(); | ||||
|  | ||||
|     while (true) { | ||||
|       const { done, value } = await reader.read(); | ||||
|       if (done) break; | ||||
|       const chunk = decoder.decode(value, { stream: true }); | ||||
|       res.write(`${chunk}`); | ||||
|     } | ||||
|  | ||||
|     res.end(); | ||||
|   } catch (error) { | ||||
|     console.error("Error streaming logs:", error); | ||||
|     res.status(500).json({ error: "Internal Server Error" }); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										39
									
								
								src/pages/api/delete-cron-job.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								src/pages/api/delete-cron-job.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|     console.log("Data", data); | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/json"); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/delete-cron-job`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         console.error(response); | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error deleting cron job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										38
									
								
								src/pages/api/delete.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								src/pages/api/delete.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/json"); | ||||
|     headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/delete-scrape-jobs`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error submitting scrape job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										37
									
								
								src/pages/api/download.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								src/pages/api/download.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/json"); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/download`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const csvText = await response.text(); | ||||
|       res.status(200).send(csvText); | ||||
|     } catch (error) { | ||||
|       console.error("Error submitting scrape job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										30
									
								
								src/pages/api/get-average-element-per-link.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								src/pages/api/get-average-element-per-link.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   const headers = new Headers(); | ||||
|   headers.set("content-type", "application/json"); | ||||
|   headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|  | ||||
|   try { | ||||
|     const response = await fetch( | ||||
|       `${process.env.NEXT_PUBLIC_API_URL}/api/statistics/get-average-element-per-link`, | ||||
|       { | ||||
|         method: "GET", | ||||
|         headers, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     if (!response.ok) { | ||||
|       throw new Error(`Error: ${response.statusText}`); | ||||
|     } | ||||
|  | ||||
|     const csvText = await response.text(); | ||||
|     res.status(200).send(csvText); | ||||
|   } catch (error) { | ||||
|     console.error("Error submitting scrape job:", error); | ||||
|     res.status(500).json({ error: "Internal Server Error" }); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										30
									
								
								src/pages/api/get-average-jobs-per-day.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								src/pages/api/get-average-jobs-per-day.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   const headers = new Headers(); | ||||
|   headers.set("content-type", "application/json"); | ||||
|   headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|  | ||||
|   try { | ||||
|     const response = await fetch( | ||||
|       `${global.process.env.NEXT_PUBLIC_API_URL}/api/statistics/get-average-jobs-per-day`, | ||||
|       { | ||||
|         method: "GET", | ||||
|         headers, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     if (!response.ok) { | ||||
|       throw new Error(`Error: ${response.statusText}`); | ||||
|     } | ||||
|  | ||||
|     const csvText = await response.text(); | ||||
|     res.status(200).send(csvText); | ||||
|   } catch (error) { | ||||
|     console.error("Error submitting scrape job:", error); | ||||
|     res.status(500).json({ error: "Internal Server Error" }); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										31
									
								
								src/pages/api/job/[id].ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								src/pages/api/job/[id].ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   const { id } = req.query; | ||||
|  | ||||
|   const headers = new Headers(); | ||||
|   headers.set("content-type", "application/json"); | ||||
|   headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|  | ||||
|   try { | ||||
|     const response = await fetch( | ||||
|       `${global.process.env.NEXT_PUBLIC_API_URL}/api/job/${id}`, | ||||
|       { | ||||
|         headers, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     if (!response.ok) { | ||||
|       throw new Error(`Error: ${response.statusText}`); | ||||
|     } | ||||
|  | ||||
|     const result = await response.json(); | ||||
|     res.status(200).json(result); | ||||
|   } catch (error) { | ||||
|     console.error("Error submitting scrape job:", error); | ||||
|     res.status(500).json({ error: "Internal Server Error" }); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										45
									
								
								src/pages/api/logs.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								src/pages/api/logs.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   try { | ||||
|     const response = await fetch( | ||||
|       `${process.env.NEXT_PUBLIC_API_URL}/api/logs`, | ||||
|       { | ||||
|         method: "GET", | ||||
|         headers: { | ||||
|           Accept: "text/event-stream", | ||||
|         }, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     if (!response.ok || !response.body) { | ||||
|       throw new Error(`Error fetching logs: ${response.statusText}`); | ||||
|     } | ||||
|  | ||||
|     res.writeHead(200, { | ||||
|       "Content-Type": "text/event-stream", | ||||
|       "Cache-Control": "no-cache, no-transform", | ||||
|       Connection: "keep-alive", | ||||
|       "Transfer-Encoding": "chunked", | ||||
|     }); | ||||
|  | ||||
|     let responseStream = response.body; | ||||
|     const reader = responseStream.getReader(); | ||||
|     const decoder = new TextDecoder(); | ||||
|  | ||||
|     while (true) { | ||||
|       const { done, value } = await reader.read(); | ||||
|       if (done) break; | ||||
|       const chunk = decoder.decode(value, { stream: true }); | ||||
|       res.write(`data: ${chunk}\n\n`); | ||||
|     } | ||||
|  | ||||
|     res.end(); | ||||
|   } catch (error) { | ||||
|     console.error("Error streaming logs:", error); | ||||
|     res.status(500).json({ error: "Internal Server Error" }); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										30
									
								
								src/pages/api/me.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								src/pages/api/me.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   try { | ||||
|     const headers = new Headers(); | ||||
|     headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|     headers.set("content-type", "application/json"); | ||||
|  | ||||
|     const response = await fetch( | ||||
|       `${global.process.env.NEXT_PUBLIC_API_URL}/api/auth/users/me`, | ||||
|       { | ||||
|         method: "GET", | ||||
|         headers, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     if (!response.ok) { | ||||
|       throw new Error(`Error: ${response.statusText}`); | ||||
|     } | ||||
|  | ||||
|     const result = await response.json(); | ||||
|     res.status(200).json(result); | ||||
|   } catch (error) { | ||||
|     console.error("Error submitting scrape job:", error); | ||||
|     res.status(500).json({ error: "Internal Server Error" }); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										38
									
								
								src/pages/api/retrieve.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								src/pages/api/retrieve.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/json"); | ||||
|     headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/retrieve-scrape-jobs`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error submitting scrape job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										39
									
								
								src/pages/api/schedule-cron-job.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								src/pages/api/schedule-cron-job.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|     console.log("Data", data); | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/json"); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/schedule-cron-job`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         console.error(response); | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error scheduling cron job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										37
									
								
								src/pages/api/signup.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								src/pages/api/signup.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/json"); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/auth/signup`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error submitting scrape job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										38
									
								
								src/pages/api/submit-scrape-job.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								src/pages/api/submit-scrape-job.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|     headers.set("content-type", "application/json"); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/submit-scrape-job`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error submitting scrape job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										39
									
								
								src/pages/api/token.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								src/pages/api/token.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const body = new URLSearchParams(req.body as string); | ||||
|     const username = body.get("username") || ""; | ||||
|     const password = body.get("password") || ""; | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/x-www-form-urlencoded"); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/auth/token`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: new URLSearchParams({ username, password }).toString(), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error submitting scrape job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										48
									
								
								src/pages/api/update.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								src/pages/api/update.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| import { NextApiRequest, NextApiResponse } from "next"; | ||||
|  | ||||
| export default async function handler( | ||||
|   req: NextApiRequest, | ||||
|   res: NextApiResponse | ||||
| ) { | ||||
|   if (req.method === "POST") { | ||||
|     const { data } = req.body; | ||||
|  | ||||
|     const headers = new Headers(); | ||||
|     headers.set("content-type", "application/json"); | ||||
|     headers.set("Authorization", `Bearer ${req.headers.authorization}`); | ||||
|  | ||||
|     try { | ||||
|       const response = await fetch( | ||||
|         `${global.process.env.NEXT_PUBLIC_API_URL}/api/update`, | ||||
|         { | ||||
|           method: "POST", | ||||
|           headers, | ||||
|           body: JSON.stringify(data), | ||||
|         } | ||||
|       ); | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         const errorDetails = await response.text(); | ||||
|         if (response.status === 422) { | ||||
|           console.error(`422 Error: ${errorDetails}`); | ||||
|         } | ||||
|         throw new Error( | ||||
|           `Error fetching logs: ${response.statusText} - ${errorDetails}` | ||||
|         ); | ||||
|       } | ||||
|  | ||||
|       if (!response.ok) { | ||||
|         throw new Error(`Error: ${response.statusText}`); | ||||
|       } | ||||
|  | ||||
|       const result = await response.json(); | ||||
|       res.status(200).json(result); | ||||
|     } catch (error) { | ||||
|       console.error("Error submitting scrape job:", error); | ||||
|       res.status(500).json({ error: "Internal Server Error" }); | ||||
|     } | ||||
|   } else { | ||||
|     res.setHeader("Allow", ["POST"]); | ||||
|     res.status(405).end(`Method ${req.method} Not Allowed`); | ||||
|   } | ||||
| } | ||||
| @@ -86,7 +86,9 @@ const AI: React.FC = () => { | ||||
|       headers: { | ||||
|         "Content-Type": "application/json", | ||||
|       }, | ||||
|       body: JSON.stringify({ messages: [jobMessage, ...messages, newMessage] }), | ||||
|       body: JSON.stringify({ | ||||
|         data: { messages: [jobMessage, ...messages, newMessage] }, | ||||
|       }), | ||||
|     }); | ||||
|  | ||||
|     const updatedMessages = [...messages, newMessage]; | ||||
|   | ||||
							
								
								
									
										4
									
								
								src/pages/cron-jobs.tsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/pages/cron-jobs.tsx
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| import { CronJobs } from "../components/pages/cron-jobs"; | ||||
| import { getServerSideProps } from "../components/pages/cron-jobs/get-server-side-props"; | ||||
| export { getServerSideProps }; | ||||
| export default CronJobs; | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user