mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-11-05 08:54:27 +00:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d4edb9d93e | ||
|
|
5ebd96b62b | ||
|
|
d602d3330a | ||
|
|
6639e8b48f | ||
|
|
263e46ba4d | ||
|
|
f815a58efc | ||
|
|
50ec5df657 | ||
|
|
28de0f362c |
4
.dockerignore
Normal file
4
.dockerignore
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
node_modules
|
||||||
|
npm-debug.log
|
||||||
|
Dockerfile
|
||||||
|
.dockerignore
|
||||||
@@ -15,11 +15,11 @@ runs:
|
|||||||
|
|
||||||
- name: Setup Docker project
|
- name: Setup Docker project
|
||||||
shell: bash
|
shell: bash
|
||||||
run: make build up-dev
|
run: make build-ci up-ci
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
shell: bash
|
shell: bash
|
||||||
run: npm install
|
run: yarn install
|
||||||
|
|
||||||
- name: Wait for frontend to be ready
|
- name: Wait for frontend to be ready
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
5
.github/workflows/docker-image.yml
vendored
5
.github/workflows/docker-image.yml
vendored
@@ -1,14 +1,9 @@
|
|||||||
name: Docker Image
|
name: Docker Image
|
||||||
on:
|
on:
|
||||||
workflow_run:
|
|
||||||
workflows: ["Unit Tests"]
|
|
||||||
types:
|
|
||||||
- completed
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.ref == 'refs/heads/master' }}
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
|
|||||||
2
.github/workflows/unit-tests.yml
vendored
2
.github/workflows/unit-tests.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
|||||||
run: pdm run playwright install
|
run: pdm run playwright install
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: PYTHONPATH=. pdm run pytest api/backend/tests
|
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
|
||||||
|
|
||||||
cypress-tests:
|
cypress-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
16
.gitignore
vendored
16
.gitignore
vendored
@@ -188,4 +188,18 @@ postgres_data
|
|||||||
.vscode
|
.vscode
|
||||||
ollama
|
ollama
|
||||||
data
|
data
|
||||||
media
|
|
||||||
|
media/images
|
||||||
|
media/videos
|
||||||
|
media/audio
|
||||||
|
media/pdfs
|
||||||
|
media/spreadsheets
|
||||||
|
media/presentations
|
||||||
|
media/documents
|
||||||
|
media/recordings
|
||||||
|
media/download_summary.txt
|
||||||
|
|
||||||
|
cypress/screenshots
|
||||||
|
cypress/videos
|
||||||
|
|
||||||
|
docker-compose.dev.local.yml
|
||||||
12
Makefile
12
Makefile
@@ -1,6 +1,6 @@
|
|||||||
.DEFAULT_GOAL := help
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
COMPOSE_DEV = docker compose -f docker-compose.yml -f docker-compose.dev.yml
|
COMPOSE_DEV = docker compose -f docker-compose.yml -f docker-compose.dev.local.yml
|
||||||
COMPOSE_PROD = docker compose -f docker-compose.yml
|
COMPOSE_PROD = docker compose -f docker-compose.yml
|
||||||
|
|
||||||
.PHONY: help deps build pull up up-dev down setup deploy
|
.PHONY: help deps build pull up up-dev down setup deploy
|
||||||
@@ -17,6 +17,7 @@ help:
|
|||||||
@echo " make down - Stop and remove containers, networks, images, and volumes"
|
@echo " make down - Stop and remove containers, networks, images, and volumes"
|
||||||
@echo " make setup - Setup server with dependencies and clone repo"
|
@echo " make setup - Setup server with dependencies and clone repo"
|
||||||
@echo " make deploy - Deploy site onto server"
|
@echo " make deploy - Deploy site onto server"
|
||||||
|
@echo " make cypress-start - Start Cypress"
|
||||||
@echo ""
|
@echo ""
|
||||||
|
|
||||||
logs:
|
logs:
|
||||||
@@ -51,3 +52,12 @@ setup:
|
|||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
ansible-playbook -i ./ansible/inventory.yaml ./ansible/deploy_site.yaml -v
|
ansible-playbook -i ./ansible/inventory.yaml ./ansible/deploy_site.yaml -v
|
||||||
|
|
||||||
|
build-ci:
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml build
|
||||||
|
|
||||||
|
up-ci:
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --force-recreate
|
||||||
|
|
||||||
|
cypress-start:
|
||||||
|
DISPLAY=:0 npx cypress open
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
## 📋 Overview
|
## 📋 Overview
|
||||||
|
|
||||||
Scraperr enables you to extract data from websites with precision using XPath selectors. This self-hosted application provides a clean interface to manage scraping jobs, view results, and export data.
|
Scrape websites without writing a single line of code.
|
||||||
|
|
||||||
> 📚 **[Check out the docs](https://scraperr-docs.pages.dev)** for a comprehensive quickstart guide and detailed information.
|
> 📚 **[Check out the docs](https://scraperr-docs.pages.dev)** for a comprehensive quickstart guide and detailed information.
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ Scraperr enables you to extract data from websites with precision using XPath se
|
|||||||
- **Custom Headers**: Add JSON headers to your scraping requests
|
- **Custom Headers**: Add JSON headers to your scraping requests
|
||||||
- **Media Downloads**: Automatically download images, videos, and other media
|
- **Media Downloads**: Automatically download images, videos, and other media
|
||||||
- **Results Visualization**: View scraped data in a structured table format
|
- **Results Visualization**: View scraped data in a structured table format
|
||||||
- **Data Export**: Export your results in various formats
|
- **Data Export**: Export your results in markdown and csv formats
|
||||||
- **Notifcation Channels**: Send completion notifcations, through various channels
|
- **Notifcation Channels**: Send completion notifcations, through various channels
|
||||||
|
|
||||||
## 🚀 Getting Started
|
## 🚀 Getting Started
|
||||||
|
|||||||
6
api/backend/ai/agent/actions.py
Normal file
6
api/backend/ai/agent/actions.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
|
|
||||||
|
class Action(TypedDict):
|
||||||
|
type: str
|
||||||
|
url: str
|
||||||
94
api/backend/ai/agent/agent.py
Normal file
94
api/backend/ai/agent/agent.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
import random
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from camoufox import AsyncCamoufox
|
||||||
|
from playwright.async_api import Page
|
||||||
|
|
||||||
|
from api.backend.ai.agent.utils import (
|
||||||
|
capture_elements,
|
||||||
|
convert_to_markdown,
|
||||||
|
parse_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
from api.backend.ai.clients import ask_open_ai, ask_ollama, open_ai_key
|
||||||
|
|
||||||
|
from api.backend.ai.agent.prompts import (
|
||||||
|
ELEMENT_EXTRACTION_PROMPT,
|
||||||
|
EXTRACT_ELEMENTS_PROMPT,
|
||||||
|
)
|
||||||
|
|
||||||
|
from api.backend.job.scraping.collect_media import collect_media
|
||||||
|
from api.backend.worker.logger import LOG
|
||||||
|
|
||||||
|
from api.backend.job.scraping.add_custom import add_custom_items
|
||||||
|
|
||||||
|
from api.backend.models import CapturedElement
|
||||||
|
|
||||||
|
|
||||||
|
ask_ai = ask_open_ai if open_ai_key else ask_ollama
|
||||||
|
|
||||||
|
|
||||||
|
async def scrape_with_agent(agent_job: dict[str, Any]):
|
||||||
|
LOG.info(f"Starting work for agent job: {agent_job}")
|
||||||
|
pages = set()
|
||||||
|
|
||||||
|
if agent_job["job_options"]["proxies"]:
|
||||||
|
proxy = random.choice(agent_job["job_options"]["proxies"])
|
||||||
|
LOG.info(f"Using proxy: {proxy}")
|
||||||
|
|
||||||
|
async with AsyncCamoufox(headless=True) as browser:
|
||||||
|
page: Page = await browser.new_page()
|
||||||
|
|
||||||
|
await add_custom_items(
|
||||||
|
agent_job["url"],
|
||||||
|
page,
|
||||||
|
agent_job["job_options"]["custom_cookies"],
|
||||||
|
agent_job["job_options"]["custom_headers"],
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
await page.goto(agent_job["url"], timeout=60000)
|
||||||
|
|
||||||
|
if agent_job["job_options"]["collect_media"]:
|
||||||
|
await collect_media(agent_job["id"], page)
|
||||||
|
|
||||||
|
html_content = await page.content()
|
||||||
|
markdown_content = convert_to_markdown(html_content)
|
||||||
|
|
||||||
|
response = await ask_ai(
|
||||||
|
ELEMENT_EXTRACTION_PROMPT.format(
|
||||||
|
extraction_prompt=EXTRACT_ELEMENTS_PROMPT,
|
||||||
|
webpage=markdown_content,
|
||||||
|
prompt=agent_job["prompt"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
xpaths = parse_response(response)
|
||||||
|
|
||||||
|
captured_elements = await capture_elements(page, xpaths)
|
||||||
|
|
||||||
|
final_url = page.url
|
||||||
|
|
||||||
|
pages.add((html_content, final_url))
|
||||||
|
finally:
|
||||||
|
await page.close()
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
name_to_elements = {}
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
for element in captured_elements:
|
||||||
|
if element.name not in name_to_elements:
|
||||||
|
name_to_elements[element.name] = []
|
||||||
|
|
||||||
|
name_to_elements[element.name].append(element)
|
||||||
|
|
||||||
|
scraped_elements: list[dict[str, dict[str, list[CapturedElement]]]] = [
|
||||||
|
{
|
||||||
|
page[1]: name_to_elements,
|
||||||
|
}
|
||||||
|
for page in pages
|
||||||
|
]
|
||||||
|
|
||||||
|
return scraped_elements
|
||||||
58
api/backend/ai/agent/prompts.py
Normal file
58
api/backend/ai/agent/prompts.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
EXTRACT_ELEMENTS_PROMPT = """
|
||||||
|
You are an assistant that extracts XPath expressions from webpages.
|
||||||
|
|
||||||
|
You will receive HTML content in markdown format.
|
||||||
|
|
||||||
|
Each element in the markdown has their xpath shown above them in a path like:
|
||||||
|
<!-- //div -->
|
||||||
|
|
||||||
|
Respond only with a list of general XPath expressions inside `<xpaths>...</xpaths>` tags.
|
||||||
|
|
||||||
|
You will also decide the decision of what to do next. If there is no decision available, return nothing for that section.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ELEMENT_EXTRACTION_PROMPT = """
|
||||||
|
{extraction_prompt}
|
||||||
|
|
||||||
|
**Guidelines:**
|
||||||
|
- Prefer shorter, more general XPaths like `//div[...]` or `//span[...]`.
|
||||||
|
- Avoid overly specific or deep paths like `//div[3]/ul/li[2]/a`.
|
||||||
|
- Do **not** chain multiple elements deeply (e.g., `//div/span/a`).
|
||||||
|
- Use XPaths further down the tree when possible.
|
||||||
|
- Do not include any extra explanation or text.
|
||||||
|
- One XPath is acceptable if that's all that's needed.
|
||||||
|
- Try and limit it down to 1 - 3 xpaths.
|
||||||
|
- Include a name for each xpath.
|
||||||
|
|
||||||
|
<important>
|
||||||
|
- USE THE MOST SIMPLE XPATHS POSSIBLE.
|
||||||
|
- USE THE MOST GENERAL XPATHS POSSIBLE.
|
||||||
|
- USE THE MOST SPECIFIC XPATHS POSSIBLE.
|
||||||
|
- USE THE MOST GENERAL XPATHS POSSIBLE.
|
||||||
|
</important>
|
||||||
|
|
||||||
|
**Example Format:**
|
||||||
|
```xml
|
||||||
|
<xpaths>
|
||||||
|
- <name: insert_name_here>: <xpath: //div>
|
||||||
|
- <name: insert_name_here>: <xpath: //span>
|
||||||
|
- <name: insert_name_here>: <xpath: //span[contains(@text, 'example')]>
|
||||||
|
- <name: insert_name_here>: <xpath: //div[contains(@text, 'example')]>
|
||||||
|
- <name: insert_name_here>: <xpath: //a[@href]>
|
||||||
|
- etc
|
||||||
|
</xpaths>
|
||||||
|
|
||||||
|
<decision>
|
||||||
|
<next_page>
|
||||||
|
- //a[@href='next_page_url']
|
||||||
|
</next_page>
|
||||||
|
</decision>
|
||||||
|
```
|
||||||
|
|
||||||
|
**Input webpage:**
|
||||||
|
{webpage}
|
||||||
|
|
||||||
|
**Target content:**
|
||||||
|
{prompt}
|
||||||
|
|
||||||
|
"""
|
||||||
252
api/backend/ai/agent/utils.py
Normal file
252
api/backend/ai/agent/utils.py
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
from lxml import html, etree
|
||||||
|
import re
|
||||||
|
from playwright.async_api import Page
|
||||||
|
|
||||||
|
from api.backend.models import CapturedElement
|
||||||
|
|
||||||
|
from api.backend.job.scraping.scraping_utils import clean_format_characters
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_markdown(html_str: str):
|
||||||
|
parser = html.HTMLParser()
|
||||||
|
tree = html.fromstring(html_str, parser=parser)
|
||||||
|
root = tree.getroottree()
|
||||||
|
|
||||||
|
def format_attributes(el: etree._Element) -> str:
|
||||||
|
"""Convert element attributes into a string."""
|
||||||
|
return " ".join(f'{k}="{v}"' for k, v in el.attrib.items())
|
||||||
|
|
||||||
|
def is_visible(el: etree._Element) -> bool:
|
||||||
|
style = el.attrib.get("style", "").lower()
|
||||||
|
class_ = el.attrib.get("class", "").lower()
|
||||||
|
|
||||||
|
# Check for visibility styles
|
||||||
|
if "display: none" in style or "visibility: hidden" in style:
|
||||||
|
return False
|
||||||
|
if "opacity: 0" in style or "opacity:0" in style:
|
||||||
|
return False
|
||||||
|
if "height: 0" in style or "width: 0" in style:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for common hidden classes
|
||||||
|
if any(
|
||||||
|
hidden in class_
|
||||||
|
for hidden in ["hidden", "invisible", "truncate", "collapse"]
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for hidden attributes
|
||||||
|
if el.attrib.get("hidden") is not None:
|
||||||
|
return False
|
||||||
|
if el.attrib.get("aria-hidden") == "true":
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for empty or whitespace-only content
|
||||||
|
if not el.text and len(el) == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def is_layout_or_decorative(el: etree._Element) -> bool:
|
||||||
|
tag = el.tag.lower()
|
||||||
|
|
||||||
|
# Layout elements
|
||||||
|
if tag in {"nav", "footer", "header", "aside", "main", "section"}:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Decorative elements
|
||||||
|
if tag in {"svg", "path", "circle", "rect", "line", "polygon", "polyline"}:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check id and class for layout/decorative keywords
|
||||||
|
id_class = " ".join(
|
||||||
|
[el.attrib.get("id", ""), el.attrib.get("class", "")]
|
||||||
|
).lower()
|
||||||
|
|
||||||
|
layout_keywords = {
|
||||||
|
"sidebar",
|
||||||
|
"nav",
|
||||||
|
"header",
|
||||||
|
"footer",
|
||||||
|
"menu",
|
||||||
|
"advert",
|
||||||
|
"ads",
|
||||||
|
"breadcrumb",
|
||||||
|
"container",
|
||||||
|
"wrapper",
|
||||||
|
"layout",
|
||||||
|
"grid",
|
||||||
|
"flex",
|
||||||
|
"row",
|
||||||
|
"column",
|
||||||
|
"section",
|
||||||
|
"banner",
|
||||||
|
"hero",
|
||||||
|
"card",
|
||||||
|
"modal",
|
||||||
|
"popup",
|
||||||
|
"tooltip",
|
||||||
|
"dropdown",
|
||||||
|
"overlay",
|
||||||
|
}
|
||||||
|
|
||||||
|
return any(keyword in id_class for keyword in layout_keywords)
|
||||||
|
|
||||||
|
# Tags to ignore in the final markdown output
|
||||||
|
included_tags = {
|
||||||
|
"div",
|
||||||
|
"span",
|
||||||
|
"a",
|
||||||
|
"p",
|
||||||
|
"h1",
|
||||||
|
"h2",
|
||||||
|
"h3",
|
||||||
|
"h4",
|
||||||
|
"h5",
|
||||||
|
"h6",
|
||||||
|
"img",
|
||||||
|
"button",
|
||||||
|
"input",
|
||||||
|
"textarea",
|
||||||
|
"ul",
|
||||||
|
"ol",
|
||||||
|
"li",
|
||||||
|
"table",
|
||||||
|
"tr",
|
||||||
|
"td",
|
||||||
|
"th",
|
||||||
|
"input",
|
||||||
|
"textarea",
|
||||||
|
"select",
|
||||||
|
"option",
|
||||||
|
"optgroup",
|
||||||
|
"fieldset",
|
||||||
|
"legend",
|
||||||
|
}
|
||||||
|
|
||||||
|
special_elements = []
|
||||||
|
normal_elements = []
|
||||||
|
|
||||||
|
for el in tree.iter():
|
||||||
|
if el.tag is etree.Comment:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tag = el.tag.lower()
|
||||||
|
|
||||||
|
if tag not in included_tags:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not is_visible(el):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if is_layout_or_decorative(el):
|
||||||
|
continue
|
||||||
|
|
||||||
|
path = root.getpath(el)
|
||||||
|
attrs = format_attributes(el)
|
||||||
|
attrs_str = f" {attrs}" if attrs else ""
|
||||||
|
text = el.text.strip() if el.text else ""
|
||||||
|
|
||||||
|
if not text and not attrs:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# input elements
|
||||||
|
if tag == "button":
|
||||||
|
prefix = "🔘 **<button>**"
|
||||||
|
special_elements.append(f"<!-- {path} -->\n{prefix} {text}")
|
||||||
|
elif tag == "a":
|
||||||
|
href = el.attrib.get("href", "")
|
||||||
|
prefix = f"🔗 **<a href='{href}'>**"
|
||||||
|
special_elements.append(f"<!-- {path} -->\n{prefix} {text}")
|
||||||
|
elif tag == "input":
|
||||||
|
input_type = el.attrib.get("type", "text")
|
||||||
|
prefix = f"📝 **<input type='{input_type}'>**"
|
||||||
|
special_elements.append(f"<!-- {path} -->\n{prefix}")
|
||||||
|
else:
|
||||||
|
prefix = f"**<{tag}{attrs_str}>**"
|
||||||
|
|
||||||
|
if text:
|
||||||
|
normal_elements.append(f"<!-- {path} -->\n{prefix} {text}")
|
||||||
|
|
||||||
|
return "\n\n".join(normal_elements + special_elements) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def parse_response(text: str) -> list[dict[str, str]]:
|
||||||
|
xpaths = re.findall(r"<xpaths>(.*?)</xpaths>", text, re.DOTALL)
|
||||||
|
results = []
|
||||||
|
|
||||||
|
if xpaths:
|
||||||
|
lines = xpaths[0].strip().splitlines()
|
||||||
|
for line in lines:
|
||||||
|
if line.strip().startswith("-"):
|
||||||
|
name = re.findall(r"<name: (.*?)>", line)[0]
|
||||||
|
xpath = re.findall(r"<xpath: (.*?)>", line)[0]
|
||||||
|
results.append({"name": name, "xpath": xpath})
|
||||||
|
else:
|
||||||
|
results.append({"name": "", "xpath": line.strip()})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def parse_next_page(text: str) -> str | None:
|
||||||
|
next_page = re.findall(r"<next_page>(.*?)</next_page>", text, re.DOTALL)
|
||||||
|
|
||||||
|
if next_page:
|
||||||
|
lines = next_page[0].strip().splitlines()
|
||||||
|
next_page = [
|
||||||
|
line.strip().lstrip("-").strip()
|
||||||
|
for line in lines
|
||||||
|
if line.strip().startswith("-")
|
||||||
|
]
|
||||||
|
|
||||||
|
return next_page[0] if next_page else None
|
||||||
|
|
||||||
|
|
||||||
|
async def capture_elements(
|
||||||
|
page: Page, xpaths: list[dict[str, str]]
|
||||||
|
) -> list[CapturedElement]:
|
||||||
|
captured_elements = []
|
||||||
|
seen_texts = set()
|
||||||
|
|
||||||
|
for xpath in xpaths:
|
||||||
|
try:
|
||||||
|
locator = page.locator(f"xpath={xpath['xpath']}")
|
||||||
|
count = await locator.count()
|
||||||
|
|
||||||
|
for i in range(count):
|
||||||
|
element_text = ""
|
||||||
|
|
||||||
|
element_handle = await locator.nth(i).element_handle()
|
||||||
|
|
||||||
|
if not element_handle:
|
||||||
|
continue
|
||||||
|
|
||||||
|
link = await element_handle.get_attribute("href") or ""
|
||||||
|
|
||||||
|
text = await element_handle.text_content()
|
||||||
|
|
||||||
|
if text:
|
||||||
|
element_text += text
|
||||||
|
|
||||||
|
if link:
|
||||||
|
element_text += f" ({link})"
|
||||||
|
|
||||||
|
cleaned = clean_format_characters(element_text)
|
||||||
|
|
||||||
|
if cleaned in seen_texts:
|
||||||
|
continue
|
||||||
|
|
||||||
|
seen_texts.add(cleaned)
|
||||||
|
|
||||||
|
captured_elements.append(
|
||||||
|
CapturedElement(
|
||||||
|
name=xpath["name"],
|
||||||
|
text=cleaned,
|
||||||
|
xpath=xpath["xpath"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing xpath {xpath}: {e}")
|
||||||
|
|
||||||
|
return captured_elements
|
||||||
@@ -1,32 +1,29 @@
|
|||||||
# STL
|
# STL
|
||||||
import os
|
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Iterable, AsyncGenerator
|
from collections.abc import Iterable, AsyncGenerator
|
||||||
|
|
||||||
# PDM
|
# PDM
|
||||||
from openai import OpenAI
|
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
from openai.types.chat import ChatCompletionMessageParam
|
from openai.types.chat import ChatCompletionMessageParam
|
||||||
|
|
||||||
# LOCAL
|
# LOCAL
|
||||||
from ollama import Message, AsyncClient
|
from ollama import Message
|
||||||
from api.backend.models import AI
|
from api.backend.models import AI
|
||||||
|
|
||||||
|
from api.backend.ai.clients import (
|
||||||
|
llama_client,
|
||||||
|
llama_model,
|
||||||
|
openai_client,
|
||||||
|
open_ai_model,
|
||||||
|
open_ai_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
ai_router = APIRouter()
|
ai_router = APIRouter()
|
||||||
|
|
||||||
# Load environment variables
|
|
||||||
open_ai_key = os.getenv("OPENAI_KEY")
|
|
||||||
open_ai_model = os.getenv("OPENAI_MODEL")
|
|
||||||
llama_url = os.getenv("OLLAMA_URL")
|
|
||||||
llama_model = os.getenv("OLLAMA_MODEL")
|
|
||||||
|
|
||||||
# Initialize clients
|
|
||||||
openai_client = OpenAI(api_key=open_ai_key) if open_ai_key else None
|
|
||||||
llama_client = AsyncClient(host=llama_url) if llama_url else None
|
|
||||||
|
|
||||||
|
|
||||||
async def llama_chat(chat_messages: list[Message]) -> AsyncGenerator[str, None]:
|
async def llama_chat(chat_messages: list[Message]) -> AsyncGenerator[str, None]:
|
||||||
if llama_client and llama_model:
|
if llama_client and llama_model:
|
||||||
|
|||||||
38
api/backend/ai/clients.py
Normal file
38
api/backend/ai/clients.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
from ollama import AsyncClient
|
||||||
|
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
open_ai_key = os.getenv("OPENAI_KEY")
|
||||||
|
open_ai_model = os.getenv("OPENAI_MODEL")
|
||||||
|
llama_url = os.getenv("OLLAMA_URL")
|
||||||
|
llama_model = os.getenv("OLLAMA_MODEL")
|
||||||
|
|
||||||
|
# Initialize clients
|
||||||
|
openai_client = OpenAI(api_key=open_ai_key) if open_ai_key else None
|
||||||
|
llama_client = AsyncClient(host=llama_url) if llama_url else None
|
||||||
|
|
||||||
|
|
||||||
|
async def ask_open_ai(prompt: str) -> str:
|
||||||
|
if not openai_client:
|
||||||
|
raise ValueError("OpenAI client not initialized")
|
||||||
|
|
||||||
|
response = openai_client.chat.completions.create(
|
||||||
|
model=open_ai_model or "gpt-4.1-mini",
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.choices[0].message.content or ""
|
||||||
|
|
||||||
|
|
||||||
|
async def ask_ollama(prompt: str) -> str:
|
||||||
|
if not llama_client:
|
||||||
|
raise ValueError("Ollama client not initialized")
|
||||||
|
|
||||||
|
response = await llama_client.chat(
|
||||||
|
model=llama_model or "", messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.message.content or ""
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import apscheduler # type: ignore
|
import apscheduler # type: ignore
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
# PDM
|
# PDM
|
||||||
import apscheduler.schedulers
|
import apscheduler.schedulers
|
||||||
@@ -33,7 +34,30 @@ logging.basicConfig(
|
|||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
app = FastAPI(title="api", root_path="/api")
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
# Startup
|
||||||
|
LOG.info("Starting application...")
|
||||||
|
|
||||||
|
init_database()
|
||||||
|
|
||||||
|
LOG.info("Starting cron scheduler...")
|
||||||
|
start_cron_scheduler(scheduler)
|
||||||
|
scheduler.start()
|
||||||
|
LOG.info("Cron scheduler started successfully")
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Shutdown
|
||||||
|
LOG.info("Shutting down application...")
|
||||||
|
LOG.info("Stopping cron scheduler...")
|
||||||
|
scheduler.shutdown(wait=False) # Set wait=False to not block shutdown
|
||||||
|
LOG.info("Cron scheduler stopped")
|
||||||
|
LOG.info("Application shutdown complete")
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="api", root_path="/api", lifespan=lifespan)
|
||||||
|
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
@@ -43,28 +67,12 @@ app.add_middleware(
|
|||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
app.include_router(auth_router)
|
app.include_router(auth_router)
|
||||||
app.include_router(ai_router)
|
app.include_router(ai_router)
|
||||||
app.include_router(job_router)
|
app.include_router(job_router)
|
||||||
app.include_router(stats_router)
|
app.include_router(stats_router)
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event():
|
|
||||||
start_cron_scheduler(scheduler)
|
|
||||||
scheduler.start()
|
|
||||||
|
|
||||||
if os.getenv("ENV") != "test":
|
|
||||||
init_database()
|
|
||||||
LOG.info("Starting up...")
|
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("shutdown")
|
|
||||||
def shutdown_scheduler():
|
|
||||||
scheduler.shutdown(wait=False) # Set wait=False to not block shutdown
|
|
||||||
|
|
||||||
|
|
||||||
@app.exception_handler(RequestValidationError)
|
@app.exception_handler(RequestValidationError)
|
||||||
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
||||||
exc_str = f"{exc}".replace("\n", " ").replace(" ", " ")
|
exc_str = f"{exc}".replace("\n", " ").replace(" ", " ")
|
||||||
|
|||||||
@@ -66,4 +66,8 @@ async def read_users_me(current_user: User = Depends(get_current_user)):
|
|||||||
|
|
||||||
@auth_router.get("/auth/check")
|
@auth_router.get("/auth/check")
|
||||||
async def check_auth():
|
async def check_auth():
|
||||||
return {"registration": os.environ.get("REGISTRATION_ENABLED", "True") == "True"}
|
return {
|
||||||
|
"registration": os.environ.get("REGISTRATION_ENABLED", "True") == "True",
|
||||||
|
"recordings_enabled": os.environ.get("RECORDINGS_ENABLED", "true").lower()
|
||||||
|
== "true",
|
||||||
|
}
|
||||||
|
|||||||
@@ -1 +1,16 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
|
||||||
DATABASE_PATH = "data/database.db"
|
DATABASE_PATH = "data/database.db"
|
||||||
|
RECORDINGS_DIR = Path("media/recordings")
|
||||||
|
RECORDINGS_ENABLED = os.getenv("RECORDINGS_ENABLED", "true").lower() == "true"
|
||||||
|
MEDIA_DIR = Path("media")
|
||||||
|
MEDIA_TYPES = [
|
||||||
|
"audio",
|
||||||
|
"documents",
|
||||||
|
"images",
|
||||||
|
"pdfs",
|
||||||
|
"presentations",
|
||||||
|
"spreadsheets",
|
||||||
|
"videos",
|
||||||
|
]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
JOB_INSERT_QUERY = """
|
JOB_INSERT_QUERY = """
|
||||||
INSERT INTO jobs
|
INSERT INTO jobs
|
||||||
(id, url, elements, user, time_created, result, status, chat, job_options)
|
(id, url, elements, user, time_created, result, status, chat, job_options, agent_mode, prompt)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
DELETE_JOB_QUERY = """
|
DELETE_JOB_QUERY = """
|
||||||
|
|||||||
@@ -27,4 +27,7 @@ CREATE TABLE IF NOT EXISTS cron_jobs (
|
|||||||
time_updated DATETIME NOT NULL,
|
time_updated DATETIME NOT NULL,
|
||||||
FOREIGN KEY (job_id) REFERENCES jobs(id)
|
FOREIGN KEY (job_id) REFERENCES jobs(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ALTER TABLE jobs ADD COLUMN agent_mode BOOLEAN NOT NULL DEFAULT FALSE;
|
||||||
|
ALTER TABLE jobs ADD COLUMN prompt STRING;
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from api.backend.database.common import connect, QUERIES, insert
|
from api.backend.database.common import connect, QUERIES, insert
|
||||||
import logging
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
from api.backend.auth.auth_utils import get_password_hash
|
from api.backend.auth.auth_utils import get_password_hash
|
||||||
|
|
||||||
@@ -11,11 +12,22 @@ def init_database():
|
|||||||
cursor = connect()
|
cursor = connect()
|
||||||
|
|
||||||
for query in QUERIES["init"].strip().split(";"):
|
for query in QUERIES["init"].strip().split(";"):
|
||||||
if query.strip():
|
query = query.strip()
|
||||||
|
if not query:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
LOG.info(f"Executing query: {query}")
|
LOG.info(f"Executing query: {query}")
|
||||||
_ = cursor.execute(query)
|
_ = cursor.execute(query)
|
||||||
|
except sqlite3.OperationalError as e:
|
||||||
|
if "duplicate column name" in str(e).lower():
|
||||||
|
LOG.warning(f"Skipping duplicate column error: {e}")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
LOG.error(f"Error executing query: {query}")
|
||||||
|
raise
|
||||||
|
|
||||||
if os.environ.get("REGISTRATION_ENABLED", "True") == "False":
|
if os.environ.get("REGISTRATION_ENABLED", "true").lower() == "false":
|
||||||
default_user_email = os.environ.get("DEFAULT_USER_EMAIL")
|
default_user_email = os.environ.get("DEFAULT_USER_EMAIL")
|
||||||
default_user_password = os.environ.get("DEFAULT_USER_PASSWORD")
|
default_user_password = os.environ.get("DEFAULT_USER_PASSWORD")
|
||||||
default_user_full_name = os.environ.get("DEFAULT_USER_FULL_NAME")
|
default_user_full_name = os.environ.get("DEFAULT_USER_FULL_NAME")
|
||||||
|
|||||||
@@ -27,6 +27,8 @@ def insert(item: dict[str, Any]) -> None:
|
|||||||
item["status"],
|
item["status"],
|
||||||
item["chat"],
|
item["chat"],
|
||||||
item["job_options"],
|
item["job_options"],
|
||||||
|
item["agent_mode"],
|
||||||
|
item["prompt"],
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
LOG.info(f"Inserted item: {item}")
|
LOG.info(f"Inserted item: {item}")
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib.parse import urlparse
|
import re
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
@@ -9,12 +10,12 @@ from playwright.async_api import Page
|
|||||||
from api.backend.utils import LOG
|
from api.backend.utils import LOG
|
||||||
|
|
||||||
|
|
||||||
async def collect_media(page: Page) -> dict[str, list[dict[str, str]]]:
|
async def collect_media(id: str, page: Page) -> dict[str, list[dict[str, str]]]:
|
||||||
media_types = {
|
media_types = {
|
||||||
"images": "img",
|
"images": "img",
|
||||||
"videos": "video",
|
"videos": "video",
|
||||||
"audio": "audio",
|
"audio": "audio",
|
||||||
"pdfs": 'a[href$=".pdf"]',
|
"pdfs": 'a[href$=".pdf"], a[href*=".pdf#page="]',
|
||||||
"documents": 'a[href$=".doc"], a[href$=".docx"], a[href$=".txt"], a[href$=".rtf"]',
|
"documents": 'a[href$=".doc"], a[href$=".docx"], a[href$=".txt"], a[href$=".rtf"]',
|
||||||
"presentations": 'a[href$=".ppt"], a[href$=".pptx"]',
|
"presentations": 'a[href$=".ppt"], a[href$=".pptx"]',
|
||||||
"spreadsheets": 'a[href$=".xls"], a[href$=".xlsx"], a[href$=".csv"]',
|
"spreadsheets": 'a[href$=".xls"], a[href$=".xlsx"], a[href$=".csv"]',
|
||||||
@@ -48,6 +49,11 @@ async def collect_media(page: Page) -> dict[str, list[dict[str, str]]]:
|
|||||||
root_domain = f"{root_url.scheme}://{root_url.netloc}"
|
root_domain = f"{root_url.scheme}://{root_url.netloc}"
|
||||||
url = f"{root_domain}{url}"
|
url = f"{root_domain}{url}"
|
||||||
|
|
||||||
|
if url and re.match(r"^[\w\-]+/", url):
|
||||||
|
root_url = urlparse(page.url)
|
||||||
|
root_domain = f"{root_url.scheme}://{root_url.netloc}"
|
||||||
|
url = urljoin(root_domain + "/", url)
|
||||||
|
|
||||||
if url and url.startswith(("http://", "https://")):
|
if url and url.startswith(("http://", "https://")):
|
||||||
try:
|
try:
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
@@ -67,15 +73,20 @@ async def collect_media(page: Page) -> dict[str, list[dict[str, str]]]:
|
|||||||
}.get(media_type, "")
|
}.get(media_type, "")
|
||||||
filename += ext
|
filename += ext
|
||||||
|
|
||||||
file_path = media_dir / filename
|
if not os.path.exists(media_dir / id):
|
||||||
|
os.makedirs(media_dir / id, exist_ok=True)
|
||||||
|
|
||||||
|
file_path = media_dir / id / f"{filename}"
|
||||||
|
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
with open(file_path, "wb") as f:
|
with open(file_path, "wb") as f:
|
||||||
while True:
|
while True:
|
||||||
chunk = await response.content.read(8192)
|
chunk = await response.content.read(8192)
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
|
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
|
|
||||||
urls.append({"url": url, "local_path": str(file_path)})
|
urls.append({"url": url, "local_path": str(file_path)})
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from api.backend.job.scraping.collect_media import collect_media as collect_medi
|
|||||||
|
|
||||||
|
|
||||||
async def scrape_content(
|
async def scrape_content(
|
||||||
page: Page, pages: Set[Tuple[str, str]], collect_media: bool
|
id: str, page: Page, pages: Set[Tuple[str, str]], collect_media: bool
|
||||||
) -> str:
|
) -> str:
|
||||||
last_height = await page.evaluate("document.body.scrollHeight")
|
last_height = await page.evaluate("document.body.scrollHeight")
|
||||||
|
|
||||||
@@ -27,6 +27,19 @@ async def scrape_content(
|
|||||||
|
|
||||||
if collect_media:
|
if collect_media:
|
||||||
LOG.info("Collecting media")
|
LOG.info("Collecting media")
|
||||||
await collect_media_utils(page)
|
await collect_media_utils(id, page)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
def clean_format_characters(text: str) -> str:
|
||||||
|
text = text.strip()
|
||||||
|
text = text.replace("\n", " ")
|
||||||
|
text = text.replace("\t", " ")
|
||||||
|
text = text.replace("\r", " ")
|
||||||
|
text = text.replace("\f", " ")
|
||||||
|
text = text.replace("\v", " ")
|
||||||
|
text = text.replace("\b", " ")
|
||||||
|
text = text.replace("\a", " ")
|
||||||
|
|
||||||
|
return text
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ def clear_done_actions(site_map: dict[str, Any]) -> dict[str, Any]:
|
|||||||
async def handle_input(action: Action, page: Page) -> bool:
|
async def handle_input(action: Action, page: Page) -> bool:
|
||||||
try:
|
try:
|
||||||
element = page.locator(f"xpath={action.xpath}")
|
element = page.locator(f"xpath={action.xpath}")
|
||||||
await element.wait_for(state="visible", timeout=10000)
|
|
||||||
LOG.info(f"Sending keys: {action.input} to element: {action.xpath}")
|
LOG.info(f"Sending keys: {action.input} to element: {action.xpath}")
|
||||||
await element.fill(action.input)
|
await element.fill(action.input)
|
||||||
return True
|
return True
|
||||||
@@ -36,7 +35,6 @@ async def handle_input(action: Action, page: Page) -> bool:
|
|||||||
async def handle_click(action: Action, page: Page) -> bool:
|
async def handle_click(action: Action, page: Page) -> bool:
|
||||||
try:
|
try:
|
||||||
element = page.locator(f"xpath={action.xpath}")
|
element = page.locator(f"xpath={action.xpath}")
|
||||||
await element.wait_for(state="visible", timeout=10000)
|
|
||||||
LOG.info(f"Clicking element: {action.xpath}")
|
LOG.info(f"Clicking element: {action.xpath}")
|
||||||
await element.click()
|
await element.click()
|
||||||
return True
|
return True
|
||||||
@@ -52,6 +50,7 @@ ACTION_MAP = {
|
|||||||
|
|
||||||
|
|
||||||
async def handle_site_mapping(
|
async def handle_site_mapping(
|
||||||
|
id: str,
|
||||||
site_map_dict: dict[str, Any],
|
site_map_dict: dict[str, Any],
|
||||||
page: Page,
|
page: Page,
|
||||||
pages: set[tuple[str, str]],
|
pages: set[tuple[str, str]],
|
||||||
@@ -68,11 +67,11 @@ async def handle_site_mapping(
|
|||||||
|
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
await scrape_content(page, pages, collect_media=collect_media)
|
await scrape_content(id, page, pages, collect_media=collect_media)
|
||||||
|
|
||||||
cleared_site_map_dict = clear_done_actions(site_map_dict)
|
cleared_site_map_dict = clear_done_actions(site_map_dict)
|
||||||
|
|
||||||
if cleared_site_map_dict["actions"]:
|
if cleared_site_map_dict["actions"]:
|
||||||
await handle_site_mapping(
|
await handle_site_mapping(
|
||||||
cleared_site_map_dict, page, pages, collect_media=collect_media
|
id, cleared_site_map_dict, page, pages, collect_media=collect_media
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -58,6 +58,8 @@ class Job(pydantic.BaseModel):
|
|||||||
job_options: JobOptions
|
job_options: JobOptions
|
||||||
status: str = "Queued"
|
status: str = "Queued"
|
||||||
chat: Optional[str] = None
|
chat: Optional[str] = None
|
||||||
|
agent_mode: bool = False
|
||||||
|
prompt: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class CronJob(pydantic.BaseModel):
|
class CronJob(pydantic.BaseModel):
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import random
|
|||||||
# PDM
|
# PDM
|
||||||
from fastapi import Depends, APIRouter
|
from fastapi import Depends, APIRouter
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||||
from api.backend.scheduler import scheduler
|
from api.backend.scheduler import scheduler
|
||||||
from apscheduler.triggers.cron import CronTrigger # type: ignore
|
from apscheduler.triggers.cron import CronTrigger # type: ignore
|
||||||
|
|
||||||
@@ -42,6 +42,8 @@ from api.backend.job.cron_scheduling.cron_scheduling import (
|
|||||||
from api.backend.job.utils.clean_job_format import clean_job_format
|
from api.backend.job.utils.clean_job_format import clean_job_format
|
||||||
from api.backend.job.utils.stream_md_from_job_results import stream_md_from_job_results
|
from api.backend.job.utils.stream_md_from_job_results import stream_md_from_job_results
|
||||||
|
|
||||||
|
from api.backend.constants import MEDIA_DIR, MEDIA_TYPES, RECORDINGS_DIR
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
job_router = APIRouter()
|
job_router = APIRouter()
|
||||||
@@ -231,3 +233,41 @@ async def delete_cron_job_request(request: DeleteCronJob):
|
|||||||
async def get_cron_jobs_request(user: User = Depends(get_current_user)):
|
async def get_cron_jobs_request(user: User = Depends(get_current_user)):
|
||||||
cron_jobs = get_cron_jobs(user.email)
|
cron_jobs = get_cron_jobs(user.email)
|
||||||
return JSONResponse(content=jsonable_encoder(cron_jobs))
|
return JSONResponse(content=jsonable_encoder(cron_jobs))
|
||||||
|
|
||||||
|
|
||||||
|
@job_router.get("/recordings/{id}")
|
||||||
|
async def get_recording(id: str):
|
||||||
|
path = RECORDINGS_DIR / f"{id}.mp4"
|
||||||
|
if not path.exists():
|
||||||
|
return JSONResponse(content={"error": "Recording not found."}, status_code=404)
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
path, headers={"Content-Type": "video/mp4", "Accept-Ranges": "bytes"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@job_router.get("/get-media")
|
||||||
|
async def get_media(id: str):
|
||||||
|
try:
|
||||||
|
files: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
for media_type in MEDIA_TYPES:
|
||||||
|
path = MEDIA_DIR / media_type / f"{id}"
|
||||||
|
|
||||||
|
files[media_type] = [file.name for file in path.glob("*")]
|
||||||
|
|
||||||
|
return JSONResponse(content={"files": files})
|
||||||
|
except Exception as e:
|
||||||
|
LOG.error(f"Exception occurred: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
|
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||||
|
|
||||||
|
|
||||||
|
@job_router.get("/media")
|
||||||
|
async def get_media_file(id: str, type: str, file: str):
|
||||||
|
path = MEDIA_DIR / type / f"{id}" / file
|
||||||
|
|
||||||
|
if not path.exists():
|
||||||
|
return JSONResponse(content={"error": "Media file not found."}, status_code=404)
|
||||||
|
|
||||||
|
return FileResponse(path)
|
||||||
|
|||||||
@@ -9,11 +9,16 @@ from playwright.async_api import Page
|
|||||||
from urllib.parse import urlparse, urljoin
|
from urllib.parse import urlparse, urljoin
|
||||||
|
|
||||||
from api.backend.models import Element, CapturedElement
|
from api.backend.models import Element, CapturedElement
|
||||||
from api.backend.job.scraping.scraping_utils import scrape_content
|
from api.backend.job.scraping.scraping_utils import (
|
||||||
|
clean_format_characters,
|
||||||
|
scrape_content,
|
||||||
|
)
|
||||||
from api.backend.job.site_mapping.site_mapping import handle_site_mapping
|
from api.backend.job.site_mapping.site_mapping import handle_site_mapping
|
||||||
|
|
||||||
from api.backend.job.scraping.add_custom import add_custom_items
|
from api.backend.job.scraping.add_custom import add_custom_items
|
||||||
|
|
||||||
|
from api.backend.constants import RECORDINGS_ENABLED
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -37,6 +42,7 @@ def sxpath(context: etree._Element, xpath: str):
|
|||||||
|
|
||||||
|
|
||||||
async def make_site_request(
|
async def make_site_request(
|
||||||
|
id: str,
|
||||||
url: str,
|
url: str,
|
||||||
headers: Optional[dict[str, Any]],
|
headers: Optional[dict[str, Any]],
|
||||||
multi_page_scrape: bool = False,
|
multi_page_scrape: bool = False,
|
||||||
@@ -57,8 +63,9 @@ async def make_site_request(
|
|||||||
proxy = random.choice(proxies)
|
proxy = random.choice(proxies)
|
||||||
LOG.info(f"Using proxy: {proxy}")
|
LOG.info(f"Using proxy: {proxy}")
|
||||||
|
|
||||||
async with AsyncCamoufox(headless=True, proxy=proxy) as browser:
|
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
|
||||||
page: Page = await browser.new_page()
|
page: Page = await browser.new_page()
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
|
||||||
# Add cookies and headers
|
# Add cookies and headers
|
||||||
await add_custom_items(url, page, custom_cookies, headers)
|
await add_custom_items(url, page, custom_cookies, headers)
|
||||||
@@ -67,21 +74,21 @@ async def make_site_request(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
await page.goto(url, timeout=60000)
|
await page.goto(url, timeout=60000)
|
||||||
await page.wait_for_load_state("networkidle", timeout=10000)
|
await page.wait_for_load_state("networkidle")
|
||||||
|
|
||||||
final_url = page.url
|
final_url = page.url
|
||||||
|
|
||||||
visited_urls.add(url)
|
visited_urls.add(url)
|
||||||
visited_urls.add(final_url)
|
visited_urls.add(final_url)
|
||||||
|
|
||||||
html_content = await scrape_content(page, pages, collect_media)
|
html_content = await scrape_content(id, page, pages, collect_media)
|
||||||
|
|
||||||
html_content = await page.content()
|
html_content = await page.content()
|
||||||
pages.add((html_content, final_url))
|
pages.add((html_content, final_url))
|
||||||
|
|
||||||
if site_map:
|
if site_map:
|
||||||
await handle_site_mapping(
|
await handle_site_mapping(
|
||||||
site_map, page, pages, collect_media=collect_media
|
id, site_map, page, pages, collect_media=collect_media
|
||||||
)
|
)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@@ -108,6 +115,7 @@ async def make_site_request(
|
|||||||
|
|
||||||
if link not in visited_urls and is_same_domain(link, original_url):
|
if link not in visited_urls and is_same_domain(link, original_url):
|
||||||
await make_site_request(
|
await make_site_request(
|
||||||
|
id,
|
||||||
link,
|
link,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
multi_page_scrape=multi_page_scrape,
|
multi_page_scrape=multi_page_scrape,
|
||||||
@@ -132,11 +140,13 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
|
|||||||
|
|
||||||
for e in el: # type: ignore
|
for e in el: # type: ignore
|
||||||
text = (
|
text = (
|
||||||
"\t".join(str(t) for t in e.itertext())
|
" ".join(str(t) for t in e.itertext())
|
||||||
if isinstance(e, etree._Element)
|
if isinstance(e, etree._Element)
|
||||||
else str(e) # type: ignore
|
else str(e) # type: ignore
|
||||||
)
|
)
|
||||||
|
|
||||||
|
text = clean_format_characters(text)
|
||||||
|
|
||||||
captured_element = CapturedElement(
|
captured_element = CapturedElement(
|
||||||
xpath=elem.xpath, text=text, name=elem.name
|
xpath=elem.xpath, text=text, name=elem.name
|
||||||
)
|
)
|
||||||
@@ -150,6 +160,7 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
|
|||||||
|
|
||||||
|
|
||||||
async def scrape(
|
async def scrape(
|
||||||
|
id: str,
|
||||||
url: str,
|
url: str,
|
||||||
xpaths: list[Element],
|
xpaths: list[Element],
|
||||||
headers: Optional[dict[str, Any]] = None,
|
headers: Optional[dict[str, Any]] = None,
|
||||||
@@ -163,6 +174,7 @@ async def scrape(
|
|||||||
pages: set[tuple[str, str]] = set()
|
pages: set[tuple[str, str]] = set()
|
||||||
|
|
||||||
await make_site_request(
|
await make_site_request(
|
||||||
|
id,
|
||||||
url,
|
url,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
multi_page_scrape=multi_page_scrape,
|
multi_page_scrape=multi_page_scrape,
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from api.backend.job import get_queued_job, update_job
|
from api.backend.job import get_queued_job, update_job
|
||||||
from api.backend.scraping import scrape
|
from api.backend.scraping import scrape
|
||||||
from api.backend.models import Element
|
from api.backend.models import Element
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
|
import subprocess
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import traceback
|
import traceback
|
||||||
@@ -14,6 +16,8 @@ from api.backend.database.startup import init_database
|
|||||||
from api.backend.worker.post_job_complete.post_job_complete import post_job_complete
|
from api.backend.worker.post_job_complete.post_job_complete import post_job_complete
|
||||||
from api.backend.worker.logger import LOG
|
from api.backend.worker.logger import LOG
|
||||||
|
|
||||||
|
from api.backend.ai.agent.agent import scrape_with_agent
|
||||||
|
|
||||||
|
|
||||||
NOTIFICATION_CHANNEL = os.getenv("NOTIFICATION_CHANNEL", "")
|
NOTIFICATION_CHANNEL = os.getenv("NOTIFICATION_CHANNEL", "")
|
||||||
NOTIFICATION_WEBHOOK_URL = os.getenv("NOTIFICATION_WEBHOOK_URL", "")
|
NOTIFICATION_WEBHOOK_URL = os.getenv("NOTIFICATION_WEBHOOK_URL", "")
|
||||||
@@ -26,14 +30,42 @@ SMTP_USER = os.getenv("SMTP_USER", "")
|
|||||||
SMTP_PASSWORD = os.getenv("SMTP_PASSWORD", "")
|
SMTP_PASSWORD = os.getenv("SMTP_PASSWORD", "")
|
||||||
USE_TLS = os.getenv("USE_TLS", "false").lower() == "true"
|
USE_TLS = os.getenv("USE_TLS", "false").lower() == "true"
|
||||||
|
|
||||||
|
RECORDINGS_ENABLED = os.getenv("RECORDINGS_ENABLED", "true").lower() == "true"
|
||||||
|
RECORDINGS_DIR = Path("/project/app/media/recordings")
|
||||||
|
|
||||||
|
|
||||||
async def process_job():
|
async def process_job():
|
||||||
job = await get_queued_job()
|
job = await get_queued_job()
|
||||||
|
ffmpeg_proc = None
|
||||||
status = "Queued"
|
status = "Queued"
|
||||||
|
|
||||||
if job:
|
if job:
|
||||||
LOG.info(f"Beginning processing job: {job}.")
|
LOG.info(f"Beginning processing job: {job}.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
output_path = RECORDINGS_DIR / f"{job['id']}.mp4"
|
||||||
|
|
||||||
|
if RECORDINGS_ENABLED:
|
||||||
|
ffmpeg_proc = subprocess.Popen(
|
||||||
|
[
|
||||||
|
"ffmpeg",
|
||||||
|
"-y",
|
||||||
|
"-video_size",
|
||||||
|
"1280x1024",
|
||||||
|
"-framerate",
|
||||||
|
"15",
|
||||||
|
"-f",
|
||||||
|
"x11grab",
|
||||||
|
"-i",
|
||||||
|
":99",
|
||||||
|
"-codec:v",
|
||||||
|
"libx264",
|
||||||
|
"-preset",
|
||||||
|
"ultrafast",
|
||||||
|
output_path,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
_ = await update_job([job["id"]], field="status", value="Scraping")
|
_ = await update_job([job["id"]], field="status", value="Scraping")
|
||||||
|
|
||||||
proxies = job["job_options"]["proxies"]
|
proxies = job["job_options"]["proxies"]
|
||||||
@@ -45,16 +77,21 @@ async def process_job():
|
|||||||
LOG.error(f"Failed to parse proxy JSON: {proxies}")
|
LOG.error(f"Failed to parse proxy JSON: {proxies}")
|
||||||
proxies = []
|
proxies = []
|
||||||
|
|
||||||
scraped = await scrape(
|
if job["agent_mode"]:
|
||||||
job["url"],
|
scraped = await scrape_with_agent(job)
|
||||||
[Element(**j) for j in job["elements"]],
|
else:
|
||||||
job["job_options"]["custom_headers"],
|
scraped = await scrape(
|
||||||
job["job_options"]["multi_page_scrape"],
|
job["id"],
|
||||||
proxies,
|
job["url"],
|
||||||
job["job_options"]["site_map"],
|
[Element(**j) for j in job["elements"]],
|
||||||
job["job_options"]["collect_media"],
|
job["job_options"]["custom_headers"],
|
||||||
job["job_options"]["custom_cookies"],
|
job["job_options"]["multi_page_scrape"],
|
||||||
)
|
proxies,
|
||||||
|
job["job_options"]["site_map"],
|
||||||
|
job["job_options"]["collect_media"],
|
||||||
|
job["job_options"]["custom_cookies"],
|
||||||
|
)
|
||||||
|
|
||||||
LOG.info(
|
LOG.info(
|
||||||
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
|
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
|
||||||
)
|
)
|
||||||
@@ -87,12 +124,18 @@ async def process_job():
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ffmpeg_proc:
|
||||||
|
ffmpeg_proc.terminate()
|
||||||
|
ffmpeg_proc.wait()
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
LOG.info("Starting job worker...")
|
LOG.info("Starting job worker...")
|
||||||
|
|
||||||
init_database()
|
init_database()
|
||||||
|
|
||||||
|
RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
await process_job()
|
await process_job()
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
|
|||||||
@@ -30,5 +30,59 @@ describe.only("Job", () => {
|
|||||||
"exist"
|
"exist"
|
||||||
);
|
);
|
||||||
cy.contains("div", "Completed", { timeout: 20000 }).should("exist");
|
cy.contains("div", "Completed", { timeout: 20000 }).should("exist");
|
||||||
|
|
||||||
|
cy.get("tbody tr")
|
||||||
|
.first()
|
||||||
|
.within(() => {
|
||||||
|
cy.get('input[type="checkbox"]').click();
|
||||||
|
});
|
||||||
|
|
||||||
|
cy.get("[data-testid='DeleteIcon']").click();
|
||||||
|
|
||||||
|
cy.contains("div", "https://example.com", { timeout: 10000 }).should(
|
||||||
|
"not.exist"
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should create a job with advanced options (media)", () => {
|
||||||
|
cy.intercept("POST", "/api/submit-scrape-job").as("submitScrapeJob");
|
||||||
|
|
||||||
|
cy.visit("/");
|
||||||
|
|
||||||
|
cy.get("button").contains("Advanced Job Options").click();
|
||||||
|
|
||||||
|
cy.get('[data-cy="collect-media-checkbox"]').click();
|
||||||
|
cy.get("body").type("{esc}");
|
||||||
|
|
||||||
|
cy.get('[data-cy="url-input"]').type("https://books.toscrape.com");
|
||||||
|
cy.get('[data-cy="name-field"]').type("example");
|
||||||
|
cy.get('[data-cy="xpath-field"]').type("//body");
|
||||||
|
cy.get('[data-cy="add-button"]').click();
|
||||||
|
|
||||||
|
cy.get("button").contains("Submit").click();
|
||||||
|
|
||||||
|
cy.get("li").contains("Jobs").click();
|
||||||
|
|
||||||
|
cy.contains("div", "https://books.toscrape.com", { timeout: 10000 }).should(
|
||||||
|
"exist"
|
||||||
|
);
|
||||||
|
|
||||||
|
cy.contains("div", "Completed", { timeout: 20000 }).should("exist");
|
||||||
|
cy.get("li").contains("Media").click();
|
||||||
|
|
||||||
|
cy.get("div[id='select-job']").click();
|
||||||
|
cy.get("li[role='option']").click();
|
||||||
|
|
||||||
|
cy.get("[data-testid='media-grid']", { timeout: 10000 }).should("exist");
|
||||||
|
|
||||||
|
cy.get("li").contains("Jobs").click();
|
||||||
|
|
||||||
|
cy.get("tbody tr")
|
||||||
|
.first()
|
||||||
|
.within(() => {
|
||||||
|
cy.get('input[type="checkbox"]').click();
|
||||||
|
});
|
||||||
|
|
||||||
|
cy.get("[data-testid='DeleteIcon']").click();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
version: "3"
|
version: "3"
|
||||||
services:
|
services:
|
||||||
scraperr:
|
scraperr:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/frontend/Dockerfile
|
||||||
command: ["npm", "run", "dev"]
|
command: ["npm", "run", "dev"]
|
||||||
volumes:
|
volumes:
|
||||||
- "$PWD/src:/app/src"
|
- "$PWD/src:/app/src"
|
||||||
@@ -10,7 +13,12 @@ services:
|
|||||||
- "$PWD/package-lock.json:/app/package-lock.json"
|
- "$PWD/package-lock.json:/app/package-lock.json"
|
||||||
- "$PWD/tsconfig.json:/app/tsconfig.json"
|
- "$PWD/tsconfig.json:/app/tsconfig.json"
|
||||||
scraperr_api:
|
scraperr_api:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/api/Dockerfile
|
||||||
environment:
|
environment:
|
||||||
- LOG_LEVEL=INFO
|
- LOG_LEVEL=INFO
|
||||||
volumes:
|
volumes:
|
||||||
- "$PWD/api:/project/app/api"
|
- "$PWD/api:/project/app/api"
|
||||||
|
ports:
|
||||||
|
- "5900:5900"
|
||||||
|
|||||||
@@ -1,11 +1,6 @@
|
|||||||
services:
|
services:
|
||||||
scraperr:
|
scraperr:
|
||||||
depends_on:
|
image: jpyles0524/scraperr:latest
|
||||||
- scraperr_api
|
|
||||||
image: jpyles0524/scraperr:1.0.13
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: docker/frontend/Dockerfile
|
|
||||||
container_name: scraperr
|
container_name: scraperr
|
||||||
command: ["npm", "run", "start"]
|
command: ["npm", "run", "start"]
|
||||||
environment:
|
environment:
|
||||||
@@ -18,9 +13,6 @@ services:
|
|||||||
scraperr_api:
|
scraperr_api:
|
||||||
init: True
|
init: True
|
||||||
image: jpyles0524/scraperr_api:latest
|
image: jpyles0524/scraperr_api:latest
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: docker/api/Dockerfile
|
|
||||||
environment:
|
environment:
|
||||||
- LOG_LEVEL=INFO
|
- LOG_LEVEL=INFO
|
||||||
container_name: scraperr_api
|
container_name: scraperr_api
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ FROM python:3.10.12-slim as pybuilder
|
|||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y curl && \
|
apt-get install -y curl && \
|
||||||
apt-get install -y uvicorn wget gnupg supervisor libgl1 libglx-mesa0 libglx0 vainfo libva-dev libva-glx2 libva-drm2 && \
|
apt-get install -y x11vnc xvfb uvicorn wget gnupg supervisor libgl1 libglx-mesa0 libglx0 vainfo libva-dev libva-glx2 libva-drm2 ffmpeg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
||||||
apt-get remove -y curl && \
|
apt-get remove -y curl && \
|
||||||
apt-get autoremove -y && \
|
apt-get autoremove -y && \
|
||||||
@@ -14,7 +14,8 @@ RUN pdm config python.use_venv false
|
|||||||
|
|
||||||
WORKDIR /project/app
|
WORKDIR /project/app
|
||||||
COPY pyproject.toml pdm.lock /project/app/
|
COPY pyproject.toml pdm.lock /project/app/
|
||||||
RUN pdm install
|
|
||||||
|
RUN pdm install -v --frozen-lockfile
|
||||||
|
|
||||||
RUN pdm run playwright install --with-deps
|
RUN pdm run playwright install --with-deps
|
||||||
|
|
||||||
@@ -30,7 +31,12 @@ EXPOSE 8000
|
|||||||
|
|
||||||
WORKDIR /project/app
|
WORKDIR /project/app
|
||||||
|
|
||||||
|
RUN mkdir -p /project/app/media
|
||||||
RUN mkdir -p /project/app/data
|
RUN mkdir -p /project/app/data
|
||||||
RUN touch /project/app/data/database.db
|
RUN touch /project/app/data/database.db
|
||||||
|
|
||||||
|
EXPOSE 5900
|
||||||
|
|
||||||
|
COPY start.sh /project/app/start.sh
|
||||||
|
|
||||||
CMD [ "supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf" ]
|
CMD [ "supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf" ]
|
||||||
@@ -1,10 +1,14 @@
|
|||||||
# Build next dependencies
|
# Build next dependencies
|
||||||
FROM node:23.1
|
FROM node:23.1-slim
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
# Copy package files first to leverage Docker cache
|
||||||
RUN npm install
|
COPY package.json yarn.lock ./
|
||||||
|
|
||||||
|
# Install dependencies in a separate layer
|
||||||
|
RUN yarn install --frozen-lockfile
|
||||||
|
|
||||||
|
# Copy the rest of the application
|
||||||
COPY tsconfig.json /app/tsconfig.json
|
COPY tsconfig.json /app/tsconfig.json
|
||||||
COPY tailwind.config.js /app/tailwind.config.js
|
COPY tailwind.config.js /app/tailwind.config.js
|
||||||
COPY next.config.mjs /app/next.config.mjs
|
COPY next.config.mjs /app/next.config.mjs
|
||||||
@@ -13,6 +17,7 @@ COPY postcss.config.js /app/postcss.config.js
|
|||||||
COPY public /app/public
|
COPY public /app/public
|
||||||
COPY src /app/src
|
COPY src /app/src
|
||||||
|
|
||||||
RUN npm run build
|
# Build the application
|
||||||
|
RUN yarn build
|
||||||
|
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 48 KiB |
@@ -15,7 +15,7 @@ type: application
|
|||||||
# This is the chart version. This version number should be incremented each time you make changes
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
# to the chart and its templates, including the app version.
|
# to the chart and its templates, including the app version.
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
version: 1.0.14
|
version: 1.1.0
|
||||||
|
|
||||||
# This is the version number of the application being deployed. This version number should be
|
# This is the version number of the application being deployed. This version number should be
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
|||||||
11371
package-lock.json
generated
11371
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
10
package.json
10
package.json
@@ -12,9 +12,11 @@
|
|||||||
"@minchat/react-chat-ui": "^0.16.2",
|
"@minchat/react-chat-ui": "^0.16.2",
|
||||||
"@mui/icons-material": "^5.15.3",
|
"@mui/icons-material": "^5.15.3",
|
||||||
"@mui/material": "^5.16.0",
|
"@mui/material": "^5.16.0",
|
||||||
|
"@reduxjs/toolkit": "^2.8.2",
|
||||||
"@testing-library/jest-dom": "^5.16.5",
|
"@testing-library/jest-dom": "^5.16.5",
|
||||||
"@testing-library/react": "^13.4.0",
|
"@testing-library/react": "^13.4.0",
|
||||||
"@testing-library/user-event": "^13.5.0",
|
"@testing-library/user-event": "^13.5.0",
|
||||||
|
"@types/react": "^18.3.21",
|
||||||
"axios": "^1.7.2",
|
"axios": "^1.7.2",
|
||||||
"bootstrap": "^5.3.0",
|
"bootstrap": "^5.3.0",
|
||||||
"chart.js": "^4.4.3",
|
"chart.js": "^4.4.3",
|
||||||
@@ -30,16 +32,18 @@
|
|||||||
"react-dom": "^18.3.1",
|
"react-dom": "^18.3.1",
|
||||||
"react-markdown": "^9.0.0",
|
"react-markdown": "^9.0.0",
|
||||||
"react-modal-image": "^2.6.0",
|
"react-modal-image": "^2.6.0",
|
||||||
|
"react-redux": "^9.2.0",
|
||||||
"react-router": "^6.14.1",
|
"react-router": "^6.14.1",
|
||||||
"react-router-dom": "^6.14.1",
|
"react-router-dom": "^6.14.1",
|
||||||
"react-spinners": "^0.14.1",
|
"react-spinners": "^0.14.1",
|
||||||
|
"redux-persist": "^6.0.0",
|
||||||
"typescript": "^4.9.5",
|
"typescript": "^4.9.5",
|
||||||
"web-vitals": "^2.1.4"
|
"web-vitals": "^2.1.4"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "next dev",
|
"dev": "yarn next dev",
|
||||||
"build": "next build",
|
"build": "yarn next build",
|
||||||
"start": "next start",
|
"start": "yarn next start",
|
||||||
"serve": "serve -s ./dist",
|
"serve": "serve -s ./dist",
|
||||||
"cy:open": "cypress open",
|
"cy:open": "cypress open",
|
||||||
"cy:run": "cypress run"
|
"cy:run": "cypress run"
|
||||||
|
|||||||
13
pdm.lock
generated
13
pdm.lock
generated
@@ -5,7 +5,7 @@
|
|||||||
groups = ["default", "dev"]
|
groups = ["default", "dev"]
|
||||||
strategy = ["inherit_metadata"]
|
strategy = ["inherit_metadata"]
|
||||||
lock_version = "4.5.0"
|
lock_version = "4.5.0"
|
||||||
content_hash = "sha256:cb37fedd6d022515dde14e475588a8da2144ba22e41dfdfacfe3f7a7d14486ca"
|
content_hash = "sha256:5f4c90b42c3b35194a7c2af8b46b7c28127e25e836a779e85aae0df2bd0e69eb"
|
||||||
|
|
||||||
[[metadata.targets]]
|
[[metadata.targets]]
|
||||||
requires_python = ">=3.10"
|
requires_python = ">=3.10"
|
||||||
@@ -1174,6 +1174,17 @@ files = [
|
|||||||
{file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"},
|
{file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html2text"
|
||||||
|
version = "2025.4.15"
|
||||||
|
requires_python = ">=3.9"
|
||||||
|
summary = "Turn HTML into equivalent Markdown-structured text."
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "html2text-2025.4.15-py3-none-any.whl", hash = "sha256:00569167ffdab3d7767a4cdf589b7f57e777a5ed28d12907d8c58769ec734acc"},
|
||||||
|
{file = "html2text-2025.4.15.tar.gz", hash = "sha256:948a645f8f0bc3abe7fd587019a2197a12436cd73d0d4908af95bfc8da337588"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httpcore"
|
name = "httpcore"
|
||||||
version = "1.0.9"
|
version = "1.0.9"
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ dependencies = [
|
|||||||
"apscheduler>=3.11.0",
|
"apscheduler>=3.11.0",
|
||||||
"playwright>=1.52.0",
|
"playwright>=1.52.0",
|
||||||
"camoufox>=0.4.11",
|
"camoufox>=0.4.11",
|
||||||
|
"html2text>=2025.4.15",
|
||||||
]
|
]
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|||||||
@@ -1,17 +1,23 @@
|
|||||||
import React, { useState, useEffect, Dispatch, useRef } from "react";
|
import React, { useState, Dispatch, useEffect } from "react";
|
||||||
import { Job } from "../../types";
|
import { Job } from "../../types";
|
||||||
import { fetchJobs } from "../../lib";
|
|
||||||
import Box from "@mui/material/Box";
|
import Box from "@mui/material/Box";
|
||||||
import InputLabel from "@mui/material/InputLabel";
|
import InputLabel from "@mui/material/InputLabel";
|
||||||
import FormControl from "@mui/material/FormControl";
|
import FormControl from "@mui/material/FormControl";
|
||||||
import Select from "@mui/material/Select";
|
import Select from "@mui/material/Select";
|
||||||
import Popover from "@mui/material/Popover";
|
import Popover from "@mui/material/Popover";
|
||||||
import { Typography, MenuItem, useTheme } from "@mui/material";
|
import {
|
||||||
|
Typography,
|
||||||
|
MenuItem,
|
||||||
|
useTheme,
|
||||||
|
ClickAwayListener,
|
||||||
|
} from "@mui/material";
|
||||||
import { SxProps } from "@mui/material";
|
import { SxProps } from "@mui/material";
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
sxProps: SxProps;
|
sxProps?: SxProps;
|
||||||
setSelectedJob: Dispatch<React.SetStateAction<Job | null>>;
|
setSelectedJob:
|
||||||
|
| Dispatch<React.SetStateAction<Job | null>>
|
||||||
|
| ((job: Job) => void);
|
||||||
selectedJob: Job | null;
|
selectedJob: Job | null;
|
||||||
setJobs: Dispatch<React.SetStateAction<Job[]>>;
|
setJobs: Dispatch<React.SetStateAction<Job[]>>;
|
||||||
jobs: Job[];
|
jobs: Job[];
|
||||||
@@ -43,6 +49,12 @@ export const JobSelector = ({
|
|||||||
|
|
||||||
const open = Boolean(anchorEl);
|
const open = Boolean(anchorEl);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!open) {
|
||||||
|
setAnchorEl(null);
|
||||||
|
}
|
||||||
|
}, [open]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Box sx={sxProps}>
|
<Box sx={sxProps}>
|
||||||
<FormControl fullWidth>
|
<FormControl fullWidth>
|
||||||
@@ -55,9 +67,11 @@ export const JobSelector = ({
|
|||||||
value={selectedJob?.id || ""}
|
value={selectedJob?.id || ""}
|
||||||
label="Job"
|
label="Job"
|
||||||
onChange={(e) => {
|
onChange={(e) => {
|
||||||
setSelectedJob(
|
const job = jobs.find((job) => job.id === e.target.value);
|
||||||
jobs.find((job) => job.id === e.target.value) || null
|
|
||||||
);
|
if (job) {
|
||||||
|
setSelectedJob(job);
|
||||||
|
}
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{jobs.map((job) => (
|
{jobs.map((job) => (
|
||||||
@@ -77,57 +91,63 @@ export const JobSelector = ({
|
|||||||
</>
|
</>
|
||||||
) : null}
|
) : null}
|
||||||
</FormControl>
|
</FormControl>
|
||||||
<Popover
|
|
||||||
id="mouse-over-popover"
|
{open && (
|
||||||
sx={{
|
<ClickAwayListener onClickAway={handlePopoverClose}>
|
||||||
pointerEvents: "none",
|
<Popover
|
||||||
padding: 0,
|
id="mouse-over-popover"
|
||||||
}}
|
|
||||||
open={open}
|
|
||||||
anchorEl={anchorEl}
|
|
||||||
anchorOrigin={{
|
|
||||||
vertical: "bottom",
|
|
||||||
horizontal: "left",
|
|
||||||
}}
|
|
||||||
transformOrigin={{
|
|
||||||
vertical: "top",
|
|
||||||
horizontal: "left",
|
|
||||||
}}
|
|
||||||
onClose={handlePopoverClose}
|
|
||||||
>
|
|
||||||
{popoverJob && (
|
|
||||||
<Box
|
|
||||||
sx={{
|
sx={{
|
||||||
border:
|
pointerEvents: "none",
|
||||||
theme.palette.mode === "light"
|
padding: 0,
|
||||||
? "2px solid black"
|
|
||||||
: "2px solid white",
|
|
||||||
}}
|
}}
|
||||||
|
open={open}
|
||||||
|
anchorEl={anchorEl}
|
||||||
|
anchorOrigin={{
|
||||||
|
vertical: "bottom",
|
||||||
|
horizontal: "left",
|
||||||
|
}}
|
||||||
|
transformOrigin={{
|
||||||
|
vertical: "top",
|
||||||
|
horizontal: "left",
|
||||||
|
}}
|
||||||
|
onClose={handlePopoverClose}
|
||||||
>
|
>
|
||||||
<Typography
|
{popoverJob && (
|
||||||
variant="body1"
|
<Box
|
||||||
sx={{ paddingLeft: 1, paddingRight: 1 }}
|
|
||||||
>
|
|
||||||
{popoverJob.url}
|
|
||||||
</Typography>
|
|
||||||
<div className="flex flex-row w-full justify-end mb-1">
|
|
||||||
<Typography
|
|
||||||
variant="body2"
|
|
||||||
sx={{
|
sx={{
|
||||||
paddingLeft: 1,
|
border:
|
||||||
paddingRight: 1,
|
theme.palette.mode === "light"
|
||||||
color: theme.palette.mode === "dark" ? "#d3d7e6" : "#5b5d63",
|
? "2px solid black"
|
||||||
fontStyle: "italic",
|
: "2px solid white",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{popoverJob.time_created
|
<Typography
|
||||||
? new Date(popoverJob.time_created).toLocaleString()
|
variant="body1"
|
||||||
: "Unknown"}
|
sx={{ paddingLeft: 1, paddingRight: 1 }}
|
||||||
</Typography>
|
>
|
||||||
</div>
|
{popoverJob.url}
|
||||||
</Box>
|
</Typography>
|
||||||
)}
|
<div className="flex flex-row w-full justify-end mb-1">
|
||||||
</Popover>
|
<Typography
|
||||||
|
variant="body2"
|
||||||
|
sx={{
|
||||||
|
paddingLeft: 1,
|
||||||
|
paddingRight: 1,
|
||||||
|
color:
|
||||||
|
theme.palette.mode === "dark" ? "#d3d7e6" : "#5b5d63",
|
||||||
|
fontStyle: "italic",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{popoverJob.time_created
|
||||||
|
? new Date(popoverJob.time_created).toLocaleString()
|
||||||
|
: "Unknown"}
|
||||||
|
</Typography>
|
||||||
|
</div>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
</Popover>
|
||||||
|
</ClickAwayListener>
|
||||||
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -6,11 +6,13 @@ import { RawJobOptions } from "@/types";
|
|||||||
export type AdvancedJobOptionsProps = {
|
export type AdvancedJobOptionsProps = {
|
||||||
jobOptions: RawJobOptions;
|
jobOptions: RawJobOptions;
|
||||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
|
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
|
||||||
|
multiPageScrapeEnabled?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const AdvancedJobOptions = ({
|
export const AdvancedJobOptions = ({
|
||||||
jobOptions,
|
jobOptions,
|
||||||
setJobOptions,
|
setJobOptions,
|
||||||
|
multiPageScrapeEnabled = true,
|
||||||
}: AdvancedJobOptionsProps) => {
|
}: AdvancedJobOptionsProps) => {
|
||||||
const [open, setOpen] = useState(false);
|
const [open, setOpen] = useState(false);
|
||||||
return (
|
return (
|
||||||
@@ -39,6 +41,7 @@ export const AdvancedJobOptions = ({
|
|||||||
onClose={() => setOpen(false)}
|
onClose={() => setOpen(false)}
|
||||||
jobOptions={jobOptions}
|
jobOptions={jobOptions}
|
||||||
setJobOptions={setJobOptions}
|
setJobOptions={setJobOptions}
|
||||||
|
multiPageScrapeEnabled={multiPageScrapeEnabled}
|
||||||
/>
|
/>
|
||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ export type AdvancedJobOptionsDialogProps = {
|
|||||||
onClose: () => void;
|
onClose: () => void;
|
||||||
jobOptions: RawJobOptions;
|
jobOptions: RawJobOptions;
|
||||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
|
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
|
||||||
|
multiPageScrapeEnabled?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const AdvancedJobOptionsDialog = ({
|
export const AdvancedJobOptionsDialog = ({
|
||||||
@@ -39,6 +40,7 @@ export const AdvancedJobOptionsDialog = ({
|
|||||||
onClose,
|
onClose,
|
||||||
jobOptions,
|
jobOptions,
|
||||||
setJobOptions,
|
setJobOptions,
|
||||||
|
multiPageScrapeEnabled = true,
|
||||||
}: AdvancedJobOptionsDialogProps) => {
|
}: AdvancedJobOptionsDialogProps) => {
|
||||||
const theme = useTheme();
|
const theme = useTheme();
|
||||||
const handleMultiPageScrapeChange = () => {
|
const handleMultiPageScrapeChange = () => {
|
||||||
@@ -122,12 +124,19 @@ export const AdvancedJobOptionsDialog = ({
|
|||||||
<Checkbox
|
<Checkbox
|
||||||
checked={jobOptions.multi_page_scrape}
|
checked={jobOptions.multi_page_scrape}
|
||||||
onChange={handleMultiPageScrapeChange}
|
onChange={handleMultiPageScrapeChange}
|
||||||
|
disabled={!multiPageScrapeEnabled}
|
||||||
/>
|
/>
|
||||||
}
|
}
|
||||||
label={
|
label={
|
||||||
<Box sx={{ display: "flex", alignItems: "center" }}>
|
<Box sx={{ display: "flex", alignItems: "center" }}>
|
||||||
<Typography>Multi Page Scrape</Typography>
|
<Typography>Multi Page Scrape</Typography>
|
||||||
<Tooltip title="Enable crawling through multiple pages">
|
<Tooltip
|
||||||
|
title={
|
||||||
|
multiPageScrapeEnabled
|
||||||
|
? "Enable crawling through multiple pages"
|
||||||
|
: "Multi page scrape is disabled"
|
||||||
|
}
|
||||||
|
>
|
||||||
<IconButton size="small">
|
<IconButton size="small">
|
||||||
<InfoOutlined fontSize="small" />
|
<InfoOutlined fontSize="small" />
|
||||||
</IconButton>
|
</IconButton>
|
||||||
@@ -140,6 +149,7 @@ export const AdvancedJobOptionsDialog = ({
|
|||||||
<Checkbox
|
<Checkbox
|
||||||
checked={jobOptions.collect_media}
|
checked={jobOptions.collect_media}
|
||||||
onChange={handleCollectMediaChange}
|
onChange={handleCollectMediaChange}
|
||||||
|
data-cy="collect-media-checkbox"
|
||||||
/>
|
/>
|
||||||
}
|
}
|
||||||
label={
|
label={
|
||||||
|
|||||||
@@ -131,8 +131,9 @@ export const CsvTable: React.FC<CsvTableProps> = ({ csv, className }) => {
|
|||||||
<Typography variant="body2" color="text.secondary">
|
<Typography variant="body2" color="text.secondary">
|
||||||
{row.text
|
{row.text
|
||||||
? row.text
|
? row.text
|
||||||
.replace(/(\r\n|\n|\r)/g, " ")
|
.replace(/[\n\t\r]+/g, " ")
|
||||||
.replace(/\t/g, " ")
|
.replace(/\s+/g, " ")
|
||||||
|
.trim()
|
||||||
: "No text available"}
|
: "No text available"}
|
||||||
</Typography>
|
</Typography>
|
||||||
</Paper>
|
</Paper>
|
||||||
|
|||||||
29
src/components/common/disabled/disabled.tsx
Normal file
29
src/components/common/disabled/disabled.tsx
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import { Box } from "@mui/material";
|
||||||
|
|
||||||
|
export type DisabledProps = {
|
||||||
|
message: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const Disabled = ({ message }: DisabledProps) => {
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
bgcolor="background.default"
|
||||||
|
minHeight="100vh"
|
||||||
|
display="flex"
|
||||||
|
justifyContent="center"
|
||||||
|
alignItems="center"
|
||||||
|
>
|
||||||
|
<h4
|
||||||
|
style={{
|
||||||
|
color: "#fff",
|
||||||
|
padding: "20px",
|
||||||
|
borderRadius: "8px",
|
||||||
|
background: "rgba(0, 0, 0, 0.6)",
|
||||||
|
boxShadow: "0 4px 8px rgba(0, 0, 0, 0.2)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{message}
|
||||||
|
</h4>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/common/disabled/index.ts
Normal file
1
src/components/common/disabled/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./disabled";
|
||||||
40
src/components/common/media-viewer/audio/audio-viewer.tsx
Normal file
40
src/components/common/media-viewer/audio/audio-viewer.tsx
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
|
||||||
|
import { Box, Typography } from "@mui/material";
|
||||||
|
|
||||||
|
interface AudioViewerProps {
|
||||||
|
mediaUrl: string;
|
||||||
|
selectedMedia: string;
|
||||||
|
onError: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const AudioViewer = ({
|
||||||
|
mediaUrl,
|
||||||
|
selectedMedia,
|
||||||
|
onError,
|
||||||
|
}: AudioViewerProps) => {
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
flexDirection: "column",
|
||||||
|
height: "100%",
|
||||||
|
gap: 2,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography variant="h6">{selectedMedia}</Typography>
|
||||||
|
<audio
|
||||||
|
controls
|
||||||
|
onError={onError}
|
||||||
|
style={{
|
||||||
|
width: "80%",
|
||||||
|
maxWidth: "500px",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<source src={mediaUrl} type="audio/mpeg" />
|
||||||
|
Your browser does not support the audio element.
|
||||||
|
</audio>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/common/media-viewer/audio/index.ts
Normal file
1
src/components/common/media-viewer/audio/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./audio-viewer";
|
||||||
36
src/components/common/media-viewer/image/image-viewer.tsx
Normal file
36
src/components/common/media-viewer/image/image-viewer.tsx
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import { Box, useTheme } from "@mui/material";
|
||||||
|
|
||||||
|
export const ImageViewer = ({
|
||||||
|
mediaUrl,
|
||||||
|
selectedMedia,
|
||||||
|
}: {
|
||||||
|
mediaUrl: string;
|
||||||
|
selectedMedia: string;
|
||||||
|
}) => {
|
||||||
|
const theme = useTheme();
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
height: "100%",
|
||||||
|
width: "100%",
|
||||||
|
overflow: "hidden",
|
||||||
|
position: "relative",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<img
|
||||||
|
src={mediaUrl}
|
||||||
|
alt={selectedMedia}
|
||||||
|
style={{
|
||||||
|
maxHeight: "100%",
|
||||||
|
maxWidth: "100%",
|
||||||
|
objectFit: "contain",
|
||||||
|
borderRadius: "4px",
|
||||||
|
boxShadow: theme.shadows[4],
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/common/media-viewer/image/index.ts
Normal file
1
src/components/common/media-viewer/image/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./image-viewer";
|
||||||
1
src/components/common/media-viewer/index.ts
Normal file
1
src/components/common/media-viewer/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./media-viewer";
|
||||||
75
src/components/common/media-viewer/media-viewer.tsx
Normal file
75
src/components/common/media-viewer/media-viewer.tsx
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
import { Box, Typography } from "@mui/material";
|
||||||
|
import { ImageViewer } from "./image";
|
||||||
|
import { VideoViewer } from "./video";
|
||||||
|
import { AudioViewer } from "./audio";
|
||||||
|
import { PDFViewer } from "./pdf-viewer";
|
||||||
|
|
||||||
|
interface MediaViewerProps {
|
||||||
|
selectedMedia: string;
|
||||||
|
activeTab: string;
|
||||||
|
getMediaUrl: (fileName: string) => string;
|
||||||
|
onError: (error: string) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const MediaViewer = ({
|
||||||
|
selectedMedia,
|
||||||
|
activeTab,
|
||||||
|
getMediaUrl,
|
||||||
|
onError,
|
||||||
|
}: MediaViewerProps) => {
|
||||||
|
if (!selectedMedia) {
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
height: "100%",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography variant="body1" color="textSecondary">
|
||||||
|
Select a file to view
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const mediaUrl = getMediaUrl(selectedMedia);
|
||||||
|
|
||||||
|
switch (activeTab) {
|
||||||
|
case "images":
|
||||||
|
return <ImageViewer mediaUrl={mediaUrl} selectedMedia={selectedMedia} />;
|
||||||
|
case "videos":
|
||||||
|
return (
|
||||||
|
<VideoViewer
|
||||||
|
mediaUrl={mediaUrl}
|
||||||
|
onError={() => onError("Error loading video")}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
case "audio":
|
||||||
|
return (
|
||||||
|
<AudioViewer
|
||||||
|
mediaUrl={mediaUrl}
|
||||||
|
selectedMedia={selectedMedia}
|
||||||
|
onError={() => onError("Error loading audio")}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
case "pdfs":
|
||||||
|
return <PDFViewer mediaUrl={mediaUrl} selectedMedia={selectedMedia} />;
|
||||||
|
default:
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
height: "100%",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography variant="body1">
|
||||||
|
{selectedMedia} - Download this file to view it
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
1
src/components/common/media-viewer/pdf-viewer/index.ts
Normal file
1
src/components/common/media-viewer/pdf-viewer/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./pdf-viewer";
|
||||||
33
src/components/common/media-viewer/pdf-viewer/pdf-viewer.tsx
Normal file
33
src/components/common/media-viewer/pdf-viewer/pdf-viewer.tsx
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import { Box, useTheme } from "@mui/material";
|
||||||
|
|
||||||
|
interface PDFViewerProps {
|
||||||
|
mediaUrl: string;
|
||||||
|
selectedMedia: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const PDFViewer = ({ mediaUrl, selectedMedia }: PDFViewerProps) => {
|
||||||
|
const theme = useTheme();
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
width: "100%",
|
||||||
|
height: "100%",
|
||||||
|
overflow: "hidden",
|
||||||
|
borderRadius: 1,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<iframe
|
||||||
|
src={`${mediaUrl}#view=fitH`}
|
||||||
|
style={{
|
||||||
|
width: "100%",
|
||||||
|
height: "100%",
|
||||||
|
border: "none",
|
||||||
|
borderRadius: "4px",
|
||||||
|
boxShadow: theme.shadows[4],
|
||||||
|
}}
|
||||||
|
title={selectedMedia}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
export * from "./tile-grid-view";
|
||||||
@@ -0,0 +1,114 @@
|
|||||||
|
import { MediaFiles } from "@/components/pages/media/id/id";
|
||||||
|
import {
|
||||||
|
Card,
|
||||||
|
CardActionArea,
|
||||||
|
CardMedia,
|
||||||
|
CardContent,
|
||||||
|
Typography,
|
||||||
|
Box,
|
||||||
|
Grid,
|
||||||
|
useTheme,
|
||||||
|
} from "@mui/material";
|
||||||
|
|
||||||
|
interface TileGridViewProps {
|
||||||
|
mediaFiles: MediaFiles;
|
||||||
|
activeTab: string;
|
||||||
|
selectedMedia: string;
|
||||||
|
handleMediaSelect: (fileName: string) => void;
|
||||||
|
getMediaUrl: (fileName: string) => string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const TileGridView = ({
|
||||||
|
mediaFiles,
|
||||||
|
activeTab,
|
||||||
|
selectedMedia,
|
||||||
|
handleMediaSelect,
|
||||||
|
getMediaUrl,
|
||||||
|
}: TileGridViewProps) => {
|
||||||
|
const theme = useTheme();
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Grid container spacing={2} sx={{ p: 2 }} data-testid="media-grid">
|
||||||
|
{mediaFiles[activeTab].map((fileName: string) => (
|
||||||
|
<Grid item xs={6} sm={4} md={3} lg={2} key={fileName}>
|
||||||
|
<Card
|
||||||
|
sx={{
|
||||||
|
height: "100%",
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
borderColor:
|
||||||
|
selectedMedia === fileName
|
||||||
|
? theme.palette.primary.main
|
||||||
|
: "transparent",
|
||||||
|
borderWidth: 2,
|
||||||
|
borderStyle: "solid",
|
||||||
|
transition: "all 0.2s",
|
||||||
|
"&:hover": {
|
||||||
|
transform: "translateY(-4px)",
|
||||||
|
boxShadow: theme.shadows[6],
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<CardActionArea onClick={() => handleMediaSelect(fileName)}>
|
||||||
|
<CardMedia
|
||||||
|
component="div"
|
||||||
|
sx={{
|
||||||
|
pt: "75%",
|
||||||
|
position: "relative",
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[100]
|
||||||
|
: theme.palette.grey[800],
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{activeTab === "images" ? (
|
||||||
|
<Box
|
||||||
|
component="img"
|
||||||
|
src={getMediaUrl(fileName)}
|
||||||
|
alt={fileName}
|
||||||
|
sx={{
|
||||||
|
position: "absolute",
|
||||||
|
top: 0,
|
||||||
|
left: 0,
|
||||||
|
width: "100%",
|
||||||
|
height: "100%",
|
||||||
|
objectFit: "contain",
|
||||||
|
p: 1,
|
||||||
|
}}
|
||||||
|
onError={(e) => {
|
||||||
|
const target = e.target as HTMLImageElement;
|
||||||
|
if (target.src !== "/placeholder-image.png") {
|
||||||
|
target.src = "";
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<Typography
|
||||||
|
variant="body2"
|
||||||
|
color="textSecondary"
|
||||||
|
sx={{
|
||||||
|
position: "absolute",
|
||||||
|
top: "50%",
|
||||||
|
left: "50%",
|
||||||
|
transform: "translate(-50%, -50%)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{fileName.split(".").pop()?.toUpperCase() || "FILE"}
|
||||||
|
</Typography>
|
||||||
|
)}
|
||||||
|
</CardMedia>
|
||||||
|
<CardContent sx={{ flexGrow: 1, p: 1 }}>
|
||||||
|
<Typography variant="body2" noWrap title={fileName}>
|
||||||
|
{fileName}
|
||||||
|
</Typography>
|
||||||
|
</CardContent>
|
||||||
|
</CardActionArea>
|
||||||
|
</Card>
|
||||||
|
</Grid>
|
||||||
|
))}
|
||||||
|
</Grid>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/common/media-viewer/video/index.ts
Normal file
1
src/components/common/media-viewer/video/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./video-viewer";
|
||||||
39
src/components/common/media-viewer/video/video-viewer.tsx
Normal file
39
src/components/common/media-viewer/video/video-viewer.tsx
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import { Box, useTheme } from "@mui/material";
|
||||||
|
|
||||||
|
export const VideoViewer = ({
|
||||||
|
mediaUrl,
|
||||||
|
onError,
|
||||||
|
}: {
|
||||||
|
mediaUrl: string;
|
||||||
|
onError: () => void;
|
||||||
|
}) => {
|
||||||
|
const theme = useTheme();
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
width: "100%",
|
||||||
|
height: "100%",
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
overflow: "hidden",
|
||||||
|
borderRadius: 1,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<video
|
||||||
|
className="h-full w-full object-contain"
|
||||||
|
controls
|
||||||
|
onError={onError}
|
||||||
|
style={{
|
||||||
|
maxHeight: "100%",
|
||||||
|
maxWidth: "100%",
|
||||||
|
borderRadius: "4px",
|
||||||
|
boxShadow: theme.shadows[4],
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<source src={mediaUrl} type="video/mp4" />
|
||||||
|
Your browser does not support the video tag.
|
||||||
|
</video>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
@@ -3,11 +3,10 @@ import { NavItem } from "../nav-item";
|
|||||||
|
|
||||||
import HomeIcon from "@mui/icons-material/Home";
|
import HomeIcon from "@mui/icons-material/Home";
|
||||||
import HttpIcon from "@mui/icons-material/Http";
|
import HttpIcon from "@mui/icons-material/Http";
|
||||||
import TerminalIcon from "@mui/icons-material/Terminal";
|
|
||||||
import BarChart from "@mui/icons-material/BarChart";
|
import BarChart from "@mui/icons-material/BarChart";
|
||||||
import AutoAwesomeIcon from "@mui/icons-material/AutoAwesome";
|
import AutoAwesomeIcon from "@mui/icons-material/AutoAwesome";
|
||||||
import { List } from "@mui/material";
|
import { List } from "@mui/material";
|
||||||
import { Schedule } from "@mui/icons-material";
|
import { Folder, Person, Schedule, VideoFile } from "@mui/icons-material";
|
||||||
|
|
||||||
const items = [
|
const items = [
|
||||||
{
|
{
|
||||||
@@ -20,6 +19,11 @@ const items = [
|
|||||||
text: "Jobs",
|
text: "Jobs",
|
||||||
href: "/jobs",
|
href: "/jobs",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
icon: <Person />,
|
||||||
|
text: "Agent",
|
||||||
|
href: "/agent",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
icon: <AutoAwesomeIcon />,
|
icon: <AutoAwesomeIcon />,
|
||||||
text: "Chat",
|
text: "Chat",
|
||||||
@@ -35,6 +39,16 @@ const items = [
|
|||||||
text: "Cron Jobs",
|
text: "Cron Jobs",
|
||||||
href: "/cron-jobs",
|
href: "/cron-jobs",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
icon: <VideoFile />,
|
||||||
|
text: "Recordings",
|
||||||
|
href: "/recordings",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
icon: <Folder />,
|
||||||
|
text: "Media",
|
||||||
|
href: "/media",
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
export const NavItems = () => {
|
export const NavItems = () => {
|
||||||
|
|||||||
@@ -7,20 +7,15 @@ import {
|
|||||||
TableHead,
|
TableHead,
|
||||||
TableRow,
|
TableRow,
|
||||||
Box,
|
Box,
|
||||||
Typography,
|
|
||||||
Accordion,
|
|
||||||
AccordionSummary,
|
|
||||||
AccordionDetails,
|
|
||||||
Checkbox,
|
Checkbox,
|
||||||
Button,
|
Button,
|
||||||
Tooltip,
|
Tooltip,
|
||||||
IconButton,
|
IconButton,
|
||||||
TableContainer,
|
TableContainer,
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
|
||||||
import StarIcon from "@mui/icons-material/Star";
|
import StarIcon from "@mui/icons-material/Star";
|
||||||
import { Job } from "../../types";
|
import { Job } from "../../types";
|
||||||
import { AutoAwesome } from "@mui/icons-material";
|
import { AutoAwesome, Image, VideoCameraBack } from "@mui/icons-material";
|
||||||
import { useRouter } from "next/router";
|
import { useRouter } from "next/router";
|
||||||
|
|
||||||
interface stringMap {
|
interface stringMap {
|
||||||
@@ -59,7 +54,7 @@ export const JobQueue = ({
|
|||||||
<Table sx={{ tableLayout: "fixed", width: "100%" }}>
|
<Table sx={{ tableLayout: "fixed", width: "100%" }}>
|
||||||
<TableHead>
|
<TableHead>
|
||||||
<TableRow>
|
<TableRow>
|
||||||
<TableCell>Select</TableCell>
|
<TableCell sx={{ width: "280px" }}>Select</TableCell>
|
||||||
<TableCell>Id</TableCell>
|
<TableCell>Id</TableCell>
|
||||||
<TableCell>Url</TableCell>
|
<TableCell>Url</TableCell>
|
||||||
<TableCell>Elements</TableCell>
|
<TableCell>Elements</TableCell>
|
||||||
@@ -72,7 +67,7 @@ export const JobQueue = ({
|
|||||||
<TableBody sx={{ overflow: "auto" }}>
|
<TableBody sx={{ overflow: "auto" }}>
|
||||||
{filteredJobs.map((row, index) => (
|
{filteredJobs.map((row, index) => (
|
||||||
<TableRow key={index}>
|
<TableRow key={index}>
|
||||||
<TableCell padding="checkbox">
|
<TableCell padding="checkbox" sx={{ width: "280px" }}>
|
||||||
<Checkbox
|
<Checkbox
|
||||||
checked={selectedJobs.has(row.id)}
|
checked={selectedJobs.has(row.id)}
|
||||||
onChange={() => onSelectJob(row.id)}
|
onChange={() => onSelectJob(row.id)}
|
||||||
@@ -106,12 +101,47 @@ export const JobQueue = ({
|
|||||||
</IconButton>
|
</IconButton>
|
||||||
</span>
|
</span>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
<Tooltip title="View Recording">
|
||||||
|
<span>
|
||||||
|
<IconButton
|
||||||
|
onClick={() => {
|
||||||
|
router.push({
|
||||||
|
pathname: "/recordings",
|
||||||
|
query: {
|
||||||
|
id: row.id,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<VideoCameraBack />
|
||||||
|
</IconButton>
|
||||||
|
</span>
|
||||||
|
</Tooltip>
|
||||||
|
{row.job_options.collect_media && (
|
||||||
|
<Tooltip title="View Media">
|
||||||
|
<span>
|
||||||
|
<IconButton
|
||||||
|
onClick={() => {
|
||||||
|
router.replace(`/media?id=${row.id}`);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Image />
|
||||||
|
</IconButton>
|
||||||
|
</span>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
<TableCell
|
||||||
|
sx={{
|
||||||
|
maxWidth: 100,
|
||||||
|
overflow: "auto",
|
||||||
|
}}
|
||||||
|
>
|
||||||
<Box
|
<Box
|
||||||
sx={{
|
sx={{
|
||||||
maxHeight: 100,
|
maxHeight: 100,
|
||||||
overflow: "auto",
|
overflow: "auto",
|
||||||
|
paddingTop: 1,
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{row.id}
|
{row.id}
|
||||||
@@ -122,7 +152,7 @@ export const JobQueue = ({
|
|||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||||
{JSON.stringify(row.elements)}
|
{row.agent_mode ? "Agent Mode" : JSON.stringify(row.elements)}
|
||||||
</Box>
|
</Box>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
|
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
|
||||||
@@ -151,7 +181,7 @@ export const JobQueue = ({
|
|||||||
</Box>
|
</Box>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell sx={{ maxWidth: 50, overflow: "auto" }}>
|
<TableCell sx={{ maxWidth: 50, overflow: "auto" }}>
|
||||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
<Box sx={{ maxWidth: 100, maxHeight: 100, overflow: "auto" }}>
|
||||||
<Box
|
<Box
|
||||||
className="rounded-md p-2 text-center"
|
className="rounded-md p-2 text-center"
|
||||||
sx={{ bgcolor: colors[row.status] }}
|
sx={{ bgcolor: colors[row.status] }}
|
||||||
@@ -176,9 +206,19 @@ export const JobQueue = ({
|
|||||||
Download
|
Download
|
||||||
</Button>
|
</Button>
|
||||||
<Button
|
<Button
|
||||||
onClick={() =>
|
onClick={() => {
|
||||||
onNavigate(row.elements, row.url, row.job_options)
|
if (row.agent_mode) {
|
||||||
}
|
router.push({
|
||||||
|
pathname: "/agent",
|
||||||
|
query: {
|
||||||
|
url: row.url,
|
||||||
|
prompt: row.prompt,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
onNavigate(row.elements, row.url, row.job_options);
|
||||||
|
}
|
||||||
|
}}
|
||||||
size="small"
|
size="small"
|
||||||
sx={{
|
sx={{
|
||||||
minWidth: 0,
|
minWidth: 0,
|
||||||
|
|||||||
228
src/components/pages/agent/agent.tsx
Normal file
228
src/components/pages/agent/agent.tsx
Normal file
@@ -0,0 +1,228 @@
|
|||||||
|
import { validateURL } from "@/lib/helpers/validate-url";
|
||||||
|
import { ApiService } from "@/services";
|
||||||
|
import {
|
||||||
|
Box,
|
||||||
|
Button,
|
||||||
|
Divider,
|
||||||
|
Snackbar,
|
||||||
|
Alert,
|
||||||
|
TextField,
|
||||||
|
Typography,
|
||||||
|
useTheme,
|
||||||
|
} from "@mui/material";
|
||||||
|
import { useEffect, useState } from "react";
|
||||||
|
import { useRouter } from "next/router";
|
||||||
|
import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
|
||||||
|
import { useAdvancedJobOptions } from "@/lib/hooks/use-advanced-job-options/use-advanced-job-options";
|
||||||
|
import { checkAI } from "@/lib";
|
||||||
|
import { Disabled } from "@/components/common/disabled/disabled";
|
||||||
|
|
||||||
|
export const Agent = () => {
|
||||||
|
const [url, setUrl] = useState("");
|
||||||
|
const [prompt, setPrompt] = useState("");
|
||||||
|
const [urlError, setUrlError] = useState<string | null>(null);
|
||||||
|
const [aiEnabled, setAiEnabled] = useState(false);
|
||||||
|
const [snackbarMessage, setSnackbarMessage] = useState("");
|
||||||
|
const [snackbarSeverity, setSnackbarSeverity] = useState<
|
||||||
|
"success" | "error" | "info" | "warning"
|
||||||
|
>("info");
|
||||||
|
const [snackbarOpen, setSnackbarOpen] = useState(false);
|
||||||
|
const router = useRouter();
|
||||||
|
const { jobOptions, setJobOptions } = useAdvancedJobOptions();
|
||||||
|
const theme = useTheme();
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (router.query.url) {
|
||||||
|
setUrl(router.query.url as string);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (router.query.prompt) {
|
||||||
|
setPrompt(router.query.prompt as string);
|
||||||
|
}
|
||||||
|
}, [router.query.url, router.query.prompt]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
checkAI(setAiEnabled);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const handleCloseSnackbar = () => {
|
||||||
|
setSnackbarOpen(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
const ErrorSnackbar = () => {
|
||||||
|
return (
|
||||||
|
<Snackbar
|
||||||
|
open={snackbarOpen}
|
||||||
|
autoHideDuration={6000}
|
||||||
|
onClose={handleCloseSnackbar}
|
||||||
|
>
|
||||||
|
<Alert onClose={handleCloseSnackbar} severity="error">
|
||||||
|
{snackbarMessage}
|
||||||
|
</Alert>
|
||||||
|
</Snackbar>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const NotifySnackbar = () => {
|
||||||
|
const goTo = () => {
|
||||||
|
router.push("/jobs");
|
||||||
|
};
|
||||||
|
|
||||||
|
const action = (
|
||||||
|
<Button color="inherit" size="small" onClick={goTo}>
|
||||||
|
Go To Job
|
||||||
|
</Button>
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Snackbar
|
||||||
|
open={snackbarOpen}
|
||||||
|
autoHideDuration={6000}
|
||||||
|
onClose={handleCloseSnackbar}
|
||||||
|
>
|
||||||
|
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
|
||||||
|
{snackbarMessage}
|
||||||
|
</Alert>
|
||||||
|
</Snackbar>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSubmit = async () => {
|
||||||
|
if (!validateURL(url)) {
|
||||||
|
setUrlError("Please enter a valid URL.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setUrlError(null);
|
||||||
|
|
||||||
|
await ApiService.submitJob(
|
||||||
|
url,
|
||||||
|
[],
|
||||||
|
"",
|
||||||
|
{
|
||||||
|
collect_media: jobOptions.collect_media,
|
||||||
|
multi_page_scrape: jobOptions.multi_page_scrape,
|
||||||
|
},
|
||||||
|
jobOptions.custom_headers,
|
||||||
|
jobOptions.custom_cookies,
|
||||||
|
null,
|
||||||
|
true,
|
||||||
|
prompt
|
||||||
|
)
|
||||||
|
.then(async (response) => {
|
||||||
|
if (!response.ok) {
|
||||||
|
return response.json().then((error) => {
|
||||||
|
throw new Error(error.error);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return response.json();
|
||||||
|
})
|
||||||
|
.then((data) => {
|
||||||
|
setSnackbarMessage(
|
||||||
|
`Agent job: ${data.id} submitted successfully.` ||
|
||||||
|
"Agent job submitted successfully."
|
||||||
|
);
|
||||||
|
setSnackbarSeverity("info");
|
||||||
|
setSnackbarOpen(true);
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
setSnackbarMessage(error || "An error occurred.");
|
||||||
|
setSnackbarSeverity("error");
|
||||||
|
setSnackbarOpen(true);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!aiEnabled) {
|
||||||
|
return (
|
||||||
|
<Disabled message="Must set either OPENAI_KEY or OLLAMA_MODEL to use AI features." />
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
minHeight: "100vh",
|
||||||
|
display: "flex",
|
||||||
|
alignItems: "center",
|
||||||
|
justifyContent: "center",
|
||||||
|
background: theme.palette.background.default,
|
||||||
|
p: 4,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
backgroundColor: theme.palette.background.paper,
|
||||||
|
borderRadius: 4,
|
||||||
|
boxShadow: 6,
|
||||||
|
p: 4,
|
||||||
|
width: "100%",
|
||||||
|
maxWidth: 800,
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
gap: "1rem",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography variant="h3" sx={{ textAlign: "center", fontWeight: 600 }}>
|
||||||
|
Agent Mode
|
||||||
|
</Typography>
|
||||||
|
<Typography
|
||||||
|
variant="body1"
|
||||||
|
sx={{ textAlign: "center", color: "text.secondary" }}
|
||||||
|
>
|
||||||
|
Use AI to scrape a website
|
||||||
|
</Typography>
|
||||||
|
<Divider />
|
||||||
|
<Typography variant="body1" sx={{ fontWeight: 500 }}>
|
||||||
|
Website URL
|
||||||
|
</Typography>
|
||||||
|
<TextField
|
||||||
|
value={url}
|
||||||
|
onChange={(e) => setUrl(e.target.value)}
|
||||||
|
error={!!urlError}
|
||||||
|
helperText={urlError}
|
||||||
|
autoComplete="agent-url"
|
||||||
|
fullWidth
|
||||||
|
placeholder="https://www.example.com"
|
||||||
|
variant="outlined"
|
||||||
|
size="small"
|
||||||
|
/>
|
||||||
|
<Typography variant="body1" sx={{ fontWeight: 500, marginBottom: 0 }}>
|
||||||
|
Prompt
|
||||||
|
</Typography>
|
||||||
|
<TextField
|
||||||
|
value={prompt}
|
||||||
|
onChange={(e) => setPrompt(e.target.value)}
|
||||||
|
autoComplete="agent-prompt"
|
||||||
|
fullWidth
|
||||||
|
placeholder="Collect all the links on the page"
|
||||||
|
variant="outlined"
|
||||||
|
size="small"
|
||||||
|
/>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
gap: 2,
|
||||||
|
alignItems: "center",
|
||||||
|
justifyContent: "space-between",
|
||||||
|
flexWrap: "wrap",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<AdvancedJobOptions
|
||||||
|
jobOptions={jobOptions}
|
||||||
|
setJobOptions={setJobOptions}
|
||||||
|
multiPageScrapeEnabled={false}
|
||||||
|
/>
|
||||||
|
<Button
|
||||||
|
variant="contained"
|
||||||
|
color="primary"
|
||||||
|
onClick={handleSubmit}
|
||||||
|
sx={{ minWidth: 120 }}
|
||||||
|
>
|
||||||
|
Submit
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/pages/agent/index.ts
Normal file
1
src/components/pages/agent/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./agent";
|
||||||
@@ -11,7 +11,7 @@ import {
|
|||||||
import { JobSelector } from "../../ai";
|
import { JobSelector } from "../../ai";
|
||||||
import { Job, Message } from "../../../types";
|
import { Job, Message } from "../../../types";
|
||||||
import { useSearchParams } from "next/navigation";
|
import { useSearchParams } from "next/navigation";
|
||||||
import { checkAI, fetchJob, fetchJobs, updateJob } from "../../../lib";
|
import { fetchJob, fetchJobs, updateJob, checkAI } from "../../../lib";
|
||||||
import SendIcon from "@mui/icons-material/Send";
|
import SendIcon from "@mui/icons-material/Send";
|
||||||
import EditNoteIcon from "@mui/icons-material/EditNote";
|
import EditNoteIcon from "@mui/icons-material/EditNote";
|
||||||
|
|
||||||
|
|||||||
392
src/components/pages/media/id/id.tsx
Normal file
392
src/components/pages/media/id/id.tsx
Normal file
@@ -0,0 +1,392 @@
|
|||||||
|
import { JobSelector } from "@/components/ai";
|
||||||
|
import { fetchJobs } from "@/lib";
|
||||||
|
import { Job } from "@/types";
|
||||||
|
import {
|
||||||
|
Box,
|
||||||
|
useTheme,
|
||||||
|
Typography,
|
||||||
|
CircularProgress,
|
||||||
|
Alert,
|
||||||
|
Paper,
|
||||||
|
Tabs,
|
||||||
|
Tab,
|
||||||
|
} from "@mui/material";
|
||||||
|
import { useRouter, useSearchParams } from "next/navigation";
|
||||||
|
import { useState, useEffect } from "react";
|
||||||
|
import { TileGridView } from "@/components/common/media-viewer/tile-grid-view";
|
||||||
|
import { MediaViewer } from "@/components/common/media-viewer";
|
||||||
|
|
||||||
|
export interface MediaFiles {
|
||||||
|
audio: string[];
|
||||||
|
documents: string[];
|
||||||
|
images: string[];
|
||||||
|
pdfs: string[];
|
||||||
|
presentations: string[];
|
||||||
|
spreadsheets: string[];
|
||||||
|
videos: string[];
|
||||||
|
[key: string]: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export const MediaId = () => {
|
||||||
|
const searchParams = useSearchParams();
|
||||||
|
const theme = useTheme();
|
||||||
|
const router = useRouter();
|
||||||
|
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [jobs, setJobs] = useState<Job[]>([]);
|
||||||
|
const [selectedJob, setSelectedJob] = useState<Job | null>(null);
|
||||||
|
const [mediaFiles, setMediaFiles] = useState<MediaFiles | null>(null);
|
||||||
|
const [activeTab, setActiveTab] = useState<string>("images");
|
||||||
|
const [selectedMedia, setSelectedMedia] = useState<string | null>(null);
|
||||||
|
|
||||||
|
const currentId = searchParams.get("id");
|
||||||
|
const mediaType = searchParams.get("type") || "images";
|
||||||
|
const mediaName = searchParams.get("file");
|
||||||
|
|
||||||
|
const handleSelectJob = (job: Job | null) => {
|
||||||
|
if (job) {
|
||||||
|
router.push(`/media?id=${job.id}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleTabChange = (_event: React.SyntheticEvent, newValue: string) => {
|
||||||
|
setActiveTab(newValue);
|
||||||
|
router.push(`/media?id=${currentId}&type=${newValue}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleMediaSelect = (fileName: string) => {
|
||||||
|
setSelectedMedia(fileName);
|
||||||
|
router.push(`/media?id=${currentId}&type=${activeTab}&file=${fileName}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fetch jobs on mount
|
||||||
|
useEffect(() => {
|
||||||
|
fetchJobs(setJobs);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Set selected job when currentId changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (!currentId) {
|
||||||
|
setSelectedJob(null);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const job = jobs.find((j) => j.id === currentId);
|
||||||
|
setSelectedJob(job || null);
|
||||||
|
}, [currentId, jobs]);
|
||||||
|
|
||||||
|
// Fetch media files when selected job changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (!selectedJob?.id) {
|
||||||
|
setError("No job ID provided");
|
||||||
|
setLoading(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fetchMediaFiles = async () => {
|
||||||
|
setLoading(true);
|
||||||
|
setError(null);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`/api/media/get-media?id=${selectedJob.id}`);
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Media not found (status: ${res.status})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
setMediaFiles(data.files);
|
||||||
|
|
||||||
|
const hasMediaType = mediaType && data.files[mediaType]?.length > 0;
|
||||||
|
|
||||||
|
if (hasMediaType && activeTab !== mediaType) {
|
||||||
|
setActiveTab(mediaType);
|
||||||
|
} else if (!hasMediaType && !activeTab) {
|
||||||
|
// Only set a default tab if activeTab is not set
|
||||||
|
const firstNonEmpty = Object.entries(data.files).find(
|
||||||
|
([_, files]) => Array.isArray(files) && files.length > 0
|
||||||
|
);
|
||||||
|
if (firstNonEmpty) {
|
||||||
|
setActiveTab(firstNonEmpty[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
setError(
|
||||||
|
err instanceof Error ? err.message : "Failed to fetch media files"
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchMediaFiles();
|
||||||
|
}, [selectedJob?.id]);
|
||||||
|
|
||||||
|
// Set selected media when mediaName changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (mediaName && mediaName !== selectedMedia) {
|
||||||
|
setSelectedMedia(mediaName);
|
||||||
|
}
|
||||||
|
}, [mediaName, selectedMedia]);
|
||||||
|
|
||||||
|
// Get media file URL
|
||||||
|
const getMediaUrl = (fileName: string) => {
|
||||||
|
if (!currentId || !activeTab) return "";
|
||||||
|
return `/api/media?id=${currentId}&type=${activeTab}&file=${fileName}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const renderMediaThumbnails = () => {
|
||||||
|
if (
|
||||||
|
!mediaFiles ||
|
||||||
|
!mediaFiles[activeTab] ||
|
||||||
|
mediaFiles[activeTab].length === 0
|
||||||
|
) {
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
height: "100%",
|
||||||
|
p: 3,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography variant="body2" color="textSecondary">
|
||||||
|
No {activeTab} files available
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<TileGridView
|
||||||
|
mediaFiles={mediaFiles}
|
||||||
|
activeTab={activeTab}
|
||||||
|
selectedMedia={selectedMedia || ""}
|
||||||
|
handleMediaSelect={handleMediaSelect}
|
||||||
|
getMediaUrl={getMediaUrl}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
height: "100%",
|
||||||
|
width: "100%",
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
position: "relative",
|
||||||
|
borderRadius: 2,
|
||||||
|
overflow: "hidden",
|
||||||
|
border: `1px solid ${theme.palette.divider}`,
|
||||||
|
backgroundColor: theme.palette.background.paper,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "flex-end",
|
||||||
|
p: 1,
|
||||||
|
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[50]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
zIndex: 10,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Box sx={{ width: "300px" }}>
|
||||||
|
<JobSelector
|
||||||
|
setSelectedJob={handleSelectJob}
|
||||||
|
selectedJob={selectedJob}
|
||||||
|
setJobs={setJobs}
|
||||||
|
jobs={jobs}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{loading ? (
|
||||||
|
<Box
|
||||||
|
display="flex"
|
||||||
|
flexDirection="column"
|
||||||
|
alignItems="center"
|
||||||
|
justifyContent="center"
|
||||||
|
sx={{ flex: 1 }}
|
||||||
|
gap={2}
|
||||||
|
>
|
||||||
|
<CircularProgress />
|
||||||
|
<Typography variant="body2" color="textSecondary">
|
||||||
|
Loading media...
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
) : error ? (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
flex: 1,
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[100]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
p: 2,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Paper
|
||||||
|
elevation={3}
|
||||||
|
sx={{
|
||||||
|
p: 3,
|
||||||
|
maxWidth: "500px",
|
||||||
|
width: "100%",
|
||||||
|
backgroundColor: theme.palette.background.paper,
|
||||||
|
borderRadius: 2,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Alert
|
||||||
|
severity="error"
|
||||||
|
variant="filled"
|
||||||
|
sx={{
|
||||||
|
mb: 2,
|
||||||
|
backgroundColor: theme.palette.error.main,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{error}
|
||||||
|
</Alert>
|
||||||
|
<Typography variant="body2" color="textSecondary" sx={{ mt: 2 }}>
|
||||||
|
Please select a different job from the dropdown menu above or
|
||||||
|
check if media browsing is enabled.
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
</Box>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
borderBottom: 1,
|
||||||
|
borderColor: "divider",
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[50]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Tabs
|
||||||
|
value={activeTab}
|
||||||
|
onChange={handleTabChange}
|
||||||
|
variant="scrollable"
|
||||||
|
scrollButtons="auto"
|
||||||
|
aria-label="media type tabs"
|
||||||
|
>
|
||||||
|
{mediaFiles &&
|
||||||
|
Object.entries(mediaFiles).map(([type, files]) => (
|
||||||
|
<Tab
|
||||||
|
key={type}
|
||||||
|
value={type}
|
||||||
|
label={`${type.charAt(0).toUpperCase() + type.slice(1)} (${
|
||||||
|
files.length
|
||||||
|
})`}
|
||||||
|
disabled={!files.length}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</Tabs>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
flex: 1,
|
||||||
|
height: "calc(100% - 48px)",
|
||||||
|
overflow: "hidden",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{selectedMedia && mediaType && mediaName ? (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
height: "100%",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "space-between",
|
||||||
|
alignItems: "center",
|
||||||
|
p: 1,
|
||||||
|
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[50]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography variant="subtitle1" noWrap>
|
||||||
|
{selectedMedia}
|
||||||
|
</Typography>
|
||||||
|
<Box>
|
||||||
|
<Typography
|
||||||
|
variant="body2"
|
||||||
|
sx={{
|
||||||
|
cursor: "pointer",
|
||||||
|
color: theme.palette.primary.main,
|
||||||
|
"&:hover": {
|
||||||
|
textDecoration: "underline",
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
onClick={async () => {
|
||||||
|
setSelectedMedia(null);
|
||||||
|
await router.push(
|
||||||
|
`/media?id=${currentId}&type=${mediaType}`
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Back to Gallery
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
flex: 1,
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[100]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
overflow: "hidden",
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
p: 2,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<MediaViewer
|
||||||
|
selectedMedia={selectedMedia}
|
||||||
|
activeTab={activeTab}
|
||||||
|
getMediaUrl={getMediaUrl}
|
||||||
|
onError={() => setError("Error loading media")}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
) : (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
flex: 1,
|
||||||
|
overflow: "auto",
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[100]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{renderMediaThumbnails()}
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/pages/media/id/index.ts
Normal file
1
src/components/pages/media/id/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { MediaId } from "./id";
|
||||||
204
src/components/pages/recordings/id/id.tsx
Normal file
204
src/components/pages/recordings/id/id.tsx
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
import { JobSelector } from "@/components/ai";
|
||||||
|
import { fetchJobs } from "@/lib";
|
||||||
|
import { useUserSettings } from "@/store/hooks";
|
||||||
|
import { Job } from "@/types";
|
||||||
|
import {
|
||||||
|
Box,
|
||||||
|
useTheme,
|
||||||
|
Typography,
|
||||||
|
CircularProgress,
|
||||||
|
Alert,
|
||||||
|
Paper,
|
||||||
|
} from "@mui/material";
|
||||||
|
import { useRouter, useSearchParams } from "next/navigation";
|
||||||
|
import { useState, useEffect } from "react";
|
||||||
|
|
||||||
|
export const RecordingId = () => {
|
||||||
|
const searchParams = useSearchParams();
|
||||||
|
const theme = useTheme();
|
||||||
|
const { userSettings } = useUserSettings();
|
||||||
|
const router = useRouter();
|
||||||
|
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [videoUrl, setVideoUrl] = useState<string | null>(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [jobs, setJobs] = useState<Job[]>([]);
|
||||||
|
const [selectedJob, setSelectedJob] = useState<Job | null>(null);
|
||||||
|
|
||||||
|
const currentId = searchParams.get("id");
|
||||||
|
|
||||||
|
const handleSelectJob = (job: Job | null) => {
|
||||||
|
if (job) {
|
||||||
|
router.push(`/recordings?id=${job.id}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchJobs(setJobs);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!userSettings.recordingsEnabled) {
|
||||||
|
setError("Recordings are disabled");
|
||||||
|
setLoading(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!currentId) {
|
||||||
|
setError("No recording ID provided");
|
||||||
|
setLoading(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setLoading(true);
|
||||||
|
setError(null);
|
||||||
|
|
||||||
|
const url = `/api/recordings/${currentId}`;
|
||||||
|
fetch(url, { method: "HEAD" })
|
||||||
|
.then((res) => {
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Video not found (status: ${res.status})`);
|
||||||
|
}
|
||||||
|
setVideoUrl(url);
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
setError("404 recording not found");
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
setLoading(false);
|
||||||
|
});
|
||||||
|
}, [currentId, userSettings.recordingsEnabled]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!currentId) {
|
||||||
|
setSelectedJob(null);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const job = jobs.find((j) => j.id === currentId);
|
||||||
|
setSelectedJob(job || null);
|
||||||
|
}, [currentId, jobs]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
height: "100%",
|
||||||
|
width: "100%",
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
position: "relative",
|
||||||
|
borderRadius: 2,
|
||||||
|
overflow: "hidden",
|
||||||
|
border: `1px solid ${theme.palette.divider}`,
|
||||||
|
backgroundColor: theme.palette.background.paper,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "flex-end",
|
||||||
|
p: 1,
|
||||||
|
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[50]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
zIndex: 10,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Box sx={{ width: "300px" }}>
|
||||||
|
<JobSelector
|
||||||
|
setSelectedJob={handleSelectJob}
|
||||||
|
selectedJob={selectedJob}
|
||||||
|
setJobs={setJobs}
|
||||||
|
jobs={jobs}
|
||||||
|
sxProps={{}}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
flex: 1,
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
position: "relative",
|
||||||
|
backgroundColor:
|
||||||
|
theme.palette.mode === "light"
|
||||||
|
? theme.palette.grey[100]
|
||||||
|
: theme.palette.grey[900],
|
||||||
|
p: 2,
|
||||||
|
overflow: "hidden",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{loading ? (
|
||||||
|
<Box
|
||||||
|
display="flex"
|
||||||
|
flexDirection="column"
|
||||||
|
alignItems="center"
|
||||||
|
gap={2}
|
||||||
|
>
|
||||||
|
<CircularProgress />
|
||||||
|
<Typography variant="body2" color="textSecondary">
|
||||||
|
Loading recording...
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
) : error ? (
|
||||||
|
<Paper
|
||||||
|
elevation={3}
|
||||||
|
sx={{
|
||||||
|
p: 3,
|
||||||
|
maxWidth: "500px",
|
||||||
|
width: "100%",
|
||||||
|
backgroundColor: theme.palette.background.paper,
|
||||||
|
borderRadius: 2,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Alert
|
||||||
|
severity="error"
|
||||||
|
variant="filled"
|
||||||
|
sx={{
|
||||||
|
mb: 2,
|
||||||
|
backgroundColor: theme.palette.error.main,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{error}
|
||||||
|
</Alert>
|
||||||
|
<Typography variant="body2" color="textSecondary" sx={{ mt: 2 }}>
|
||||||
|
Please select a different recording from the dropdown menu above
|
||||||
|
or check if recordings are enabled.
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
) : (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
width: "100%",
|
||||||
|
height: "100%",
|
||||||
|
display: "flex",
|
||||||
|
justifyContent: "center",
|
||||||
|
alignItems: "center",
|
||||||
|
overflow: "hidden",
|
||||||
|
borderRadius: 1,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<video
|
||||||
|
className="h-full w-full object-contain"
|
||||||
|
controls
|
||||||
|
onError={() => setError("Error loading video")}
|
||||||
|
style={{
|
||||||
|
maxHeight: "100%",
|
||||||
|
maxWidth: "100%",
|
||||||
|
borderRadius: "4px",
|
||||||
|
boxShadow: theme.shadows[4],
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<source src={videoUrl ?? undefined} type="video/mp4" />
|
||||||
|
Your browser does not support the video tag.
|
||||||
|
</video>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/pages/recordings/id/index.ts
Normal file
1
src/components/pages/recordings/id/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { RecordingId } from "./id";
|
||||||
@@ -41,8 +41,6 @@ export const JobSubmitter = () => {
|
|||||||
const [jobOptions, setJobOptions] =
|
const [jobOptions, setJobOptions] =
|
||||||
useState<RawJobOptions>(initialJobOptions);
|
useState<RawJobOptions>(initialJobOptions);
|
||||||
|
|
||||||
console.log(jobOptions);
|
|
||||||
|
|
||||||
const handleSubmit = async () => {
|
const handleSubmit = async () => {
|
||||||
if (!validateURL(submittedURL)) {
|
if (!validateURL(submittedURL)) {
|
||||||
setIsValidUrl(false);
|
setIsValidUrl(false);
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import { RawJobOptions, SiteMap } from "@/types";
|
|||||||
export const parseJobOptions = (
|
export const parseJobOptions = (
|
||||||
job_options: string,
|
job_options: string,
|
||||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
||||||
setSiteMap: Dispatch<SetStateAction<SiteMap | null>>
|
setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>
|
||||||
) => {
|
) => {
|
||||||
if (job_options) {
|
if (job_options) {
|
||||||
const jsonOptions = JSON.parse(job_options as string);
|
const jsonOptions = JSON.parse(job_options as string);
|
||||||
@@ -17,6 +17,10 @@ export const parseJobOptions = (
|
|||||||
custom_cookies: null,
|
custom_cookies: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (jsonOptions.collect_media) {
|
||||||
|
newJobOptions.collect_media = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
jsonOptions.custom_headers &&
|
jsonOptions.custom_headers &&
|
||||||
Object.keys(jsonOptions.custom_headers).length
|
Object.keys(jsonOptions.custom_headers).length
|
||||||
@@ -34,7 +38,7 @@ export const parseJobOptions = (
|
|||||||
newJobOptions.proxies = jsonOptions.proxies.join(",");
|
newJobOptions.proxies = jsonOptions.proxies.join(",");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (jsonOptions.site_map) {
|
if (jsonOptions.site_map && setSiteMap) {
|
||||||
setSiteMap(jsonOptions.site_map);
|
setSiteMap(jsonOptions.site_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
1
src/lib/hooks/use-advanced-job-options/index.ts
Normal file
1
src/lib/hooks/use-advanced-job-options/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./use-advanced-job-options";
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
import { useEffect, useState } from "react";
|
||||||
|
|
||||||
|
import { RawJobOptions } from "@/types";
|
||||||
|
import { parseJobOptions } from "@/lib/helpers/parse-job-options";
|
||||||
|
import { useRouter } from "next/router";
|
||||||
|
|
||||||
|
export const useAdvancedJobOptions = () => {
|
||||||
|
const initialJobOptions: RawJobOptions = {
|
||||||
|
multi_page_scrape: false,
|
||||||
|
custom_headers: null,
|
||||||
|
proxies: null,
|
||||||
|
collect_media: false,
|
||||||
|
custom_cookies: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
const router = useRouter();
|
||||||
|
const { job_options } = router.query;
|
||||||
|
|
||||||
|
const [jobOptions, setJobOptions] =
|
||||||
|
useState<RawJobOptions>(initialJobOptions);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (job_options) {
|
||||||
|
parseJobOptions(job_options as string, setJobOptions);
|
||||||
|
}
|
||||||
|
}, [job_options]);
|
||||||
|
|
||||||
|
return { jobOptions, setJobOptions };
|
||||||
|
};
|
||||||
@@ -80,3 +80,22 @@ export const updateJob = async (ids: string[], field: string, value: any) => {
|
|||||||
console.error("Error fetching jobs:", error);
|
console.error("Error fetching jobs:", error);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getUserSettings = async () => {
|
||||||
|
const token = Cookies.get("token");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch("/api/check", {
|
||||||
|
headers: {
|
||||||
|
"content-type": "application/json",
|
||||||
|
Authorization: `Bearer ${token}`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error fetching jobs:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|||||||
@@ -8,6 +8,9 @@ import { ThemeProvider, CssBaseline, Box } from "@mui/material";
|
|||||||
import { NavDrawer } from "../components/common";
|
import { NavDrawer } from "../components/common";
|
||||||
import { darkTheme, lightTheme } from "../styles/themes";
|
import { darkTheme, lightTheme } from "../styles/themes";
|
||||||
import { AuthProvider } from "../contexts/AuthContext";
|
import { AuthProvider } from "../contexts/AuthContext";
|
||||||
|
import { Provider } from "react-redux";
|
||||||
|
import { PersistGate } from "redux-persist/integration/react";
|
||||||
|
import { store, persistor } from "@/store/store";
|
||||||
|
|
||||||
const App: React.FC<AppProps> = ({ Component, pageProps }) => {
|
const App: React.FC<AppProps> = ({ Component, pageProps }) => {
|
||||||
const [isDarkMode, setIsDarkMode] = useState(false);
|
const [isDarkMode, setIsDarkMode] = useState(false);
|
||||||
@@ -35,26 +38,30 @@ const App: React.FC<AppProps> = ({ Component, pageProps }) => {
|
|||||||
<Head>
|
<Head>
|
||||||
<title>Scraperr</title>
|
<title>Scraperr</title>
|
||||||
</Head>
|
</Head>
|
||||||
<AuthProvider>
|
<Provider store={store}>
|
||||||
<ThemeProvider theme={isDarkMode ? darkTheme : lightTheme}>
|
<PersistGate loading={null} persistor={persistor}>
|
||||||
<CssBaseline />
|
<AuthProvider>
|
||||||
<Box sx={{ height: "100%", display: "flex" }}>
|
<ThemeProvider theme={isDarkMode ? darkTheme : lightTheme}>
|
||||||
<NavDrawer isDarkMode={isDarkMode} toggleTheme={toggleTheme} />
|
<CssBaseline />
|
||||||
<Box
|
<Box sx={{ height: "100%", display: "flex" }}>
|
||||||
component="main"
|
<NavDrawer isDarkMode={isDarkMode} toggleTheme={toggleTheme} />
|
||||||
sx={{
|
<Box
|
||||||
p: 3,
|
component="main"
|
||||||
bgcolor: "background.default",
|
sx={{
|
||||||
overflow: "hidden",
|
p: 3,
|
||||||
height: "100%",
|
bgcolor: "background.default",
|
||||||
width: "100%",
|
overflow: "hidden",
|
||||||
}}
|
height: "100%",
|
||||||
>
|
width: "100%",
|
||||||
<Component {...pageProps} />
|
}}
|
||||||
</Box>
|
>
|
||||||
</Box>
|
<Component {...pageProps} />
|
||||||
</ThemeProvider>
|
</Box>
|
||||||
</AuthProvider>
|
</Box>
|
||||||
|
</ThemeProvider>
|
||||||
|
</AuthProvider>
|
||||||
|
</PersistGate>
|
||||||
|
</Provider>
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
1
src/pages/agent.tsx
Normal file
1
src/pages/agent.tsx
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { Agent as default } from "@/components/pages/agent";
|
||||||
24
src/pages/api/media/get-media.ts
Normal file
24
src/pages/api/media/get-media.ts
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import { NextApiRequest, NextApiResponse } from "next";
|
||||||
|
|
||||||
|
export default async function handler(
|
||||||
|
req: NextApiRequest,
|
||||||
|
res: NextApiResponse
|
||||||
|
) {
|
||||||
|
const { id } = req.query;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(
|
||||||
|
`${process.env.NEXT_PUBLIC_API_URL}/get-media?id=${id}`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Error: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
res.status(200).json(data);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error streaming video:", error);
|
||||||
|
res.status(404).json({ error: "Error streaming video" });
|
||||||
|
}
|
||||||
|
}
|
||||||
33
src/pages/api/media/index.ts
Normal file
33
src/pages/api/media/index.ts
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import { NextApiRequest, NextApiResponse } from "next";
|
||||||
|
|
||||||
|
export default async function handler(
|
||||||
|
req: NextApiRequest,
|
||||||
|
res: NextApiResponse
|
||||||
|
) {
|
||||||
|
const { id, type, file } = req.query;
|
||||||
|
|
||||||
|
if (!id || !type || !file) {
|
||||||
|
return res.status(400).json({ error: "Missing required parameters" });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(
|
||||||
|
`${process.env.NEXT_PUBLIC_API_URL}/media?id=${id}&type=${type}&file=${file}`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Error: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentType =
|
||||||
|
response.headers.get("content-type") || "application/octet-stream";
|
||||||
|
|
||||||
|
res.setHeader("Content-Type", contentType);
|
||||||
|
|
||||||
|
const arrayBuffer = await response.arrayBuffer();
|
||||||
|
res.status(200).send(Buffer.from(arrayBuffer));
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error streaming media:", error);
|
||||||
|
res.status(404).json({ error: "Error retrieving media file" });
|
||||||
|
}
|
||||||
|
}
|
||||||
39
src/pages/api/recordings/[id].ts
Normal file
39
src/pages/api/recordings/[id].ts
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import { NextApiRequest, NextApiResponse } from "next";
|
||||||
|
|
||||||
|
export default async function handler(
|
||||||
|
req: NextApiRequest,
|
||||||
|
res: NextApiResponse
|
||||||
|
) {
|
||||||
|
const { id } = req.query;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(
|
||||||
|
`${process.env.NEXT_PUBLIC_API_URL}/recordings/${id}`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Error: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
res.setHeader("Content-Type", "video/mp4");
|
||||||
|
res.setHeader("Accept-Ranges", "bytes");
|
||||||
|
|
||||||
|
const reader = response.body?.getReader();
|
||||||
|
|
||||||
|
if (!reader) {
|
||||||
|
res.status(404).json({ error: "Recording not found" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
res.write(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
res.end();
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error streaming video:", error);
|
||||||
|
res.status(404).json({ error: "Error streaming video" });
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,7 +6,8 @@ import { Button, TextField, Typography, Box } from "@mui/material";
|
|||||||
import { useTheme } from "@mui/material/styles";
|
import { useTheme } from "@mui/material/styles";
|
||||||
import { useRouter } from "next/router";
|
import { useRouter } from "next/router";
|
||||||
import { useAuth } from "../contexts/AuthContext";
|
import { useAuth } from "../contexts/AuthContext";
|
||||||
import { Constants } from "../lib";
|
import { Constants, getUserSettings } from "../lib";
|
||||||
|
import { useUserSettings } from "@/store/hooks";
|
||||||
|
|
||||||
type Mode = "login" | "signup";
|
type Mode = "login" | "signup";
|
||||||
|
|
||||||
@@ -19,6 +20,7 @@ const AuthForm: React.FC = () => {
|
|||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const { login } = useAuth();
|
const { login } = useAuth();
|
||||||
const [registrationEnabled, setRegistrationEnabled] = useState<boolean>(true);
|
const [registrationEnabled, setRegistrationEnabled] = useState<boolean>(true);
|
||||||
|
const { setUserSettings } = useUserSettings();
|
||||||
|
|
||||||
const checkRegistrationEnabled = async () => {
|
const checkRegistrationEnabled = async () => {
|
||||||
const response = await axios.get(`/api/check`);
|
const response = await axios.get(`/api/check`);
|
||||||
@@ -28,12 +30,17 @@ const AuthForm: React.FC = () => {
|
|||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
checkRegistrationEnabled();
|
checkRegistrationEnabled();
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const handleSubmit = async (event: React.FormEvent) => {
|
const handleSubmit = async (event: React.FormEvent) => {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
try {
|
try {
|
||||||
if (mode === "login") {
|
if (mode === "login") {
|
||||||
await login(email, password);
|
await login(email, password);
|
||||||
alert("Login successful");
|
alert("Login successful");
|
||||||
|
|
||||||
|
const userSettings = await getUserSettings();
|
||||||
|
setUserSettings(userSettings);
|
||||||
|
|
||||||
router.push("/");
|
router.push("/");
|
||||||
} else {
|
} else {
|
||||||
await axios.post(`/api/signup`, {
|
await axios.post(`/api/signup`, {
|
||||||
|
|||||||
1
src/pages/media/index.tsx
Normal file
1
src/pages/media/index.tsx
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { MediaId as default } from "@/components/pages/media/id";
|
||||||
1
src/pages/recordings/index.tsx
Normal file
1
src/pages/recordings/index.tsx
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { RecordingId as default } from "@/components/pages/recordings/id";
|
||||||
@@ -7,7 +7,9 @@ export const submitJob = async (
|
|||||||
jobOptions: any,
|
jobOptions: any,
|
||||||
customHeaders: any,
|
customHeaders: any,
|
||||||
customCookies: any,
|
customCookies: any,
|
||||||
siteMap: SiteMap | null
|
siteMap: SiteMap | null,
|
||||||
|
agentMode: boolean = false,
|
||||||
|
prompt?: string
|
||||||
) => {
|
) => {
|
||||||
return await fetch(`/api/submit-scrape-job`, {
|
return await fetch(`/api/submit-scrape-job`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
@@ -26,6 +28,8 @@ export const submitJob = async (
|
|||||||
site_map: siteMap,
|
site_map: siteMap,
|
||||||
custom_cookies: customCookies || [],
|
custom_cookies: customCookies || [],
|
||||||
},
|
},
|
||||||
|
agent_mode: agentMode,
|
||||||
|
prompt: prompt || "",
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|||||||
23
src/store/hooks.ts
Normal file
23
src/store/hooks.ts
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import { TypedUseSelectorHook, useDispatch, useSelector } from "react-redux";
|
||||||
|
import type { RootState, AppDispatch } from "./store";
|
||||||
|
import {
|
||||||
|
SettingsState,
|
||||||
|
setAiEnabled,
|
||||||
|
setRecordingsEnabled,
|
||||||
|
} from "./slices/settingsSlice";
|
||||||
|
|
||||||
|
export const useAppDispatch = () => useDispatch<AppDispatch>();
|
||||||
|
export const useAppSelector: TypedUseSelectorHook<RootState> = useSelector;
|
||||||
|
|
||||||
|
export const useUserSettings = () => {
|
||||||
|
const userSettings = useAppSelector((state) => state.settings);
|
||||||
|
const dispatch = useAppDispatch();
|
||||||
|
|
||||||
|
const setUserSettings = (userSettings: any) => {
|
||||||
|
dispatch(setAiEnabled(userSettings.ai_enabled));
|
||||||
|
dispatch(setRecordingsEnabled(userSettings.recordings_enabled));
|
||||||
|
return userSettings;
|
||||||
|
};
|
||||||
|
|
||||||
|
return { userSettings, setUserSettings };
|
||||||
|
};
|
||||||
28
src/store/slices/settingsSlice.ts
Normal file
28
src/store/slices/settingsSlice.ts
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import { createSlice, PayloadAction } from "@reduxjs/toolkit";
|
||||||
|
|
||||||
|
export interface SettingsState {
|
||||||
|
aiEnabled: boolean;
|
||||||
|
recordingsEnabled: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
const initialState: SettingsState = {
|
||||||
|
aiEnabled: false,
|
||||||
|
recordingsEnabled: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
const settingsSlice = createSlice({
|
||||||
|
name: "settings",
|
||||||
|
initialState,
|
||||||
|
reducers: {
|
||||||
|
setAiEnabled: (state, action: PayloadAction<boolean>) => {
|
||||||
|
state.aiEnabled = action.payload;
|
||||||
|
},
|
||||||
|
setRecordingsEnabled: (state, action: PayloadAction<boolean>) => {
|
||||||
|
state.recordingsEnabled = action.payload;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
export const { setAiEnabled, setRecordingsEnabled } = settingsSlice.actions;
|
||||||
|
|
||||||
|
export default settingsSlice.reducer;
|
||||||
32
src/store/store.ts
Normal file
32
src/store/store.ts
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import { configureStore } from "@reduxjs/toolkit";
|
||||||
|
import { persistStore, persistReducer } from "redux-persist";
|
||||||
|
import storage from "redux-persist/lib/storage";
|
||||||
|
import { combineReducers } from "@reduxjs/toolkit";
|
||||||
|
import settingsReducer from "./slices/settingsSlice";
|
||||||
|
|
||||||
|
const persistConfig = {
|
||||||
|
key: "root",
|
||||||
|
storage,
|
||||||
|
whitelist: ["settings"], // only settings will be persisted
|
||||||
|
};
|
||||||
|
|
||||||
|
const rootReducer = combineReducers({
|
||||||
|
settings: settingsReducer,
|
||||||
|
});
|
||||||
|
|
||||||
|
const persistedReducer = persistReducer(persistConfig, rootReducer);
|
||||||
|
|
||||||
|
export const store = configureStore({
|
||||||
|
reducer: persistedReducer,
|
||||||
|
middleware: (getDefaultMiddleware) =>
|
||||||
|
getDefaultMiddleware({
|
||||||
|
serializableCheck: {
|
||||||
|
ignoredActions: ["persist/PERSIST", "persist/REHYDRATE"],
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const persistor = persistStore(store);
|
||||||
|
|
||||||
|
export type RootState = ReturnType<typeof store.getState>;
|
||||||
|
export type AppDispatch = typeof store.dispatch;
|
||||||
@@ -7,9 +7,11 @@ export interface Job {
|
|||||||
result: Object;
|
result: Object;
|
||||||
time_created: Date;
|
time_created: Date;
|
||||||
status: string;
|
status: string;
|
||||||
job_options: Object;
|
job_options: RawJobOptions;
|
||||||
favorite: boolean;
|
favorite: boolean;
|
||||||
chat?: Message[];
|
chat?: Message[];
|
||||||
|
agent_mode?: boolean;
|
||||||
|
prompt?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type JobOptions = {
|
export type JobOptions = {
|
||||||
|
|||||||
14
start.sh
Executable file
14
start.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
RECORDINGS_ENABLED=${RECORDINGS_ENABLED:-true}
|
||||||
|
|
||||||
|
if [ "$RECORDINGS_ENABLED" == "false" ]; then
|
||||||
|
pdm run python -m api.backend.worker.job_worker
|
||||||
|
else
|
||||||
|
Xvfb :99 -screen 0 1280x1024x24 &
|
||||||
|
XVFB_PID=$!
|
||||||
|
sleep 2
|
||||||
|
x11vnc -display :99 -rfbport 5900 -forever -nopw &
|
||||||
|
VNC_PID=$!
|
||||||
|
DISPLAY=:99 pdm run python -m api.backend.worker.job_worker
|
||||||
|
fi
|
||||||
@@ -12,7 +12,7 @@ stdout_logfile_maxbytes=0
|
|||||||
stderr_logfile_maxbytes=0
|
stderr_logfile_maxbytes=0
|
||||||
|
|
||||||
[program:worker]
|
[program:worker]
|
||||||
command=pdm run python -m api.backend.worker.job_worker
|
command=/project/app/start.sh
|
||||||
directory=/project/app
|
directory=/project/app
|
||||||
autostart=true
|
autostart=true
|
||||||
autorestart=true
|
autorestart=true
|
||||||
|
|||||||
Reference in New Issue
Block a user