feat: add agent mode (#81)

* chore: wip agent mode

* wip: add agent mode frontend

* wip: add agent mode frontend

* chore: cleanup code

* chore: cleanup code

* chore: cleanup code
This commit is contained in:
Jayden Pyles
2025-05-19 20:44:41 -05:00
committed by GitHub
parent d602d3330a
commit 5ebd96b62b
33 changed files with 869 additions and 54 deletions

View File

@@ -30,7 +30,7 @@ jobs:
run: pdm run playwright install run: pdm run playwright install
- name: Run tests - name: Run tests
run: PYTHONPATH=. pdm run pytest api/backend/tests run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
cypress-tests: cypress-tests:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@@ -0,0 +1,6 @@
from typing_extensions import TypedDict
class Action(TypedDict):
type: str
url: str

View File

@@ -0,0 +1,94 @@
import random
from typing import Any
from camoufox import AsyncCamoufox
from playwright.async_api import Page
from api.backend.ai.agent.utils import (
capture_elements,
convert_to_markdown,
parse_response,
)
from api.backend.ai.clients import ask_open_ai, ask_ollama, open_ai_key
from api.backend.ai.agent.prompts import (
ELEMENT_EXTRACTION_PROMPT,
EXTRACT_ELEMENTS_PROMPT,
)
from api.backend.job.scraping.collect_media import collect_media
from api.backend.worker.logger import LOG
from api.backend.job.scraping.add_custom import add_custom_items
from api.backend.models import CapturedElement
ask_ai = ask_open_ai if open_ai_key else ask_ollama
async def scrape_with_agent(agent_job: dict[str, Any]):
LOG.info(f"Starting work for agent job: {agent_job}")
pages = set()
if agent_job["job_options"]["proxies"]:
proxy = random.choice(agent_job["job_options"]["proxies"])
LOG.info(f"Using proxy: {proxy}")
async with AsyncCamoufox(headless=True) as browser:
page: Page = await browser.new_page()
await add_custom_items(
agent_job["url"],
page,
agent_job["job_options"]["custom_cookies"],
agent_job["job_options"]["custom_headers"],
)
try:
await page.set_viewport_size({"width": 1920, "height": 1080})
await page.goto(agent_job["url"], timeout=60000)
if agent_job["job_options"]["collect_media"]:
await collect_media(agent_job["id"], page)
html_content = await page.content()
markdown_content = convert_to_markdown(html_content)
response = await ask_ai(
ELEMENT_EXTRACTION_PROMPT.format(
extraction_prompt=EXTRACT_ELEMENTS_PROMPT,
webpage=markdown_content,
prompt=agent_job["prompt"],
)
)
xpaths = parse_response(response)
captured_elements = await capture_elements(page, xpaths)
final_url = page.url
pages.add((html_content, final_url))
finally:
await page.close()
await browser.close()
name_to_elements = {}
for page in pages:
for element in captured_elements:
if element.name not in name_to_elements:
name_to_elements[element.name] = []
name_to_elements[element.name].append(element)
scraped_elements: list[dict[str, dict[str, list[CapturedElement]]]] = [
{
page[1]: name_to_elements,
}
for page in pages
]
return scraped_elements

View File

@@ -0,0 +1,58 @@
EXTRACT_ELEMENTS_PROMPT = """
You are an assistant that extracts XPath expressions from webpages.
You will receive HTML content in markdown format.
Each element in the markdown has their xpath shown above them in a path like:
<!-- //div -->
Respond only with a list of general XPath expressions inside `<xpaths>...</xpaths>` tags.
You will also decide the decision of what to do next. If there is no decision available, return nothing for that section.
"""
ELEMENT_EXTRACTION_PROMPT = """
{extraction_prompt}
**Guidelines:**
- Prefer shorter, more general XPaths like `//div[...]` or `//span[...]`.
- Avoid overly specific or deep paths like `//div[3]/ul/li[2]/a`.
- Do **not** chain multiple elements deeply (e.g., `//div/span/a`).
- Use XPaths further down the tree when possible.
- Do not include any extra explanation or text.
- One XPath is acceptable if that's all that's needed.
- Try and limit it down to 1 - 3 xpaths.
- Include a name for each xpath.
<important>
- USE THE MOST SIMPLE XPATHS POSSIBLE.
- USE THE MOST GENERAL XPATHS POSSIBLE.
- USE THE MOST SPECIFIC XPATHS POSSIBLE.
- USE THE MOST GENERAL XPATHS POSSIBLE.
</important>
**Example Format:**
```xml
<xpaths>
- <name: insert_name_here>: <xpath: //div>
- <name: insert_name_here>: <xpath: //span>
- <name: insert_name_here>: <xpath: //span[contains(@text, 'example')]>
- <name: insert_name_here>: <xpath: //div[contains(@text, 'example')]>
- <name: insert_name_here>: <xpath: //a[@href]>
- etc
</xpaths>
<decision>
<next_page>
- //a[@href='next_page_url']
</next_page>
</decision>
```
**Input webpage:**
{webpage}
**Target content:**
{prompt}
"""

View File

@@ -0,0 +1,252 @@
from lxml import html, etree
import re
from playwright.async_api import Page
from api.backend.models import CapturedElement
from api.backend.job.scraping.scraping_utils import clean_format_characters
def convert_to_markdown(html_str: str):
parser = html.HTMLParser()
tree = html.fromstring(html_str, parser=parser)
root = tree.getroottree()
def format_attributes(el: etree._Element) -> str:
"""Convert element attributes into a string."""
return " ".join(f'{k}="{v}"' for k, v in el.attrib.items())
def is_visible(el: etree._Element) -> bool:
style = el.attrib.get("style", "").lower()
class_ = el.attrib.get("class", "").lower()
# Check for visibility styles
if "display: none" in style or "visibility: hidden" in style:
return False
if "opacity: 0" in style or "opacity:0" in style:
return False
if "height: 0" in style or "width: 0" in style:
return False
# Check for common hidden classes
if any(
hidden in class_
for hidden in ["hidden", "invisible", "truncate", "collapse"]
):
return False
# Check for hidden attributes
if el.attrib.get("hidden") is not None:
return False
if el.attrib.get("aria-hidden") == "true":
return False
# Check for empty or whitespace-only content
if not el.text and len(el) == 0:
return False
return True
def is_layout_or_decorative(el: etree._Element) -> bool:
tag = el.tag.lower()
# Layout elements
if tag in {"nav", "footer", "header", "aside", "main", "section"}:
return True
# Decorative elements
if tag in {"svg", "path", "circle", "rect", "line", "polygon", "polyline"}:
return True
# Check id and class for layout/decorative keywords
id_class = " ".join(
[el.attrib.get("id", ""), el.attrib.get("class", "")]
).lower()
layout_keywords = {
"sidebar",
"nav",
"header",
"footer",
"menu",
"advert",
"ads",
"breadcrumb",
"container",
"wrapper",
"layout",
"grid",
"flex",
"row",
"column",
"section",
"banner",
"hero",
"card",
"modal",
"popup",
"tooltip",
"dropdown",
"overlay",
}
return any(keyword in id_class for keyword in layout_keywords)
# Tags to ignore in the final markdown output
included_tags = {
"div",
"span",
"a",
"p",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"img",
"button",
"input",
"textarea",
"ul",
"ol",
"li",
"table",
"tr",
"td",
"th",
"input",
"textarea",
"select",
"option",
"optgroup",
"fieldset",
"legend",
}
special_elements = []
normal_elements = []
for el in tree.iter():
if el.tag is etree.Comment:
continue
tag = el.tag.lower()
if tag not in included_tags:
continue
if not is_visible(el):
continue
if is_layout_or_decorative(el):
continue
path = root.getpath(el)
attrs = format_attributes(el)
attrs_str = f" {attrs}" if attrs else ""
text = el.text.strip() if el.text else ""
if not text and not attrs:
continue
# input elements
if tag == "button":
prefix = "🔘 **<button>**"
special_elements.append(f"<!-- {path} -->\n{prefix} {text}")
elif tag == "a":
href = el.attrib.get("href", "")
prefix = f"🔗 **<a href='{href}'>**"
special_elements.append(f"<!-- {path} -->\n{prefix} {text}")
elif tag == "input":
input_type = el.attrib.get("type", "text")
prefix = f"📝 **<input type='{input_type}'>**"
special_elements.append(f"<!-- {path} -->\n{prefix}")
else:
prefix = f"**<{tag}{attrs_str}>**"
if text:
normal_elements.append(f"<!-- {path} -->\n{prefix} {text}")
return "\n\n".join(normal_elements + special_elements) # type: ignore
def parse_response(text: str) -> list[dict[str, str]]:
xpaths = re.findall(r"<xpaths>(.*?)</xpaths>", text, re.DOTALL)
results = []
if xpaths:
lines = xpaths[0].strip().splitlines()
for line in lines:
if line.strip().startswith("-"):
name = re.findall(r"<name: (.*?)>", line)[0]
xpath = re.findall(r"<xpath: (.*?)>", line)[0]
results.append({"name": name, "xpath": xpath})
else:
results.append({"name": "", "xpath": line.strip()})
return results
def parse_next_page(text: str) -> str | None:
next_page = re.findall(r"<next_page>(.*?)</next_page>", text, re.DOTALL)
if next_page:
lines = next_page[0].strip().splitlines()
next_page = [
line.strip().lstrip("-").strip()
for line in lines
if line.strip().startswith("-")
]
return next_page[0] if next_page else None
async def capture_elements(
page: Page, xpaths: list[dict[str, str]]
) -> list[CapturedElement]:
captured_elements = []
seen_texts = set()
for xpath in xpaths:
try:
locator = page.locator(f"xpath={xpath['xpath']}")
count = await locator.count()
for i in range(count):
element_text = ""
element_handle = await locator.nth(i).element_handle()
if not element_handle:
continue
link = await element_handle.get_attribute("href") or ""
text = await element_handle.text_content()
if text:
element_text += text
if link:
element_text += f" ({link})"
cleaned = clean_format_characters(element_text)
if cleaned in seen_texts:
continue
seen_texts.add(cleaned)
captured_elements.append(
CapturedElement(
name=xpath["name"],
text=cleaned,
xpath=xpath["xpath"],
)
)
except Exception as e:
print(f"Error processing xpath {xpath}: {e}")
return captured_elements

View File

@@ -1,32 +1,29 @@
# STL # STL
import os
import logging import logging
from collections.abc import Iterable, AsyncGenerator from collections.abc import Iterable, AsyncGenerator
# PDM # PDM
from openai import OpenAI
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from openai.types.chat import ChatCompletionMessageParam from openai.types.chat import ChatCompletionMessageParam
# LOCAL # LOCAL
from ollama import Message, AsyncClient from ollama import Message
from api.backend.models import AI from api.backend.models import AI
from api.backend.ai.clients import (
llama_client,
llama_model,
openai_client,
open_ai_model,
open_ai_key,
)
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
ai_router = APIRouter() ai_router = APIRouter()
# Load environment variables
open_ai_key = os.getenv("OPENAI_KEY")
open_ai_model = os.getenv("OPENAI_MODEL")
llama_url = os.getenv("OLLAMA_URL")
llama_model = os.getenv("OLLAMA_MODEL")
# Initialize clients
openai_client = OpenAI(api_key=open_ai_key) if open_ai_key else None
llama_client = AsyncClient(host=llama_url) if llama_url else None
async def llama_chat(chat_messages: list[Message]) -> AsyncGenerator[str, None]: async def llama_chat(chat_messages: list[Message]) -> AsyncGenerator[str, None]:
if llama_client and llama_model: if llama_client and llama_model:

38
api/backend/ai/clients.py Normal file
View File

@@ -0,0 +1,38 @@
import os
from openai import OpenAI
from ollama import AsyncClient
# Load environment variables
open_ai_key = os.getenv("OPENAI_KEY")
open_ai_model = os.getenv("OPENAI_MODEL")
llama_url = os.getenv("OLLAMA_URL")
llama_model = os.getenv("OLLAMA_MODEL")
# Initialize clients
openai_client = OpenAI(api_key=open_ai_key) if open_ai_key else None
llama_client = AsyncClient(host=llama_url) if llama_url else None
async def ask_open_ai(prompt: str) -> str:
if not openai_client:
raise ValueError("OpenAI client not initialized")
response = openai_client.chat.completions.create(
model=open_ai_model or "gpt-4.1-mini",
messages=[{"role": "user", "content": prompt}],
)
return response.choices[0].message.content or ""
async def ask_ollama(prompt: str) -> str:
if not llama_client:
raise ValueError("Ollama client not initialized")
response = await llama_client.chat(
model=llama_model or "", messages=[{"role": "user", "content": prompt}]
)
return response.message.content or ""

View File

@@ -1,7 +1,7 @@
JOB_INSERT_QUERY = """ JOB_INSERT_QUERY = """
INSERT INTO jobs INSERT INTO jobs
(id, url, elements, user, time_created, result, status, chat, job_options) (id, url, elements, user, time_created, result, status, chat, job_options, agent_mode, prompt)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""" """
DELETE_JOB_QUERY = """ DELETE_JOB_QUERY = """

View File

@@ -27,4 +27,7 @@ CREATE TABLE IF NOT EXISTS cron_jobs (
time_updated DATETIME NOT NULL, time_updated DATETIME NOT NULL,
FOREIGN KEY (job_id) REFERENCES jobs(id) FOREIGN KEY (job_id) REFERENCES jobs(id)
); );
ALTER TABLE jobs ADD COLUMN agent_mode BOOLEAN NOT NULL DEFAULT FALSE;
ALTER TABLE jobs ADD COLUMN prompt STRING;
""" """

View File

@@ -1,6 +1,7 @@
import os import os
from api.backend.database.common import connect, QUERIES, insert from api.backend.database.common import connect, QUERIES, insert
import logging import logging
import sqlite3
from api.backend.auth.auth_utils import get_password_hash from api.backend.auth.auth_utils import get_password_hash
@@ -11,11 +12,22 @@ def init_database():
cursor = connect() cursor = connect()
for query in QUERIES["init"].strip().split(";"): for query in QUERIES["init"].strip().split(";"):
if query.strip(): query = query.strip()
if not query:
continue
try:
LOG.info(f"Executing query: {query}") LOG.info(f"Executing query: {query}")
_ = cursor.execute(query) _ = cursor.execute(query)
except sqlite3.OperationalError as e:
if "duplicate column name" in str(e).lower():
LOG.warning(f"Skipping duplicate column error: {e}")
continue
else:
LOG.error(f"Error executing query: {query}")
raise
if os.environ.get("REGISTRATION_ENABLED", "True") == "False": if os.environ.get("REGISTRATION_ENABLED", "true").lower() == "false":
default_user_email = os.environ.get("DEFAULT_USER_EMAIL") default_user_email = os.environ.get("DEFAULT_USER_EMAIL")
default_user_password = os.environ.get("DEFAULT_USER_PASSWORD") default_user_password = os.environ.get("DEFAULT_USER_PASSWORD")
default_user_full_name = os.environ.get("DEFAULT_USER_FULL_NAME") default_user_full_name = os.environ.get("DEFAULT_USER_FULL_NAME")

View File

@@ -27,6 +27,8 @@ def insert(item: dict[str, Any]) -> None:
item["status"], item["status"],
item["chat"], item["chat"],
item["job_options"], item["job_options"],
item["agent_mode"],
item["prompt"],
), ),
) )
LOG.info(f"Inserted item: {item}") LOG.info(f"Inserted item: {item}")

View File

@@ -30,3 +30,16 @@ async def scrape_content(
await collect_media_utils(id, page) await collect_media_utils(id, page)
return html return html
def clean_format_characters(text: str) -> str:
text = text.strip()
text = text.replace("\n", " ")
text = text.replace("\t", " ")
text = text.replace("\r", " ")
text = text.replace("\f", " ")
text = text.replace("\v", " ")
text = text.replace("\b", " ")
text = text.replace("\a", " ")
return text

View File

@@ -24,7 +24,6 @@ def clear_done_actions(site_map: dict[str, Any]) -> dict[str, Any]:
async def handle_input(action: Action, page: Page) -> bool: async def handle_input(action: Action, page: Page) -> bool:
try: try:
element = page.locator(f"xpath={action.xpath}") element = page.locator(f"xpath={action.xpath}")
await element.wait_for(state="visible", timeout=10000)
LOG.info(f"Sending keys: {action.input} to element: {action.xpath}") LOG.info(f"Sending keys: {action.input} to element: {action.xpath}")
await element.fill(action.input) await element.fill(action.input)
return True return True
@@ -36,7 +35,6 @@ async def handle_input(action: Action, page: Page) -> bool:
async def handle_click(action: Action, page: Page) -> bool: async def handle_click(action: Action, page: Page) -> bool:
try: try:
element = page.locator(f"xpath={action.xpath}") element = page.locator(f"xpath={action.xpath}")
# await element.wait_for(state="visible", timeout=10000)
LOG.info(f"Clicking element: {action.xpath}") LOG.info(f"Clicking element: {action.xpath}")
await element.click() await element.click()
return True return True

View File

@@ -58,6 +58,8 @@ class Job(pydantic.BaseModel):
job_options: JobOptions job_options: JobOptions
status: str = "Queued" status: str = "Queued"
chat: Optional[str] = None chat: Optional[str] = None
agent_mode: bool = False
prompt: Optional[str] = None
class CronJob(pydantic.BaseModel): class CronJob(pydantic.BaseModel):

View File

@@ -9,7 +9,10 @@ from playwright.async_api import Page
from urllib.parse import urlparse, urljoin from urllib.parse import urlparse, urljoin
from api.backend.models import Element, CapturedElement from api.backend.models import Element, CapturedElement
from api.backend.job.scraping.scraping_utils import scrape_content from api.backend.job.scraping.scraping_utils import (
clean_format_characters,
scrape_content,
)
from api.backend.job.site_mapping.site_mapping import handle_site_mapping from api.backend.job.site_mapping.site_mapping import handle_site_mapping
from api.backend.job.scraping.add_custom import add_custom_items from api.backend.job.scraping.add_custom import add_custom_items
@@ -142,14 +145,7 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element])
else str(e) # type: ignore else str(e) # type: ignore
) )
text = text.strip() text = clean_format_characters(text)
text = text.replace("\n", " ")
text = text.replace("\t", " ")
text = text.replace("\r", " ")
text = text.replace("\f", " ")
text = text.replace("\v", " ")
text = text.replace("\b", " ")
text = text.replace("\a", " ")
captured_element = CapturedElement( captured_element = CapturedElement(
xpath=elem.xpath, text=text, name=elem.name xpath=elem.xpath, text=text, name=elem.name

View File

@@ -16,6 +16,8 @@ from api.backend.database.startup import init_database
from api.backend.worker.post_job_complete.post_job_complete import post_job_complete from api.backend.worker.post_job_complete.post_job_complete import post_job_complete
from api.backend.worker.logger import LOG from api.backend.worker.logger import LOG
from api.backend.ai.agent.agent import scrape_with_agent
NOTIFICATION_CHANNEL = os.getenv("NOTIFICATION_CHANNEL", "") NOTIFICATION_CHANNEL = os.getenv("NOTIFICATION_CHANNEL", "")
NOTIFICATION_WEBHOOK_URL = os.getenv("NOTIFICATION_WEBHOOK_URL", "") NOTIFICATION_WEBHOOK_URL = os.getenv("NOTIFICATION_WEBHOOK_URL", "")
@@ -75,17 +77,21 @@ async def process_job():
LOG.error(f"Failed to parse proxy JSON: {proxies}") LOG.error(f"Failed to parse proxy JSON: {proxies}")
proxies = [] proxies = []
scraped = await scrape( if job["agent_mode"]:
job["id"], scraped = await scrape_with_agent(job)
job["url"], else:
[Element(**j) for j in job["elements"]], scraped = await scrape(
job["job_options"]["custom_headers"], job["id"],
job["job_options"]["multi_page_scrape"], job["url"],
proxies, [Element(**j) for j in job["elements"]],
job["job_options"]["site_map"], job["job_options"]["custom_headers"],
job["job_options"]["collect_media"], job["job_options"]["multi_page_scrape"],
job["job_options"]["custom_cookies"], proxies,
) job["job_options"]["site_map"],
job["job_options"]["collect_media"],
job["job_options"]["custom_cookies"],
)
LOG.info( LOG.info(
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}" f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
) )

13
pdm.lock generated
View File

@@ -5,7 +5,7 @@
groups = ["default", "dev"] groups = ["default", "dev"]
strategy = ["inherit_metadata"] strategy = ["inherit_metadata"]
lock_version = "4.5.0" lock_version = "4.5.0"
content_hash = "sha256:cb37fedd6d022515dde14e475588a8da2144ba22e41dfdfacfe3f7a7d14486ca" content_hash = "sha256:5f4c90b42c3b35194a7c2af8b46b7c28127e25e836a779e85aae0df2bd0e69eb"
[[metadata.targets]] [[metadata.targets]]
requires_python = ">=3.10" requires_python = ">=3.10"
@@ -1174,6 +1174,17 @@ files = [
{file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"}, {file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"},
] ]
[[package]]
name = "html2text"
version = "2025.4.15"
requires_python = ">=3.9"
summary = "Turn HTML into equivalent Markdown-structured text."
groups = ["default"]
files = [
{file = "html2text-2025.4.15-py3-none-any.whl", hash = "sha256:00569167ffdab3d7767a4cdf589b7f57e777a5ed28d12907d8c58769ec734acc"},
{file = "html2text-2025.4.15.tar.gz", hash = "sha256:948a645f8f0bc3abe7fd587019a2197a12436cd73d0d4908af95bfc8da337588"},
]
[[package]] [[package]]
name = "httpcore" name = "httpcore"
version = "1.0.9" version = "1.0.9"

View File

@@ -41,6 +41,7 @@ dependencies = [
"apscheduler>=3.11.0", "apscheduler>=3.11.0",
"playwright>=1.52.0", "playwright>=1.52.0",
"camoufox>=0.4.11", "camoufox>=0.4.11",
"html2text>=2025.4.15",
] ]
requires-python = ">=3.10" requires-python = ">=3.10"
readme = "README.md" readme = "README.md"

View File

@@ -6,11 +6,13 @@ import { RawJobOptions } from "@/types";
export type AdvancedJobOptionsProps = { export type AdvancedJobOptionsProps = {
jobOptions: RawJobOptions; jobOptions: RawJobOptions;
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>; setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
multiPageScrapeEnabled?: boolean;
}; };
export const AdvancedJobOptions = ({ export const AdvancedJobOptions = ({
jobOptions, jobOptions,
setJobOptions, setJobOptions,
multiPageScrapeEnabled = true,
}: AdvancedJobOptionsProps) => { }: AdvancedJobOptionsProps) => {
const [open, setOpen] = useState(false); const [open, setOpen] = useState(false);
return ( return (
@@ -39,6 +41,7 @@ export const AdvancedJobOptions = ({
onClose={() => setOpen(false)} onClose={() => setOpen(false)}
jobOptions={jobOptions} jobOptions={jobOptions}
setJobOptions={setJobOptions} setJobOptions={setJobOptions}
multiPageScrapeEnabled={multiPageScrapeEnabled}
/> />
</Box> </Box>
); );

View File

@@ -32,6 +32,7 @@ export type AdvancedJobOptionsDialogProps = {
onClose: () => void; onClose: () => void;
jobOptions: RawJobOptions; jobOptions: RawJobOptions;
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>; setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
multiPageScrapeEnabled?: boolean;
}; };
export const AdvancedJobOptionsDialog = ({ export const AdvancedJobOptionsDialog = ({
@@ -39,6 +40,7 @@ export const AdvancedJobOptionsDialog = ({
onClose, onClose,
jobOptions, jobOptions,
setJobOptions, setJobOptions,
multiPageScrapeEnabled = true,
}: AdvancedJobOptionsDialogProps) => { }: AdvancedJobOptionsDialogProps) => {
const theme = useTheme(); const theme = useTheme();
const handleMultiPageScrapeChange = () => { const handleMultiPageScrapeChange = () => {
@@ -122,12 +124,19 @@ export const AdvancedJobOptionsDialog = ({
<Checkbox <Checkbox
checked={jobOptions.multi_page_scrape} checked={jobOptions.multi_page_scrape}
onChange={handleMultiPageScrapeChange} onChange={handleMultiPageScrapeChange}
disabled={!multiPageScrapeEnabled}
/> />
} }
label={ label={
<Box sx={{ display: "flex", alignItems: "center" }}> <Box sx={{ display: "flex", alignItems: "center" }}>
<Typography>Multi Page Scrape</Typography> <Typography>Multi Page Scrape</Typography>
<Tooltip title="Enable crawling through multiple pages"> <Tooltip
title={
multiPageScrapeEnabled
? "Enable crawling through multiple pages"
: "Multi page scrape is disabled"
}
>
<IconButton size="small"> <IconButton size="small">
<InfoOutlined fontSize="small" /> <InfoOutlined fontSize="small" />
</IconButton> </IconButton>

View File

@@ -0,0 +1,29 @@
import { Box } from "@mui/material";
export type DisabledProps = {
message: string;
};
export const Disabled = ({ message }: DisabledProps) => {
return (
<Box
bgcolor="background.default"
minHeight="100vh"
display="flex"
justifyContent="center"
alignItems="center"
>
<h4
style={{
color: "#fff",
padding: "20px",
borderRadius: "8px",
background: "rgba(0, 0, 0, 0.6)",
boxShadow: "0 4px 8px rgba(0, 0, 0, 0.2)",
}}
>
{message}
</h4>
</Box>
);
};

View File

@@ -0,0 +1 @@
export * from "./disabled";

View File

@@ -3,11 +3,10 @@ import { NavItem } from "../nav-item";
import HomeIcon from "@mui/icons-material/Home"; import HomeIcon from "@mui/icons-material/Home";
import HttpIcon from "@mui/icons-material/Http"; import HttpIcon from "@mui/icons-material/Http";
import TerminalIcon from "@mui/icons-material/Terminal";
import BarChart from "@mui/icons-material/BarChart"; import BarChart from "@mui/icons-material/BarChart";
import AutoAwesomeIcon from "@mui/icons-material/AutoAwesome"; import AutoAwesomeIcon from "@mui/icons-material/AutoAwesome";
import { List } from "@mui/material"; import { List } from "@mui/material";
import { Folder, Schedule, VideoFile } from "@mui/icons-material"; import { Folder, Person, Schedule, VideoFile } from "@mui/icons-material";
const items = [ const items = [
{ {
@@ -20,6 +19,11 @@ const items = [
text: "Jobs", text: "Jobs",
href: "/jobs", href: "/jobs",
}, },
{
icon: <Person />,
text: "Agent",
href: "/agent",
},
{ {
icon: <AutoAwesomeIcon />, icon: <AutoAwesomeIcon />,
text: "Chat", text: "Chat",

View File

@@ -131,11 +131,17 @@ export const JobQueue = ({
</Tooltip> </Tooltip>
)} )}
</TableCell> </TableCell>
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}> <TableCell
sx={{
maxWidth: 100,
overflow: "auto",
}}
>
<Box <Box
sx={{ sx={{
maxHeight: 100, maxHeight: 100,
overflow: "auto", overflow: "auto",
paddingTop: 1,
}} }}
> >
{row.id} {row.id}
@@ -146,7 +152,7 @@ export const JobQueue = ({
</TableCell> </TableCell>
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}> <TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
<Box sx={{ maxHeight: 100, overflow: "auto" }}> <Box sx={{ maxHeight: 100, overflow: "auto" }}>
{JSON.stringify(row.elements)} {row.agent_mode ? "Agent Mode" : JSON.stringify(row.elements)}
</Box> </Box>
</TableCell> </TableCell>
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}> <TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
@@ -200,9 +206,19 @@ export const JobQueue = ({
Download Download
</Button> </Button>
<Button <Button
onClick={() => onClick={() => {
onNavigate(row.elements, row.url, row.job_options) if (row.agent_mode) {
} router.push({
pathname: "/agent",
query: {
url: row.url,
prompt: row.prompt,
},
});
} else {
onNavigate(row.elements, row.url, row.job_options);
}
}}
size="small" size="small"
sx={{ sx={{
minWidth: 0, minWidth: 0,

View File

@@ -0,0 +1,228 @@
import { validateURL } from "@/lib/helpers/validate-url";
import { ApiService } from "@/services";
import {
Box,
Button,
Divider,
Snackbar,
Alert,
TextField,
Typography,
useTheme,
} from "@mui/material";
import { useEffect, useState } from "react";
import { useRouter } from "next/router";
import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
import { useAdvancedJobOptions } from "@/lib/hooks/use-advanced-job-options/use-advanced-job-options";
import { checkAI } from "@/lib";
import { Disabled } from "@/components/common/disabled/disabled";
export const Agent = () => {
const [url, setUrl] = useState("");
const [prompt, setPrompt] = useState("");
const [urlError, setUrlError] = useState<string | null>(null);
const [aiEnabled, setAiEnabled] = useState(false);
const [snackbarMessage, setSnackbarMessage] = useState("");
const [snackbarSeverity, setSnackbarSeverity] = useState<
"success" | "error" | "info" | "warning"
>("info");
const [snackbarOpen, setSnackbarOpen] = useState(false);
const router = useRouter();
const { jobOptions, setJobOptions } = useAdvancedJobOptions();
const theme = useTheme();
useEffect(() => {
if (router.query.url) {
setUrl(router.query.url as string);
}
if (router.query.prompt) {
setPrompt(router.query.prompt as string);
}
}, [router.query.url, router.query.prompt]);
useEffect(() => {
checkAI(setAiEnabled);
}, []);
const handleCloseSnackbar = () => {
setSnackbarOpen(false);
};
const ErrorSnackbar = () => {
return (
<Snackbar
open={snackbarOpen}
autoHideDuration={6000}
onClose={handleCloseSnackbar}
>
<Alert onClose={handleCloseSnackbar} severity="error">
{snackbarMessage}
</Alert>
</Snackbar>
);
};
const NotifySnackbar = () => {
const goTo = () => {
router.push("/jobs");
};
const action = (
<Button color="inherit" size="small" onClick={goTo}>
Go To Job
</Button>
);
return (
<Snackbar
open={snackbarOpen}
autoHideDuration={6000}
onClose={handleCloseSnackbar}
>
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
{snackbarMessage}
</Alert>
</Snackbar>
);
};
const handleSubmit = async () => {
if (!validateURL(url)) {
setUrlError("Please enter a valid URL.");
return;
}
setUrlError(null);
await ApiService.submitJob(
url,
[],
"",
{
collect_media: jobOptions.collect_media,
multi_page_scrape: jobOptions.multi_page_scrape,
},
jobOptions.custom_headers,
jobOptions.custom_cookies,
null,
true,
prompt
)
.then(async (response) => {
if (!response.ok) {
return response.json().then((error) => {
throw new Error(error.error);
});
}
return response.json();
})
.then((data) => {
setSnackbarMessage(
`Agent job: ${data.id} submitted successfully.` ||
"Agent job submitted successfully."
);
setSnackbarSeverity("info");
setSnackbarOpen(true);
})
.catch((error) => {
setSnackbarMessage(error || "An error occurred.");
setSnackbarSeverity("error");
setSnackbarOpen(true);
});
};
if (!aiEnabled) {
return (
<Disabled message="Must set either OPENAI_KEY or OLLAMA_MODEL to use AI features." />
);
}
return (
<Box
sx={{
minHeight: "100vh",
display: "flex",
alignItems: "center",
justifyContent: "center",
background: theme.palette.background.default,
p: 4,
}}
>
<Box
sx={{
backgroundColor: theme.palette.background.paper,
borderRadius: 4,
boxShadow: 6,
p: 4,
width: "100%",
maxWidth: 800,
display: "flex",
flexDirection: "column",
gap: "1rem",
}}
>
<Typography variant="h3" sx={{ textAlign: "center", fontWeight: 600 }}>
Agent Mode
</Typography>
<Typography
variant="body1"
sx={{ textAlign: "center", color: "text.secondary" }}
>
Use AI to scrape a website
</Typography>
<Divider />
<Typography variant="body1" sx={{ fontWeight: 500 }}>
Website URL
</Typography>
<TextField
value={url}
onChange={(e) => setUrl(e.target.value)}
error={!!urlError}
helperText={urlError}
autoComplete="agent-url"
fullWidth
placeholder="https://www.example.com"
variant="outlined"
size="small"
/>
<Typography variant="body1" sx={{ fontWeight: 500, marginBottom: 0 }}>
Prompt
</Typography>
<TextField
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
autoComplete="agent-prompt"
fullWidth
placeholder="Collect all the links on the page"
variant="outlined"
size="small"
/>
<Box
sx={{
display: "flex",
gap: 2,
alignItems: "center",
justifyContent: "space-between",
flexWrap: "wrap",
}}
>
<AdvancedJobOptions
jobOptions={jobOptions}
setJobOptions={setJobOptions}
multiPageScrapeEnabled={false}
/>
<Button
variant="contained"
color="primary"
onClick={handleSubmit}
sx={{ minWidth: 120 }}
>
Submit
</Button>
</Box>
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
</Box>
</Box>
);
};

View File

@@ -0,0 +1 @@
export * from "./agent";

View File

@@ -41,8 +41,6 @@ export const JobSubmitter = () => {
const [jobOptions, setJobOptions] = const [jobOptions, setJobOptions] =
useState<RawJobOptions>(initialJobOptions); useState<RawJobOptions>(initialJobOptions);
console.log(jobOptions);
const handleSubmit = async () => { const handleSubmit = async () => {
if (!validateURL(submittedURL)) { if (!validateURL(submittedURL)) {
setIsValidUrl(false); setIsValidUrl(false);

View File

@@ -5,7 +5,7 @@ import { RawJobOptions, SiteMap } from "@/types";
export const parseJobOptions = ( export const parseJobOptions = (
job_options: string, job_options: string,
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>, setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
setSiteMap: Dispatch<SetStateAction<SiteMap | null>> setSiteMap?: Dispatch<SetStateAction<SiteMap | null>>
) => { ) => {
if (job_options) { if (job_options) {
const jsonOptions = JSON.parse(job_options as string); const jsonOptions = JSON.parse(job_options as string);
@@ -38,7 +38,7 @@ export const parseJobOptions = (
newJobOptions.proxies = jsonOptions.proxies.join(","); newJobOptions.proxies = jsonOptions.proxies.join(",");
} }
if (jsonOptions.site_map) { if (jsonOptions.site_map && setSiteMap) {
setSiteMap(jsonOptions.site_map); setSiteMap(jsonOptions.site_map);
} }

View File

@@ -0,0 +1 @@
export * from "./use-advanced-job-options";

View File

@@ -0,0 +1,29 @@
import { useEffect, useState } from "react";
import { RawJobOptions } from "@/types";
import { parseJobOptions } from "@/lib/helpers/parse-job-options";
import { useRouter } from "next/router";
export const useAdvancedJobOptions = () => {
const initialJobOptions: RawJobOptions = {
multi_page_scrape: false,
custom_headers: null,
proxies: null,
collect_media: false,
custom_cookies: null,
};
const router = useRouter();
const { job_options } = router.query;
const [jobOptions, setJobOptions] =
useState<RawJobOptions>(initialJobOptions);
useEffect(() => {
if (job_options) {
parseJobOptions(job_options as string, setJobOptions);
}
}, [job_options]);
return { jobOptions, setJobOptions };
};

1
src/pages/agent.tsx Normal file
View File

@@ -0,0 +1 @@
export { Agent as default } from "@/components/pages/agent";

View File

@@ -7,7 +7,9 @@ export const submitJob = async (
jobOptions: any, jobOptions: any,
customHeaders: any, customHeaders: any,
customCookies: any, customCookies: any,
siteMap: SiteMap | null siteMap: SiteMap | null,
agentMode: boolean = false,
prompt?: string
) => { ) => {
return await fetch(`/api/submit-scrape-job`, { return await fetch(`/api/submit-scrape-job`, {
method: "POST", method: "POST",
@@ -26,6 +28,8 @@ export const submitJob = async (
site_map: siteMap, site_map: siteMap,
custom_cookies: customCookies || [], custom_cookies: customCookies || [],
}, },
agent_mode: agentMode,
prompt: prompt || "",
}, },
}), }),
}); });

View File

@@ -10,6 +10,8 @@ export interface Job {
job_options: RawJobOptions; job_options: RawJobOptions;
favorite: boolean; favorite: boolean;
chat?: Message[]; chat?: Message[];
agent_mode?: boolean;
prompt?: string;
} }
export type JobOptions = { export type JobOptions = {