feat: add import/export for job configurations (#91)

* chore: wip add upload/import

* chore: wip add upload/import

* feat: update job rerunning

* fix: update workflow

* fix: update workflow

* chore: temp disable workflow
This commit is contained in:
Jayden Pyles
2025-06-12 18:00:39 -05:00
committed by GitHub
parent 93b0c83381
commit 91dc13348d
25 changed files with 371 additions and 64 deletions

View File

@@ -7,6 +7,7 @@ from camoufox import AsyncCamoufox
from playwright.async_api import Page
# LOCAL
from api.backend.constants import RECORDINGS_ENABLED
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
from api.backend.job.models import CapturedElement
from api.backend.worker.logger import LOG
@@ -29,11 +30,13 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
LOG.info(f"Starting work for agent job: {agent_job}")
pages = set()
proxy = None
if agent_job["job_options"]["proxies"]:
proxy = random.choice(agent_job["job_options"]["proxies"])
LOG.info(f"Using proxy: {proxy}")
async with AsyncCamoufox(headless=True) as browser:
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
page: Page = await browser.new_page()
await add_custom_items(
@@ -64,7 +67,7 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
xpaths = parse_response(response)
captured_elements = await capture_elements(
page, xpaths, agent_job["job_options"]["return_html"]
page, xpaths, agent_job["job_options"].get("return_html", False)
)
final_url = page.url