mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-12-12 10:45:58 +00:00
feat: add import/export for job configurations (#91)
* chore: wip add upload/import * chore: wip add upload/import * feat: update job rerunning * fix: update workflow * fix: update workflow * chore: temp disable workflow
This commit is contained in:
@@ -7,6 +7,7 @@ from camoufox import AsyncCamoufox
|
||||
from playwright.async_api import Page
|
||||
|
||||
# LOCAL
|
||||
from api.backend.constants import RECORDINGS_ENABLED
|
||||
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
|
||||
from api.backend.job.models import CapturedElement
|
||||
from api.backend.worker.logger import LOG
|
||||
@@ -29,11 +30,13 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
|
||||
LOG.info(f"Starting work for agent job: {agent_job}")
|
||||
pages = set()
|
||||
|
||||
proxy = None
|
||||
|
||||
if agent_job["job_options"]["proxies"]:
|
||||
proxy = random.choice(agent_job["job_options"]["proxies"])
|
||||
LOG.info(f"Using proxy: {proxy}")
|
||||
|
||||
async with AsyncCamoufox(headless=True) as browser:
|
||||
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
|
||||
page: Page = await browser.new_page()
|
||||
|
||||
await add_custom_items(
|
||||
@@ -64,7 +67,7 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
|
||||
xpaths = parse_response(response)
|
||||
|
||||
captured_elements = await capture_elements(
|
||||
page, xpaths, agent_job["job_options"]["return_html"]
|
||||
page, xpaths, agent_job["job_options"].get("return_html", False)
|
||||
)
|
||||
|
||||
final_url = page.url
|
||||
|
||||
Reference in New Issue
Block a user