Files
Scraperr/api/backend/job/site_mapping/site_mapping.py
Jayden Pyles 6c56f2f161
Some checks failed
Unit Tests / unit-tests (push) Has been cancelled
Unit Tests / cypress-tests (push) Has been cancelled
Unit Tests / success-message (push) Has been cancelled
Chore: app refactor (#88)
* chore: refactor wip

* chore: refactor wip

* chore: refactor wip

* chore: refactor wip

* chore: refactor wip

* chore: refactor wip

* chore: work in progress

* chore: work in progress

* chore: work in progress

* chore: work in progress

* chore: work in progress

* chore: work in progress

* chore: work in progress

* chore: work in progress

* chore: work in progress

* chore: refactor wip

* chore: work in progress

* chore: refactor wip

* chore: refactor wip

* chore: refactor wip

* fix: build

* fix: cypress test

* fix: cypress test

* fix: cypress test

* fix: cypress test

* fix: cypress test

* fix: cypress test

* fix: cypress test

* fix: cypress test

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests

* fix: cypress tests
2025-06-01 15:56:15 -05:00

81 lines
2.1 KiB
Python

# STL
import asyncio
import logging
from copy import deepcopy
from typing import Any
# PDM
from playwright.async_api import Page
# LOCAL
from api.backend.job.models.site_map import Action, SiteMap
from api.backend.job.scraping.scraping_utils import scrape_content
LOG = logging.getLogger("Job")
def clear_done_actions(site_map: dict[str, Any]) -> dict[str, Any]:
"""Clear all actions that have been clicked."""
cleared_site_map = deepcopy(site_map)
cleared_site_map["actions"] = [
action for action in cleared_site_map["actions"] if not action["do_once"]
]
return cleared_site_map
async def handle_input(action: Action, page: Page) -> bool:
try:
element = page.locator(f"xpath={action.xpath}")
LOG.info(f"Sending keys: {action.input} to element: {action.xpath}")
await element.fill(action.input)
return True
except Exception as e:
LOG.warning(f"Error handling input for xpath '{action.xpath}': {e}")
return False
async def handle_click(action: Action, page: Page) -> bool:
try:
element = page.locator(f"xpath={action.xpath}")
LOG.info(f"Clicking element: {action.xpath}")
await element.click()
return True
except Exception as e:
LOG.warning(f"Error clicking element at xpath '{action.xpath}': {e}")
return False
ACTION_MAP = {
"click": handle_click,
"input": handle_input,
}
async def handle_site_mapping(
id: str,
site_map_dict: dict[str, Any],
page: Page,
pages: set[tuple[str, str]],
collect_media: bool = False,
):
site_map = SiteMap(**site_map_dict)
for action in site_map.actions:
action_handler = ACTION_MAP[action.type]
success = await action_handler(action, page)
if not success:
return
await asyncio.sleep(2)
await scrape_content(id, page, pages, collect_media=collect_media)
cleared_site_map_dict = clear_done_actions(site_map_dict)
if cleared_site_map_dict["actions"]:
await handle_site_mapping(
id, cleared_site_map_dict, page, pages, collect_media=collect_media
)