feat: add in media downloading (#62)

* feat: add in media downloading

* fix: build issue
This commit is contained in:
Jayden Pyles
2025-05-10 15:14:54 -05:00
committed by GitHub
parent a58212b214
commit 8cd30599fa
13 changed files with 136 additions and 8 deletions

View File

@@ -1,13 +1,19 @@
import time
from typing import cast
from selenium import webdriver
from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from api.backend.utils import LOG
def scrape_content(driver: webdriver.Chrome, pages: set[tuple[str, str]]):
from api.backend.job.scraping.collect_media import collect_media as collect_media_utils
def scrape_content(
driver: webdriver.Chrome, pages: set[tuple[str, str]], collect_media: bool
):
_ = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
@@ -27,4 +33,9 @@ def scrape_content(driver: webdriver.Chrome, pages: set[tuple[str, str]]):
last_height = new_height
pages.add((driver.page_source, driver.current_url))
if collect_media:
LOG.info("Collecting media")
collect_media_utils(driver)
return driver.page_source