diff --git a/.github/actions/run-cypress-tests/action.yaml b/.github/actions/run-cypress-tests/action.yaml new file mode 100644 index 0000000..984c27b --- /dev/null +++ b/.github/actions/run-cypress-tests/action.yaml @@ -0,0 +1,38 @@ +name: Run Cypress Tests + +description: Run Cypress tests + +runs: + using: "composite" + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Setup Docker project + shell: bash + run: docker compose up -d + + - name: Wait for frontend to be ready + shell: bash + run: | + for i in {1..10}; do + curl -s http://127.0.0.1:3000 && echo "Frontend is ready" && exit 0 + echo "Waiting for frontend to be ready... attempt $i" + sleep 1 + done + echo "Frontend failed to be ready after 10 retries" + exit 1 + + - name: Install dependencies + shell: bash + run: npm install + + - name: Run Cypress tests + shell: bash + run: npm run cy:run + diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9996ca1..b3bbe07 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -27,3 +27,12 @@ jobs: - name: Run tests run: PYTHONPATH=. pdm run pytest api/backend/tests + + cypress-tests: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Run Cypress tests + uses: ./.github/actions/run-cypress-tests/action.yaml diff --git a/api/backend/scraping.py b/api/backend/scraping.py index 8d80629..d88b066 100644 --- a/api/backend/scraping.py +++ b/api/backend/scraping.py @@ -2,10 +2,10 @@ import logging from typing import Any, Optional import random -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, Tag from lxml import etree from seleniumwire import webdriver # type: ignore -from lxml.etree import _Element # pyright: ignore [reportPrivateUsage] +from lxml.etree import _Element from fake_useragent import UserAgent from selenium.webdriver.chrome.options import Options as ChromeOptions from urllib.parse import urlparse, urljoin @@ -16,7 +16,6 @@ from api.backend.job.site_mapping.site_mapping import ( from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager from api.backend.job.scraping.scraping_utils import scrape_content -from api.backend.job.models.site_map import SiteMap LOG = logging.getLogger(__name__) @@ -143,7 +142,10 @@ async def make_site_request( soup = BeautifulSoup(page_source, "html.parser") for a_tag in soup.find_all("a"): - link = a_tag.get("href") + if not isinstance(a_tag, Tag): + continue + + link = str(a_tag.get("href", "")) if link: if not urlparse(link).netloc: @@ -171,7 +173,7 @@ async def collect_scraped_elements(page: tuple[str, str], xpaths: list[Element]) el = sxpath(root, elem.xpath) for e in el: - if isinstance(e, etree._Element): + if isinstance(e, etree._Element): # type: ignore text = "\t".join(str(t) for t in e.itertext()) else: text = str(e) @@ -194,7 +196,7 @@ async def scrape( headers: Optional[dict[str, Any]], multi_page_scrape: bool = False, proxies: Optional[list[str]] = [], - site_map: Optional[SiteMap] = None, + site_map: Optional[dict[str, Any]] = None, ): visited_urls: set[str] = set() pages: set[tuple[str, str]] = set() diff --git a/cypress/e2e/submit-job.cy.ts b/cypress/e2e/submit-job.cy.ts index 0d35968..0db127b 100644 --- a/cypress/e2e/submit-job.cy.ts +++ b/cypress/e2e/submit-job.cy.ts @@ -1,4 +1,4 @@ -describe("Job", () => { +describe.only("Job", () => { it("should create a job", () => { cy.visit("/"); @@ -15,5 +15,16 @@ describe("Job", () => { const submit = cy.contains("Submit"); submit.click(); + + const previousJobs = cy.get("li").contains("Previous Jobs"); + previousJobs.click(); + + const jobUrl = cy.get("div").contains("https://example.com"); + jobUrl.should("exist"); + + cy.wait(10000); + + const completedJobStatus = cy.get("div").contains("Completed"); + completedJobStatus.should("exist"); }); }); diff --git a/pyproject.toml b/pyproject.toml index 8396187..7681209 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,9 +58,9 @@ defineConstant = { DEBUG = true } stubPath = "" # Type checking strictness -typeCheckingMode = "strict" # Enables strict type checking mode -reportPrivateUsage = "error" -reportMissingTypeStubs = "error" +typeCheckingMode = "strict" # Enables strict type checking mode +reportPrivateUsage = "none" +reportMissingTypeStubs = "none" reportUntypedFunctionDecorator = "error" reportUntypedClassDecorator = "error" reportUntypedBaseClass = "error" @@ -89,7 +89,7 @@ reportInvalidStubStatement = "error" reportInconsistentOverload = "error" # Misc settings -pythonVersion = "3.10" # Matches your Python version from pyproject.toml +pythonVersion = "3.10" # Matches your Python version from pyproject.toml strictListInference = true strictDictionaryInference = true strictSetInference = true