import sys from changedetectionio.strtobool import strtobool from loguru import logger from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException import os # Visual Selector scraper - 'Button' is there because some sites have . visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button' SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000 SCREENSHOT_DEFAULT_QUALITY = 40 # Maximum total height for the final image (When in stitch mode). # We limit this to 16000px due to the huge amount of RAM that was being used # Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc) SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) # The size at which we will switch to stitching method, when below this (and # MAX_TOTAL_HEIGHT which can be set by a user) we will use the default # screenshot method. SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000 # available_fetchers() will scan this implementation looking for anything starting with html_ # this information is used in the form selections from changedetectionio.content_fetchers.requests import fetcher as html_requests import importlib.resources XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8') def available_fetchers(): # See the if statement at the bottom of this file for how we switch between playwright and webdriver import inspect p = [] for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass): if inspect.isclass(obj): # @todo html_ is maybe better as fetcher_ or something # In this case, make sure to edit the default one in store.py and fetch_site_status.py if name.startswith('html_'): t = tuple([name, obj.fetcher_description]) p.append(t) return p # Decide which is the 'real' HTML webdriver, this is more a system wide config # rather than site-specific. use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False) if use_playwright_as_chrome_fetcher: # @note - For now, browser steps always uses playwright if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')): logger.debug('Using Playwright library as fetcher') from .playwright import fetcher as html_webdriver else: logger.debug('Using direct Python Puppeteer library as fetcher') from .puppeteer import fetcher as html_webdriver else: logger.debug("Falling back to selenium as fetcher") from .webdriver_selenium import fetcher as html_webdriver