mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-03 16:17:51 +00:00 
			
		
		
		
	Compare commits
	
		
			4 Commits
		
	
	
		
			0.49.7
			...
			raw-browse
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					ec77b45e84 | ||
| 
						 | 
					138f7fc59c | ||
| 
						 | 
					56b768d24f | ||
| 
						 | 
					a61d7b4284 | 
@@ -679,7 +679,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    @app.route("/settings", methods=['GET', "POST"])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
    def settings_page():
 | 
			
		||||
        from changedetectionio import content_fetcher, forms
 | 
			
		||||
        from . import forms
 | 
			
		||||
 | 
			
		||||
        default = deepcopy(datastore.data['settings'])
 | 
			
		||||
        if datastore.proxy_list is not None:
 | 
			
		||||
 
 | 
			
		||||
@@ -237,7 +237,7 @@ class browsersteps_live_ui(steppable_browser_interface):
 | 
			
		||||
    def get_current_state(self):
 | 
			
		||||
        """Return the screenshot and interactive elements mapping, generally always called after action_()"""
 | 
			
		||||
        from pkg_resources import resource_string
 | 
			
		||||
        xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
 | 
			
		||||
        xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        self.page.wait_for_timeout(1 * 1000)
 | 
			
		||||
 | 
			
		||||
@@ -272,8 +272,8 @@ class browsersteps_live_ui(steppable_browser_interface):
 | 
			
		||||
        self.page.evaluate("var include_filters=''")
 | 
			
		||||
        from pkg_resources import resource_string
 | 
			
		||||
        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
 | 
			
		||||
        xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
 | 
			
		||||
        from changedetectionio.content_fetcher import visualselector_xpath_selectors
 | 
			
		||||
        xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
 | 
			
		||||
        from changedetectionio.fetchers import visualselector_xpath_selectors
 | 
			
		||||
        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
 | 
			
		||||
        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
 | 
			
		||||
        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ import signal
 | 
			
		||||
import socket
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
from . import store, changedetection_app, content_fetcher
 | 
			
		||||
from . import store, changedetection_app
 | 
			
		||||
from . import __version__
 | 
			
		||||
 | 
			
		||||
# Only global so we can access it in the signal handler
 | 
			
		||||
 
 | 
			
		||||
@@ -1,595 +0,0 @@
 | 
			
		||||
import hashlib
 | 
			
		||||
from abc import abstractmethod
 | 
			
		||||
import chardet
 | 
			
		||||
import json
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import requests
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
 | 
			
		||||
 | 
			
		||||
class Non200ErrorCodeReceived(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        self.xpath_data = xpath_data
 | 
			
		||||
        self.page_text = None
 | 
			
		||||
 | 
			
		||||
        if page_html:
 | 
			
		||||
            from changedetectionio import html_tools
 | 
			
		||||
            self.page_text = html_tools.html_to_text(page_html)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class checksumFromPreviousCheckWasTheSame(Exception):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class JSActionExceptions(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot, message=''):
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        self.message = message
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class BrowserStepsStepTimout(Exception):
 | 
			
		||||
    def __init__(self, step_n):
 | 
			
		||||
        self.step_n = step_n
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PageUnloadable(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, message, screenshot=False):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        self.message = message
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class EmptyReply(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class ScreenshotUnavailable(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, page_html=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        if page_html:
 | 
			
		||||
            from html_tools import html_to_text
 | 
			
		||||
            self.page_text = html_to_text(page_html)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class ReplyWithContentButNoText(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class Fetcher():
 | 
			
		||||
    browser_steps = None
 | 
			
		||||
    browser_steps_screenshot_path = None
 | 
			
		||||
    content = None
 | 
			
		||||
    error = None
 | 
			
		||||
    fetcher_description = "No description"
 | 
			
		||||
    headers = None
 | 
			
		||||
    status_code = None
 | 
			
		||||
    webdriver_js_execute_code = None
 | 
			
		||||
    xpath_data = None
 | 
			
		||||
    xpath_element_js = ""
 | 
			
		||||
    instock_data = None
 | 
			
		||||
    instock_data_js = ""
 | 
			
		||||
 | 
			
		||||
    # Will be needed in the future by the VisualSelector, always get this where possible.
 | 
			
		||||
    screenshot = False
 | 
			
		||||
    system_http_proxy = os.getenv('HTTP_PROXY')
 | 
			
		||||
    system_https_proxy = os.getenv('HTTPS_PROXY')
 | 
			
		||||
 | 
			
		||||
    # Time ONTOP of the system defined env minimum time
 | 
			
		||||
    render_extract_delay = 0
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        from pkg_resources import resource_string
 | 
			
		||||
        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
 | 
			
		||||
        self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
 | 
			
		||||
        self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_error(self):
 | 
			
		||||
        return self.error
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
        # Should set self.error, self.status_code and self.content
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def quit(self):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_last_status_code(self):
 | 
			
		||||
        return self.status_code
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def screenshot_step(self, step_n):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def iterate_browser_steps(self):
 | 
			
		||||
        from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
 | 
			
		||||
        from playwright._impl._api_types import TimeoutError
 | 
			
		||||
        from jinja2 import Environment
 | 
			
		||||
        jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
 | 
			
		||||
 | 
			
		||||
        step_n = 0
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps is not None and len(self.browser_steps):
 | 
			
		||||
            interface = steppable_browser_interface()
 | 
			
		||||
            interface.page = self.page
 | 
			
		||||
 | 
			
		||||
            valid_steps = filter(lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.browser_steps)
 | 
			
		||||
 | 
			
		||||
            for step in valid_steps:
 | 
			
		||||
                step_n += 1
 | 
			
		||||
                print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
 | 
			
		||||
                self.screenshot_step("before-"+str(step_n))
 | 
			
		||||
                self.save_step_html("before-"+str(step_n))
 | 
			
		||||
                try:
 | 
			
		||||
                    optional_value = step['optional_value']
 | 
			
		||||
                    selector = step['selector']
 | 
			
		||||
                    # Support for jinja2 template in step values, with date module added
 | 
			
		||||
                    if '{%' in step['optional_value'] or '{{' in step['optional_value']:
 | 
			
		||||
                        optional_value = str(jinja2_env.from_string(step['optional_value']).render())
 | 
			
		||||
                    if '{%' in step['selector'] or '{{' in step['selector']:
 | 
			
		||||
                        selector = str(jinja2_env.from_string(step['selector']).render())
 | 
			
		||||
 | 
			
		||||
                    getattr(interface, "call_action")(action_name=step['operation'],
 | 
			
		||||
                                                      selector=selector,
 | 
			
		||||
                                                      optional_value=optional_value)
 | 
			
		||||
                    self.screenshot_step(step_n)
 | 
			
		||||
                    self.save_step_html(step_n)
 | 
			
		||||
                except TimeoutError:
 | 
			
		||||
                    # Stop processing here
 | 
			
		||||
                    raise BrowserStepsStepTimout(step_n=step_n)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # It's always good to reset these
 | 
			
		||||
    def delete_browser_steps_screenshots(self):
 | 
			
		||||
        import glob
 | 
			
		||||
        if self.browser_steps_screenshot_path is not None:
 | 
			
		||||
            dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg')
 | 
			
		||||
            files = glob.glob(dest)
 | 
			
		||||
            for f in files:
 | 
			
		||||
                os.unlink(f)
 | 
			
		||||
 | 
			
		||||
#   Maybe for the future, each fetcher provides its own diff output, could be used for text, image
 | 
			
		||||
#   the current one would return javascript output (as we use JS to generate the diff)
 | 
			
		||||
#
 | 
			
		||||
def available_fetchers():
 | 
			
		||||
    # See the if statement at the bottom of this file for how we switch between playwright and webdriver
 | 
			
		||||
    import inspect
 | 
			
		||||
    p = []
 | 
			
		||||
    for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
 | 
			
		||||
        if inspect.isclass(obj):
 | 
			
		||||
            # @todo html_ is maybe better as fetcher_ or something
 | 
			
		||||
            # In this case, make sure to edit the default one in store.py and fetch_site_status.py
 | 
			
		||||
            if name.startswith('html_'):
 | 
			
		||||
                t = tuple([name, obj.fetcher_description])
 | 
			
		||||
                p.append(t)
 | 
			
		||||
 | 
			
		||||
    return p
 | 
			
		||||
 | 
			
		||||
class base_html_playwright(Fetcher):
 | 
			
		||||
    fetcher_description = "Playwright {}/Javascript".format(
 | 
			
		||||
        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
 | 
			
		||||
    )
 | 
			
		||||
    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
 | 
			
		||||
        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
 | 
			
		||||
 | 
			
		||||
    browser_type = ''
 | 
			
		||||
    command_executor = ''
 | 
			
		||||
 | 
			
		||||
    # Configs for Proxy setup
 | 
			
		||||
    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
 | 
			
		||||
    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
 | 
			
		||||
 | 
			
		||||
    proxy = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
 | 
			
		||||
        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
 | 
			
		||||
        self.command_executor = os.getenv(
 | 
			
		||||
            "PLAYWRIGHT_DRIVER_URL",
 | 
			
		||||
            'ws://playwright-chrome:3000'
 | 
			
		||||
        ).strip('"')
 | 
			
		||||
 | 
			
		||||
        # If any proxy settings are enabled, then we should setup the proxy object
 | 
			
		||||
        proxy_args = {}
 | 
			
		||||
        for k in self.playwright_proxy_settings_mappings:
 | 
			
		||||
            v = os.getenv('playwright_proxy_' + k, False)
 | 
			
		||||
            if v:
 | 
			
		||||
                proxy_args[k] = v.strip('"')
 | 
			
		||||
 | 
			
		||||
        if proxy_args:
 | 
			
		||||
            self.proxy = proxy_args
 | 
			
		||||
 | 
			
		||||
        # allow per-watch proxy selection override
 | 
			
		||||
        if proxy_override:
 | 
			
		||||
            self.proxy = {'server': proxy_override}
 | 
			
		||||
 | 
			
		||||
        if self.proxy:
 | 
			
		||||
            # Playwright needs separate username and password values
 | 
			
		||||
            from urllib.parse import urlparse
 | 
			
		||||
            parsed = urlparse(self.proxy.get('server'))
 | 
			
		||||
            if parsed.username:
 | 
			
		||||
                self.proxy['username'] = parsed.username
 | 
			
		||||
                self.proxy['password'] = parsed.password
 | 
			
		||||
 | 
			
		||||
    def screenshot_step(self, step_n=''):
 | 
			
		||||
        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps_screenshot_path is not None:
 | 
			
		||||
            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
 | 
			
		||||
            logging.debug("Saving step screenshot to {}".format(destination))
 | 
			
		||||
            with open(destination, 'wb') as f:
 | 
			
		||||
                f.write(screenshot)
 | 
			
		||||
 | 
			
		||||
    def save_step_html(self, step_n):
 | 
			
		||||
        content = self.page.content()
 | 
			
		||||
        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
 | 
			
		||||
        logging.debug("Saving step HTML to {}".format(destination))
 | 
			
		||||
        with open(destination, 'w') as f:
 | 
			
		||||
            f.write(content)
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
 | 
			
		||||
        from playwright.sync_api import sync_playwright
 | 
			
		||||
        import playwright._impl._api_types
 | 
			
		||||
 | 
			
		||||
        self.delete_browser_steps_screenshots()
 | 
			
		||||
        response = None
 | 
			
		||||
        with sync_playwright() as p:
 | 
			
		||||
            browser_type = getattr(p, self.browser_type)
 | 
			
		||||
 | 
			
		||||
            # Seemed to cause a connection Exception even tho I can see it connect
 | 
			
		||||
            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
 | 
			
		||||
            # 60,000 connection timeout only
 | 
			
		||||
            browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
 | 
			
		||||
 | 
			
		||||
            # Set user agent to prevent Cloudflare from blocking the browser
 | 
			
		||||
            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
 | 
			
		||||
            context = browser.new_context(
 | 
			
		||||
                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
 | 
			
		||||
                proxy=self.proxy,
 | 
			
		||||
                # This is needed to enable JavaScript execution on GitHub and others
 | 
			
		||||
                bypass_csp=True,
 | 
			
		||||
                # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
 | 
			
		||||
                service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
 | 
			
		||||
                # Should never be needed
 | 
			
		||||
                accept_downloads=False
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            self.page = context.new_page()
 | 
			
		||||
            if len(request_headers):
 | 
			
		||||
                context.set_extra_http_headers(request_headers)
 | 
			
		||||
 | 
			
		||||
                self.page.set_default_navigation_timeout(90000)
 | 
			
		||||
                self.page.set_default_timeout(90000)
 | 
			
		||||
 | 
			
		||||
                # Listen for all console events and handle errors
 | 
			
		||||
                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
 | 
			
		||||
 | 
			
		||||
            # Goto page
 | 
			
		||||
            try:
 | 
			
		||||
                # Wait_until = commit
 | 
			
		||||
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
 | 
			
		||||
                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
 | 
			
		||||
                # This seemed to solve nearly all 'TimeoutErrors'
 | 
			
		||||
                response = self.page.goto(url, wait_until='commit')
 | 
			
		||||
            except playwright._impl._api_types.Error as e:
 | 
			
		||||
                # Retry once - https://github.com/browserless/chrome/issues/2485
 | 
			
		||||
                # Sometimes errors related to invalid cert's and other can be random
 | 
			
		||||
                print ("Content Fetcher > retrying request got error - ", str(e))
 | 
			
		||||
                time.sleep(1)
 | 
			
		||||
                response = self.page.goto(url, wait_until='commit')
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print ("Content Fetcher > Other exception when page.goto", str(e))
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            # Execute any browser steps
 | 
			
		||||
            try:
 | 
			
		||||
                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
                self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
 | 
			
		||||
                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
 | 
			
		||||
                    self.page.evaluate(self.webdriver_js_execute_code)
 | 
			
		||||
 | 
			
		||||
            except playwright._impl._api_types.TimeoutError as e:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                # This can be ok, we will try to grab what we could retrieve
 | 
			
		||||
                pass
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            if response is None:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("Content Fetcher > Response object was none")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            # Run Browser Steps here
 | 
			
		||||
            self.iterate_browser_steps()
 | 
			
		||||
 | 
			
		||||
            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
            time.sleep(extra_wait)
 | 
			
		||||
 | 
			
		||||
            self.content = self.page.content()
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            if len(self.page.content().strip()) == 0:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("Content Fetcher > Content was empty")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=response.status)
 | 
			
		||||
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            self.headers = response.all_headers()
 | 
			
		||||
 | 
			
		||||
            # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
 | 
			
		||||
            if current_include_filters is not None:
 | 
			
		||||
                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
 | 
			
		||||
            else:
 | 
			
		||||
                self.page.evaluate("var include_filters=''")
 | 
			
		||||
 | 
			
		||||
            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
 | 
			
		||||
            self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
 | 
			
		||||
 | 
			
		||||
            # Bug 3 in Playwright screenshot handling
 | 
			
		||||
            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
 | 
			
		||||
            # JPEG is better here because the screenshots can be very very large
 | 
			
		||||
 | 
			
		||||
            # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
 | 
			
		||||
            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
 | 
			
		||||
            # acceptable screenshot quality here
 | 
			
		||||
            try:
 | 
			
		||||
                # The actual screenshot
 | 
			
		||||
                self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise ScreenshotUnavailable(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            context.close()
 | 
			
		||||
            browser.close()
 | 
			
		||||
 | 
			
		||||
class base_html_webdriver(Fetcher):
 | 
			
		||||
    if os.getenv("WEBDRIVER_URL"):
 | 
			
		||||
        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
 | 
			
		||||
    else:
 | 
			
		||||
        fetcher_description = "WebDriver Chrome/Javascript"
 | 
			
		||||
 | 
			
		||||
    command_executor = ''
 | 
			
		||||
 | 
			
		||||
    # Configs for Proxy setup
 | 
			
		||||
    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
 | 
			
		||||
    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
 | 
			
		||||
                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
 | 
			
		||||
                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
 | 
			
		||||
    proxy = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
 | 
			
		||||
 | 
			
		||||
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
 | 
			
		||||
        self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
 | 
			
		||||
 | 
			
		||||
        # If any proxy settings are enabled, then we should setup the proxy object
 | 
			
		||||
        proxy_args = {}
 | 
			
		||||
        for k in self.selenium_proxy_settings_mappings:
 | 
			
		||||
            v = os.getenv('webdriver_' + k, False)
 | 
			
		||||
            if v:
 | 
			
		||||
                proxy_args[k] = v.strip('"')
 | 
			
		||||
 | 
			
		||||
        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
 | 
			
		||||
        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
 | 
			
		||||
            proxy_args['httpProxy'] = self.system_http_proxy
 | 
			
		||||
        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
 | 
			
		||||
            proxy_args['httpsProxy'] = self.system_https_proxy
 | 
			
		||||
 | 
			
		||||
        # Allows override the proxy on a per-request basis
 | 
			
		||||
        if proxy_override is not None:
 | 
			
		||||
            proxy_args['httpProxy'] = proxy_override
 | 
			
		||||
 | 
			
		||||
        if proxy_args:
 | 
			
		||||
            self.proxy = SeleniumProxy(raw=proxy_args)
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
			
		||||
        from selenium.common.exceptions import WebDriverException
 | 
			
		||||
        # request_body, request_method unused for now, until some magic in the future happens.
 | 
			
		||||
 | 
			
		||||
        # check env for WEBDRIVER_URL
 | 
			
		||||
        self.driver = webdriver.Remote(
 | 
			
		||||
            command_executor=self.command_executor,
 | 
			
		||||
            desired_capabilities=DesiredCapabilities.CHROME,
 | 
			
		||||
            proxy=self.proxy)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            self.driver.get(url)
 | 
			
		||||
        except WebDriverException as e:
 | 
			
		||||
            # Be sure we close the session window
 | 
			
		||||
            self.quit()
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        self.driver.set_window_size(1280, 1024)
 | 
			
		||||
        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
        if self.webdriver_js_execute_code is not None:
 | 
			
		||||
            self.driver.execute_script(self.webdriver_js_execute_code)
 | 
			
		||||
            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
 | 
			
		||||
            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
        # @todo - how to check this? is it possible?
 | 
			
		||||
        self.status_code = 200
 | 
			
		||||
        # @todo somehow we should try to get this working for WebDriver
 | 
			
		||||
        # raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
        # @todo - dom wait loaded?
 | 
			
		||||
        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
 | 
			
		||||
        self.content = self.driver.page_source
 | 
			
		||||
        self.headers = {}
 | 
			
		||||
 | 
			
		||||
        self.screenshot = self.driver.get_screenshot_as_png()
 | 
			
		||||
 | 
			
		||||
    # Does the connection to the webdriver work? run a test connection.
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
			
		||||
 | 
			
		||||
        self.driver = webdriver.Remote(
 | 
			
		||||
            command_executor=self.command_executor,
 | 
			
		||||
            desired_capabilities=DesiredCapabilities.CHROME)
 | 
			
		||||
 | 
			
		||||
        # driver.quit() seems to cause better exceptions
 | 
			
		||||
        self.quit()
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def quit(self):
 | 
			
		||||
        if self.driver:
 | 
			
		||||
            try:
 | 
			
		||||
                self.driver.quit()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# "html_requests" is listed as the default fetcher in store.py!
 | 
			
		||||
class html_requests(Fetcher):
 | 
			
		||||
    fetcher_description = "Basic fast Plaintext/HTTP Client"
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None):
 | 
			
		||||
        self.proxy_override = proxy_override
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
 | 
			
		||||
        # Make requests use a more modern looking user-agent
 | 
			
		||||
        if not 'User-Agent' in request_headers:
 | 
			
		||||
            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
 | 
			
		||||
                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
 | 
			
		||||
 | 
			
		||||
        proxies = {}
 | 
			
		||||
 | 
			
		||||
        # Allows override the proxy on a per-request basis
 | 
			
		||||
        if self.proxy_override:
 | 
			
		||||
            proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
 | 
			
		||||
        else:
 | 
			
		||||
            if self.system_http_proxy:
 | 
			
		||||
                proxies['http'] = self.system_http_proxy
 | 
			
		||||
            if self.system_https_proxy:
 | 
			
		||||
                proxies['https'] = self.system_https_proxy
 | 
			
		||||
 | 
			
		||||
        r = requests.request(method=request_method,
 | 
			
		||||
                             data=request_body,
 | 
			
		||||
                             url=url,
 | 
			
		||||
                             headers=request_headers,
 | 
			
		||||
                             timeout=timeout,
 | 
			
		||||
                             proxies=proxies,
 | 
			
		||||
                             verify=False)
 | 
			
		||||
 | 
			
		||||
        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
 | 
			
		||||
        # For example - some sites don't tell us it's utf-8, but return utf-8 content
 | 
			
		||||
        # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
 | 
			
		||||
        # https://github.com/psf/requests/issues/1604 good info about requests encoding detection
 | 
			
		||||
        if not is_binary:
 | 
			
		||||
            # Don't run this for PDF (and requests identified as binary) takes a _long_ time
 | 
			
		||||
            if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
 | 
			
		||||
                encoding = chardet.detect(r.content)['encoding']
 | 
			
		||||
                if encoding:
 | 
			
		||||
                    r.encoding = encoding
 | 
			
		||||
 | 
			
		||||
        if not r.content or not len(r.content):
 | 
			
		||||
            raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
        # @todo test this
 | 
			
		||||
        # @todo maybe you really want to test zero-byte return pages?
 | 
			
		||||
        if r.status_code != 200 and not ignore_status_codes:
 | 
			
		||||
            # maybe check with content works?
 | 
			
		||||
            raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
 | 
			
		||||
 | 
			
		||||
        self.status_code = r.status_code
 | 
			
		||||
        if is_binary:
 | 
			
		||||
            # Binary files just return their checksum until we add something smarter
 | 
			
		||||
            self.content = hashlib.md5(r.content).hexdigest()
 | 
			
		||||
        else:
 | 
			
		||||
            self.content = r.text
 | 
			
		||||
 | 
			
		||||
        self.headers = r.headers
 | 
			
		||||
        self.raw_content = r.content
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
 | 
			
		||||
# rather than site-specific.
 | 
			
		||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
 | 
			
		||||
if use_playwright_as_chrome_fetcher:
 | 
			
		||||
    html_webdriver = base_html_playwright
 | 
			
		||||
else:
 | 
			
		||||
    html_webdriver = base_html_webdriver
 | 
			
		||||
							
								
								
									
										150
									
								
								changedetectionio/fetchers/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								changedetectionio/fetchers/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,150 @@
 | 
			
		||||
from abc import abstractmethod
 | 
			
		||||
import os
 | 
			
		||||
from . import exceptions
 | 
			
		||||
 | 
			
		||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Fetcher():
 | 
			
		||||
    browser_steps = None
 | 
			
		||||
    browser_steps_screenshot_path = None
 | 
			
		||||
    content = None
 | 
			
		||||
    error = None
 | 
			
		||||
    fetcher_description = "No description"
 | 
			
		||||
    headers = None
 | 
			
		||||
    status_code = None
 | 
			
		||||
    webdriver_js_execute_code = None
 | 
			
		||||
    xpath_data = None
 | 
			
		||||
    xpath_element_js = ""
 | 
			
		||||
    instock_data = None
 | 
			
		||||
    instock_data_js = ""
 | 
			
		||||
 | 
			
		||||
    # Will be needed in the future by the VisualSelector, always get this where possible.
 | 
			
		||||
    screenshot = False
 | 
			
		||||
    system_http_proxy = os.getenv('HTTP_PROXY')
 | 
			
		||||
    system_https_proxy = os.getenv('HTTPS_PROXY')
 | 
			
		||||
 | 
			
		||||
    # Time ONTOP of the system defined env minimum time
 | 
			
		||||
    render_extract_delay = 0
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        from pkg_resources import resource_string
 | 
			
		||||
        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
 | 
			
		||||
        self.xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
 | 
			
		||||
        self.instock_data_js = resource_string(__name__, "../res/stock-not-in-stock.js").decode('utf-8')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_error(self):
 | 
			
		||||
        return self.error
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
        # Should set self.error, self.status_code and self.content
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def quit(self):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_last_status_code(self):
 | 
			
		||||
        return self.status_code
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def screenshot_step(self, step_n):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def iterate_browser_steps(self):
 | 
			
		||||
        from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
 | 
			
		||||
        from playwright._impl._api_types import TimeoutError
 | 
			
		||||
        from jinja2 import Environment
 | 
			
		||||
        jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
 | 
			
		||||
 | 
			
		||||
        step_n = 0
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps is not None and len(self.browser_steps):
 | 
			
		||||
            interface = steppable_browser_interface()
 | 
			
		||||
            interface.page = self.page
 | 
			
		||||
 | 
			
		||||
            valid_steps = filter(lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.browser_steps)
 | 
			
		||||
 | 
			
		||||
            for step in valid_steps:
 | 
			
		||||
                step_n += 1
 | 
			
		||||
                print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
 | 
			
		||||
                self.screenshot_step("before-"+str(step_n))
 | 
			
		||||
                self.save_step_html("before-"+str(step_n))
 | 
			
		||||
                try:
 | 
			
		||||
                    optional_value = step['optional_value']
 | 
			
		||||
                    selector = step['selector']
 | 
			
		||||
                    # Support for jinja2 template in step values, with date module added
 | 
			
		||||
                    if '{%' in step['optional_value'] or '{{' in step['optional_value']:
 | 
			
		||||
                        optional_value = str(jinja2_env.from_string(step['optional_value']).render())
 | 
			
		||||
                    if '{%' in step['selector'] or '{{' in step['selector']:
 | 
			
		||||
                        selector = str(jinja2_env.from_string(step['selector']).render())
 | 
			
		||||
 | 
			
		||||
                    getattr(interface, "call_action")(action_name=step['operation'],
 | 
			
		||||
                                                      selector=selector,
 | 
			
		||||
                                                      optional_value=optional_value)
 | 
			
		||||
                    self.screenshot_step(step_n)
 | 
			
		||||
                    self.save_step_html(step_n)
 | 
			
		||||
                except TimeoutError:
 | 
			
		||||
                    # Stop processing here
 | 
			
		||||
                    raise exceptions.BrowserStepsStepTimout(step_n=step_n)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # It's always good to reset these
 | 
			
		||||
    def delete_browser_steps_screenshots(self):
 | 
			
		||||
        import glob
 | 
			
		||||
        if self.browser_steps_screenshot_path is not None:
 | 
			
		||||
            dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg')
 | 
			
		||||
            files = glob.glob(dest)
 | 
			
		||||
            for f in files:
 | 
			
		||||
                os.unlink(f)
 | 
			
		||||
 | 
			
		||||
#   Maybe for the future, each fetcher provides its own diff output, could be used for text, image
 | 
			
		||||
#   the current one would return javascript output (as we use JS to generate the diff)
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def available_fetchers():
 | 
			
		||||
    from . import playwright, html_requests, webdriver
 | 
			
		||||
 | 
			
		||||
    p = []
 | 
			
		||||
    p.append(tuple(['html_requests', html_requests.fetcher.fetcher_description]))
 | 
			
		||||
 | 
			
		||||
    # Prefer playwright
 | 
			
		||||
    if os.getenv('PLAYWRIGHT_DRIVER_URL', False):
 | 
			
		||||
        p.append(tuple(['html_webdriver', playwright.fetcher.fetcher_description]))
 | 
			
		||||
 | 
			
		||||
    elif os.getenv('WEBDRIVER_URL'):
 | 
			
		||||
        p.append(tuple(['html_webdriver', webdriver.fetcher.fetcher_description]))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    return p
 | 
			
		||||
 | 
			
		||||
html_webdriver = None
 | 
			
		||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config rather than site-specific.
 | 
			
		||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
 | 
			
		||||
if use_playwright_as_chrome_fetcher:
 | 
			
		||||
    from . import playwright
 | 
			
		||||
    html_webdriver = getattr(playwright, "fetcher")
 | 
			
		||||
 | 
			
		||||
else:
 | 
			
		||||
    from . import webdriver
 | 
			
		||||
    html_webdriver = getattr(webdriver, "fetcher")
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										71
									
								
								changedetectionio/fetchers/browserless.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								changedetectionio/fetchers/browserless.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,71 @@
 | 
			
		||||
from . import Fetcher
 | 
			
		||||
import os
 | 
			
		||||
import requests
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Exploit the debugging API to get screenshot and HTML without needing playwright
 | 
			
		||||
# https://www.browserless.io/docs/scrape#debugging
 | 
			
		||||
 | 
			
		||||
class fetcher(Fetcher):
 | 
			
		||||
    fetcher_description = "Browserless Chrome/Javascript via '{}'".format(os.getenv("BROWSERLESS_DRIVER_URL"))
 | 
			
		||||
 | 
			
		||||
    command_executor = ''
 | 
			
		||||
    proxy = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None, command_executor=None):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        self.proxy = proxy_override
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
 | 
			
		||||
        proxy = ""
 | 
			
		||||
        if self.proxy:
 | 
			
		||||
            proxy = f"--proxy-server={self.proxy}"
 | 
			
		||||
 | 
			
		||||
        import json
 | 
			
		||||
        r = requests.request(method='POST',
 | 
			
		||||
                             data=json.dumps({
 | 
			
		||||
                                 "url": f"{url}?{proxy}",
 | 
			
		||||
                                 "elements": [],
 | 
			
		||||
                                 "debug": {
 | 
			
		||||
                                     "screenshot": True,
 | 
			
		||||
                                     "console": False,
 | 
			
		||||
                                     "network": True,
 | 
			
		||||
                                     "cookies": False,
 | 
			
		||||
                                     "html": True
 | 
			
		||||
                                 }
 | 
			
		||||
                             }),
 | 
			
		||||
                             url=os.getenv("BROWSERLESS_DRIVER_URL"),
 | 
			
		||||
                             headers={'Content-Type': 'application/json'},
 | 
			
		||||
                             timeout=timeout,
 | 
			
		||||
                             verify=False)
 | 
			
		||||
 | 
			
		||||
        # "waitFor": "() => document.querySelector('h1')"
 | 
			
		||||
        #        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
        #        self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
 | 
			
		||||
        if r.status_code == 200:
 | 
			
		||||
            # the basic request to browserless was OK, but how was the internal request to the site?
 | 
			
		||||
            result = r.json()
 | 
			
		||||
 | 
			
		||||
            if result['debug']['network'].get('inbound') and len(result['debug']['network']['inbound']):
 | 
			
		||||
                self.status_code = result['debug']['network']['inbound'][000]['status']
 | 
			
		||||
 | 
			
		||||
            self.content = result['debug']['html']
 | 
			
		||||
 | 
			
		||||
            self.headers = {}
 | 
			
		||||
            if result['debug'].get('screenshot'):
 | 
			
		||||
                import base64
 | 
			
		||||
                self.screenshot = base64.b64decode(result['debug']['screenshot'])
 | 
			
		||||
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        # Try ping?
 | 
			
		||||
        return os.getenv("BROWSERLESS_DRIVER_URL", False)
 | 
			
		||||
							
								
								
									
										66
									
								
								changedetectionio/fetchers/exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								changedetectionio/fetchers/exceptions.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,66 @@
 | 
			
		||||
class Non200ErrorCodeReceived(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        self.xpath_data = xpath_data
 | 
			
		||||
        self.page_text = None
 | 
			
		||||
 | 
			
		||||
        if page_html:
 | 
			
		||||
            from changedetectionio import html_tools
 | 
			
		||||
            self.page_text = html_tools.html_to_text(page_html)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class checksumFromPreviousCheckWasTheSame(Exception):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class JSActionExceptions(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot, message=''):
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        self.message = message
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class BrowserStepsStepTimout(Exception):
 | 
			
		||||
    def __init__(self, step_n):
 | 
			
		||||
        self.step_n = step_n
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PageUnloadable(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, message, screenshot=False):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        self.message = message
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class EmptyReply(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class ScreenshotUnavailable(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, page_html=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        if page_html:
 | 
			
		||||
            from ..html_tools import html_to_text
 | 
			
		||||
            self.page_text = html_to_text(page_html)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
class ReplyWithContentButNoText(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot=None):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        self.screenshot = screenshot
 | 
			
		||||
        return
 | 
			
		||||
							
								
								
									
										80
									
								
								changedetectionio/fetchers/html_requests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								changedetectionio/fetchers/html_requests.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,80 @@
 | 
			
		||||
from . import Fetcher
 | 
			
		||||
from . import exceptions
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# "html_requests" is listed as the default fetcher in store.py!
 | 
			
		||||
class fetcher(Fetcher):
 | 
			
		||||
    fetcher_description = "Basic fast Plaintext/HTTP Client"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None):
 | 
			
		||||
        self.proxy_override = proxy_override
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
 | 
			
		||||
        import chardet
 | 
			
		||||
        import hashlib
 | 
			
		||||
        import os
 | 
			
		||||
        import requests
 | 
			
		||||
 | 
			
		||||
        # Make requests use a more modern looking user-agent
 | 
			
		||||
        if not 'User-Agent' in request_headers:
 | 
			
		||||
            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
 | 
			
		||||
                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
 | 
			
		||||
 | 
			
		||||
        proxies = {}
 | 
			
		||||
 | 
			
		||||
        # Allows override the proxy on a per-request basis
 | 
			
		||||
        if self.proxy_override:
 | 
			
		||||
            proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
 | 
			
		||||
        else:
 | 
			
		||||
            if self.system_http_proxy:
 | 
			
		||||
                proxies['http'] = self.system_http_proxy
 | 
			
		||||
            if self.system_https_proxy:
 | 
			
		||||
                proxies['https'] = self.system_https_proxy
 | 
			
		||||
 | 
			
		||||
        r = requests.request(method=request_method,
 | 
			
		||||
                             data=request_body,
 | 
			
		||||
                             url=url,
 | 
			
		||||
                             headers=request_headers,
 | 
			
		||||
                             timeout=timeout,
 | 
			
		||||
                             proxies=proxies,
 | 
			
		||||
                             verify=False)
 | 
			
		||||
 | 
			
		||||
        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
 | 
			
		||||
        # For example - some sites don't tell us it's utf-8, but return utf-8 content
 | 
			
		||||
        # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
 | 
			
		||||
        # https://github.com/psf/requests/issues/1604 good info about requests encoding detection
 | 
			
		||||
        if not is_binary:
 | 
			
		||||
            # Don't run this for PDF (and requests identified as binary) takes a _long_ time
 | 
			
		||||
            if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
 | 
			
		||||
                encoding = chardet.detect(r.content)['encoding']
 | 
			
		||||
                if encoding:
 | 
			
		||||
                    r.encoding = encoding
 | 
			
		||||
 | 
			
		||||
        if not r.content or not len(r.content):
 | 
			
		||||
            raise exceptions.EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
        # @todo test this
 | 
			
		||||
        # @todo maybe you really want to test zero-byte return pages?
 | 
			
		||||
        if r.status_code != 200 and not ignore_status_codes:
 | 
			
		||||
            # maybe check with content works?
 | 
			
		||||
            raise exceptions.Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
 | 
			
		||||
 | 
			
		||||
        self.status_code = r.status_code
 | 
			
		||||
        if is_binary:
 | 
			
		||||
            # Binary files just return their checksum until we add something smarter
 | 
			
		||||
            self.content = hashlib.md5(r.content).hexdigest()
 | 
			
		||||
        else:
 | 
			
		||||
            self.content = r.text
 | 
			
		||||
 | 
			
		||||
        self.headers = r.headers
 | 
			
		||||
        self.raw_content = r.content
 | 
			
		||||
							
								
								
									
										208
									
								
								changedetectionio/fetchers/playwright.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										208
									
								
								changedetectionio/fetchers/playwright.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,208 @@
 | 
			
		||||
from . import Fetcher
 | 
			
		||||
from . import exceptions
 | 
			
		||||
from . import visualselector_xpath_selectors
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import logging
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
class fetcher(Fetcher):
 | 
			
		||||
    fetcher_description = "Playwright {}/Javascript".format(
 | 
			
		||||
        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
 | 
			
		||||
    )
 | 
			
		||||
    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
 | 
			
		||||
        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
 | 
			
		||||
 | 
			
		||||
    browser_type = ''
 | 
			
		||||
    command_executor = ''
 | 
			
		||||
 | 
			
		||||
    # Configs for Proxy setup
 | 
			
		||||
    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
 | 
			
		||||
    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
 | 
			
		||||
 | 
			
		||||
    proxy = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        import json
 | 
			
		||||
 | 
			
		||||
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
 | 
			
		||||
        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
 | 
			
		||||
        self.command_executor = os.getenv(
 | 
			
		||||
            "PLAYWRIGHT_DRIVER_URL",
 | 
			
		||||
            'ws://playwright-chrome:3000'
 | 
			
		||||
        ).strip('"')
 | 
			
		||||
 | 
			
		||||
        # If any proxy settings are enabled, then we should setup the proxy object
 | 
			
		||||
        proxy_args = {}
 | 
			
		||||
        for k in self.playwright_proxy_settings_mappings:
 | 
			
		||||
            v = os.getenv('playwright_proxy_' + k, False)
 | 
			
		||||
            if v:
 | 
			
		||||
                proxy_args[k] = v.strip('"')
 | 
			
		||||
 | 
			
		||||
        if proxy_args:
 | 
			
		||||
            self.proxy = proxy_args
 | 
			
		||||
 | 
			
		||||
        # allow per-watch proxy selection override
 | 
			
		||||
        if proxy_override:
 | 
			
		||||
            self.proxy = {'server': proxy_override}
 | 
			
		||||
 | 
			
		||||
        if self.proxy:
 | 
			
		||||
            # Playwright needs separate username and password values
 | 
			
		||||
            from urllib.parse import urlparse
 | 
			
		||||
            parsed = urlparse(self.proxy.get('server'))
 | 
			
		||||
            if parsed.username:
 | 
			
		||||
                self.proxy['username'] = parsed.username
 | 
			
		||||
                self.proxy['password'] = parsed.password
 | 
			
		||||
 | 
			
		||||
    def screenshot_step(self, step_n=''):
 | 
			
		||||
        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps_screenshot_path is not None:
 | 
			
		||||
            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
 | 
			
		||||
            logging.debug("Saving step screenshot to {}".format(destination))
 | 
			
		||||
            with open(destination, 'wb') as f:
 | 
			
		||||
                f.write(screenshot)
 | 
			
		||||
 | 
			
		||||
    def save_step_html(self, step_n):
 | 
			
		||||
        content = self.page.content()
 | 
			
		||||
        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
 | 
			
		||||
        logging.debug("Saving step HTML to {}".format(destination))
 | 
			
		||||
        with open(destination, 'w') as f:
 | 
			
		||||
            f.write(content)
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
 | 
			
		||||
        from playwright.sync_api import sync_playwright
 | 
			
		||||
        import playwright._impl._api_types
 | 
			
		||||
        import json
 | 
			
		||||
 | 
			
		||||
        self.delete_browser_steps_screenshots()
 | 
			
		||||
        response = None
 | 
			
		||||
        with sync_playwright() as p:
 | 
			
		||||
            browser_type = getattr(p, self.browser_type)
 | 
			
		||||
 | 
			
		||||
            # Seemed to cause a connection Exception even tho I can see it connect
 | 
			
		||||
            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
 | 
			
		||||
            # 60,000 connection timeout only
 | 
			
		||||
            browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
 | 
			
		||||
 | 
			
		||||
            # Set user agent to prevent Cloudflare from blocking the browser
 | 
			
		||||
            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
 | 
			
		||||
            context = browser.new_context(
 | 
			
		||||
                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
 | 
			
		||||
                proxy=self.proxy,
 | 
			
		||||
                # This is needed to enable JavaScript execution on GitHub and others
 | 
			
		||||
                bypass_csp=True,
 | 
			
		||||
                # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
 | 
			
		||||
                service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
 | 
			
		||||
                # Should never be needed
 | 
			
		||||
                accept_downloads=False
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            self.page = context.new_page()
 | 
			
		||||
            if len(request_headers):
 | 
			
		||||
                context.set_extra_http_headers(request_headers)
 | 
			
		||||
 | 
			
		||||
                self.page.set_default_navigation_timeout(90000)
 | 
			
		||||
                self.page.set_default_timeout(90000)
 | 
			
		||||
 | 
			
		||||
                # Listen for all console events and handle errors
 | 
			
		||||
                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
 | 
			
		||||
 | 
			
		||||
            # Goto page
 | 
			
		||||
            try:
 | 
			
		||||
                # Wait_until = commit
 | 
			
		||||
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
 | 
			
		||||
                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
 | 
			
		||||
                # This seemed to solve nearly all 'TimeoutErrors'
 | 
			
		||||
                response = self.page.goto(url, wait_until='commit')
 | 
			
		||||
            except playwright._impl._api_types.Error as e:
 | 
			
		||||
                # Retry once - https://github.com/browserless/chrome/issues/2485
 | 
			
		||||
                # Sometimes errors related to invalid cert's and other can be random
 | 
			
		||||
                print ("Content Fetcher > retrying request got error - ", str(e))
 | 
			
		||||
                time.sleep(1)
 | 
			
		||||
                response = self.page.goto(url, wait_until='commit')
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print ("Content Fetcher > Other exception when page.goto", str(e))
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise exceptions.PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            # Execute any browser steps
 | 
			
		||||
            try:
 | 
			
		||||
                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
                self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
 | 
			
		||||
                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
 | 
			
		||||
                    self.page.evaluate(self.webdriver_js_execute_code)
 | 
			
		||||
 | 
			
		||||
            except playwright._impl._api_types.TimeoutError as e:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                # This can be ok, we will try to grab what we could retrieve
 | 
			
		||||
                pass
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise exceptions.PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            if response is None:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("Content Fetcher > Response object was none")
 | 
			
		||||
                raise exceptions.EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            # Run Browser Steps here
 | 
			
		||||
            self.iterate_browser_steps()
 | 
			
		||||
 | 
			
		||||
            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
            time.sleep(extra_wait)
 | 
			
		||||
 | 
			
		||||
            self.content = self.page.content()
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            if len(self.page.content().strip()) == 0:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("Content Fetcher > Content was empty")
 | 
			
		||||
                raise exceptions.EmptyReply(url=url, status_code=response.status)
 | 
			
		||||
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            self.headers = response.all_headers()
 | 
			
		||||
 | 
			
		||||
            # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
 | 
			
		||||
            if current_include_filters is not None:
 | 
			
		||||
                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
 | 
			
		||||
            else:
 | 
			
		||||
                self.page.evaluate("var include_filters=''")
 | 
			
		||||
 | 
			
		||||
            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
 | 
			
		||||
            self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
 | 
			
		||||
 | 
			
		||||
            # Bug 3 in Playwright screenshot handling
 | 
			
		||||
            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
 | 
			
		||||
            # JPEG is better here because the screenshots can be very very large
 | 
			
		||||
 | 
			
		||||
            # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
 | 
			
		||||
            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
 | 
			
		||||
            # acceptable screenshot quality here
 | 
			
		||||
            try:
 | 
			
		||||
                # The actual screenshot
 | 
			
		||||
                self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise exceptions.ScreenshotUnavailable(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            context.close()
 | 
			
		||||
            browser.close()
 | 
			
		||||
							
								
								
									
										103
									
								
								changedetectionio/fetchers/webdriver.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								changedetectionio/fetchers/webdriver.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,103 @@
 | 
			
		||||
from . import Fetcher
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
class fetcher(Fetcher):
 | 
			
		||||
    if os.getenv("WEBDRIVER_URL"):
 | 
			
		||||
        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
 | 
			
		||||
    else:
 | 
			
		||||
        fetcher_description = "WebDriver Chrome/Javascript"
 | 
			
		||||
 | 
			
		||||
    command_executor = ''
 | 
			
		||||
 | 
			
		||||
    # Configs for Proxy setup
 | 
			
		||||
    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
 | 
			
		||||
    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
 | 
			
		||||
                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
 | 
			
		||||
                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
 | 
			
		||||
    proxy = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None, command_executor=None):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
 | 
			
		||||
 | 
			
		||||
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
 | 
			
		||||
        if command_executor:
 | 
			
		||||
            self.command_executor = command_executor
 | 
			
		||||
        else:
 | 
			
		||||
            self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
 | 
			
		||||
 | 
			
		||||
        # If any proxy settings are enabled, then we should setup the proxy object
 | 
			
		||||
        proxy_args = {}
 | 
			
		||||
        for k in self.selenium_proxy_settings_mappings:
 | 
			
		||||
            v = os.getenv('webdriver_' + k, False)
 | 
			
		||||
            if v:
 | 
			
		||||
                proxy_args[k] = v.strip('"')
 | 
			
		||||
 | 
			
		||||
        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
 | 
			
		||||
        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
 | 
			
		||||
            proxy_args['httpProxy'] = self.system_http_proxy
 | 
			
		||||
        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
 | 
			
		||||
            proxy_args['httpsProxy'] = self.system_https_proxy
 | 
			
		||||
 | 
			
		||||
        # Allows override the proxy on a per-request basis
 | 
			
		||||
        if proxy_override is not None:
 | 
			
		||||
            proxy_args['httpProxy'] = proxy_override
 | 
			
		||||
 | 
			
		||||
        if proxy_args:
 | 
			
		||||
            self.proxy = SeleniumProxy(raw=proxy_args)
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
			
		||||
        from selenium.common.exceptions import WebDriverException
 | 
			
		||||
        # request_body, request_method unused for now, until some magic in the future happens.
 | 
			
		||||
 | 
			
		||||
        # check env for WEBDRIVER_URL
 | 
			
		||||
        self.driver = webdriver.Remote(
 | 
			
		||||
            command_executor=self.command_executor,
 | 
			
		||||
            desired_capabilities=DesiredCapabilities.CHROME,
 | 
			
		||||
            proxy=self.proxy
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            self.driver.get(url)
 | 
			
		||||
        except WebDriverException as e:
 | 
			
		||||
            # Be sure we close the session window
 | 
			
		||||
            self.quit()
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        self.driver.set_window_size(1280, 1024)
 | 
			
		||||
        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
        if self.webdriver_js_execute_code is not None:
 | 
			
		||||
            self.driver.execute_script(self.webdriver_js_execute_code)
 | 
			
		||||
            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
 | 
			
		||||
            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
        # @todo - how to check this? is it possible?
 | 
			
		||||
        self.status_code = 200
 | 
			
		||||
        # @todo somehow we should try to get this working for WebDriver
 | 
			
		||||
        # raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
        # @todo - dom wait loaded?
 | 
			
		||||
        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
 | 
			
		||||
        self.content = self.driver.page_source
 | 
			
		||||
        self.headers = {}
 | 
			
		||||
 | 
			
		||||
        self.screenshot = self.driver.get_screenshot_as_png()
 | 
			
		||||
 | 
			
		||||
    # Try something with requests?
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -21,7 +21,6 @@ from wtforms.validators import ValidationError
 | 
			
		||||
# each select <option data-enabled="enabled-0-0"
 | 
			
		||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
 | 
			
		||||
 | 
			
		||||
from changedetectionio import content_fetcher
 | 
			
		||||
from changedetectionio.notification import (
 | 
			
		||||
    valid_notification_formats,
 | 
			
		||||
)
 | 
			
		||||
@@ -135,30 +134,31 @@ class ValidateContentFetcherIsReady(object):
 | 
			
		||||
 | 
			
		||||
    def __call__(self, form, field):
 | 
			
		||||
        import urllib3.exceptions
 | 
			
		||||
        from changedetectionio import content_fetcher
 | 
			
		||||
        import importlib
 | 
			
		||||
 | 
			
		||||
        # Better would be a radiohandler that keeps a reference to each class
 | 
			
		||||
        if field.data is not None and field.data != 'system':
 | 
			
		||||
            klass = getattr(content_fetcher, field.data)
 | 
			
		||||
            some_object = klass()
 | 
			
		||||
            try:
 | 
			
		||||
                ready = some_object.is_ready()
 | 
			
		||||
            from . import fetchers
 | 
			
		||||
            if fetchers.html_webdriver is not None:
 | 
			
		||||
                try:
 | 
			
		||||
                    driver = fetchers.html_webdriver()
 | 
			
		||||
                    driver.is_ready()
 | 
			
		||||
 | 
			
		||||
            except urllib3.exceptions.MaxRetryError as e:
 | 
			
		||||
                driver_url = some_object.command_executor
 | 
			
		||||
                message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
 | 
			
		||||
                message += '<br>' + field.gettext(
 | 
			
		||||
                    'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
 | 
			
		||||
                message += '<br>' + field.gettext('Did you follow the instructions in the wiki?')
 | 
			
		||||
                message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url))
 | 
			
		||||
                message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
 | 
			
		||||
                message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
 | 
			
		||||
                except urllib3.exceptions.MaxRetryError as e:
 | 
			
		||||
                    driver_url = fetchers.html_webdriver.command_executor
 | 
			
		||||
                    message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
 | 
			
		||||
                    message += '<br>' + field.gettext(
 | 
			
		||||
                        'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
 | 
			
		||||
                    message += '<br>' + field.gettext('Did you follow the instructions in the wiki?')
 | 
			
		||||
                    message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url))
 | 
			
		||||
                    message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
 | 
			
		||||
                    message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
 | 
			
		||||
 | 
			
		||||
                raise ValidationError(message)
 | 
			
		||||
                    raise ValidationError(message)
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s')
 | 
			
		||||
                raise ValidationError(message % (field.data, e))
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s')
 | 
			
		||||
                    raise ValidationError(message % (field.data, e))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ValidateNotificationBodyAndTitleWhenURLisSet(object):
 | 
			
		||||
@@ -355,11 +355,12 @@ class quickWatchForm(Form):
 | 
			
		||||
 | 
			
		||||
# Common to a single watch and the global settings
 | 
			
		||||
class commonSettingsForm(Form):
 | 
			
		||||
    from .fetchers import available_fetchers
 | 
			
		||||
    notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers()])
 | 
			
		||||
    notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
 | 
			
		||||
    notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
 | 
			
		||||
    notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
 | 
			
		||||
    fetch_backend = RadioField(u'Fetch Method', choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
 | 
			
		||||
    fetch_backend = RadioField(u'Fetch Method', choices=available_fetchers(), validators=[ValidateContentFetcherIsReady()])
 | 
			
		||||
    extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
 | 
			
		||||
    webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1,
 | 
			
		||||
                                                                                                                                    message="Should contain one or more seconds")])
 | 
			
		||||
@@ -472,11 +473,11 @@ class globalSettingsRequestForm(Form):
 | 
			
		||||
 | 
			
		||||
# datastore.data['settings']['application']..
 | 
			
		||||
class globalSettingsApplicationForm(commonSettingsForm):
 | 
			
		||||
 | 
			
		||||
    from .fetchers import available_fetchers
 | 
			
		||||
    api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
 | 
			
		||||
    base_url = StringField('Base URL', validators=[validators.Optional()])
 | 
			
		||||
    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
 | 
			
		||||
    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
 | 
			
		||||
    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=available_fetchers(), validators=[ValidateContentFetcherIsReady()])
 | 
			
		||||
    global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
 | 
			
		||||
    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
 | 
			
		||||
    ignore_whitespace = BooleanField('Ignore whitespace')
 | 
			
		||||
 
 | 
			
		||||
@@ -4,8 +4,8 @@ import os
 | 
			
		||||
import re
 | 
			
		||||
import urllib3
 | 
			
		||||
from . import difference_detection_processor
 | 
			
		||||
from changedetectionio import content_fetcher
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
from .. import fetchers
 | 
			
		||||
 | 
			
		||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
			
		||||
 | 
			
		||||
@@ -61,11 +61,12 @@ class perform_site_check(difference_detection_processor):
 | 
			
		||||
        if not prefer_backend or prefer_backend == 'system':
 | 
			
		||||
            prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
 | 
			
		||||
 | 
			
		||||
        if hasattr(content_fetcher, prefer_backend):
 | 
			
		||||
            klass = getattr(content_fetcher, prefer_backend)
 | 
			
		||||
        if prefer_backend == 'html_webdriver':
 | 
			
		||||
            preferred_fetcher = fetchers.html_webdriver
 | 
			
		||||
        else:
 | 
			
		||||
            # If the klass doesnt exist, just use a default
 | 
			
		||||
            klass = getattr(content_fetcher, "html_requests")
 | 
			
		||||
            from ..fetchers import html_requests
 | 
			
		||||
            preferred_fetcher = html_requests
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
 | 
			
		||||
        proxy_url = None
 | 
			
		||||
@@ -73,7 +74,7 @@ class perform_site_check(difference_detection_processor):
 | 
			
		||||
            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
 | 
			
		||||
            print("UUID {} Using proxy {}".format(uuid, proxy_url))
 | 
			
		||||
 | 
			
		||||
        fetcher = klass(proxy_override=proxy_url)
 | 
			
		||||
        fetcher = preferred_fetcher(proxy_override=proxy_url)
 | 
			
		||||
 | 
			
		||||
        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
 | 
			
		||||
        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
 | 
			
		||||
 
 | 
			
		||||
@@ -7,10 +7,11 @@ import os
 | 
			
		||||
import re
 | 
			
		||||
import urllib3
 | 
			
		||||
 | 
			
		||||
from changedetectionio import content_fetcher, html_tools
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
from . import difference_detection_processor
 | 
			
		||||
from .. import fetchers
 | 
			
		||||
 | 
			
		||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
			
		||||
 | 
			
		||||
@@ -101,11 +102,12 @@ class perform_site_check(difference_detection_processor):
 | 
			
		||||
        if not prefer_backend or prefer_backend == 'system':
 | 
			
		||||
            prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
 | 
			
		||||
 | 
			
		||||
        if hasattr(content_fetcher, prefer_backend):
 | 
			
		||||
            klass = getattr(content_fetcher, prefer_backend)
 | 
			
		||||
        if prefer_backend == 'html_webdriver':
 | 
			
		||||
            preferred_fetcher = fetchers.html_webdriver
 | 
			
		||||
        else:
 | 
			
		||||
            # If the klass doesnt exist, just use a default
 | 
			
		||||
            klass = getattr(content_fetcher, "html_requests")
 | 
			
		||||
            from ..fetchers import html_requests
 | 
			
		||||
            preferred_fetcher = html_requests
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
 | 
			
		||||
        proxy_url = None
 | 
			
		||||
@@ -113,7 +115,7 @@ class perform_site_check(difference_detection_processor):
 | 
			
		||||
            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
 | 
			
		||||
            print("UUID {} Using proxy {}".format(uuid, proxy_url))
 | 
			
		||||
 | 
			
		||||
        fetcher = klass(proxy_override=proxy_url)
 | 
			
		||||
        fetcher = preferred_fetcher(proxy_override=proxy_url)
 | 
			
		||||
 | 
			
		||||
        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
 | 
			
		||||
        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
 | 
			
		||||
@@ -147,7 +149,7 @@ class perform_site_check(difference_detection_processor):
 | 
			
		||||
        update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
 | 
			
		||||
        if skip_when_checksum_same:
 | 
			
		||||
            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
 | 
			
		||||
                raise content_fetcher.checksumFromPreviousCheckWasTheSame()
 | 
			
		||||
                raise fetchers.exceptions.checksumFromPreviousCheckWasTheSame()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Fetching complete, now filters
 | 
			
		||||
@@ -310,7 +312,7 @@ class perform_site_check(difference_detection_processor):
 | 
			
		||||
        # Treat pages with no renderable text content as a change? No by default
 | 
			
		||||
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
 | 
			
		||||
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
 | 
			
		||||
            raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
 | 
			
		||||
            raise fetchers.exceptions.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
 | 
			
		||||
 | 
			
		||||
        # We rely on the actual text in the html output.. many sites have random script vars etc,
 | 
			
		||||
        # in the future we'll implement other mechanisms.
 | 
			
		||||
 
 | 
			
		||||
@@ -3,9 +3,8 @@ import threading
 | 
			
		||||
import queue
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from changedetectionio import content_fetcher
 | 
			
		||||
from .processors.text_json_diff import FilterNotFoundInResponse
 | 
			
		||||
 | 
			
		||||
from .fetchers import exceptions
 | 
			
		||||
 | 
			
		||||
# A single update worker
 | 
			
		||||
#
 | 
			
		||||
@@ -190,6 +189,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                        processor = self.datastore.data['watching'][uuid].get('processor','text_json_diff')
 | 
			
		||||
 | 
			
		||||
                        # @todo some way to switch by name
 | 
			
		||||
                        update_handler = None
 | 
			
		||||
                        if processor == 'restock_diff':
 | 
			
		||||
                            update_handler = restock_diff.perform_site_check(datastore=self.datastore)
 | 
			
		||||
                        else:
 | 
			
		||||
@@ -205,7 +205,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                    except PermissionError as e:
 | 
			
		||||
                        self.app.logger.error("File permission error updating", uuid, str(e))
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetcher.ReplyWithContentButNoText as e:
 | 
			
		||||
                    except exceptions.ReplyWithContentButNoText as e:
 | 
			
		||||
                        # Totally fine, it's by choice - just continue on, nothing more to care about
 | 
			
		||||
                        # Page had elements/content but no renderable text
 | 
			
		||||
                        # Backend (not filters) gave zero output
 | 
			
		||||
@@ -214,7 +214,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetcher.Non200ErrorCodeReceived as e:
 | 
			
		||||
                    except exceptions.Non200ErrorCodeReceived as e:
 | 
			
		||||
                        if e.status_code == 403:
 | 
			
		||||
                            err_text = "Error - 403 (Access denied) received"
 | 
			
		||||
                        elif e.status_code == 404:
 | 
			
		||||
@@ -258,12 +258,12 @@ class update_worker(threading.Thread):
 | 
			
		||||
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
 | 
			
		||||
                    except exceptions.checksumFromPreviousCheckWasTheSame as e:
 | 
			
		||||
                        # Yes fine, so nothing todo, don't continue to process.
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                        changed_detected = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetcher.BrowserStepsStepTimout as e:
 | 
			
		||||
                    except exceptions.BrowserStepsStepTimout as e:
 | 
			
		||||
 | 
			
		||||
                        if not self.datastore.data['watching'].get(uuid):
 | 
			
		||||
                            continue
 | 
			
		||||
@@ -288,25 +288,25 @@ class update_worker(threading.Thread):
 | 
			
		||||
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetcher.EmptyReply as e:
 | 
			
		||||
                    except exceptions.EmptyReply as e:
 | 
			
		||||
                        # Some kind of custom to-str handler in the exception handler that does this?
 | 
			
		||||
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetcher.ScreenshotUnavailable as e:
 | 
			
		||||
                    except exceptions.ScreenshotUnavailable as e:
 | 
			
		||||
                        err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetcher.JSActionExceptions as e:
 | 
			
		||||
                    except exceptions.JSActionExceptions as e:
 | 
			
		||||
                        err_text = "Error running JS Actions - Page request - "+e.message
 | 
			
		||||
                        if e.screenshot:
 | 
			
		||||
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetcher.PageUnloadable as e:
 | 
			
		||||
                    except exceptions.PageUnloadable as e:
 | 
			
		||||
                        err_text = "Page request from server didnt respond correctly"
 | 
			
		||||
                        if e.message:
 | 
			
		||||
                            err_text = "{} - {}".format(err_text, e.message)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user