mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			18 Commits
		
	
	
		
			notificati
			...
			endpoint-t
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					8c5fc60216 | ||
| 
						 | 
					e80936b29c | ||
| 
						 | 
					eaccd6026c | ||
| 
						 | 
					5b70625eaa | ||
| 
						 | 
					60d292107d | ||
| 
						 | 
					1cb38347da | ||
| 
						 | 
					55fe2abf42 | ||
| 
						 | 
					4225900ec3 | ||
| 
						 | 
					1fb4342488 | ||
| 
						 | 
					7071df061a | ||
| 
						 | 
					6dd1fa2b88 | ||
| 
						 | 
					371f85d544 | ||
| 
						 | 
					932cf15e1e | ||
| 
						 | 
					bf0d410d32 | ||
| 
						 | 
					730f37c7ba | ||
| 
						 | 
					8a35d62e02 | ||
| 
						 | 
					f527744024 | ||
| 
						 | 
					71c9b1273c | 
@@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
 | 
			
		||||
 | 
			
		||||
__version__ = '0.46.01'
 | 
			
		||||
__version__ = '0.46.04'
 | 
			
		||||
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
from json.decoder import JSONDecodeError
 | 
			
		||||
 
 | 
			
		||||
@@ -85,7 +85,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
        browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
 | 
			
		||||
            playwright_browser=browsersteps_start_session['browser'],
 | 
			
		||||
            proxy=proxy,
 | 
			
		||||
            start_url=datastore.data['watching'][watch_uuid].get('url')
 | 
			
		||||
            start_url=datastore.data['watching'][watch_uuid].get('url'),
 | 
			
		||||
            headers=datastore.data['watching'][watch_uuid].get('headers')
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # For test
 | 
			
		||||
 
 | 
			
		||||
@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
 | 
			
		||||
                        {% if '/text()' in  field %}
 | 
			
		||||
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
 | 
			
		||||
 | 
			
		||||
                    <ul>
 | 
			
		||||
                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
 | 
			
		||||
                    <div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
 | 
			
		||||
                    <ul id="advanced-help-selectors">
 | 
			
		||||
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
 | 
			
		||||
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
 | 
			
		||||
                            <ul>
 | 
			
		||||
 
 | 
			
		||||
@@ -65,8 +65,8 @@ class Fetcher():
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        import importlib.resources
 | 
			
		||||
        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
 | 
			
		||||
        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
 | 
			
		||||
        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
 | 
			
		||||
        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_error(self):
 | 
			
		||||
@@ -81,7 +81,8 @@ class Fetcher():
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
        # Should set self.error, self.status_code and self.content
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -83,7 +83,8 @@ class fetcher(Fetcher):
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
 | 
			
		||||
        from playwright.sync_api import sync_playwright
 | 
			
		||||
        import playwright._impl._errors
 | 
			
		||||
@@ -130,7 +131,7 @@ class fetcher(Fetcher):
 | 
			
		||||
            if response is None:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                logger.debug("Content Fetcher > Response object was none")
 | 
			
		||||
                logger.debug("Content Fetcher > Response object from the browser communication was none")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
@@ -166,10 +167,10 @@ class fetcher(Fetcher):
 | 
			
		||||
 | 
			
		||||
                raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
 | 
			
		||||
 | 
			
		||||
            if len(self.page.content().strip()) == 0:
 | 
			
		||||
            if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
 | 
			
		||||
                logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                logger.debug("Content Fetcher > Content was empty")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=response.status)
 | 
			
		||||
 | 
			
		||||
            # Run Browser Steps here
 | 
			
		||||
 
 | 
			
		||||
@@ -75,7 +75,8 @@ class fetcher(Fetcher):
 | 
			
		||||
                         request_method,
 | 
			
		||||
                         ignore_status_codes,
 | 
			
		||||
                         current_include_filters,
 | 
			
		||||
                         is_binary
 | 
			
		||||
                         is_binary,
 | 
			
		||||
                         empty_pages_are_a_change
 | 
			
		||||
                         ):
 | 
			
		||||
 | 
			
		||||
        from changedetectionio.content_fetchers import visualselector_xpath_selectors
 | 
			
		||||
@@ -153,7 +154,7 @@ class fetcher(Fetcher):
 | 
			
		||||
        if response is None:
 | 
			
		||||
            await self.page.close()
 | 
			
		||||
            await browser.close()
 | 
			
		||||
            logger.warning("Content Fetcher > Response object was none")
 | 
			
		||||
            logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
 | 
			
		||||
            raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
        self.headers = response.headers
 | 
			
		||||
@@ -186,10 +187,11 @@ class fetcher(Fetcher):
 | 
			
		||||
 | 
			
		||||
            raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
 | 
			
		||||
        content = await self.page.content
 | 
			
		||||
        if len(content.strip()) == 0:
 | 
			
		||||
 | 
			
		||||
        if not empty_pages_are_a_change and len(content.strip()) == 0:
 | 
			
		||||
            logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
 | 
			
		||||
            await self.page.close()
 | 
			
		||||
            await browser.close()
 | 
			
		||||
            logger.error("Content Fetcher > Content was empty")
 | 
			
		||||
            raise EmptyReply(url=url, status_code=response.status)
 | 
			
		||||
 | 
			
		||||
        # Run Browser Steps here
 | 
			
		||||
@@ -247,7 +249,7 @@ class fetcher(Fetcher):
 | 
			
		||||
        await self.fetch_page(**kwargs)
 | 
			
		||||
 | 
			
		||||
    def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None, is_binary=False):
 | 
			
		||||
            current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
 | 
			
		||||
 | 
			
		||||
        #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
 | 
			
		||||
        max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
 | 
			
		||||
@@ -262,7 +264,8 @@ class fetcher(Fetcher):
 | 
			
		||||
                request_method=request_method,
 | 
			
		||||
                ignore_status_codes=ignore_status_codes,
 | 
			
		||||
                current_include_filters=current_include_filters,
 | 
			
		||||
                is_binary=is_binary
 | 
			
		||||
                is_binary=is_binary,
 | 
			
		||||
                empty_pages_are_a_change=empty_pages_are_a_change
 | 
			
		||||
            ), timeout=max_time))
 | 
			
		||||
        except asyncio.TimeoutError:
 | 
			
		||||
            raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,8 @@
 | 
			
		||||
from loguru import logger
 | 
			
		||||
import chardet
 | 
			
		||||
import hashlib
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
import chardet
 | 
			
		||||
import requests
 | 
			
		||||
 | 
			
		||||
from changedetectionio import strtobool
 | 
			
		||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
 | 
			
		||||
from changedetectionio.content_fetchers.base import Fetcher
 | 
			
		||||
@@ -26,7 +25,8 @@ class fetcher(Fetcher):
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps_get_valid_steps():
 | 
			
		||||
            raise BrowserStepsInUnsupportedFetcher(url=url)
 | 
			
		||||
@@ -74,7 +74,10 @@ class fetcher(Fetcher):
 | 
			
		||||
        self.headers = r.headers
 | 
			
		||||
 | 
			
		||||
        if not r.content or not len(r.content):
 | 
			
		||||
            raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
            if not empty_pages_are_a_change:
 | 
			
		||||
                raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
            else:
 | 
			
		||||
                logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True")
 | 
			
		||||
 | 
			
		||||
        # @todo test this
 | 
			
		||||
        # @todo maybe you really want to test zero-byte return pages?
 | 
			
		||||
 
 | 
			
		||||
@@ -75,6 +75,7 @@ function isItemInStock() {
 | 
			
		||||
        'vergriffen',
 | 
			
		||||
        'vorbestellen',
 | 
			
		||||
        'vorbestellung ist bald möglich',
 | 
			
		||||
        'we don\'t currently have any',
 | 
			
		||||
        'we couldn\'t find any products that match',
 | 
			
		||||
        'we do not currently have an estimate of when this product will be back in stock.',
 | 
			
		||||
        'we don\'t know when or if this item will be back in stock.',
 | 
			
		||||
@@ -173,7 +174,8 @@ function isItemInStock() {
 | 
			
		||||
        const element = elementsToScan[i];
 | 
			
		||||
        // outside the 'fold' or some weird text in the heading area
 | 
			
		||||
        // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
 | 
			
		||||
        if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
 | 
			
		||||
        // Note: theres also an automated test that places the 'out of stock' text fairly low down
 | 
			
		||||
        if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
 | 
			
		||||
            continue
 | 
			
		||||
        }
 | 
			
		||||
        elementText = "";
 | 
			
		||||
@@ -187,7 +189,7 @@ function isItemInStock() {
 | 
			
		||||
            // and these mean its out of stock
 | 
			
		||||
            for (const outOfStockText of outOfStockTexts) {
 | 
			
		||||
                if (elementText.includes(outOfStockText)) {
 | 
			
		||||
                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
 | 
			
		||||
                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
 | 
			
		||||
                    return outOfStockText; // item is out of stock
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 
 | 
			
		||||
@@ -164,6 +164,15 @@ visibleElementsArray.forEach(function (element) {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
 | 
			
		||||
 | 
			
		||||
    let text = element.textContent.trim().slice(0, 30).trim();
 | 
			
		||||
    while (/\n{2,}|\t{2,}/.test(text)) {
 | 
			
		||||
        text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
 | 
			
		||||
    const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
 | 
			
		||||
 | 
			
		||||
    size_pos.push({
 | 
			
		||||
        xpath: xpath_result,
 | 
			
		||||
@@ -171,9 +180,16 @@ visibleElementsArray.forEach(function (element) {
 | 
			
		||||
        height: Math.round(bbox['height']),
 | 
			
		||||
        left: Math.floor(bbox['left']),
 | 
			
		||||
        top: Math.floor(bbox['top']) + scroll_y,
 | 
			
		||||
        // tagName used by Browser Steps
 | 
			
		||||
        tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
 | 
			
		||||
        // tagtype used by Browser Steps
 | 
			
		||||
        tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
 | 
			
		||||
        isClickable: window.getComputedStyle(element).cursor == "pointer"
 | 
			
		||||
        isClickable: window.getComputedStyle(element).cursor === "pointer",
 | 
			
		||||
        // Used by the keras trainer
 | 
			
		||||
        fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
 | 
			
		||||
        fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
 | 
			
		||||
        hasDigitCurrency: hasDigitCurrency,
 | 
			
		||||
        label: label,
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
});
 | 
			
		||||
 
 | 
			
		||||
@@ -56,7 +56,8 @@ class fetcher(Fetcher):
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False):
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.chrome.options import Options as ChromeOptions
 | 
			
		||||
 
 | 
			
		||||
@@ -1377,17 +1377,19 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
        import brotli
 | 
			
		||||
 | 
			
		||||
        watch = datastore.data['watching'].get(uuid)
 | 
			
		||||
        if watch and os.path.isdir(watch.watch_data_dir):
 | 
			
		||||
            latest_filename = list(watch.history.keys())[0]
 | 
			
		||||
        if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
 | 
			
		||||
            latest_filename = list(watch.history.keys())[-1]
 | 
			
		||||
            html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
 | 
			
		||||
            if html_fname.endswith('.br'):
 | 
			
		||||
                # Read and decompress the Brotli file
 | 
			
		||||
                with open(html_fname, 'rb') as f:
 | 
			
		||||
            with open(html_fname, 'rb') as f:
 | 
			
		||||
                if html_fname.endswith('.br'):
 | 
			
		||||
                    # Read and decompress the Brotli file
 | 
			
		||||
                    decompressed_data = brotli.decompress(f.read())
 | 
			
		||||
                else:
 | 
			
		||||
                    decompressed_data = f.read()
 | 
			
		||||
 | 
			
		||||
                buffer = BytesIO(decompressed_data)
 | 
			
		||||
            buffer = BytesIO(decompressed_data)
 | 
			
		||||
 | 
			
		||||
                return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
 | 
			
		||||
            return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Return a 500 error
 | 
			
		||||
 
 | 
			
		||||
@@ -26,6 +26,8 @@ class difference_detection_processor():
 | 
			
		||||
 | 
			
		||||
    def call_browser(self):
 | 
			
		||||
        from requests.structures import CaseInsensitiveDict
 | 
			
		||||
        from changedetectionio.content_fetchers.exceptions import EmptyReply
 | 
			
		||||
 | 
			
		||||
        # Protect against file:// access
 | 
			
		||||
        if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
 | 
			
		||||
            if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
 | 
			
		||||
@@ -133,8 +135,18 @@ class difference_detection_processor():
 | 
			
		||||
        is_binary = self.watch.is_pdf
 | 
			
		||||
 | 
			
		||||
        # And here we go! call the right browser with browser-specific settings
 | 
			
		||||
        self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
 | 
			
		||||
                    is_binary=is_binary)
 | 
			
		||||
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
 | 
			
		||||
 | 
			
		||||
        self.fetcher.run(url=url,
 | 
			
		||||
                         timeout=timeout,
 | 
			
		||||
                         request_headers=request_headers,
 | 
			
		||||
                         request_body=request_body,
 | 
			
		||||
                         request_method=request_method,
 | 
			
		||||
                         ignore_status_codes=ignore_status_codes,
 | 
			
		||||
                         current_include_filters=self.watch.get('include_filters'),
 | 
			
		||||
                         is_binary=is_binary,
 | 
			
		||||
                         empty_pages_are_a_change=empty_pages_are_a_change
 | 
			
		||||
                         )
 | 
			
		||||
 | 
			
		||||
        #@todo .quit here could go on close object, so we can run JS if change-detected
 | 
			
		||||
        self.fetcher.quit()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +1,12 @@
 | 
			
		||||
 | 
			
		||||
from changedetectionio.model.Watch import model as BaseWatch
 | 
			
		||||
import re
 | 
			
		||||
from babel.numbers import parse_decimal
 | 
			
		||||
from changedetectionio.model.Watch import model as BaseWatch
 | 
			
		||||
from typing import Union
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
class Restock(dict):
 | 
			
		||||
 | 
			
		||||
    def parse_currency(self, raw_value: str) -> float:
 | 
			
		||||
    def parse_currency(self, raw_value: str) -> Union[float, None]:
 | 
			
		||||
        # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
 | 
			
		||||
        standardized_value = raw_value
 | 
			
		||||
 | 
			
		||||
@@ -21,8 +22,11 @@ class Restock(dict):
 | 
			
		||||
        # Remove any non-numeric characters except for the decimal point
 | 
			
		||||
        standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
 | 
			
		||||
 | 
			
		||||
        # Convert to float
 | 
			
		||||
        return float(parse_decimal(standardized_value, locale='en'))
 | 
			
		||||
        if standardized_value:
 | 
			
		||||
            # Convert to float
 | 
			
		||||
            return float(parse_decimal(standardized_value, locale='en'))
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        # Define default values
 | 
			
		||||
 
 | 
			
		||||
@@ -40,13 +40,16 @@ def get_itemprop_availability(html_content) -> Restock:
 | 
			
		||||
    import extruct
 | 
			
		||||
    logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
 | 
			
		||||
 | 
			
		||||
    value = {}
 | 
			
		||||
    now = time.time()
 | 
			
		||||
 | 
			
		||||
    # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
 | 
			
		||||
 | 
			
		||||
    syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
 | 
			
		||||
    try:
 | 
			
		||||
        data = extruct.extract(html_content, syntaxes=syntaxes)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
 | 
			
		||||
        return Restock()
 | 
			
		||||
 | 
			
		||||
    data = extruct.extract(html_content, syntaxes=syntaxes)
 | 
			
		||||
    logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
 | 
			
		||||
 | 
			
		||||
    # First phase, dead simple scanning of anything that looks useful
 | 
			
		||||
 
 | 
			
		||||
@@ -18,9 +18,11 @@ $(document).ready(function () {
 | 
			
		||||
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    $("#notification-token-toggle").click(function (e) {
 | 
			
		||||
    $(".toggle-show").click(function (e) {
 | 
			
		||||
        e.preventDefault();
 | 
			
		||||
        $('#notification-tokens-info').toggle();
 | 
			
		||||
        let target = $(this).data('target');
 | 
			
		||||
        $(target).toggle();
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -11,8 +11,11 @@
 | 
			
		||||
    class="notification-urls" )
 | 
			
		||||
                            }}
 | 
			
		||||
                            <div class="pure-form-message-inline">
 | 
			
		||||
                              <ul>
 | 
			
		||||
                                <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
 | 
			
		||||
                                <p>
 | 
			
		||||
                                <strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
 | 
			
		||||
</p>
 | 
			
		||||
                                <div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
 | 
			
		||||
                              <ul style="display: none" id="advanced-help-notifications">
 | 
			
		||||
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
 | 
			
		||||
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
 | 
			
		||||
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
 | 
			
		||||
@@ -40,7 +43,7 @@
 | 
			
		||||
 | 
			
		||||
                            </div>
 | 
			
		||||
                            <div class="pure-controls">
 | 
			
		||||
                                <div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
 | 
			
		||||
                                <div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
 | 
			
		||||
                            </div>
 | 
			
		||||
                            <div class="pure-controls" style="display: none;" id="notification-tokens-info">
 | 
			
		||||
                                <table class="pure-table" id="token-table">
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,7 @@
 | 
			
		||||
{% from '_common_fields.html' import render_common_settings_form %}
 | 
			
		||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 | 
			
		||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
 | 
			
		||||
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
 | 
			
		||||
<script>
 | 
			
		||||
    const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
 | 
			
		||||
    const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
 | 
			
		||||
@@ -275,9 +276,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
 | 
			
		||||
                        {% if '/text()' in  field %}
 | 
			
		||||
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
 | 
			
		||||
 | 
			
		||||
                    <ul>
 | 
			
		||||
                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
 | 
			
		||||
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
 | 
			
		||||
                    <ul id="advanced-help-selectors" style="display: none;">
 | 
			
		||||
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
 | 
			
		||||
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
 | 
			
		||||
                            <ul>
 | 
			
		||||
@@ -297,9 +298,12 @@ xpath://body/div/span[contains(@class, 'example-class')]",
 | 
			
		||||
                                <li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
 | 
			
		||||
                            </ul>
 | 
			
		||||
                            </li>
 | 
			
		||||
                    </ul>
 | 
			
		||||
                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
 | 
			
		||||
                    <li>
 | 
			
		||||
                        Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
 | 
			
		||||
                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
 | 
			
		||||
                    </li>
 | 
			
		||||
                    </ul>
 | 
			
		||||
 | 
			
		||||
                </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                <fieldset class="pure-control-group">
 | 
			
		||||
 
 | 
			
		||||
@@ -76,7 +76,7 @@
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
 | 
			
		||||
                        <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                {% if form.requests.proxy %}
 | 
			
		||||
                    <div class="pure-control-group inline-radio">
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
 | 
			
		||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 | 
			
		||||
from changedetectionio.notification import (
 | 
			
		||||
    default_notification_body,
 | 
			
		||||
    default_notification_format,
 | 
			
		||||
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
 | 
			
		||||
    assert b'not-in-stock' not in res.data
 | 
			
		||||
 | 
			
		||||
    # We should have a notification
 | 
			
		||||
    time.sleep(2)
 | 
			
		||||
    wait_for_notification_endpoint_output()
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
 | 
			
		||||
    os.unlink("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
@@ -103,6 +103,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
    time.sleep(5)
 | 
			
		||||
    assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"
 | 
			
		||||
 | 
			
		||||
    # BUT we should see that it correctly shows "not in stock"
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
import os.path
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import live_server_setup, wait_for_all_checks
 | 
			
		||||
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -112,7 +112,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("settings_page"),
 | 
			
		||||
        data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
 | 
			
		||||
              "application-notification_body": 'triggered text was -{{triggered_text}}-',
 | 
			
		||||
              "application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了',
 | 
			
		||||
              # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
 | 
			
		||||
              "application-notification_urls": test_notification_url,
 | 
			
		||||
              "application-minutes_between_check": 180,
 | 
			
		||||
@@ -165,11 +165,12 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 | 
			
		||||
    # Takes a moment for apprise to fire
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
    wait_for_notification_endpoint_output()
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        response= f.read()
 | 
			
		||||
        assert '-Oh yes please-' in response
 | 
			
		||||
    with open("test-datastore/notification.txt", 'rb') as f:
 | 
			
		||||
        response = f.read()
 | 
			
		||||
        assert b'-Oh yes please-' in response
 | 
			
		||||
        assert '网站监测 内容更新了'.encode('utf-8') in response
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
 
 | 
			
		||||
@@ -69,6 +69,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
 | 
			
		||||
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    uuid = extract_UUID_from_client(client)
 | 
			
		||||
 | 
			
		||||
    # Check the 'get latest snapshot works'
 | 
			
		||||
    res = client.get(url_for("watch_get_latest_html", uuid=uuid))
 | 
			
		||||
    assert b'which has this one new line' in res.data
 | 
			
		||||
 | 
			
		||||
    # Now something should be ready, indicated by having a 'unviewed' class
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
@@ -86,7 +92,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
 | 
			
		||||
    assert expected_url.encode('utf-8') in res.data
 | 
			
		||||
 | 
			
		||||
    # Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
 | 
			
		||||
    res = client.get(url_for("diff_history_page", uuid="first"))
 | 
			
		||||
    res = client.get(url_for("diff_history_page", uuid=uuid))
 | 
			
		||||
    assert b'selected=""' in res.data, "Confirm diff history page loaded"
 | 
			
		||||
 | 
			
		||||
    # Check the [preview] pulls the right one
 | 
			
		||||
@@ -143,18 +149,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
 | 
			
		||||
    # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
 | 
			
		||||
    uuid = extract_UUID_from_client(client)
 | 
			
		||||
    client.get(url_for("clear_watch_history", uuid=uuid))
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'preview/' in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Check the 'get latest snapshot works'
 | 
			
		||||
    res = client.get(url_for("watch_get_latest_html", uuid=uuid))
 | 
			
		||||
    assert b'<head><title>head title</title></head>' in res.data
 | 
			
		||||
 | 
			
		||||
    #
 | 
			
		||||
    # Cleanup everything
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import set_original_response, live_server_setup
 | 
			
		||||
from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
 | 
			
		||||
from changedetectionio.model import App
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -102,14 +102,15 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
    wait_for_notification_endpoint_output()
 | 
			
		||||
 | 
			
		||||
    # Shouldn't exist, shouldn't have fired
 | 
			
		||||
    assert not os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
    # Now the filter should exist
 | 
			
		||||
    set_response_with_filter()
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    wait_for_notification_endpoint_output()
 | 
			
		||||
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,8 @@
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
 | 
			
		||||
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
 | 
			
		||||
    wait_for_notification_endpoint_output
 | 
			
		||||
from changedetectionio.model import App
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -107,7 +108,8 @@ def run_filter_test(client, live_server, content_filter):
 | 
			
		||||
    # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
    time.sleep(2)  # delay for apprise to fire
 | 
			
		||||
 | 
			
		||||
    wait_for_notification_endpoint_output()
 | 
			
		||||
    # Now it should exist and contain our "filter not found" alert
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
@@ -127,6 +129,7 @@ def run_filter_test(client, live_server, content_filter):
 | 
			
		||||
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
        wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    wait_for_notification_endpoint_output()
 | 
			
		||||
    # It should have sent a notification, but..
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
    # but it should not contain the info about a failed filter (because there was none in this case)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +1,8 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
from .util import set_original_response, set_modified_response, live_server_setup
 | 
			
		||||
 | 
			
		||||
sleep_time_for_fetch_thread = 3
 | 
			
		||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_nonrenderable_response():
 | 
			
		||||
@@ -16,12 +13,18 @@ def set_nonrenderable_response():
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
def set_zero_byte_response():
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write("")
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
@@ -35,18 +38,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
 | 
			
		||||
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # Do this a few times.. ensures we dont accidently set the status
 | 
			
		||||
    for n in range(3):
 | 
			
		||||
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
        # Give the thread time to pick it up
 | 
			
		||||
        time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
        # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
        res = client.get(url_for("index"))
 | 
			
		||||
        assert b'unviewed' not in res.data
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    #####################
 | 
			
		||||
@@ -64,7 +60,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
@@ -86,14 +82,20 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
    client.get(url_for("mark_all_viewed"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # A totally zero byte (#2528) response should also not trigger an error
 | 
			
		||||
    set_zero_byte_response()
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
 | 
			
		||||
    assert b'fetch-error' not in res.data
 | 
			
		||||
 | 
			
		||||
    #
 | 
			
		||||
    # Cleanup everything
 | 
			
		||||
 
 | 
			
		||||
@@ -291,11 +291,11 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
 | 
			
		||||
        data={
 | 
			
		||||
              "application-fetch_backend": "html_requests",
 | 
			
		||||
              "application-minutes_between_check": 180,
 | 
			
		||||
              "application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444 }',
 | 
			
		||||
              "application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
 | 
			
		||||
              "application-notification_format": default_notification_format,
 | 
			
		||||
              "application-notification_urls": test_notification_url,
 | 
			
		||||
              # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
 | 
			
		||||
              "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
 | 
			
		||||
              "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
 | 
			
		||||
              },
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
@@ -324,6 +324,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
 | 
			
		||||
        j = json.loads(x)
 | 
			
		||||
        assert j['url'].startswith('http://localhost')
 | 
			
		||||
        assert j['secret'] == 444
 | 
			
		||||
        assert j['somebug'] == '网站监测 内容更新了'
 | 
			
		||||
 | 
			
		||||
    # URL check, this will always be converted to lowercase
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification-url.txt")
 | 
			
		||||
@@ -354,9 +355,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
 | 
			
		||||
#2510
 | 
			
		||||
def test_global_send_test_notification(client, live_server, measure_memory_usage):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    #live_server_setup(live_server)
 | 
			
		||||
    set_original_response()
 | 
			
		||||
    if os.path.isfile("test-datastore/notification.txt"):
 | 
			
		||||
        os.unlink("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
    # otherwise other settings would have already existed from previous tests in this file
 | 
			
		||||
    res = client.post(
 | 
			
		||||
@@ -364,7 +366,8 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
 | 
			
		||||
        data={
 | 
			
		||||
            "application-fetch_backend": "html_requests",
 | 
			
		||||
            "application-minutes_between_check": 180,
 | 
			
		||||
            "application-notification_body": 'change detection is cool',
 | 
			
		||||
            #1995 UTF-8 content should be encoded
 | 
			
		||||
            "application-notification_body": 'change detection is cool 网站监测 内容更新了',
 | 
			
		||||
            "application-notification_format": default_notification_format,
 | 
			
		||||
            "application-notification_urls": "",
 | 
			
		||||
            "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
 | 
			
		||||
@@ -399,8 +402,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        x = f.read()
 | 
			
		||||
        assert 'change detection is coo' in x
 | 
			
		||||
 | 
			
		||||
        assert 'change detection is cool 网站监测 内容更新了' in x
 | 
			
		||||
 | 
			
		||||
    os.unlink("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
@@ -420,7 +422,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        x = f.read()
 | 
			
		||||
        # Should come from notification.py default handler when there is no notification body to pull from
 | 
			
		||||
        assert 'change detection is coo' in x
 | 
			
		||||
        assert 'change detection is cool 网站监测 内容更新了' in x
 | 
			
		||||
 | 
			
		||||
    client.get(
 | 
			
		||||
        url_for("form_delete", uuid="all"),
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@ import os
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
 | 
			
		||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 | 
			
		||||
from ..notification import default_notification_format
 | 
			
		||||
 | 
			
		||||
instock_props = [
 | 
			
		||||
@@ -182,7 +182,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
 | 
			
		||||
    # price changed to something LESS than min (900), SHOULD be a change
 | 
			
		||||
    set_original_response(props_markup=instock_props[0], price='890.45')
 | 
			
		||||
    # let previous runs wait
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    time.sleep(2)
 | 
			
		||||
    
 | 
			
		||||
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    assert b'1 watches queued for rechecking.' in res.data
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
@@ -362,7 +363,7 @@ def test_change_with_notification_values(client, live_server):
 | 
			
		||||
    set_original_response(props_markup=instock_props[0], price='1950.45')
 | 
			
		||||
    client.get(url_for("form_watch_checknow"))
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
    wait_for_notification_endpoint_output()
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        notification = f.read()
 | 
			
		||||
 
 | 
			
		||||
@@ -76,6 +76,17 @@ def set_more_modified_response():
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def wait_for_notification_endpoint_output():
 | 
			
		||||
    '''Apprise can take a few seconds to fire'''
 | 
			
		||||
    from os.path import isfile
 | 
			
		||||
    for i in range(1, 20):
 | 
			
		||||
        time.sleep(1)
 | 
			
		||||
        if isfile("test-datastore/notification.txt"):
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
    return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# kinda funky, but works for now
 | 
			
		||||
def extract_api_key_from_UI(client):
 | 
			
		||||
    import re
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,5 @@
 | 
			
		||||
from .processors.exceptions import ProcessorException
 | 
			
		||||
from . import content_fetchers
 | 
			
		||||
 | 
			
		||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
 | 
			
		||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
 | 
			
		||||
@@ -301,7 +300,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetchers.exceptions.ReplyWithContentButNoText as e:
 | 
			
		||||
                    except content_fetchers_exceptions.ReplyWithContentButNoText as e:
 | 
			
		||||
                        # Totally fine, it's by choice - just continue on, nothing more to care about
 | 
			
		||||
                        # Page had elements/content but no renderable text
 | 
			
		||||
                        # Backend (not filters) gave zero output
 | 
			
		||||
@@ -327,7 +326,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                            
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
 | 
			
		||||
                    except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
 | 
			
		||||
                        if e.status_code == 403:
 | 
			
		||||
                            err_text = "Error - 403 (Access denied) received"
 | 
			
		||||
                        elif e.status_code == 404:
 | 
			
		||||
@@ -380,23 +379,23 @@ class update_worker(threading.Thread):
 | 
			
		||||
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e:
 | 
			
		||||
                    except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
 | 
			
		||||
                        # Yes fine, so nothing todo, don't continue to process.
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                        changed_detected = False
 | 
			
		||||
                    except content_fetchers.exceptions.BrowserConnectError as e:
 | 
			
		||||
                    except content_fetchers_exceptions.BrowserConnectError as e:
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid,
 | 
			
		||||
                                                    update_obj={'last_error': e.msg
 | 
			
		||||
                                                                }
 | 
			
		||||
                                                    )
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetchers.exceptions.BrowserFetchTimedOut as e:
 | 
			
		||||
                    except content_fetchers_exceptions.BrowserFetchTimedOut as e:
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid,
 | 
			
		||||
                                                    update_obj={'last_error': e.msg
 | 
			
		||||
                                                                }
 | 
			
		||||
                                                    )
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetchers.exceptions.BrowserStepsStepException as e:
 | 
			
		||||
                    except content_fetchers_exceptions.BrowserStepsStepException as e:
 | 
			
		||||
 | 
			
		||||
                        if not self.datastore.data['watching'].get(uuid):
 | 
			
		||||
                            continue
 | 
			
		||||
@@ -438,25 +437,25 @@ class update_worker(threading.Thread):
 | 
			
		||||
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                    except content_fetchers.exceptions.EmptyReply as e:
 | 
			
		||||
                    except content_fetchers_exceptions.EmptyReply as e:
 | 
			
		||||
                        # Some kind of custom to-str handler in the exception handler that does this?
 | 
			
		||||
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetchers.exceptions.ScreenshotUnavailable as e:
 | 
			
		||||
                    except content_fetchers_exceptions.ScreenshotUnavailable as e:
 | 
			
		||||
                        err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetchers.exceptions.JSActionExceptions as e:
 | 
			
		||||
                    except content_fetchers_exceptions.JSActionExceptions as e:
 | 
			
		||||
                        err_text = "Error running JS Actions - Page request - "+e.message
 | 
			
		||||
                        if e.screenshot:
 | 
			
		||||
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetchers.exceptions.PageUnloadable as e:
 | 
			
		||||
                    except content_fetchers_exceptions.PageUnloadable as e:
 | 
			
		||||
                        err_text = "Page request from server didnt respond correctly"
 | 
			
		||||
                        if e.message:
 | 
			
		||||
                            err_text = "{} - {}".format(err_text, e.message)
 | 
			
		||||
@@ -468,7 +467,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                                                                           'last_check_status': e.status_code,
 | 
			
		||||
                                                                           'has_ldjson_price_data': None})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
                    except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e:
 | 
			
		||||
                    except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
 | 
			
		||||
                        err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
 | 
			
		||||
                        process_changedetection_results = False
 | 
			
		||||
 
 | 
			
		||||
@@ -18,7 +18,7 @@ services:
 | 
			
		||||
  #
 | 
			
		||||
  #        Log levels are in descending order. (TRACE is the most detailed one)
 | 
			
		||||
  #        Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
 | 
			
		||||
  #      - LOGGER_LEVEL=DEBUG
 | 
			
		||||
  #      - LOGGER_LEVEL=TRACE
 | 
			
		||||
  #
 | 
			
		||||
  #       Alternative WebDriver/selenium URL, do not use "'s or 's!
 | 
			
		||||
  #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
 | 
			
		||||
@@ -29,8 +29,9 @@ services:
 | 
			
		||||
  #
 | 
			
		||||
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
 | 
			
		||||
  #
 | 
			
		||||
  #       Alternative Playwright URL, do not use "'s or 's!
 | 
			
		||||
  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
 | 
			
		||||
  #       Alternative target "Chrome" Playwright URL, do not use "'s or 's!
 | 
			
		||||
  #       "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
 | 
			
		||||
  #      - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
 | 
			
		||||
  #
 | 
			
		||||
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
 | 
			
		||||
  #
 | 
			
		||||
@@ -73,10 +74,10 @@ services:
 | 
			
		||||
#              condition: service_started
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
 | 
			
		||||
     # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
 | 
			
		||||
     # RECOMMENDED FOR FETCHING PAGES WITH CHROME
 | 
			
		||||
#    playwright-chrome:
 | 
			
		||||
#        hostname: playwright-chrome
 | 
			
		||||
#    sockpuppetbrowser:
 | 
			
		||||
#        hostname: sockpuppetbrowser
 | 
			
		||||
#        image: dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
#        cap_add:
 | 
			
		||||
#            - SYS_ADMIN
 | 
			
		||||
 
 | 
			
		||||
@@ -79,8 +79,9 @@ pyppeteerstealth>=0.0.4
 | 
			
		||||
pytest ~=7.2
 | 
			
		||||
pytest-flask ~=1.2
 | 
			
		||||
 | 
			
		||||
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
 | 
			
		||||
jsonschema==4.17.3
 | 
			
		||||
# Anything 4.0 and up but not 5.0
 | 
			
		||||
jsonschema ~= 4.0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
loguru
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user