mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			36 Commits
		
	
	
		
			post-reque
			...
			ui-mobile-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 7c7f6ac182 | ||
|   | 5b34aece96 | ||
|   | 1b625dc18a | ||
|   | 367afc81e9 | ||
|   | ddfbef6db3 | ||
|   | e173954cdd | ||
|   | e830fb2320 | ||
|   | c6589ee1b4 | ||
|   | dc936a2e8a | ||
|   | 8c1527c1ad | ||
|   | a5ff1cd1d7 | ||
|   | 543cb205d2 | ||
|   | 273adfa0a4 | ||
|   | 8ecfd17973 | ||
|   | 19f3851c9d | ||
|   | 7f2fa20318 | ||
|   | e16814e40b | ||
|   | 337fcab3f1 | ||
|   | eaccd6026c | ||
|   | 5b70625eaa | ||
|   | 60d292107d | ||
|   | 1cb38347da | ||
|   | 55fe2abf42 | ||
|   | 4225900ec3 | ||
|   | 1fb4342488 | ||
|   | 7071df061a | ||
|   | 6dd1fa2b88 | ||
|   | 371f85d544 | ||
|   | 932cf15e1e | ||
|   | bf0d410d32 | ||
|   | 730f37c7ba | ||
|   | 8a35d62e02 | ||
|   | f527744024 | ||
|   | 71c9b1273c | ||
|   | ec68450df1 | ||
|   | 2fd762a783 | 
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.46.01' | ||||
| __version__ = '0.46.04' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
|   | ||||
							
								
								
									
										78
									
								
								changedetectionio/apprise_plugin/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								changedetectionio/apprise_plugin/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| # include the decorator | ||||
| from apprise.decorators import notify | ||||
|  | ||||
| @notify(on="delete") | ||||
| @notify(on="deletes") | ||||
| @notify(on="get") | ||||
| @notify(on="gets") | ||||
| @notify(on="post") | ||||
| @notify(on="posts") | ||||
| @notify(on="put") | ||||
| @notify(on="puts") | ||||
| def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): | ||||
|     import requests | ||||
|     import json | ||||
|     from apprise.utils import parse_url as apprise_parse_url | ||||
|     from apprise import URLBase | ||||
|  | ||||
|     url = kwargs['meta'].get('url') | ||||
|  | ||||
|     if url.startswith('post'): | ||||
|         r = requests.post | ||||
|     elif url.startswith('get'): | ||||
|         r = requests.get | ||||
|     elif url.startswith('put'): | ||||
|         r = requests.put | ||||
|     elif url.startswith('delete'): | ||||
|         r = requests.delete | ||||
|  | ||||
|     url = url.replace('post://', 'http://') | ||||
|     url = url.replace('posts://', 'https://') | ||||
|     url = url.replace('put://', 'http://') | ||||
|     url = url.replace('puts://', 'https://') | ||||
|     url = url.replace('get://', 'http://') | ||||
|     url = url.replace('gets://', 'https://') | ||||
|     url = url.replace('put://', 'http://') | ||||
|     url = url.replace('puts://', 'https://') | ||||
|     url = url.replace('delete://', 'http://') | ||||
|     url = url.replace('deletes://', 'https://') | ||||
|  | ||||
|     headers = {} | ||||
|     params = {} | ||||
|     auth = None | ||||
|  | ||||
|     # Convert /foobar?+some-header=hello to proper header dictionary | ||||
|     results = apprise_parse_url(url) | ||||
|     if results: | ||||
|         # Add our headers that the user can potentially over-ride if they wish | ||||
|         # to to our returned result set and tidy entries by unquoting them | ||||
|         headers = {URLBase.unquote(x): URLBase.unquote(y) | ||||
|                    for x, y in results['qsd+'].items()} | ||||
|  | ||||
|         # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|         # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise | ||||
|         # but here we are making straight requests, so we need todo convert this against apprise's logic | ||||
|         for k, v in results['qsd'].items(): | ||||
|             if not k.strip('+-') in results['qsd+'].keys(): | ||||
|                 params[URLBase.unquote(k)] = URLBase.unquote(v) | ||||
|  | ||||
|         # Determine Authentication | ||||
|         auth = '' | ||||
|         if results.get('user') and results.get('password'): | ||||
|             auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user'))) | ||||
|         elif results.get('user'): | ||||
|             auth = (URLBase.unquote(results.get('user'))) | ||||
|  | ||||
|     # Try to auto-guess if it's JSON | ||||
|     try: | ||||
|         json.loads(body) | ||||
|         headers['Content-Type'] = 'application/json; charset=utf-8' | ||||
|     except ValueError as e: | ||||
|         pass | ||||
|  | ||||
|     r(results.get('url'), | ||||
|       auth=auth, | ||||
|       data=body.encode('utf-8') if type(body) is str else body, | ||||
|       headers=headers, | ||||
|       params=params | ||||
|       ) | ||||
| @@ -85,7 +85,8 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui( | ||||
|             playwright_browser=browsersteps_start_session['browser'], | ||||
|             proxy=proxy, | ||||
|             start_url=datastore.data['watching'][watch_uuid].get('url') | ||||
|             start_url=datastore.data['watching'][watch_uuid].get('url'), | ||||
|             headers=datastore.data['watching'][watch_uuid].get('headers') | ||||
|         ) | ||||
|  | ||||
|         # For test | ||||
|   | ||||
| @@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
|                           'Click element if exists': '1 0', | ||||
|                           'Click element': '1 0', | ||||
|                           'Click element containing text': '0 1', | ||||
|                           'Click element containing text if exists': '0 1', | ||||
|                           'Enter text in field': '1 1', | ||||
|                           'Execute JS': '0 1', | ||||
| #                          'Extract text and use as filter': '1 0', | ||||
| @@ -96,12 +97,24 @@ class steppable_browser_interface(): | ||||
|         return self.action_goto_url(value=self.start_url) | ||||
|  | ||||
|     def action_click_element_containing_text(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text") | ||||
|         if not len(value.strip()): | ||||
|             return | ||||
|         elem = self.page.get_by_text(value) | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=3000) | ||||
|  | ||||
|     def action_click_element_containing_text_if_exists(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text if exists") | ||||
|         if not len(value.strip()): | ||||
|             return | ||||
|         elem = self.page.get_by_text(value) | ||||
|         logger.debug(f"Clicking element containing text - {elem.count()} elements found") | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=3000) | ||||
|         else: | ||||
|             return | ||||
|  | ||||
|     def action_enter_text_in_field(self, selector, value): | ||||
|         if not len(selector.strip()): | ||||
|             return | ||||
|   | ||||
| @@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br> | ||||
|  | ||||
|                     <ul> | ||||
|                         <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br> | ||||
|                     <div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div> | ||||
|                     <ul id="advanced-help-selectors"> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). | ||||
|                             <ul> | ||||
| @@ -89,11 +89,13 @@ xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                     {{ render_field(form.subtractive_selectors, rows=5, placeholder="header | ||||
| footer | ||||
| nav | ||||
| .stockticker") }} | ||||
| .stockticker | ||||
| //*[contains(text(), 'Advertisement')]") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                           <li> Remove HTML element(s) by CSS selector before text conversion. </li> | ||||
|                           <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                           <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS and XPath selectors </li> | ||||
|                           <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                         </ul> | ||||
|                       </span> | ||||
|                 </fieldset> | ||||
|   | ||||
| @@ -65,8 +65,8 @@ class Fetcher(): | ||||
|  | ||||
|     def __init__(self): | ||||
|         import importlib.resources | ||||
|         self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() | ||||
|         self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text() | ||||
|         self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') | ||||
|         self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
| @@ -81,7 +81,8 @@ class Fetcher(): | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
|   | ||||
| @@ -83,7 +83,8 @@ class fetcher(Fetcher): | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._errors | ||||
| @@ -130,7 +131,7 @@ class fetcher(Fetcher): | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 logger.debug("Content Fetcher > Response object was none") | ||||
|                 logger.debug("Content Fetcher > Response object from the browser communication was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             try: | ||||
| @@ -166,10 +167,10 @@ class fetcher(Fetcher): | ||||
|  | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|  | ||||
|             if len(self.page.content().strip()) == 0: | ||||
|             if not empty_pages_are_a_change and len(self.page.content().strip()) == 0: | ||||
|                 logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False") | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 logger.debug("Content Fetcher > Content was empty") | ||||
|                 raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|             # Run Browser Steps here | ||||
|   | ||||
| @@ -75,7 +75,8 @@ class fetcher(Fetcher): | ||||
|                          request_method, | ||||
|                          ignore_status_codes, | ||||
|                          current_include_filters, | ||||
|                          is_binary | ||||
|                          is_binary, | ||||
|                          empty_pages_are_a_change | ||||
|                          ): | ||||
|  | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
| @@ -153,7 +154,7 @@ class fetcher(Fetcher): | ||||
|         if response is None: | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             logger.warning("Content Fetcher > Response object was none") | ||||
|             logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)") | ||||
|             raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|         self.headers = response.headers | ||||
| @@ -186,10 +187,11 @@ class fetcher(Fetcher): | ||||
|  | ||||
|             raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|         content = await self.page.content | ||||
|         if len(content.strip()) == 0: | ||||
|  | ||||
|         if not empty_pages_are_a_change and len(content.strip()) == 0: | ||||
|             logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers") | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             logger.error("Content Fetcher > Content was empty") | ||||
|             raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|         # Run Browser Steps here | ||||
| @@ -247,7 +249,7 @@ class fetcher(Fetcher): | ||||
|         await self.fetch_page(**kwargs) | ||||
|  | ||||
|     def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False, | ||||
|             current_include_filters=None, is_binary=False): | ||||
|             current_include_filters=None, is_binary=False, empty_pages_are_a_change=False): | ||||
|  | ||||
|         #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints | ||||
|         max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180) | ||||
| @@ -262,7 +264,8 @@ class fetcher(Fetcher): | ||||
|                 request_method=request_method, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|                 current_include_filters=current_include_filters, | ||||
|                 is_binary=is_binary | ||||
|                 is_binary=is_binary, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change | ||||
|             ), timeout=max_time)) | ||||
|         except asyncio.TimeoutError: | ||||
|             raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) | ||||
|   | ||||
| @@ -1,9 +1,6 @@ | ||||
| from loguru import logger | ||||
| import hashlib | ||||
| import os | ||||
|  | ||||
| import chardet | ||||
| import requests | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
| from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
| @@ -26,7 +23,11 @@ class fetcher(Fetcher): | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|  | ||||
|         import chardet | ||||
|         import requests | ||||
|  | ||||
|         if self.browser_steps_get_valid_steps(): | ||||
|             raise BrowserStepsInUnsupportedFetcher(url=url) | ||||
| @@ -53,7 +54,7 @@ class fetcher(Fetcher): | ||||
|             session.mount('file://', FileAdapter()) | ||||
|  | ||||
|         r = session.request(method=request_method, | ||||
|                             data=request_body, | ||||
|                             data=request_body.encode('utf-8') if type(request_body) is str else request_body, | ||||
|                             url=url, | ||||
|                             headers=request_headers, | ||||
|                             timeout=timeout, | ||||
| @@ -74,7 +75,10 @@ class fetcher(Fetcher): | ||||
|         self.headers = r.headers | ||||
|  | ||||
|         if not r.content or not len(r.content): | ||||
|             raise EmptyReply(url=url, status_code=r.status_code) | ||||
|             if not empty_pages_are_a_change: | ||||
|                 raise EmptyReply(url=url, status_code=r.status_code) | ||||
|             else: | ||||
|                 logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True") | ||||
|  | ||||
|         # @todo test this | ||||
|         # @todo maybe you really want to test zero-byte return pages? | ||||
|   | ||||
| @@ -75,6 +75,7 @@ function isItemInStock() { | ||||
|         'vergriffen', | ||||
|         'vorbestellen', | ||||
|         'vorbestellung ist bald möglich', | ||||
|         'we don\'t currently have any', | ||||
|         'we couldn\'t find any products that match', | ||||
|         'we do not currently have an estimate of when this product will be back in stock.', | ||||
|         'we don\'t know when or if this item will be back in stock.', | ||||
| @@ -173,7 +174,8 @@ function isItemInStock() { | ||||
|         const element = elementsToScan[i]; | ||||
|         // outside the 'fold' or some weird text in the heading area | ||||
|         // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|         // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             continue | ||||
|         } | ||||
|         elementText = ""; | ||||
| @@ -187,7 +189,7 @@ function isItemInStock() { | ||||
|             // and these mean its out of stock | ||||
|             for (const outOfStockText of outOfStockTexts) { | ||||
|                 if (elementText.includes(outOfStockText)) { | ||||
|                     console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`) | ||||
|                     console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) | ||||
|                     return outOfStockText; // item is out of stock | ||||
|                 } | ||||
|             } | ||||
|   | ||||
| @@ -164,6 +164,15 @@ visibleElementsArray.forEach(function (element) { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now | ||||
|  | ||||
|     let text = element.textContent.trim().slice(0, 30).trim(); | ||||
|     while (/\n{2,}|\t{2,}/.test(text)) { | ||||
|         text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t') | ||||
|     } | ||||
|  | ||||
|     // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training. | ||||
|     const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ; | ||||
|  | ||||
|     size_pos.push({ | ||||
|         xpath: xpath_result, | ||||
| @@ -171,9 +180,16 @@ visibleElementsArray.forEach(function (element) { | ||||
|         height: Math.round(bbox['height']), | ||||
|         left: Math.floor(bbox['left']), | ||||
|         top: Math.floor(bbox['top']) + scroll_y, | ||||
|         // tagName used by Browser Steps | ||||
|         tagName: (element.tagName) ? element.tagName.toLowerCase() : '', | ||||
|         // tagtype used by Browser Steps | ||||
|         tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', | ||||
|         isClickable: window.getComputedStyle(element).cursor == "pointer" | ||||
|         isClickable: window.getComputedStyle(element).cursor === "pointer", | ||||
|         // Used by the keras trainer | ||||
|         fontSize: window.getComputedStyle(element).getPropertyValue('font-size'), | ||||
|         fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'), | ||||
|         hasDigitCurrency: hasDigitCurrency, | ||||
|         label: label, | ||||
|     }); | ||||
|  | ||||
| }); | ||||
|   | ||||
| @@ -56,7 +56,8 @@ class fetcher(Fetcher): | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import datetime | ||||
| import importlib | ||||
|  | ||||
| import flask_login | ||||
| import locale | ||||
| import os | ||||
| @@ -10,7 +12,9 @@ import threading | ||||
| import time | ||||
| import timeago | ||||
|  | ||||
| from .content_fetchers.exceptions import ReplyWithContentButNoText | ||||
| from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor | ||||
| from .processors.text_json_diff.processor import FilterNotFoundInResponse | ||||
| from .safe_jinja import render as jinja_render | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from copy import deepcopy | ||||
| @@ -537,7 +541,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         import random | ||||
|         from .apprise_asset import asset | ||||
|         apobj = apprise.Apprise(asset=asset) | ||||
|  | ||||
|         # so that the custom endpoints are registered | ||||
|         from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper | ||||
|         is_global_settings_form = request.args.get('mode', '') == 'global-settings' | ||||
|         is_group_settings_form = request.args.get('mode', '') == 'group-settings' | ||||
|  | ||||
| @@ -1377,22 +1382,76 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         import brotli | ||||
|  | ||||
|         watch = datastore.data['watching'].get(uuid) | ||||
|         if watch and os.path.isdir(watch.watch_data_dir): | ||||
|             latest_filename = list(watch.history.keys())[0] | ||||
|         if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir): | ||||
|             latest_filename = list(watch.history.keys())[-1] | ||||
|             html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br") | ||||
|             if html_fname.endswith('.br'): | ||||
|                 # Read and decompress the Brotli file | ||||
|                 with open(html_fname, 'rb') as f: | ||||
|             with open(html_fname, 'rb') as f: | ||||
|                 if html_fname.endswith('.br'): | ||||
|                     # Read and decompress the Brotli file | ||||
|                     decompressed_data = brotli.decompress(f.read()) | ||||
|                 else: | ||||
|                     decompressed_data = f.read() | ||||
|  | ||||
|                 buffer = BytesIO(decompressed_data) | ||||
|             buffer = BytesIO(decompressed_data) | ||||
|  | ||||
|                 return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html') | ||||
|             return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html') | ||||
|  | ||||
|  | ||||
|         # Return a 500 error | ||||
|         abort(500) | ||||
|  | ||||
|     @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def watch_get_preview_rendered(uuid): | ||||
|         '''For when viewing the "preview" of the rendered text from inside of Edit''' | ||||
|         now = time.time() | ||||
|         import brotli | ||||
|         from . import forms | ||||
|  | ||||
|         text_after_filter = '' | ||||
|         tmp_watch = deepcopy(datastore.data['watching'].get(uuid)) | ||||
|  | ||||
|         if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir): | ||||
|             # Splice in the temporary stuff from the form | ||||
|             form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None, | ||||
|                                                        data=request.form | ||||
|                                                        ) | ||||
|             # Only update vars that came in via the AJAX post | ||||
|             p = {k: v for k, v in form.data.items() if k in request.form.keys()} | ||||
|             tmp_watch.update(p) | ||||
|  | ||||
|             latest_filename = next(reversed(tmp_watch.history)) | ||||
|             html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br") | ||||
|             with open(html_fname, 'rb') as f: | ||||
|                 decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8') | ||||
|  | ||||
|                 # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser() | ||||
|                 processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") | ||||
|                 update_handler = processor_module.perform_site_check(datastore=datastore, | ||||
|                                                                      watch_uuid=uuid # probably not needed anymore anyway? | ||||
|                                                                      ) | ||||
|                 # Use the last loaded HTML as the input | ||||
|                 update_handler.fetcher.content = decompressed_data | ||||
|                 update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') | ||||
|                 try: | ||||
|                     changed_detected, update_obj, contents, text_after_filter = update_handler.run_changedetection( | ||||
|                         watch=tmp_watch, | ||||
|                         skip_when_checksum_same=False, | ||||
|                     ) | ||||
|                 except FilterNotFoundInResponse as e: | ||||
|                     text_after_filter = f"Filter not found in HTML: {str(e)}" | ||||
|                 except ReplyWithContentButNoText as e: | ||||
|                     text_after_filter = f"Filter found but no text (empty result)" | ||||
|                 except Exception as e: | ||||
|                     text_after_filter = f"Error: {str(e)}" | ||||
|  | ||||
|             if not text_after_filter.strip(): | ||||
|                 text_after_filter = 'Empty content' | ||||
|  | ||||
|         logger.trace(f"Parsed in {time.time()-now:.3f}s") | ||||
|         return text_after_filter.strip() | ||||
|  | ||||
|  | ||||
|     @app.route("/form/add/quickwatch", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def form_quick_watch_add(): | ||||
|   | ||||
| @@ -221,7 +221,8 @@ class ValidateAppRiseServers(object): | ||||
|     def __call__(self, form, field): | ||||
|         import apprise | ||||
|         apobj = apprise.Apprise() | ||||
|  | ||||
|         # so that the custom endpoints are registered | ||||
|         from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper | ||||
|         for server_url in field.data: | ||||
|             if not apobj.add(server_url): | ||||
|                 message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url)) | ||||
| @@ -468,7 +469,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|  | ||||
|     include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='') | ||||
|  | ||||
|     subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|     subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)]) | ||||
|  | ||||
|     extract_text = StringListField('Extract text', [ValidateListRegex()]) | ||||
|  | ||||
| @@ -479,8 +480,10 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     body = TextAreaField('Request body', [validators.Optional()]) | ||||
|     method = SelectField('Request method', choices=valid_method, default=default_method) | ||||
|     ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) | ||||
|     check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False) | ||||
|     check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False) | ||||
|     remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False) | ||||
|     sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False) | ||||
|     trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False) | ||||
|  | ||||
|     filter_text_added = BooleanField('Added lines', default=True) | ||||
|     filter_text_replaced = BooleanField('Replaced/changed lines', default=True) | ||||
| @@ -575,7 +578,7 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|     empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False) | ||||
|     fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) | ||||
|     global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|     global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)]) | ||||
|     ignore_whitespace = BooleanField('Ignore whitespace') | ||||
|     password = SaltyPasswordField() | ||||
|     pager_size = IntegerField('Pager size', | ||||
|   | ||||
| @@ -1,10 +1,5 @@ | ||||
|  | ||||
| from bs4 import BeautifulSoup | ||||
| from inscriptis import get_text | ||||
| from jsonpath_ng.ext import parse | ||||
| from typing import List | ||||
| from inscriptis.model.config import ParserConfig | ||||
| from xml.sax.saxutils import escape as xml_escape | ||||
| from lxml import etree | ||||
| import json | ||||
| import re | ||||
|  | ||||
| @@ -39,6 +34,7 @@ def perl_style_slash_enclosed_regex_to_options(regex): | ||||
|  | ||||
| # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches | ||||
| def include_filters(include_filters, html_content, append_pretty_line_formatting=False): | ||||
|     from bs4 import BeautifulSoup | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     html_block = "" | ||||
|     r = soup.select(include_filters, separator="") | ||||
| @@ -56,16 +52,32 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting | ||||
|     return html_block | ||||
|  | ||||
| def subtractive_css_selector(css_selector, html_content): | ||||
|     from bs4 import BeautifulSoup | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     for item in soup.select(css_selector): | ||||
|         item.decompose() | ||||
|     return str(soup) | ||||
|  | ||||
| def subtractive_xpath_selector(xpath_selector, html_content):  | ||||
|     html_tree = etree.HTML(html_content) | ||||
|     elements_to_remove = html_tree.xpath(xpath_selector) | ||||
|  | ||||
|     for element in elements_to_remove: | ||||
|         element.getparent().remove(element) | ||||
|  | ||||
|     modified_html = etree.tostring(html_tree, method="html").decode("utf-8") | ||||
|     return modified_html | ||||
|  | ||||
| def element_removal(selectors: List[str], html_content): | ||||
|     """Joins individual filters into one css filter.""" | ||||
|     selector = ",".join(selectors) | ||||
|     return subtractive_css_selector(selector, html_content) | ||||
|     """Removes elements that match a list of CSS or xPath selectors.""" | ||||
|     modified_html = html_content | ||||
|     for selector in selectors: | ||||
|         if selector.startswith(('xpath:', 'xpath1:', '//')): | ||||
|             xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:') | ||||
|             modified_html = subtractive_xpath_selector(xpath_selector, modified_html) | ||||
|         else: | ||||
|             modified_html = subtractive_css_selector(selector, modified_html) | ||||
|     return modified_html | ||||
|  | ||||
| def elementpath_tostring(obj): | ||||
|     """ | ||||
| @@ -181,6 +193,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals | ||||
|  | ||||
| # Extract/find element | ||||
| def extract_element(find='title', html_content=''): | ||||
|     from bs4 import BeautifulSoup | ||||
|  | ||||
|     #Re #106, be sure to handle when its not found | ||||
|     element_text = None | ||||
| @@ -194,6 +207,8 @@ def extract_element(find='title', html_content=''): | ||||
|  | ||||
| # | ||||
| def _parse_json(json_data, json_filter): | ||||
|     from jsonpath_ng.ext import parse | ||||
|  | ||||
|     if json_filter.startswith("json:"): | ||||
|         jsonpath_expression = parse(json_filter.replace('json:', '')) | ||||
|         match = jsonpath_expression.find(json_data) | ||||
| @@ -242,6 +257,8 @@ def _get_stripped_text_from_json_match(match): | ||||
| # json_filter - ie json:$..price | ||||
| # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) | ||||
| def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): | ||||
|     from bs4 import BeautifulSoup | ||||
|  | ||||
|     stripped_text_from_html = False | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w | ||||
|     # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags | ||||
| @@ -352,6 +369,7 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     return "\n".encode('utf8').join(output) | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
|     pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>' | ||||
|     def repl(m): | ||||
|         text = m.group(1) | ||||
| @@ -360,6 +378,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False | ||||
|     return re.sub(pattern, repl, html_content) | ||||
|  | ||||
| def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str: | ||||
|     from inscriptis import get_text | ||||
|     from inscriptis.model.config import ParserConfig | ||||
|  | ||||
|     """Converts html string to a string with just the text. If ignoring | ||||
|     rendering anchor tag content is enable, anchor tag content are also | ||||
|     included in the text | ||||
|   | ||||
| @@ -18,6 +18,7 @@ class watch_base(dict): | ||||
|             'check_count': 0, | ||||
|             'check_unique_lines': False,  # On change-detected, compare against all history if its something new | ||||
|             'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'content-type': None, | ||||
|             'date_created': None, | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'extract_title_as_title': False, | ||||
| @@ -60,6 +61,8 @@ class watch_base(dict): | ||||
|             'time_between_check_use_default': True, | ||||
|             'title': None, | ||||
|             'track_ldjson_price_data': None, | ||||
|             'trim_text_whitespace': False, | ||||
|             'remove_duplicate_lines': False, | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'url': '', | ||||
|             'uuid': str(uuid.uuid4()), | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| import apprise | ||||
|  | ||||
| import time | ||||
| from apprise import NotifyFormat | ||||
| import json | ||||
| import apprise | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
| @@ -34,86 +35,11 @@ valid_notification_formats = { | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
|  | ||||
| # include the decorator | ||||
| from apprise.decorators import notify | ||||
|  | ||||
| @notify(on="delete") | ||||
| @notify(on="deletes") | ||||
| @notify(on="get") | ||||
| @notify(on="gets") | ||||
| @notify(on="post") | ||||
| @notify(on="posts") | ||||
| @notify(on="put") | ||||
| @notify(on="puts") | ||||
| def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): | ||||
|     import requests | ||||
|     from apprise.utils import parse_url as apprise_parse_url | ||||
|     from apprise import URLBase | ||||
|  | ||||
|     url = kwargs['meta'].get('url') | ||||
|  | ||||
|     if url.startswith('post'): | ||||
|         r = requests.post | ||||
|     elif url.startswith('get'): | ||||
|         r = requests.get | ||||
|     elif url.startswith('put'): | ||||
|         r = requests.put | ||||
|     elif url.startswith('delete'): | ||||
|         r = requests.delete | ||||
|  | ||||
|     url = url.replace('post://', 'http://') | ||||
|     url = url.replace('posts://', 'https://') | ||||
|     url = url.replace('put://', 'http://') | ||||
|     url = url.replace('puts://', 'https://') | ||||
|     url = url.replace('get://', 'http://') | ||||
|     url = url.replace('gets://', 'https://') | ||||
|     url = url.replace('put://', 'http://') | ||||
|     url = url.replace('puts://', 'https://') | ||||
|     url = url.replace('delete://', 'http://') | ||||
|     url = url.replace('deletes://', 'https://') | ||||
|  | ||||
|     headers = {} | ||||
|     params = {} | ||||
|     auth = None | ||||
|  | ||||
|     # Convert /foobar?+some-header=hello to proper header dictionary | ||||
|     results = apprise_parse_url(url) | ||||
|     if results: | ||||
|         # Add our headers that the user can potentially over-ride if they wish | ||||
|         # to to our returned result set and tidy entries by unquoting them | ||||
|         headers = {URLBase.unquote(x): URLBase.unquote(y) | ||||
|                    for x, y in results['qsd+'].items()} | ||||
|  | ||||
|         # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|         # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise | ||||
|         # but here we are making straight requests, so we need todo convert this against apprise's logic | ||||
|         for k, v in results['qsd'].items(): | ||||
|             if not k.strip('+-') in results['qsd+'].keys(): | ||||
|                 params[URLBase.unquote(k)] = URLBase.unquote(v) | ||||
|  | ||||
|         # Determine Authentication | ||||
|         auth = '' | ||||
|         if results.get('user') and results.get('password'): | ||||
|             auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user'))) | ||||
|         elif results.get('user'): | ||||
|             auth = (URLBase.unquote(results.get('user'))) | ||||
|  | ||||
|     # Try to auto-guess if it's JSON | ||||
|     try: | ||||
|         json.loads(body) | ||||
|         headers['Content-Type'] = 'application/json; charset=utf-8' | ||||
|     except ValueError as e: | ||||
|         pass | ||||
|  | ||||
|     r(results.get('url'), | ||||
|       auth=auth, | ||||
|       data=body, | ||||
|       headers=headers, | ||||
|       params=params | ||||
|       ) | ||||
|  | ||||
|  | ||||
| def process_notification(n_object, datastore): | ||||
|     # so that the custom endpoints are registered | ||||
|     from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper | ||||
|  | ||||
|     from .safe_jinja import render as jinja_render | ||||
|     now = time.time() | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| from abc import abstractmethod | ||||
|  | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from copy import deepcopy | ||||
| @@ -23,9 +25,12 @@ class difference_detection_processor(): | ||||
|         super().__init__(*args, **kwargs) | ||||
|         self.datastore = datastore | ||||
|         self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid)) | ||||
|         # Generic fetcher that should be extended (requests, playwright etc) | ||||
|         self.fetcher = Fetcher() | ||||
|  | ||||
|     def call_browser(self): | ||||
|         from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
|         # Protect against file:// access | ||||
|         if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE): | ||||
|             if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')): | ||||
| @@ -133,8 +138,18 @@ class difference_detection_processor(): | ||||
|         is_binary = self.watch.is_pdf | ||||
|  | ||||
|         # And here we go! call the right browser with browser-specific settings | ||||
|         self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'), | ||||
|                     is_binary=is_binary) | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|  | ||||
|         self.fetcher.run(url=url, | ||||
|                          timeout=timeout, | ||||
|                          request_headers=request_headers, | ||||
|                          request_body=request_body, | ||||
|                          request_method=request_method, | ||||
|                          ignore_status_codes=ignore_status_codes, | ||||
|                          current_include_filters=self.watch.get('include_filters'), | ||||
|                          is_binary=is_binary, | ||||
|                          empty_pages_are_a_change=empty_pages_are_a_change | ||||
|                          ) | ||||
|  | ||||
|         #@todo .quit here could go on close object, so we can run JS if change-detected | ||||
|         self.fetcher.quit() | ||||
| @@ -147,7 +162,7 @@ class difference_detection_processor(): | ||||
|         some_data = 'xxxxx' | ||||
|         update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() | ||||
|         changed_detected = False | ||||
|         return changed_detected, update_obj, ''.encode('utf-8') | ||||
|         return changed_detected, update_obj, ''.encode('utf-8'), b'' | ||||
|  | ||||
|  | ||||
| def find_sub_packages(package_name): | ||||
|   | ||||
| @@ -1,11 +1,12 @@ | ||||
|  | ||||
| from changedetectionio.model.Watch import model as BaseWatch | ||||
| import re | ||||
| from babel.numbers import parse_decimal | ||||
| from changedetectionio.model.Watch import model as BaseWatch | ||||
| from typing import Union | ||||
| import re | ||||
|  | ||||
| class Restock(dict): | ||||
|  | ||||
|     def parse_currency(self, raw_value: str) -> float: | ||||
|     def parse_currency(self, raw_value: str) -> Union[float, None]: | ||||
|         # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer. | ||||
|         standardized_value = raw_value | ||||
|  | ||||
| @@ -21,8 +22,11 @@ class Restock(dict): | ||||
|         # Remove any non-numeric characters except for the decimal point | ||||
|         standardized_value = re.sub(r'[^\d.-]', '', standardized_value) | ||||
|  | ||||
|         # Convert to float | ||||
|         return float(parse_decimal(standardized_value, locale='en')) | ||||
|         if standardized_value: | ||||
|             # Convert to float | ||||
|             return float(parse_decimal(standardized_value, locale='en')) | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         # Define default values | ||||
|   | ||||
| @@ -2,8 +2,7 @@ from .. import difference_detection_processor | ||||
| from ..exceptions import ProcessorException | ||||
| from . import Restock | ||||
| from loguru import logger | ||||
| import hashlib | ||||
| import re | ||||
|  | ||||
| import urllib3 | ||||
| import time | ||||
|  | ||||
| @@ -27,6 +26,25 @@ def _search_prop_by_value(matches, value): | ||||
|             if value in prop[0]: | ||||
|                 return prop[1]  # Yield the desired value and exit the function | ||||
|  | ||||
| def _deduplicate_prices(data): | ||||
|     seen = set() | ||||
|     unique_data = [] | ||||
|  | ||||
|     for datum in data: | ||||
|         # Convert 'value' to float if it can be a numeric string, otherwise leave it as is | ||||
|         try: | ||||
|             normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value | ||||
|         except ValueError: | ||||
|             normalized_value = datum.value | ||||
|  | ||||
|         # If the normalized value hasn't been seen yet, add it to unique data | ||||
|         if normalized_value not in seen: | ||||
|             unique_data.append(datum) | ||||
|             seen.add(normalized_value) | ||||
|      | ||||
|     return unique_data | ||||
|  | ||||
|  | ||||
| # should return Restock() | ||||
| # add casting? | ||||
| def get_itemprop_availability(html_content) -> Restock: | ||||
| @@ -36,17 +54,21 @@ def get_itemprop_availability(html_content) -> Restock: | ||||
|     """ | ||||
|     from jsonpath_ng import parse | ||||
|  | ||||
|     import re | ||||
|     now = time.time() | ||||
|     import extruct | ||||
|     logger.trace(f"Imported extruct module in {time.time() - now:.3f}s") | ||||
|  | ||||
|     value = {} | ||||
|     now = time.time() | ||||
|  | ||||
|     # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest. | ||||
|  | ||||
|     syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph'] | ||||
|     try: | ||||
|         data = extruct.extract(html_content, syntaxes=syntaxes) | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}") | ||||
|         return Restock() | ||||
|  | ||||
|     data = extruct.extract(html_content, syntaxes=syntaxes) | ||||
|     logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s") | ||||
|  | ||||
|     # First phase, dead simple scanning of anything that looks useful | ||||
| @@ -57,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock: | ||||
|         pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )') | ||||
|         availability_parse = parse('$..(availability|Availability)') | ||||
|  | ||||
|         price_result = price_parse.find(data) | ||||
|         price_result = _deduplicate_prices(price_parse.find(data)) | ||||
|         if price_result: | ||||
|             # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and | ||||
|             # parse that for the UI? | ||||
| @@ -119,6 +141,8 @@ class perform_site_check(difference_detection_processor): | ||||
|     xpath_data = None | ||||
|  | ||||
|     def run_changedetection(self, watch, skip_when_checksum_same=True): | ||||
|         import hashlib | ||||
|  | ||||
|         if not watch: | ||||
|             raise Exception("Watch no longer exists.") | ||||
|  | ||||
| @@ -132,6 +156,20 @@ class perform_site_check(difference_detection_processor): | ||||
|         update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '') | ||||
|         update_obj["last_check_status"] = self.fetcher.get_last_status_code() | ||||
|  | ||||
|         # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly. | ||||
|         # Otherwise it will assume "in stock" because nothing suggesting the opposite was found | ||||
|         from ...html_tools import html_to_text | ||||
|         text = html_to_text(self.fetcher.content) | ||||
|         logger.debug(f"Length of text after conversion: {len(text)}") | ||||
|         if not len(text): | ||||
|             from ...content_fetchers.exceptions import ReplyWithContentButNoText | ||||
|             raise ReplyWithContentButNoText(url=watch.link, | ||||
|                                             status_code=self.fetcher.get_last_status_code(), | ||||
|                                             screenshot=self.fetcher.screenshot, | ||||
|                                             html_content=self.fetcher.content, | ||||
|                                             xpath_data=self.fetcher.xpath_data | ||||
|                                             ) | ||||
|  | ||||
|         # Which restock settings to compare against? | ||||
|         restock_settings = watch.get('restock_settings', {}) | ||||
|  | ||||
| @@ -146,7 +184,7 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         itemprop_availability = {} | ||||
|         try: | ||||
|             itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content) | ||||
|             itemprop_availability = get_itemprop_availability(self.fetcher.content) | ||||
|         except MoreThanOnePriceFound as e: | ||||
|             # Add the real data | ||||
|             raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.", | ||||
| @@ -260,4 +298,4 @@ class perform_site_check(difference_detection_processor): | ||||
|         # Always record the new checksum | ||||
|         update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|         return changed_detected, update_obj, snapshot_content.encode('utf-8').strip() | ||||
|         return changed_detected, update_obj, snapshot_content.encode('utf-8').strip(), b'' | ||||
|   | ||||
| @@ -36,6 +36,7 @@ class PDFToHTMLToolNotFound(ValueError): | ||||
| class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|     def run_changedetection(self, watch, skip_when_checksum_same=True): | ||||
|  | ||||
|         changed_detected = False | ||||
|         html_content = "" | ||||
|         screenshot = False  # as bytes | ||||
| @@ -175,13 +176,13 @@ class perform_site_check(difference_detection_processor): | ||||
|                                                                     html_content=self.fetcher.content, | ||||
|                                                                     append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                     is_rss=is_rss) | ||||
|  | ||||
|                         elif filter_rule.startswith('xpath1:'): | ||||
|                             html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''), | ||||
|                                                                     html_content=self.fetcher.content, | ||||
|                                                                     append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                     is_rss=is_rss) | ||||
|                                                                      html_content=self.fetcher.content, | ||||
|                                                                      append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                      is_rss=is_rss) | ||||
|                         else: | ||||
|                             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|                             html_content += html_tools.include_filters(include_filters=filter_rule, | ||||
|                                                                        html_content=self.fetcher.content, | ||||
|                                                                        append_pretty_line_formatting=not watch.is_source_type_url) | ||||
| @@ -197,18 +198,23 @@ class perform_site_check(difference_detection_processor): | ||||
|                 else: | ||||
|                     # extract text | ||||
|                     do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|                     stripped_text_from_html = \ | ||||
|                         html_tools.html_to_text( | ||||
|                             html_content=html_content, | ||||
|                             render_anchor_tag_content=do_anchor, | ||||
|                             is_rss=is_rss # #1874 activate the <title workaround hack | ||||
|                         ) | ||||
|                     stripped_text_from_html = html_tools.html_to_text(html_content=html_content, | ||||
|                                                                       render_anchor_tag_content=do_anchor, | ||||
|                                                                       is_rss=is_rss)  # 1874 activate the <title workaround hack | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically') and stripped_text_from_html: | ||||
|  | ||||
|         if watch.get('trim_text_whitespace'): | ||||
|             stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()) | ||||
|  | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n') | ||||
|             stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() )) | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
| @@ -236,7 +242,7 @@ class perform_site_check(difference_detection_processor): | ||||
|                 # We had some content, but no differences were found | ||||
|                 # Store our new file as the MD5 so it will trigger in the future | ||||
|                 c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest() | ||||
|                 return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') | ||||
|                 return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8'), stripped_text_from_html.encode('utf-8') | ||||
|             else: | ||||
|                 stripped_text_from_html = rendered_diff | ||||
|  | ||||
| @@ -290,7 +296,7 @@ class perform_site_check(difference_detection_processor): | ||||
|                         for match in res: | ||||
|                             regex_matched_output += [match] + [b'\n'] | ||||
|  | ||||
|             # Now we will only show what the regex matched | ||||
|             ########################################################## | ||||
|             stripped_text_from_html = b'' | ||||
|             text_content_before_ignored_filter = b'' | ||||
|             if regex_matched_output: | ||||
| @@ -298,6 +304,8 @@ class perform_site_check(difference_detection_processor): | ||||
|                 stripped_text_from_html = b''.join(regex_matched_output) | ||||
|                 text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|  | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() | ||||
| @@ -357,4 +365,4 @@ class perform_site_check(difference_detection_processor): | ||||
|         if not watch.get('previous_md5'): | ||||
|             watch['previous_md5'] = fetched_md5 | ||||
|  | ||||
|         return changed_detected, update_obj, text_content_before_ignored_filter | ||||
|         return changed_detected, update_obj, text_content_before_ignored_filter, stripped_text_from_html | ||||
|   | ||||
| @@ -16,25 +16,31 @@ echo "---------------------------------- SOCKS5 -------------------" | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   --rm \ | ||||
|   -e "FLASK_SERVER_NAME=cdio" \ | ||||
|   --hostname cdio \ | ||||
|   -e "SOCKSTEST=proxiesjson" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|  | ||||
| # SOCKS5 related - by manually entering in UI | ||||
| docker run --network changedet-network \ | ||||
|   --rm \ | ||||
|   -e "FLASK_SERVER_NAME=cdio" \ | ||||
|   --hostname cdio \ | ||||
|   -e "SOCKSTEST=manual" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py' | ||||
|  | ||||
| # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY | ||||
| docker run --network changedet-network \ | ||||
|   -e "SOCKSTEST=manual-playwright" \ | ||||
|   --hostname cdio \ | ||||
|   -e "FLASK_SERVER_NAME=cdio" \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \ | ||||
|   --rm \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|  | ||||
| echo "socks5 server logs" | ||||
| docker logs socks5proxy | ||||
|   | ||||
| @@ -18,9 +18,11 @@ $(document).ready(function () { | ||||
|  | ||||
|     }); | ||||
|  | ||||
|     $("#notification-token-toggle").click(function (e) { | ||||
|     $(".toggle-show").click(function (e) { | ||||
|         e.preventDefault(); | ||||
|         $('#notification-tokens-info').toggle(); | ||||
|         let target = $(this).data('target'); | ||||
|         $(target).toggle(); | ||||
|     }); | ||||
|  | ||||
| }); | ||||
|  | ||||
|   | ||||
| @@ -12,6 +12,55 @@ function toggleOpacity(checkboxSelector, fieldSelector, inverted) { | ||||
|     checkbox.addEventListener('change', updateOpacity); | ||||
| } | ||||
|  | ||||
| (function($) { | ||||
|     // Object to store ongoing requests by namespace | ||||
|     const requests = {}; | ||||
|  | ||||
|     $.abortiveSingularAjax = function(options) { | ||||
|         const namespace = options.namespace || 'default'; | ||||
|  | ||||
|         // Abort the current request in this namespace if it's still ongoing | ||||
|         if (requests[namespace]) { | ||||
|             requests[namespace].abort(); | ||||
|         } | ||||
|  | ||||
|         // Start a new AJAX request and store its reference in the correct namespace | ||||
|         requests[namespace] = $.ajax(options); | ||||
|  | ||||
|         // Return the current request in case it's needed | ||||
|         return requests[namespace]; | ||||
|     }; | ||||
| })(jQuery); | ||||
|  | ||||
| function request_textpreview_update() { | ||||
|     if (!$('body').hasClass('preview-text-enabled')) { | ||||
|         console.error("Preview text was requested but body tag was not setup") | ||||
|         return | ||||
|     } | ||||
|  | ||||
|     const data = {}; | ||||
|     $('textarea:visible, input:visible').each(function () { | ||||
|         const $element = $(this); // Cache the jQuery object for the current element | ||||
|         const name = $element.attr('name'); // Get the name attribute of the element | ||||
|         data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : undefined) : $element.val(); | ||||
|     }); | ||||
|  | ||||
|     $.abortiveSingularAjax({ | ||||
|         type: "POST", | ||||
|         url: preview_text_edit_filters_url, | ||||
|         data: data, | ||||
|         namespace: 'watchEdit' | ||||
|     }).done(function (data) { | ||||
|         $('#filters-and-triggers #text-preview-inner').text(data); | ||||
|     }).fail(function (error) { | ||||
|         if (error.statusText === 'abort') { | ||||
|             console.log('Request was aborted due to a new request being fired.'); | ||||
|         } else { | ||||
|             $('#filters-and-triggers #text-preview-inner').text('There was an error communicating with the server.'); | ||||
|         } | ||||
|     }) | ||||
| } | ||||
|  | ||||
| $(document).ready(function () { | ||||
|     $('#notification-setting-reset-to-default').click(function (e) { | ||||
|         $('#notification_title').val(''); | ||||
| @@ -27,5 +76,22 @@ $(document).ready(function () { | ||||
|  | ||||
|     toggleOpacity('#time_between_check_use_default', '#time_between_check', false); | ||||
|  | ||||
|     const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); | ||||
|     $("#text-preview-inner").css('max-height', (vh-300)+"px"); | ||||
|  | ||||
|     // Realtime preview of 'Filters & Text' setup | ||||
|     var debounced_request_textpreview_update = request_textpreview_update.debounce(100); | ||||
|  | ||||
|     $("#activate-text-preview").click(function (e) { | ||||
|         $('body').toggleClass('preview-text-enabled') | ||||
|         request_textpreview_update(); | ||||
|  | ||||
|         const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off'; | ||||
|         $("#text-preview-refresh")[method]('click', debounced_request_textpreview_update); | ||||
|         $('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update); | ||||
|         $('input:visible')[method]('keyup blur change', debounced_request_textpreview_update); | ||||
|         $("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update); | ||||
|     }); | ||||
|  | ||||
| }); | ||||
|  | ||||
|   | ||||
| @@ -40,15 +40,39 @@ | ||||
|   } | ||||
| } | ||||
|  | ||||
| #browser-steps-fieldlist { | ||||
|   height: 100%; | ||||
|   overflow-y: scroll; | ||||
| } | ||||
|  | ||||
| #browser-steps .flex-wrapper { | ||||
|   display: flex; | ||||
|   flex-flow: row; | ||||
|   height: 70vh; | ||||
|   font-size: 80%; | ||||
|  | ||||
|   @media screen and (min-width: 800px) { | ||||
|     display: flex; | ||||
|     flex-flow: row; | ||||
|     height: 70vh; | ||||
|     #browser-steps-fieldlist { | ||||
|       flex-grow: 0; /* Don't allow it to grow */ | ||||
|       flex-shrink: 0; /* Don't allow it to shrink */ | ||||
|       flex-basis: auto; /* Base width is determined by the content */ | ||||
|       max-width: 400px; /* Set a max width to prevent overflow */ | ||||
|       padding-left: 1rem; | ||||
|       overflow-y: scroll; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     #browser-steps-ui { | ||||
|       flex-grow: 1; /* Allow it to grow and fill the available space */ | ||||
|       flex-shrink: 1; /* Allow it to shrink if needed */ | ||||
|       flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */ | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   #browser-steps-ui { | ||||
|     background-color: #eee; | ||||
|     border-radius: 5px; | ||||
|   } | ||||
|  | ||||
|   #browser-steps-field-list { | ||||
|     text-align: center; | ||||
|   } | ||||
| } | ||||
|  | ||||
| /*  this is duplicate :( */ | ||||
|   | ||||
| @@ -0,0 +1,45 @@ | ||||
| body.preview-text-enabled { | ||||
|   #filters-and-triggers > div { | ||||
|     display: flex; /* Establishes Flexbox layout */ | ||||
|     gap: 20px; /* Adds space between the columns */ | ||||
|     position: relative; /* Ensures the sticky positioning is relative to this parent */ | ||||
|   } | ||||
|  | ||||
|   /* layout of the page */ | ||||
|   #edit-text-filter, #text-preview { | ||||
|     flex: 1; /* Each column takes an equal amount of available space */ | ||||
|     align-self: flex-start; /* Aligns the right column to the start, allowing it to maintain its content height */ | ||||
|   } | ||||
|  | ||||
|   #edit-text-filter { | ||||
|     #pro-tips { | ||||
|       display: none; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   #text-preview { | ||||
|     position: sticky; | ||||
|     top: 25px; | ||||
|     display: block !important; | ||||
|   } | ||||
|  | ||||
|   /* actual preview area */ | ||||
|   #text-preview-inner { | ||||
|     background: var(--color-grey-900); | ||||
|     border: 1px solid var(--color-grey-600); | ||||
|     padding: 1rem; | ||||
|     color: #333; | ||||
|     font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */ | ||||
|     font-size: 12px; | ||||
|     overflow-x: scroll; | ||||
|     white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */ | ||||
|     overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */ | ||||
|   } | ||||
| } | ||||
|  | ||||
| #activate-text-preview { | ||||
|   right: 0; | ||||
|   position: absolute; | ||||
|   z-index: 0; | ||||
|   box-shadow: 1px 1px 4px var(--color-shadow-jump); | ||||
| } | ||||
| @@ -12,6 +12,7 @@ | ||||
| @import "parts/_darkmode"; | ||||
| @import "parts/_menu"; | ||||
| @import "parts/_love"; | ||||
| @import "parts/preview_text_filter"; | ||||
|  | ||||
| body { | ||||
|   color: var(--color-text); | ||||
|   | ||||
| @@ -46,14 +46,36 @@ | ||||
|     #browser_steps li > label { | ||||
|       display: none; } | ||||
|  | ||||
| #browser-steps-fieldlist { | ||||
|   height: 100%; | ||||
|   overflow-y: scroll; } | ||||
|  | ||||
| #browser-steps .flex-wrapper { | ||||
|   display: flex; | ||||
|   flex-flow: row; | ||||
|   height: 70vh; } | ||||
|   font-size: 80%; } | ||||
|   @media screen and (min-width: 800px) { | ||||
|     #browser-steps .flex-wrapper { | ||||
|       display: flex; | ||||
|       flex-flow: row; | ||||
|       height: 70vh; } | ||||
|       #browser-steps .flex-wrapper #browser-steps-fieldlist { | ||||
|         flex-grow: 0; | ||||
|         /* Don't allow it to grow */ | ||||
|         flex-shrink: 0; | ||||
|         /* Don't allow it to shrink */ | ||||
|         flex-basis: auto; | ||||
|         /* Base width is determined by the content */ | ||||
|         max-width: 400px; | ||||
|         /* Set a max width to prevent overflow */ | ||||
|         padding-left: 1rem; | ||||
|         overflow-y: scroll; } | ||||
|       #browser-steps .flex-wrapper #browser-steps-ui { | ||||
|         flex-grow: 1; | ||||
|         /* Allow it to grow and fill the available space */ | ||||
|         flex-shrink: 1; | ||||
|         /* Allow it to shrink if needed */ | ||||
|         flex-basis: 0; | ||||
|         /* Start with 0 base width so it stretches as much as possible */ } } | ||||
|   #browser-steps .flex-wrapper #browser-steps-ui { | ||||
|     background-color: #eee; | ||||
|     border-radius: 5px; } | ||||
|   #browser-steps .flex-wrapper #browser-steps-field-list { | ||||
|     text-align: center; } | ||||
|  | ||||
| /*  this is duplicate :( */ | ||||
| #browsersteps-selector-wrapper { | ||||
| @@ -411,6 +433,47 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark { | ||||
|     fill: #ff0000 !important; | ||||
|     transition: all ease 0.3s !important; } | ||||
|  | ||||
| body.preview-text-enabled { | ||||
|   /* layout of the page */ | ||||
|   /* actual preview area */ } | ||||
|   body.preview-text-enabled #filters-and-triggers > div { | ||||
|     display: flex; | ||||
|     /* Establishes Flexbox layout */ | ||||
|     gap: 20px; | ||||
|     /* Adds space between the columns */ | ||||
|     position: relative; | ||||
|     /* Ensures the sticky positioning is relative to this parent */ } | ||||
|   body.preview-text-enabled #edit-text-filter, body.preview-text-enabled #text-preview { | ||||
|     flex: 1; | ||||
|     /* Each column takes an equal amount of available space */ | ||||
|     align-self: flex-start; | ||||
|     /* Aligns the right column to the start, allowing it to maintain its content height */ } | ||||
|   body.preview-text-enabled #edit-text-filter #pro-tips { | ||||
|     display: none; } | ||||
|   body.preview-text-enabled #text-preview { | ||||
|     position: sticky; | ||||
|     top: 25px; | ||||
|     display: block !important; } | ||||
|   body.preview-text-enabled #text-preview-inner { | ||||
|     background: var(--color-grey-900); | ||||
|     border: 1px solid var(--color-grey-600); | ||||
|     padding: 1rem; | ||||
|     color: #333; | ||||
|     font-family: "Courier New", Courier, monospace; | ||||
|     /* Sets the font to a monospace type */ | ||||
|     font-size: 12px; | ||||
|     overflow-x: scroll; | ||||
|     white-space: pre-wrap; | ||||
|     /* Preserves whitespace and line breaks like <pre> */ | ||||
|     overflow-wrap: break-word; | ||||
|     /* Allows long words to break and wrap to the next line */ } | ||||
|  | ||||
| #activate-text-preview { | ||||
|   right: 0; | ||||
|   position: absolute; | ||||
|   z-index: 0; | ||||
|   box-shadow: 1px 1px 4px var(--color-shadow-jump); } | ||||
|  | ||||
| body { | ||||
|   color: var(--color-text); | ||||
|   background: var(--color-background-page); | ||||
| @@ -1194,11 +1257,9 @@ ul { | ||||
|   color: #fff; | ||||
|   opacity: 0.7; } | ||||
|  | ||||
|  | ||||
| .restock-label svg { | ||||
|   vertical-align: middle; } | ||||
|  | ||||
|  | ||||
| #chrome-extension-link { | ||||
|   padding: 9px; | ||||
|   border: 1px solid var(--color-grey-800); | ||||
|   | ||||
| @@ -11,7 +11,6 @@ from threading import Lock | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import requests | ||||
| import secrets | ||||
| import threading | ||||
| import time | ||||
| @@ -270,6 +269,7 @@ class ChangeDetectionStore: | ||||
|         self.needs_write_urgent = True | ||||
|  | ||||
|     def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True): | ||||
|         import requests | ||||
|  | ||||
|         if extras is None: | ||||
|             extras = {} | ||||
|   | ||||
| @@ -11,8 +11,11 @@ | ||||
|     class="notification-urls" ) | ||||
|                             }} | ||||
|                             <div class="pure-form-message-inline"> | ||||
|                               <ul> | ||||
|                                 <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li> | ||||
|                                 <p> | ||||
|                                 <strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br> | ||||
| </p> | ||||
|                                 <div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div> | ||||
|                                 <ul style="display: none" id="advanced-help-notifications"> | ||||
|                                 <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li> | ||||
|                                 <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li> | ||||
|                                 <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li> | ||||
| @@ -40,7 +43,7 @@ | ||||
|  | ||||
|                             </div> | ||||
|                             <div class="pure-controls"> | ||||
|                                 <div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div> | ||||
|                                 <div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div> | ||||
|                             </div> | ||||
|                             <div class="pure-controls" style="display: none;" id="notification-tokens-info"> | ||||
|                                 <table class="pure-table" id="token-table"> | ||||
|   | ||||
| @@ -33,7 +33,7 @@ | ||||
|     <script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script> | ||||
|   </head> | ||||
|  | ||||
|   <body> | ||||
|   <body class=""> | ||||
|     <div class="header"> | ||||
|       <div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu"> | ||||
|         {% if has_password and not current_user.is_authenticated %} | ||||
|   | ||||
| @@ -4,6 +4,7 @@ | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script> | ||||
| <script> | ||||
|     const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}'); | ||||
|     const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}'); | ||||
| @@ -49,7 +50,7 @@ | ||||
|             {% endif %} | ||||
|             {% if watch['processor'] == 'text_json_diff' %} | ||||
|             <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li> | ||||
|             <li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li> | ||||
|             <li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters & Triggers</a></li> | ||||
|             {% endif %} | ||||
|             <li class="tab"><a href="#notifications">Notifications</a></li> | ||||
|             <li class="tab"><a href="#stats">Stats</a></li> | ||||
| @@ -199,7 +200,7 @@ User-Agent: wonderbra 1.0") }} | ||||
|                         <div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div> | ||||
|                         <div class="flex-wrapper" > | ||||
|  | ||||
|                             <div id="browser-steps-ui" class="noselect"  style="width: 100%; background-color: #eee; border-radius: 5px;"> | ||||
|                             <div id="browser-steps-ui" class="noselect"> | ||||
|  | ||||
|                                 <div class="noselect"  id="browsersteps-selector-wrapper" style="width: 100%"> | ||||
|                                     <span class="loader" > | ||||
| @@ -214,7 +215,7 @@ User-Agent: wonderbra 1.0") }} | ||||
|                                     <canvas  class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas> | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div id="browser-steps-fieldlist" style="padding-left: 1em;  width: 350px; font-size: 80%;" > | ||||
|                             <div id="browser-steps-fieldlist" > | ||||
|                                 <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> | ||||
|                                 {{ render_field(form.browser_steps) }} | ||||
|                             </div> | ||||
| @@ -253,7 +254,10 @@ User-Agent: wonderbra 1.0") }} | ||||
|  | ||||
|             {% if watch['processor'] == 'text_json_diff' %} | ||||
|             <div class="tab-pane-inner" id="filters-and-triggers"> | ||||
|                     <div class="pure-control-group"> | ||||
|                 <span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">Activate preview</span> | ||||
|               <div> | ||||
|               <div id="edit-text-filter"> | ||||
|                     <div class="pure-control-group" id="pro-tips"> | ||||
|                             <strong>Pro-tips:</strong><br> | ||||
|                             <ul> | ||||
|                                 <li> | ||||
| @@ -275,9 +279,9 @@ xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br> | ||||
|  | ||||
|                     <ul> | ||||
|                         <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br> | ||||
| <p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p> | ||||
|                     <ul id="advanced-help-selectors" style="display: none;"> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). | ||||
|                             <ul> | ||||
| @@ -297,21 +301,25 @@ xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                                 <li>To use XPath1.0: Prefix with <code>xpath1:</code></li> | ||||
|                             </ul> | ||||
|                             </li> | ||||
|                     </ul> | ||||
|                     Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a | ||||
|                     <li> | ||||
|                         Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a | ||||
|                                 href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br> | ||||
|                     </li> | ||||
|                     </ul> | ||||
|  | ||||
|                 </span> | ||||
|                     </div> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header | ||||
| footer | ||||
| nav | ||||
| .stockticker") }} | ||||
| .stockticker | ||||
| //*[contains(text(), 'Advertisement')]") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                           <li> Remove HTML element(s) by CSS selector before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS selectors </li> | ||||
|                           <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                           <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS and XPath selectors </li> | ||||
|                           <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                         </ul> | ||||
|                       </span> | ||||
|                 </fieldset> | ||||
| @@ -326,14 +334,21 @@ nav | ||||
|                     <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br> | ||||
|                     <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> | ||||
|                 </fieldset> | ||||
|  | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.check_unique_lines) }} | ||||
|                     <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span> | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.remove_duplicate_lines) }} | ||||
|                     <span class="pure-form-message-inline">Remove duplicate lines of text</span> | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.sort_text_alphabetically) }} | ||||
|                     <span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span> | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.check_unique_lines) }} | ||||
|                     <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span> | ||||
|                     {{ render_checkbox_field(form.trim_text_whitespace) }} | ||||
|                     <span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span> | ||||
|                 </fieldset> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
| @@ -403,7 +418,19 @@ Unavailable") }} | ||||
|                 </fieldset> | ||||
|                 </div> | ||||
|             </div> | ||||
|             {% endif %} | ||||
|               <div id="text-preview" style="display: none;" > | ||||
|                     <script> | ||||
|                         const preview_text_edit_filters_url="{{url_for('watch_get_preview_rendered', uuid=uuid)}}"; | ||||
|                     </script> | ||||
|                     <span><strong>Preview of the text that is used for changedetection after all filters run.</strong></span><br> | ||||
|                     {#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#} | ||||
|                 <p> | ||||
|                     <div id="text-preview-inner"></div> | ||||
|                 </p> | ||||
|             </div> | ||||
|           </div> | ||||
|         </div> | ||||
|         {% endif %} | ||||
|         {# rendered sub Template #} | ||||
|         {% if extra_form_content %} | ||||
|             <div class="tab-pane-inner" id="extras_tab"> | ||||
|   | ||||
| @@ -76,7 +76,7 @@ | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} | ||||
|                         <span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span> | ||||
|                         <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span> | ||||
|                     </div> | ||||
|                 {% if form.requests.proxy %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
| @@ -155,11 +155,13 @@ | ||||
|                       {{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header | ||||
| footer | ||||
| nav | ||||
| .stockticker") }} | ||||
| .stockticker | ||||
| //*[contains(text(), 'Advertisement')]") }} | ||||
|                       <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                           <li> Remove HTML element(s) by CSS selector before text conversion. </li> | ||||
|                           <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                           <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS and XPath selectors </li> | ||||
|                           <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                         </ul> | ||||
|                       </span> | ||||
|                     </fieldset> | ||||
|   | ||||
| @@ -1,12 +1,27 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import os | ||||
| import time | ||||
| from flask import url_for | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def set_response(): | ||||
|     import time | ||||
|     data = f"""<html> | ||||
|        <body> | ||||
|      <h1>Awesome, you made it</h1> | ||||
|      yeah the socks request worked | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(data) | ||||
|     time.sleep(1) | ||||
|  | ||||
|  | ||||
| def test_socks5(client, live_server, measure_memory_usage): | ||||
|     live_server_setup(live_server) | ||||
|     set_response() | ||||
|  | ||||
|     # Setup a proxy | ||||
|     res = client.post( | ||||
| @@ -24,7 +39,10 @@ def test_socks5(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '') | ||||
|     # Because the socks server should connect back to us | ||||
|     test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}" | ||||
|     test_url = test_url.replace('localhost.localdomain', 'cdio') | ||||
|     test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
| @@ -60,4 +78,4 @@ def test_socks5(client, live_server, measure_memory_usage): | ||||
|     ) | ||||
|  | ||||
|     # Should see the proper string | ||||
|     assert "+0200:".encode('utf-8') in res.data | ||||
|     assert "Awesome, you made it".encode('utf-8') in res.data | ||||
|   | ||||
| @@ -1,16 +1,32 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import os | ||||
| import time | ||||
| from flask import url_for | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def set_response(): | ||||
|     import time | ||||
|     data = f"""<html> | ||||
|        <body> | ||||
|      <h1>Awesome, you made it</h1> | ||||
|      yeah the socks request worked | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(data) | ||||
|     time.sleep(1) | ||||
|  | ||||
| # should be proxies.json mounted from run_proxy_tests.sh already | ||||
| # -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json | ||||
| def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '') | ||||
|     set_response() | ||||
|     # Because the socks server should connect back to us | ||||
|     test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}" | ||||
|     test_url = test_url.replace('localhost.localdomain', 'cdio') | ||||
|     test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     res = client.get(url_for("settings_page")) | ||||
|     assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data | ||||
| @@ -49,4 +65,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage) | ||||
|     ) | ||||
|  | ||||
|     # Should see the proper string | ||||
|     assert "+0200:".encode('utf-8') in res.data | ||||
|     assert "Awesome, you made it".encode('utf-8') in res.data | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
| import os | ||||
| import time | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output | ||||
| from changedetectionio.notification import ( | ||||
|     default_notification_body, | ||||
|     default_notification_format, | ||||
| @@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|     assert b'not-in-stock' not in res.data | ||||
|  | ||||
|     # We should have a notification | ||||
|     time.sleep(2) | ||||
|     wait_for_notification_endpoint_output() | ||||
|     assert os.path.isfile("test-datastore/notification.txt"), "Notification received" | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
| @@ -103,6 +103,7 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|     set_original_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(5) | ||||
|     assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default" | ||||
|  | ||||
|     # BUT we should see that it correctly shows "not in stock" | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
| import os.path | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output | ||||
| from changedetectionio import html_tools | ||||
|  | ||||
|  | ||||
| @@ -112,7 +112,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|     res = client.post( | ||||
|         url_for("settings_page"), | ||||
|         data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}", | ||||
|               "application-notification_body": 'triggered text was -{{triggered_text}}-', | ||||
|               "application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了', | ||||
|               # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|               "application-notification_urls": test_notification_url, | ||||
|               "application-minutes_between_check": 180, | ||||
| @@ -165,11 +165,12 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     # Takes a moment for apprise to fire | ||||
|     time.sleep(3) | ||||
|     wait_for_notification_endpoint_output() | ||||
|     assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file" | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         response= f.read() | ||||
|         assert '-Oh yes please-' in response | ||||
|     with open("test-datastore/notification.txt", 'rb') as f: | ||||
|         response = f.read() | ||||
|         assert b'-Oh yes please-' in response | ||||
|         assert '网站监测 内容更新了'.encode('utf-8') in response | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|   | ||||
| @@ -69,6 +69,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|  | ||||
|     # Check the 'get latest snapshot works' | ||||
|     res = client.get(url_for("watch_get_latest_html", uuid=uuid)) | ||||
|     assert b'which has this one new line' in res.data | ||||
|  | ||||
|     # Now something should be ready, indicated by having a 'unviewed' class | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
| @@ -86,7 +92,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     assert expected_url.encode('utf-8') in res.data | ||||
|  | ||||
|     # Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times | ||||
|     res = client.get(url_for("diff_history_page", uuid="first")) | ||||
|     res = client.get(url_for("diff_history_page", uuid=uuid)) | ||||
|     assert b'selected=""' in res.data, "Confirm diff history page loaded" | ||||
|  | ||||
|     # Check the [preview] pulls the right one | ||||
| @@ -143,18 +149,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|     # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     client.get(url_for("clear_watch_history", uuid=uuid)) | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'preview/' in res.data | ||||
|  | ||||
|  | ||||
|     # Check the 'get latest snapshot works' | ||||
|     res = client.get(url_for("watch_get_latest_html", uuid=uuid)) | ||||
|     assert b'<head><title>head title</title></head>' in res.data | ||||
|  | ||||
|     # | ||||
|     # Cleanup everything | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|   | ||||
| @@ -87,6 +87,9 @@ def test_element_removal_output(): | ||||
|      Some initial text<br> | ||||
|      <p>across multiple lines</p> | ||||
|      <div id="changetext">Some text that changes</div> | ||||
|      <div>Some text should be matched by xPath // selector</div> | ||||
|      <div>Some text should be matched by xPath selector</div> | ||||
|      <div>Some text should be matched by xPath1 selector</div> | ||||
|      </body> | ||||
|     <footer> | ||||
|     <p>Footer</p> | ||||
| @@ -94,7 +97,16 @@ def test_element_removal_output(): | ||||
|      </html> | ||||
|     """ | ||||
|     html_blob = element_removal( | ||||
|         ["header", "footer", "nav", "#changetext"], html_content=content | ||||
|       [ | ||||
|         "header", | ||||
|         "footer", | ||||
|         "nav", | ||||
|         "#changetext", | ||||
|         "//*[contains(text(), 'xPath // selector')]", | ||||
|         "xpath://*[contains(text(), 'xPath selector')]", | ||||
|         "xpath1://*[contains(text(), 'xPath1 selector')]" | ||||
|       ], | ||||
|       html_content=content | ||||
|     ) | ||||
|     text = get_text(html_blob) | ||||
|     assert ( | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
| import pytest | ||||
|  | ||||
|  | ||||
| @@ -38,6 +38,11 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage): | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|  | ||||
|     # Content type recording worked | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid]['content-type'] == "text/html" | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|   | ||||
| @@ -4,7 +4,7 @@ | ||||
| import os | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import set_original_response, live_server_setup | ||||
| from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output | ||||
| from changedetectionio.model import App | ||||
|  | ||||
|  | ||||
| @@ -102,14 +102,15 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     time.sleep(3) | ||||
|     wait_for_notification_endpoint_output() | ||||
|  | ||||
|     # Shouldn't exist, shouldn't have fired | ||||
|     assert not os.path.isfile("test-datastore/notification.txt") | ||||
|     # Now the filter should exist | ||||
|     set_response_with_filter() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(3) | ||||
|  | ||||
|     wait_for_notification_endpoint_output() | ||||
|  | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| import os | ||||
| import time | ||||
| from loguru import logger | ||||
| from flask import url_for | ||||
| from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks | ||||
| from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \ | ||||
|     wait_for_notification_endpoint_output | ||||
| from changedetectionio.model import App | ||||
|  | ||||
|  | ||||
| @@ -26,6 +28,12 @@ def run_filter_test(client, live_server, content_filter): | ||||
|     # Response WITHOUT the filter ID element | ||||
|     set_original_response() | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json') | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
| @@ -34,83 +42,92 @@ def run_filter_test(client, live_server, content_filter): | ||||
|     if os.path.isfile("test-datastore/notification.txt"): | ||||
|         os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": ''}, | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     # Give the thread time to pick up the first version | ||||
|     assert b"1 Imported" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     url = url_for('test_notification_endpoint', _external=True) | ||||
|     notification_url = url.replace('http', 'json') | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|  | ||||
|     print(">>>> Notification URL: " + notification_url) | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure" | ||||
|  | ||||
|     # Just a regular notification setting, this will be used by the special 'filter not found' notification | ||||
|     notification_form_data = {"notification_urls": notification_url, | ||||
|                               "notification_title": "New ChangeDetection.io Notification - {{watch_url}}", | ||||
|                               "notification_body": "BASE URL: {{base_url}}\n" | ||||
|                                                    "Watch URL: {{watch_url}}\n" | ||||
|                                                    "Watch UUID: {{watch_uuid}}\n" | ||||
|                                                    "Watch title: {{watch_title}}\n" | ||||
|                                                    "Watch tag: {{watch_tag}}\n" | ||||
|                                                    "Preview: {{preview_url}}\n" | ||||
|                                                    "Diff URL: {{diff_url}}\n" | ||||
|                                                    "Snapshot: {{current_snapshot}}\n" | ||||
|                                                    "Diff: {{diff}}\n" | ||||
|                                                    "Diff Full: {{diff_full}}\n" | ||||
|                                                    "Diff as Patch: {{diff_patch}}\n" | ||||
|                                                    ":-)", | ||||
|                               "notification_format": "Text"} | ||||
|     watch_data = {"notification_urls": notification_url, | ||||
|                   "notification_title": "New ChangeDetection.io Notification - {{watch_url}}", | ||||
|                   "notification_body": "BASE URL: {{base_url}}\n" | ||||
|                                        "Watch URL: {{watch_url}}\n" | ||||
|                                        "Watch UUID: {{watch_uuid}}\n" | ||||
|                                        "Watch title: {{watch_title}}\n" | ||||
|                                        "Watch tag: {{watch_tag}}\n" | ||||
|                                        "Preview: {{preview_url}}\n" | ||||
|                                        "Diff URL: {{diff_url}}\n" | ||||
|                                        "Snapshot: {{current_snapshot}}\n" | ||||
|                                        "Diff: {{diff}}\n" | ||||
|                                        "Diff Full: {{diff_full}}\n" | ||||
|                                        "Diff as Patch: {{diff_patch}}\n" | ||||
|                                        ":-)", | ||||
|                   "notification_format": "Text", | ||||
|                   "fetch_backend": "html_requests", | ||||
|                   "filter_failure_notification_send": 'y', | ||||
|                   "headers": "", | ||||
|                   "tags": "my tag", | ||||
|                   "title": "my title 123", | ||||
|                   "time_between_check-hours": 5,  # So that the queue runner doesnt also put it in | ||||
|                   "url": test_url, | ||||
|                   } | ||||
|  | ||||
|     notification_form_data.update({ | ||||
|         "url": test_url, | ||||
|         "tags": "my tag", | ||||
|         "title": "my title 123", | ||||
|         "headers": "", | ||||
|         "filter_failure_notification_send": 'y', | ||||
|         "include_filters": content_filter, | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     # A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data=notification_form_data, | ||||
|         url_for("edit_page", uuid=uuid), | ||||
|         data=watch_data, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure" | ||||
|  | ||||
|     # Now the notification should not exist, because we didnt reach the threshold | ||||
|     # Now add a filter, because recheck hours == 5, ONLY pressing of the [edit] or [recheck all] should trigger | ||||
|     watch_data['include_filters'] = content_filter | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid=uuid), | ||||
|         data=watch_data, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     # It should have checked once so far and given this error (because we hit SAVE) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     assert not os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
|     # Hitting [save] would have triggered a recheck, and we have a filter, so this would be ONE failure | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 1, "Should have been checked once" | ||||
|  | ||||
|     # recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented) | ||||
|     for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2): | ||||
|     # Add 4 more checks | ||||
|     checked = 0 | ||||
|     ATTEMPT_THRESHOLD_SETTING = live_server.app.config['DATASTORE'].data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) | ||||
|     for i in range(0, ATTEMPT_THRESHOLD_SETTING - 2): | ||||
|         checked += 1 | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         wait_for_all_checks(client) | ||||
|         time.sleep(2) # delay for apprise to fire | ||||
|         assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}" | ||||
|  | ||||
|     # We should see something in the frontend | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'Warning, no filters were found' in res.data | ||||
|         res = client.get(url_for("index")) | ||||
|         assert b'Warning, no filters were found' in res.data | ||||
|         assert not os.path.isfile("test-datastore/notification.txt") | ||||
|         time.sleep(1) | ||||
|          | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5 | ||||
|  | ||||
|     time.sleep(2) | ||||
|     # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(2)  # delay for apprise to fire | ||||
|     wait_for_notification_endpoint_output() | ||||
|  | ||||
|     # Now it should exist and contain our "filter not found" alert | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|  | ||||
| @@ -123,10 +140,11 @@ def run_filter_test(client, live_server, content_filter): | ||||
|     set_response_with_filter() | ||||
|  | ||||
|     # Try several times, it should NOT have 'filter not found' | ||||
|     for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT): | ||||
|     for i in range(0, ATTEMPT_THRESHOLD_SETTING + 2): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|     wait_for_notification_endpoint_output() | ||||
|     # It should have sent a notification, but.. | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|     # but it should not contain the info about a failed filter (because there was none in this case) | ||||
| @@ -135,9 +153,6 @@ def run_filter_test(client, live_server, content_filter): | ||||
|     assert not 'CSS/xPath filter was not present in the page' in notification | ||||
|  | ||||
|     # Re #1247 - All tokens got replaced correctly in the notification | ||||
|     res = client.get(url_for("index")) | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     # UUID is correct, but notification contains tag uuid as UUIID wtf | ||||
|     assert uuid in notification | ||||
|  | ||||
|     # cleanup for the next | ||||
| @@ -152,9 +167,11 @@ def test_setup(live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage): | ||||
| #    live_server_setup(live_server) | ||||
|     run_filter_test(client, live_server,'#nope-doesnt-exist') | ||||
|  | ||||
| def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage): | ||||
| #    live_server_setup(live_server) | ||||
|     run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]') | ||||
|  | ||||
| # Test that notification is never sent | ||||
|   | ||||
| @@ -1,11 +1,8 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from urllib.request import urlopen | ||||
| from .util import set_original_response, set_modified_response, live_server_setup | ||||
|  | ||||
| sleep_time_for_fetch_thread = 3 | ||||
| from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks | ||||
| import time | ||||
|  | ||||
|  | ||||
| def set_nonrenderable_response(): | ||||
| @@ -16,12 +13,18 @@ def set_nonrenderable_response(): | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     time.sleep(1) | ||||
|  | ||||
|     return None | ||||
|  | ||||
| def set_zero_byte_response(): | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("") | ||||
|     time.sleep(1) | ||||
|     return None | ||||
|  | ||||
| def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage): | ||||
|     set_original_response() | ||||
|     live_server_setup(live_server) | ||||
| @@ -35,18 +38,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Do this a few times.. ensures we dont accidently set the status | ||||
|     for n in range(3): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|         # Give the thread time to pick it up | ||||
|         time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         res = client.get(url_for("index")) | ||||
|         assert b'unviewed' not in res.data | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|  | ||||
|     ##################### | ||||
| @@ -64,7 +60,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
| @@ -86,14 +82,20 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     client.get(url_for("mark_all_viewed"), follow_redirects=True) | ||||
|  | ||||
|  | ||||
|  | ||||
|     # A totally zero byte (#2528) response should also not trigger an error | ||||
|     set_zero_byte_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON | ||||
|     assert b'fetch-error' not in res.data | ||||
|  | ||||
|     # | ||||
|     # Cleanup everything | ||||
|   | ||||
| @@ -291,11 +291,11 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me | ||||
|         data={ | ||||
|               "application-fetch_backend": "html_requests", | ||||
|               "application-minutes_between_check": 180, | ||||
|               "application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444 }', | ||||
|               "application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }', | ||||
|               "application-notification_format": default_notification_format, | ||||
|               "application-notification_urls": test_notification_url, | ||||
|               # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|               "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}", | ||||
|               "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ", | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -324,6 +324,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me | ||||
|         j = json.loads(x) | ||||
|         assert j['url'].startswith('http://localhost') | ||||
|         assert j['secret'] == 444 | ||||
|         assert j['somebug'] == '网站监测 内容更新了' | ||||
|  | ||||
|     # URL check, this will always be converted to lowercase | ||||
|     assert os.path.isfile("test-datastore/notification-url.txt") | ||||
| @@ -354,9 +355,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me | ||||
| #2510 | ||||
| def test_global_send_test_notification(client, live_server, measure_memory_usage): | ||||
|  | ||||
|  | ||||
|     #live_server_setup(live_server) | ||||
|     set_original_response() | ||||
|     if os.path.isfile("test-datastore/notification.txt"): | ||||
|         os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|     # otherwise other settings would have already existed from previous tests in this file | ||||
|     res = client.post( | ||||
| @@ -364,7 +366,8 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage | ||||
|         data={ | ||||
|             "application-fetch_backend": "html_requests", | ||||
|             "application-minutes_between_check": 180, | ||||
|             "application-notification_body": 'change detection is cool', | ||||
|             #1995 UTF-8 content should be encoded | ||||
|             "application-notification_body": 'change detection is cool 网站监测 内容更新了', | ||||
|             "application-notification_format": default_notification_format, | ||||
|             "application-notification_urls": "", | ||||
|             "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}", | ||||
| @@ -399,8 +402,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage | ||||
|  | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         x = f.read() | ||||
|         assert 'change detection is coo' in x | ||||
|  | ||||
|         assert 'change detection is cool 网站监测 内容更新了' in x | ||||
|  | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
| @@ -420,7 +422,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         x = f.read() | ||||
|         # Should come from notification.py default handler when there is no notification body to pull from | ||||
|         assert 'change detection is coo' in x | ||||
|         assert 'change detection is cool 网站监测 内容更新了' in x | ||||
|  | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|   | ||||
| @@ -3,7 +3,7 @@ import os | ||||
| import time | ||||
|  | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
| from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output | ||||
| from ..notification import default_notification_format | ||||
|  | ||||
| instock_props = [ | ||||
| @@ -146,14 +146,13 @@ def _run_test_minmax_limit(client, extra_watch_edit_form): | ||||
|         data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # A change in price, should trigger a change by default | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     data = { | ||||
|         "tags": "", | ||||
|         "url": test_url, | ||||
|         "headers": "", | ||||
|         "time_between_check-hours": 5, | ||||
|         'fetch_backend': "html_requests" | ||||
|     } | ||||
|     data.update(extra_watch_edit_form) | ||||
| @@ -178,11 +177,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form): | ||||
|     assert b'1,000.45' or b'1000.45' in res.data #depending on locale | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|  | ||||
|     # price changed to something LESS than min (900), SHOULD be a change | ||||
|     set_original_response(props_markup=instock_props[0], price='890.45') | ||||
|     # let previous runs wait | ||||
|     time.sleep(1) | ||||
|  | ||||
|     res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'1 watches queued for rechecking.' in res.data | ||||
|     wait_for_all_checks(client) | ||||
| @@ -197,7 +194,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form): | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'1,890.45' or b'1890.45' in res.data | ||||
|     # Depending on the LOCALE it may be either of these (generally for US/default/etc) | ||||
|     assert b'1,890.45' in res.data or b'1890.45' in res.data | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
| @@ -362,7 +360,7 @@ def test_change_with_notification_values(client, live_server): | ||||
|     set_original_response(props_markup=instock_props[0], price='1950.45') | ||||
|     client.get(url_for("form_watch_checknow")) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(3) | ||||
|     wait_for_notification_endpoint_output() | ||||
|     assert os.path.isfile("test-datastore/notification.txt"), "Notification received" | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|   | ||||
| @@ -11,6 +11,8 @@ def set_original_ignore_response(): | ||||
|      <p>Some initial text</p> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <p>So let's see what happens.</p> | ||||
|      <p>   So let's see what happens.   <br> </p> | ||||
|      <p>A - sortable line</p>  | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
| @@ -164,5 +166,52 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage): | ||||
|     assert res.data.find(b'A uppercase') < res.data.find(b'Z last') | ||||
|     assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines') | ||||
|      | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
|  | ||||
| def test_extra_filters(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     set_original_ignore_response() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"remove_duplicate_lines": "y", | ||||
|               "trim_text_whitespace": "y", | ||||
|               "sort_text_alphabetically": "",  # leave this OFF for testing | ||||
|               "url": test_url, | ||||
|               "fetch_backend": "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first") | ||||
|     ) | ||||
|  | ||||
|     assert res.data.count(b"see what happens.") == 1 | ||||
|  | ||||
|     # still should remain unsorted ('A - sortable line') stays at the end | ||||
|     assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines') | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| @@ -76,6 +76,18 @@ def set_more_modified_response(): | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def wait_for_notification_endpoint_output(): | ||||
|     '''Apprise can take a few seconds to fire''' | ||||
|     #@todo - could check the apprise object directly instead of looking for this file | ||||
|     from os.path import isfile | ||||
|     for i in range(1, 20): | ||||
|         time.sleep(1) | ||||
|         if isfile("test-datastore/notification.txt"): | ||||
|             return True | ||||
|  | ||||
|     return False | ||||
|  | ||||
|  | ||||
| # kinda funky, but works for now | ||||
| def extract_api_key_from_UI(client): | ||||
|     import re | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| from .processors.exceptions import ProcessorException | ||||
| from . import content_fetchers | ||||
|  | ||||
| import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions | ||||
| from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse | ||||
| from changedetectionio import html_tools | ||||
|  | ||||
| @@ -190,7 +189,9 @@ class update_worker(threading.Thread): | ||||
|                 'screenshot': None | ||||
|             }) | ||||
|             self.notification_q.put(n_object) | ||||
|             logger.error(f"Sent filter not found notification for {watch_uuid}") | ||||
|             logger.debug(f"Sent filter not found notification for {watch_uuid}") | ||||
|         else: | ||||
|             logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs") | ||||
|  | ||||
|     def send_step_failure_notification(self, watch_uuid, step_n): | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid, False) | ||||
| @@ -277,7 +278,7 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                         update_handler.call_browser() | ||||
|  | ||||
|                         changed_detected, update_obj, contents = update_handler.run_changedetection( | ||||
|                         changed_detected, update_obj, contents, content_after_filters = update_handler.run_changedetection( | ||||
|                             watch=watch, | ||||
|                             skip_when_checksum_same=skip_when_same_checksum, | ||||
|                         ) | ||||
| @@ -301,7 +302,7 @@ class update_worker(threading.Thread): | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message}) | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except content_fetchers.exceptions.ReplyWithContentButNoText as e: | ||||
|                     except content_fetchers_exceptions.ReplyWithContentButNoText as e: | ||||
|                         # Totally fine, it's by choice - just continue on, nothing more to care about | ||||
|                         # Page had elements/content but no renderable text | ||||
|                         # Backend (not filters) gave zero output | ||||
| @@ -327,7 +328,7 @@ class update_worker(threading.Thread): | ||||
|                              | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except content_fetchers.exceptions.Non200ErrorCodeReceived as e: | ||||
|                     except content_fetchers_exceptions.Non200ErrorCodeReceived as e: | ||||
|                         if e.status_code == 403: | ||||
|                             err_text = "Error - 403 (Access denied) received" | ||||
|                         elif e.status_code == 404: | ||||
| @@ -365,38 +366,42 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                         # Only when enabled, send the notification | ||||
|                         if watch.get('filter_failure_notification_send', False): | ||||
|                             c = watch.get('consecutive_filter_failures', 5) | ||||
|                             c = watch.get('consecutive_filter_failures', 0) | ||||
|                             c += 1 | ||||
|                             # Send notification if we reached the threshold? | ||||
|                             threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', | ||||
|                                                                                            0) | ||||
|                             logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}") | ||||
|                             if threshold > 0 and c >= threshold: | ||||
|                             threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) | ||||
|                             logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}") | ||||
|                             if c >= threshold: | ||||
|                                 if not watch.get('notification_muted'): | ||||
|                                     logger.debug(f"Sending filter failed notification for {uuid}") | ||||
|                                     self.send_filter_failure_notification(uuid) | ||||
|                                 c = 0 | ||||
|                                 logger.debug(f"Reset filter failure count back to zero") | ||||
|  | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) | ||||
|                         else: | ||||
|                             logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping") | ||||
|  | ||||
|  | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e: | ||||
|                     except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e: | ||||
|                         # Yes fine, so nothing todo, don't continue to process. | ||||
|                         process_changedetection_results = False | ||||
|                         changed_detected = False | ||||
|                     except content_fetchers.exceptions.BrowserConnectError as e: | ||||
|                     except content_fetchers_exceptions.BrowserConnectError as e: | ||||
|                         self.datastore.update_watch(uuid=uuid, | ||||
|                                                     update_obj={'last_error': e.msg | ||||
|                                                                 } | ||||
|                                                     ) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.BrowserFetchTimedOut as e: | ||||
|                     except content_fetchers_exceptions.BrowserFetchTimedOut as e: | ||||
|                         self.datastore.update_watch(uuid=uuid, | ||||
|                                                     update_obj={'last_error': e.msg | ||||
|                                                                 } | ||||
|                                                     ) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.BrowserStepsStepException as e: | ||||
|                     except content_fetchers_exceptions.BrowserStepsStepException as e: | ||||
|  | ||||
|                         if not self.datastore.data['watching'].get(uuid): | ||||
|                             continue | ||||
| @@ -423,7 +428,7 @@ class update_worker(threading.Thread): | ||||
|                                                     ) | ||||
|  | ||||
|                         if watch.get('filter_failure_notification_send', False): | ||||
|                             c = watch.get('consecutive_filter_failures', 5) | ||||
|                             c = watch.get('consecutive_filter_failures', 0) | ||||
|                             c += 1 | ||||
|                             # Send notification if we reached the threshold? | ||||
|                             threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', | ||||
| @@ -438,25 +443,25 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except content_fetchers.exceptions.EmptyReply as e: | ||||
|                     except content_fetchers_exceptions.EmptyReply as e: | ||||
|                         # Some kind of custom to-str handler in the exception handler that does this? | ||||
|                         err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.ScreenshotUnavailable as e: | ||||
|                     except content_fetchers_exceptions.ScreenshotUnavailable as e: | ||||
|                         err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'" | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.JSActionExceptions as e: | ||||
|                     except content_fetchers_exceptions.JSActionExceptions as e: | ||||
|                         err_text = "Error running JS Actions - Page request - "+e.message | ||||
|                         if e.screenshot: | ||||
|                             watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.PageUnloadable as e: | ||||
|                     except content_fetchers_exceptions.PageUnloadable as e: | ||||
|                         err_text = "Page request from server didnt respond correctly" | ||||
|                         if e.message: | ||||
|                             err_text = "{} - {}".format(err_text, e.message) | ||||
| @@ -468,7 +473,7 @@ class update_worker(threading.Thread): | ||||
|                                                                            'last_check_status': e.status_code, | ||||
|                                                                            'has_ldjson_price_data': None}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e: | ||||
|                     except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e: | ||||
|                         err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher." | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|                         process_changedetection_results = False | ||||
| @@ -486,6 +491,8 @@ class update_worker(threading.Thread): | ||||
|                         if not self.datastore.data['watching'].get(uuid): | ||||
|                             continue | ||||
|  | ||||
|                         update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|  | ||||
|                         # Mark that we never had any failures | ||||
|                         if not watch.get('ignore_status_codes'): | ||||
|                             update_obj['consecutive_filter_failures'] = 0 | ||||
|   | ||||
| @@ -18,7 +18,7 @@ services: | ||||
|   # | ||||
|   #        Log levels are in descending order. (TRACE is the most detailed one) | ||||
|   #        Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL | ||||
|   #      - LOGGER_LEVEL=DEBUG | ||||
|   #      - LOGGER_LEVEL=TRACE | ||||
|   # | ||||
|   #       Alternative WebDriver/selenium URL, do not use "'s or 's! | ||||
|   #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub | ||||
| @@ -29,8 +29,9 @@ services: | ||||
|   # | ||||
|   #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy | ||||
|   # | ||||
|   #       Alternative Playwright URL, do not use "'s or 's! | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000 | ||||
|   #       Alternative target "Chrome" Playwright URL, do not use "'s or 's! | ||||
|   #       "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser. | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 | ||||
|   # | ||||
|   #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password | ||||
|   # | ||||
| @@ -73,10 +74,10 @@ services: | ||||
| #              condition: service_started | ||||
|  | ||||
|  | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages. | ||||
|      # RECOMMENDED FOR FETCHING PAGES WITH CHROME | ||||
| #    playwright-chrome: | ||||
| #        hostname: playwright-chrome | ||||
| #    sockpuppetbrowser: | ||||
| #        hostname: sockpuppetbrowser | ||||
| #        image: dgtlmoon/sockpuppetbrowser:latest | ||||
| #        cap_add: | ||||
| #            - SYS_ADMIN | ||||
|   | ||||
| @@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes | ||||
| # jq not available on Windows so must be installed manually | ||||
|  | ||||
| # Notification library | ||||
| apprise~=1.8.0 | ||||
| apprise==1.9.0 | ||||
|  | ||||
| # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 | ||||
| # and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible | ||||
| @@ -79,8 +79,9 @@ pyppeteerstealth>=0.0.4 | ||||
| pytest ~=7.2 | ||||
| pytest-flask ~=1.2 | ||||
|  | ||||
| # Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708) | ||||
| jsonschema==4.17.3 | ||||
| # Anything 4.0 and up but not 5.0 | ||||
| jsonschema ~= 4.0 | ||||
|  | ||||
|  | ||||
| loguru | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user