mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 22:57:18 +00:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			bugfix-han
			...
			playwright
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 443dd56e8a | 
							
								
								
									
										31
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							| @@ -1,31 +0,0 @@ | ||||
| # Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile | ||||
| # Test that we can still build on Alpine (musl modified libc https://musl.libc.org/) | ||||
| # Some packages wont install via pypi because they dont have a wheel available under this architecture. | ||||
|  | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.16 | ||||
| ENV PYTHONUNBUFFERED=1 | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
|  | ||||
| RUN \ | ||||
|   apk add --update --no-cache --virtual=build-dependencies \ | ||||
|     cargo \ | ||||
|     g++ \ | ||||
|     gcc \ | ||||
|     libc-dev \ | ||||
|     libffi-dev \ | ||||
|     libxslt-dev \ | ||||
|     make \ | ||||
|     openssl-dev \ | ||||
|     py3-wheel \ | ||||
|     python3-dev \ | ||||
|     zlib-dev && \ | ||||
|   apk add --update --no-cache \ | ||||
|     libxslt \ | ||||
|     python3 \ | ||||
|     py3-pip && \ | ||||
|   echo "**** pip3 install test of changedetection.io ****" && \ | ||||
|   pip3 install -U pip wheel setuptools && \ | ||||
|   pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.16/ -r /requirements.txt && \ | ||||
|   apk del --purge \ | ||||
|     build-dependencies | ||||
							
								
								
									
										11
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -43,16 +43,6 @@ jobs: | ||||
|             version: latest | ||||
|             driver-opts: image=moby/buildkit:master | ||||
|  | ||||
|         # https://github.com/dgtlmoon/changedetection.io/pull/1067 | ||||
|         # Check we can still build under alpine/musl | ||||
|         - name: Test that the docker containers can build (musl via alpine check) | ||||
|           id: docker_build_musl | ||||
|           uses: docker/build-push-action@v2 | ||||
|           with: | ||||
|             context: ./ | ||||
|             file: ./.github/test/Dockerfile-alpine | ||||
|             platforms: linux/amd64,linux/arm64 | ||||
|  | ||||
|         - name: Test that the docker containers can build | ||||
|           id: docker_build | ||||
|           uses: docker/build-push-action@v2 | ||||
| @@ -63,4 +53,3 @@ jobs: | ||||
|             platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64, | ||||
|             cache-from: type=local,src=/tmp/.buildx-cache | ||||
|             cache-to: type=local,dest=/tmp/.buildx-cache | ||||
|  | ||||
|   | ||||
| @@ -21,11 +21,9 @@ COPY requirements.txt /requirements.txt | ||||
|  | ||||
| RUN pip install --target=/dependencies -r /requirements.txt | ||||
|  | ||||
| # Playwright is an alternative to Selenium | ||||
| # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) | ||||
| RUN pip install --target=/dependencies playwright~=1.26 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
| RUN pip install --target=/dependencies jq~=1.3 \ | ||||
|     || echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled." | ||||
|  | ||||
|  | ||||
| # Final image stage | ||||
| FROM python:3.8-slim | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| ## Web Site Change Detection, Monitoring and Notification. | ||||
|  | ||||
| _Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._ | ||||
|  | ||||
| Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more | ||||
|  | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring"  title="Self-hosted web page change monitoring"  />](https://lemonade.changedetection.io/start?src=github) | ||||
|  | ||||
| @@ -9,6 +8,8 @@ _Live your data-life pro-actively, Detect website changes and perform meaningful | ||||
|  | ||||
|  | ||||
|  | ||||
| Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more | ||||
|  | ||||
| [**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_ | ||||
|  | ||||
| - Chrome browser included. | ||||
| @@ -166,6 +167,9 @@ One big advantage of `jq` is that you can use logic in your JSON filter, such as | ||||
|  | ||||
| See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples | ||||
|  | ||||
| Note: `jq` library must be added separately (`pip3 install jq`) | ||||
|  | ||||
|  | ||||
| ### Parse JSON embedded in HTML! | ||||
|  | ||||
| When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.  | ||||
|   | ||||
| @@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio.api import api_v1 | ||||
|  | ||||
| __version__ = '0.39.21.1' | ||||
| __version__ = '0.39.20.4' | ||||
|  | ||||
| datastore = None | ||||
|  | ||||
| @@ -199,6 +199,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     # Setup cors headers to allow all domains | ||||
|     # https://flask-cors.readthedocs.io/en/latest/ | ||||
|     #    CORS(app) | ||||
| @@ -599,7 +601,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                     extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) | ||||
|  | ||||
|             # Reset the previous_md5 so we process a new snapshot including stripping ignore text. | ||||
|             if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []): | ||||
|             if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']: | ||||
|                 if len(datastore.data['watching'][uuid].history): | ||||
|                     extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) | ||||
|  | ||||
| @@ -1307,8 +1309,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|     threading.Thread(target=notification_runner).start() | ||||
|  | ||||
|     # Check for new release version, but not when running in test/build or pytest | ||||
|     if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True: | ||||
|     # Check for new release version, but not when running in test/build | ||||
|     if not os.getenv("GITHUB_REF", False): | ||||
|         threading.Thread(target=check_for_new_version).start() | ||||
|  | ||||
|     return app | ||||
|   | ||||
| @@ -102,8 +102,8 @@ def main(): | ||||
|                     has_password=datastore.data['settings']['application']['password'] != False | ||||
|                     ) | ||||
|  | ||||
|     # Monitored websites will not receive a Referer header when a user clicks on an outgoing link. | ||||
|     # @Note: Incompatible with password login (and maybe other features) for now, submit a PR! | ||||
|     # Monitored websites will not receive a Referer header | ||||
|     # when a user clicks on an outgoing link. | ||||
|     @app.after_request | ||||
|     def hide_referrer(response): | ||||
|         if os.getenv("HIDE_REFERER", False): | ||||
|   | ||||
| @@ -164,16 +164,16 @@ class Fetcher(): | ||||
|                 } | ||||
|  | ||||
|  | ||||
|                 // inject the current one set in the include_filters, which may be a CSS rule | ||||
|                 // inject the current one set in the css_filter, which may be a CSS rule | ||||
|                 // used for displaying the current one in VisualSelector, where its not one we generated. | ||||
|                 if (include_filters.length) { | ||||
|                 if (css_filter.length) { | ||||
|                    q=false;                    | ||||
|                    try { | ||||
|                        // is it xpath? | ||||
|                        if (include_filters.startsWith('/') || include_filters.startsWith('xpath:')) { | ||||
|                          q=document.evaluate(include_filters.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; | ||||
|                        if (css_filter.startsWith('/') || css_filter.startsWith('xpath:')) { | ||||
|                          q=document.evaluate(css_filter.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; | ||||
|                        } else { | ||||
|                          q=document.querySelector(include_filters); | ||||
|                          q=document.querySelector(css_filter); | ||||
|                        }                        | ||||
|                    } catch (e) { | ||||
|                     // Maybe catch DOMException and alert?  | ||||
| @@ -186,7 +186,7 @@ class Fetcher(): | ||||
|                                     | ||||
|                    if (bbox && bbox['width'] >0 && bbox['height']>0) {                        | ||||
|                        size_pos.push({ | ||||
|                            xpath: include_filters, | ||||
|                            xpath: css_filter, | ||||
|                            width: bbox['width'],  | ||||
|                            height: bbox['height'], | ||||
|                            left: bbox['left'], | ||||
| @@ -220,7 +220,7 @@ class Fetcher(): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_css_filter=None): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
| @@ -310,7 +310,7 @@ class base_html_playwright(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_css_filter=None): | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._api_types | ||||
| @@ -413,10 +413,10 @@ class base_html_playwright(Fetcher): | ||||
|             self.status_code = response.status | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             if current_include_filters is not None: | ||||
|                 page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|             if current_css_filter is not None: | ||||
|                 page.evaluate("var css_filter={}".format(json.dumps(current_css_filter))) | ||||
|             else: | ||||
|                 page.evaluate("var include_filters=''") | ||||
|                 page.evaluate("var css_filter=''") | ||||
|  | ||||
|             self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}") | ||||
|  | ||||
| @@ -497,7 +497,7 @@ class base_html_webdriver(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_css_filter=None): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | ||||
| @@ -573,7 +573,7 @@ class html_requests(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_css_filter=None): | ||||
|  | ||||
|         # Make requests use a more modern looking user-agent | ||||
|         if not 'User-Agent' in request_headers: | ||||
|   | ||||
| @@ -10,11 +10,6 @@ from changedetectionio import content_fetcher, html_tools | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(): | ||||
| @@ -38,20 +33,18 @@ class perform_site_check(): | ||||
|  | ||||
|         return regex | ||||
|  | ||||
|  | ||||
|     def run(self, uuid): | ||||
|         from copy import deepcopy | ||||
|         changed_detected = False | ||||
|         screenshot = False  # as bytes | ||||
|         stripped_text_from_html = "" | ||||
|  | ||||
|         # DeepCopy so we can be sure we don't accidently change anything by reference | ||||
|         watch = deepcopy(self.datastore.data['watching'].get(uuid)) | ||||
|  | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         # Protect against file:// access | ||||
|         if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False): | ||||
|         if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False): | ||||
|             raise Exception( | ||||
|                 "file:// type access is denied for security reasons." | ||||
|             ) | ||||
| @@ -59,10 +52,10 @@ class perform_site_check(): | ||||
|         # Unset any existing notification error | ||||
|         update_obj = {'last_notification_error': False, 'last_error': False} | ||||
|  | ||||
|         extra_headers = watch.get('headers', []) | ||||
|         extra_headers =self.datastore.data['watching'][uuid].get('headers') | ||||
|  | ||||
|         # Tweak the base config with the per-watch ones | ||||
|         request_headers = deepcopy(self.datastore.data['settings']['headers']) | ||||
|         request_headers = self.datastore.data['settings']['headers'].copy() | ||||
|         request_headers.update(extra_headers) | ||||
|  | ||||
|         # https://github.com/psf/requests/issues/4525 | ||||
| @@ -86,7 +79,7 @@ class perform_site_check(): | ||||
|             is_source = True | ||||
|  | ||||
|         # Pluggable content fetcher | ||||
|         prefer_backend = watch.get('fetch_backend') | ||||
|         prefer_backend = watch['fetch_backend'] | ||||
|         if hasattr(content_fetcher, prefer_backend): | ||||
|             klass = getattr(content_fetcher, prefer_backend) | ||||
|         else: | ||||
| @@ -97,21 +90,21 @@ class perform_site_check(): | ||||
|         proxy_url = None | ||||
|         if proxy_id: | ||||
|             proxy_url = self.datastore.proxy_list.get(proxy_id).get('url') | ||||
|             print("UUID {} Using proxy {}".format(uuid, proxy_url)) | ||||
|             print ("UUID {} Using proxy {}".format(uuid, proxy_url)) | ||||
|  | ||||
|         fetcher = klass(proxy_override=proxy_url) | ||||
|  | ||||
|         # Configurable per-watch or global extra delay before extracting text (for webDriver types) | ||||
|         system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) | ||||
|         if watch['webdriver_delay'] is not None: | ||||
|             fetcher.render_extract_delay = watch.get('webdriver_delay') | ||||
|             fetcher.render_extract_delay = watch['webdriver_delay'] | ||||
|         elif system_webdriver_delay is not None: | ||||
|             fetcher.render_extract_delay = system_webdriver_delay | ||||
|  | ||||
|         if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip(): | ||||
|             fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code') | ||||
|         if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): | ||||
|             fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] | ||||
|  | ||||
|         fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters')) | ||||
|         fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter']) | ||||
|         fetcher.quit() | ||||
|  | ||||
|         self.screenshot = fetcher.screenshot | ||||
| @@ -135,30 +128,28 @@ class perform_site_check(): | ||||
|             is_html = False | ||||
|             is_json = False | ||||
|  | ||||
|         include_filters_rule = watch.get('include_filters', []) | ||||
|         # include_filters_rule = watch['include_filters'] | ||||
|         css_filter_rule = watch['css_filter'] | ||||
|         subtractive_selectors = watch.get( | ||||
|             "subtractive_selectors", [] | ||||
|         ) + self.datastore.data["settings"]["application"].get( | ||||
|             "global_subtractive_selectors", [] | ||||
|         ) | ||||
|  | ||||
|         has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip()) | ||||
|         has_filter_rule = css_filter_rule and len(css_filter_rule.strip()) | ||||
|         has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip()) | ||||
|  | ||||
|         if is_json and not has_filter_rule: | ||||
|             include_filters_rule.append("json:$") | ||||
|             css_filter_rule = "json:$" | ||||
|             has_filter_rule = True | ||||
|  | ||||
|         if has_filter_rule: | ||||
|             json_filter_prefixes = ['json:', 'jq:'] | ||||
|             for filter in include_filters_rule: | ||||
|                 if any(prefix in filter for prefix in json_filter_prefixes): | ||||
|                     stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter) | ||||
|                     is_html = False | ||||
|             if any(prefix in css_filter_rule for prefix in json_filter_prefixes): | ||||
|                 stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule) | ||||
|                 is_html = False | ||||
|  | ||||
|         if is_html or is_source: | ||||
|  | ||||
|              | ||||
|             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|             fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content) | ||||
|             html_content = fetcher.content | ||||
| @@ -170,36 +161,33 @@ class perform_site_check(): | ||||
|             else: | ||||
|                 # Then we assume HTML | ||||
|                 if has_filter_rule: | ||||
|                     html_content = "" | ||||
|                     for filter_rule in include_filters_rule: | ||||
|                         # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                         if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                             html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                                                                     html_content=fetcher.content, | ||||
|                                                                     append_pretty_line_formatting=not is_source) | ||||
|                         else: | ||||
|                             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|                             html_content += html_tools.include_filters(include_filters=filter_rule, | ||||
|                                                                        html_content=fetcher.content, | ||||
|                                                                        append_pretty_line_formatting=not is_source) | ||||
|  | ||||
|                     if not html_content.strip(): | ||||
|                         raise FilterNotFoundInResponse(include_filters_rule) | ||||
|                     # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                     if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'): | ||||
|                         html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''), | ||||
|                                                                html_content=fetcher.content) | ||||
|                     else: | ||||
|                         # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|                         html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content) | ||||
|  | ||||
|                 if has_subtractive_selectors: | ||||
|                     html_content = html_tools.element_removal(subtractive_selectors, html_content) | ||||
|  | ||||
|                 if is_source: | ||||
|                     stripped_text_from_html = html_content | ||||
|                 else: | ||||
|                 if not is_source: | ||||
|                     # extract text | ||||
|                     do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|                     stripped_text_from_html = \ | ||||
|                         html_tools.html_to_text( | ||||
|                             html_content, | ||||
|                             render_anchor_tag_content=do_anchor | ||||
|                             render_anchor_tag_content=self.datastore.data["settings"][ | ||||
|                                 "application"].get( | ||||
|                                 "render_anchor_tag_content", False) | ||||
|                         ) | ||||
|  | ||||
|                 elif is_source: | ||||
|                     stripped_text_from_html = html_content | ||||
|  | ||||
|             # Re #340 - return the content before the 'ignore text' was applied | ||||
|             text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|  | ||||
| @@ -232,7 +220,7 @@ class perform_site_check(): | ||||
|  | ||||
|                 for l in result: | ||||
|                     if type(l) is tuple: | ||||
|                         # @todo - some formatter option default (between groups) | ||||
|                         #@todo - some formatter option default (between groups) | ||||
|                         regex_matched_output += list(l) + [b'\n'] | ||||
|                     else: | ||||
|                         # @todo - some formatter option default (between each ungrouped result) | ||||
| @@ -246,6 +234,7 @@ class perform_site_check(): | ||||
|                 stripped_text_from_html = b''.join(regex_matched_output) | ||||
|                 text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() | ||||
| @@ -255,30 +244,29 @@ class perform_site_check(): | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         blocked = False | ||||
|  | ||||
|         trigger_text = watch.get('trigger_text', []) | ||||
|         if len(trigger_text): | ||||
|         if len(watch['trigger_text']): | ||||
|             # Assume blocked | ||||
|             blocked = True | ||||
|             # Filter and trigger works the same, so reuse it | ||||
|             # It should return the line numbers that match | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=trigger_text, | ||||
|                                                   wordlist=watch['trigger_text'], | ||||
|                                                   mode="line numbers") | ||||
|             # Unblock if the trigger was found | ||||
|             if result: | ||||
|                 blocked = False | ||||
|  | ||||
|         text_should_not_be_present = watch.get('text_should_not_be_present', []) | ||||
|         if len(text_should_not_be_present): | ||||
|  | ||||
|         if len(watch['text_should_not_be_present']): | ||||
|             # If anything matched, then we should block a change from happening | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=text_should_not_be_present, | ||||
|                                                   wordlist=watch['text_should_not_be_present'], | ||||
|                                                   mode="line numbers") | ||||
|             if result: | ||||
|                 blocked = True | ||||
|  | ||||
|         # The main thing that all this at the moment comes down to :) | ||||
|         if watch.get('previous_md5') != fetched_md5: | ||||
|         if watch['previous_md5'] != fetched_md5: | ||||
|             changed_detected = True | ||||
|  | ||||
|         # Looks like something changed, but did it match all the rules? | ||||
| @@ -287,7 +275,7 @@ class perform_site_check(): | ||||
|  | ||||
|         # Extract title as title | ||||
|         if is_html: | ||||
|             if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']: | ||||
|             if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']: | ||||
|                 if not watch['title'] or not len(watch['title']): | ||||
|                     update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content) | ||||
|  | ||||
|   | ||||
| @@ -349,7 +349,7 @@ class watchForm(commonSettingsForm): | ||||
|  | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|  | ||||
|     include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='') | ||||
|     css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='') | ||||
|  | ||||
|     subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|  | ||||
|   | ||||
| @@ -7,30 +7,26 @@ from typing import List | ||||
| import json | ||||
| import re | ||||
|  | ||||
| # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis | ||||
| TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>" | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
| class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|          | ||||
|  | ||||
|  | ||||
| # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches | ||||
| def include_filters(include_filters, html_content, append_pretty_line_formatting=False): | ||||
| def css_filter(css_filter, html_content): | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     html_block = "" | ||||
|     r = soup.select(include_filters, separator="") | ||||
|     r = soup.select(css_filter, separator="") | ||||
|     if len(html_content) > 0 and len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(css_filter) | ||||
|     for item in r: | ||||
|         html_block += str(item) | ||||
|  | ||||
|     for element in r: | ||||
|         # When there's more than 1 match, then add the suffix to separate each line | ||||
|         # And where the matched result doesn't include something that will cause Inscriptis to add a newline | ||||
|         # (This way each 'match' reliably has a new-line in the diff) | ||||
|         # Divs are converted to 4 whitespaces by inscriptis | ||||
|         if append_pretty_line_formatting and len(html_block) and not element.name in (['br', 'hr', 'div', 'p']): | ||||
|             html_block += TEXT_FILTER_LIST_LINE_SUFFIX | ||||
|  | ||||
|         html_block += str(element) | ||||
|  | ||||
|     return html_block | ||||
|     return html_block + "\n" | ||||
|  | ||||
| def subtractive_css_selector(css_selector, html_content): | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
| @@ -46,29 +42,25 @@ def element_removal(selectors: List[str], html_content): | ||||
|  | ||||
|  | ||||
| # Return str Utf-8 of matched rules | ||||
| def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False): | ||||
| def xpath_filter(xpath_filter, html_content): | ||||
|     from lxml import etree, html | ||||
|  | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8')) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) | ||||
|     if len(html_content) > 0 and len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(xpath_filter) | ||||
|  | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|  | ||||
|     for element in r: | ||||
|         # When there's more than 1 match, then add the suffix to separate each line | ||||
|         # And where the matched result doesn't include something that will cause Inscriptis to add a newline | ||||
|         # (This way each 'match' reliably has a new-line in the diff) | ||||
|         # Divs are converted to 4 whitespaces by inscriptis | ||||
|         if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])): | ||||
|             html_block += TEXT_FILTER_LIST_LINE_SUFFIX | ||||
|  | ||||
|         if type(element) == etree._ElementStringResult: | ||||
|             html_block += str(element) | ||||
|             html_block += str(element) + "<br/>" | ||||
|         elif type(element) == etree._ElementUnicodeResult: | ||||
|             html_block += str(element) | ||||
|             html_block += str(element) + "<br/>" | ||||
|         else: | ||||
|             html_block += etree.tostring(element, pretty_print=True).decode('utf-8') | ||||
|             html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>" | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
|   | ||||
| @@ -103,12 +103,12 @@ class import_distill_io_json(Importer): | ||||
|                     pass | ||||
|                 except IndexError: | ||||
|                     pass | ||||
|                 extras['include_filters'] = [] | ||||
|  | ||||
|                 try: | ||||
|                     extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr'] | ||||
|                     if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath': | ||||
|                         extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr']) | ||||
|                     else: | ||||
|                         extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr']) | ||||
|                         extras['css_filter'] = 'xpath:' + extras['css_filter'] | ||||
|  | ||||
|                 except KeyError: | ||||
|                     pass | ||||
|                 except IndexError: | ||||
|   | ||||
| @@ -16,43 +16,42 @@ class model(dict): | ||||
|     __newest_history_key = None | ||||
|     __history_n=0 | ||||
|     __base_config = { | ||||
|             #'history': {},  # Dict of timestamp and output stripped filename (removed) | ||||
|             #'newest_history_key': 0, (removed, taken from history.txt index) | ||||
|             'body': None, | ||||
|             'check_unique_lines': False, # On change-detected, compare against all history if its something new | ||||
|             'check_count': 0, | ||||
|             'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'extract_title_as_title': False, | ||||
|             'fetch_backend': None, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
|             'headers': {},  # Extra headers to send | ||||
|             'ignore_text': [],  # List of text to ignore when calculating the comparison checksum | ||||
|             'include_filters': [], | ||||
|             'url': None, | ||||
|             'tag': None, | ||||
|             'last_checked': 0, | ||||
|             'last_error': False, | ||||
|             'paused': False, | ||||
|             'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|             #'newest_history_key': 0, | ||||
|             'title': None, | ||||
|             'previous_md5': False, | ||||
|             'uuid': str(uuid.uuid4()), | ||||
|             'headers': {},  # Extra headers to send | ||||
|             'body': None, | ||||
|             'method': 'GET', | ||||
|              # Custom notification content | ||||
|             #'history': {},  # Dict of timestamp and output stripped filename | ||||
|             'ignore_text': [],  # List of text to ignore when calculating the comparison checksum | ||||
|             # Custom notification content | ||||
|             'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|             'notification_title': None, | ||||
|             'notification_body': None, | ||||
|             'notification_format': default_notification_format_for_watch, | ||||
|             'notification_muted': False, | ||||
|             'notification_title': None, | ||||
|             'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|             'paused': False, | ||||
|             'previous_md5': False, | ||||
|             'proxy': None, # Preferred proxy connection | ||||
|             'css_filter': '', | ||||
|             'last_error': False, | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'subtractive_selectors': [], | ||||
|             'tag': None, | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'text_should_not_be_present': [], # Text that should not present | ||||
|             'fetch_backend': None, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
|             'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'extract_title_as_title': False, | ||||
|             'check_unique_lines': False, # On change-detected, compare against all history if its something new | ||||
|             'proxy': None, # Preferred proxy connection | ||||
|             # Re #110, so then if this is set to None, we know to use the default value instead | ||||
|             # Requires setting to None on submit if it's the same as the default | ||||
|             # Should be all None by default, so we use the system default in this case. | ||||
|             'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, | ||||
|             'title': None, | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'url': None, | ||||
|             'uuid': str(uuid.uuid4()), | ||||
|             'webdriver_delay': None, | ||||
|             'webdriver_js_execute_code': None, # Run before change-detection | ||||
|         } | ||||
| @@ -186,12 +185,6 @@ class model(dict): | ||||
|     def save_history_text(self, contents, timestamp): | ||||
|  | ||||
|         self.ensure_data_dir_exists() | ||||
|  | ||||
|         # Small hack so that we sleep just enough to allow 1 second  between history snapshots | ||||
|         # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys | ||||
|         if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key): | ||||
|             time.sleep(timestamp - self.__newest_history_key) | ||||
|  | ||||
|         snapshot_fname = "{}.txt".format(str(uuid.uuid4())) | ||||
|  | ||||
|         # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading | ||||
|   | ||||
| @@ -24,6 +24,14 @@ echo "RUNNING WITH BASE_URL SET" | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| pytest tests/test_notification.py | ||||
|  | ||||
|  | ||||
| ## JQ + JSON: filter test | ||||
| # jq is not available on windows and we should just test it when the package is installed | ||||
| # this will re-test with jq support | ||||
| pip3 install jq~=1.3 | ||||
| pytest tests/test_jsonpath_jq_selector.py | ||||
|  | ||||
|  | ||||
| # Now for the selenium and playwright/browserless fetchers | ||||
| # Note - this is not UI functional tests - just checking that each one can fetch the content | ||||
|  | ||||
| @@ -38,10 +46,6 @@ unset WEBDRIVER_URL | ||||
| docker kill $$-test_selenium | ||||
|  | ||||
| echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." | ||||
| # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt | ||||
| PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+") | ||||
| echo "using $PLAYWRIGHT_VERSION" | ||||
| pip3 install "$PLAYWRIGHT_VERSION" | ||||
| docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable | ||||
| # takes a while to spin up | ||||
| sleep 5 | ||||
|   | ||||
| @@ -50,7 +50,7 @@ $(document).ready(function() { | ||||
|         state_clicked=false; | ||||
|         ctx.clearRect(0, 0, c.width, c.height); | ||||
|         xctx.clearRect(0, 0, c.width, c.height); | ||||
|         $("#include_filters").val(''); | ||||
|         $("#css_filter").val(''); | ||||
|     }); | ||||
|  | ||||
|  | ||||
| @@ -68,7 +68,7 @@ $(document).ready(function() { | ||||
|                xctx = c.getContext("2d"); | ||||
|                 // redline highlight context | ||||
|                ctx = c.getContext("2d"); | ||||
|                current_default_xpath =$("#include_filters").val(); | ||||
|                current_default_xpath =$("#css_filter").val(); | ||||
|                fetch_data(); | ||||
|                $('#selector-canvas').off("mousemove mousedown"); | ||||
|                // screenshot_url defined in the edit.html template | ||||
| @@ -205,9 +205,9 @@ $(document).ready(function() { | ||||
|         var sel = selector_data['size_pos'][current_selected_i]; | ||||
|         if (sel[0] == '/') { | ||||
|         // @todo - not sure just checking / is right | ||||
|             $("#include_filters").val('xpath:'+sel.xpath); | ||||
|             $("#css_filter").val('xpath:'+sel.xpath); | ||||
|         } else { | ||||
|             $("#include_filters").val(sel.xpath); | ||||
|             $("#css_filter").val(sel.xpath); | ||||
|         } | ||||
|         xctx.fillStyle = 'rgba(205,205,205,0.95)'; | ||||
|         xctx.strokeStyle = 'rgba(225,0,0,0.9)'; | ||||
|   | ||||
| @@ -27,8 +27,6 @@ class ChangeDetectionStore: | ||||
|     # For when we edit, we should write to disk | ||||
|     needs_write_urgent = False | ||||
|  | ||||
|     __version_check = True | ||||
|  | ||||
|     def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): | ||||
|         # Should only be active for docker | ||||
|         # logging.basicConfig(filename='/dev/stdout', level=logging.INFO) | ||||
| @@ -39,6 +37,7 @@ class ChangeDetectionStore: | ||||
|         self.proxy_list = None | ||||
|         self.start_time = time.time() | ||||
|         self.stop_thread = False | ||||
|  | ||||
|         # Base definition for all watchers | ||||
|         # deepcopy part of #569 - not sure why its needed exactly | ||||
|         self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) | ||||
| @@ -82,13 +81,8 @@ class ChangeDetectionStore: | ||||
|         except (FileNotFoundError, json.decoder.JSONDecodeError): | ||||
|             if include_default_watches: | ||||
|                 print("Creating JSON store at", self.datastore_path) | ||||
|                 self.add_watch(url='https://news.ycombinator.com/', | ||||
|                                tag='Tech news', | ||||
|                                extras={'fetch_backend': 'html_requests'}) | ||||
|  | ||||
|                 self.add_watch(url='https://changedetection.io/CHANGELOG.txt', | ||||
|                                tag='changedetection.io', | ||||
|                                extras={'fetch_backend': 'html_requests'}) | ||||
|                 self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') | ||||
|                 self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io') | ||||
|  | ||||
|         self.__data['version_tag'] = version_tag | ||||
|  | ||||
| @@ -272,7 +266,7 @@ class ChangeDetectionStore: | ||||
|             extras = {} | ||||
|         # should always be str | ||||
|         if tag is None or not tag: | ||||
|             tag = '' | ||||
|             tag='' | ||||
|  | ||||
|         # Incase these are copied across, assume it's a reference and deepcopy() | ||||
|         apply_extras = deepcopy(extras) | ||||
| @@ -287,31 +281,17 @@ class ChangeDetectionStore: | ||||
|                 res = r.json() | ||||
|  | ||||
|                 # List of permissible attributes we accept from the wild internet | ||||
|                 for k in [ | ||||
|                     'body', | ||||
|                     'css_filter', | ||||
|                     'extract_text', | ||||
|                     'extract_title_as_title', | ||||
|                     'headers', | ||||
|                     'ignore_text', | ||||
|                     'include_filters', | ||||
|                     'method', | ||||
|                     'paused', | ||||
|                     'previous_md5', | ||||
|                     'subtractive_selectors', | ||||
|                     'tag', | ||||
|                     'text_should_not_be_present', | ||||
|                     'title', | ||||
|                     'trigger_text', | ||||
|                     'webdriver_js_execute_code', | ||||
|                     'url', | ||||
|                 ]: | ||||
|                 for k in ['url', 'tag', | ||||
|                           'paused', 'title', | ||||
|                           'previous_md5', 'headers', | ||||
|                           'body', 'method', | ||||
|                           'ignore_text', 'css_filter', | ||||
|                           'subtractive_selectors', 'trigger_text', | ||||
|                           'extract_title_as_title', 'extract_text', | ||||
|                           'text_should_not_be_present', | ||||
|                           'webdriver_js_execute_code']: | ||||
|                     if res.get(k): | ||||
|                         if k != 'css_filter': | ||||
|                             apply_extras[k] = res[k] | ||||
|                         else: | ||||
|                             # We renamed the field and made it a list | ||||
|                             apply_extras['include_filters'] = [res['css_filter']] | ||||
|                         apply_extras[k] = res[k] | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logging.error("Error fetching metadata for shared watch link", url, str(e)) | ||||
| @@ -334,13 +314,12 @@ class ChangeDetectionStore: | ||||
|                     del apply_extras[k] | ||||
|  | ||||
|             new_watch.update(apply_extras) | ||||
|             self.__data['watching'][new_uuid] = new_watch | ||||
|             self.__data['watching'][new_uuid]=new_watch | ||||
|  | ||||
|         self.__data['watching'][new_uuid].ensure_data_dir_exists() | ||||
|  | ||||
|         if write_to_disk_now: | ||||
|             self.sync_to_json() | ||||
|  | ||||
|         return new_uuid | ||||
|  | ||||
|     def visualselector_data_is_ready(self, watch_uuid): | ||||
| @@ -604,14 +583,3 @@ class ChangeDetectionStore: | ||||
|         for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']: | ||||
|             if self.data['settings']['headers'].get(v): | ||||
|                 del self.data['settings']['headers'][v] | ||||
|  | ||||
|     # Convert filters to a list of filters css_filter -> include_filters | ||||
|     def update_8(self): | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             try: | ||||
|                 existing_filter = watch.get('css_filter', '') | ||||
|                 if existing_filter: | ||||
|                     watch['include_filters'] = [existing_filter] | ||||
|             except: | ||||
|                 continue | ||||
|         return | ||||
| @@ -174,17 +174,15 @@ User-Agent: wonderbra 1.0") }} | ||||
|                         </div> | ||||
|                     </fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% set field = render_field(form.include_filters, | ||||
|                             rows=5, | ||||
|                             placeholder="#example | ||||
| xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                         {% set field = render_field(form.css_filter, | ||||
|                             placeholder=".class-name or #some-id, or other CSS selector rule.", | ||||
|                             class="m-d") | ||||
|                         %} | ||||
|                         {{ field }} | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/> | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                     <ul> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). | ||||
|   | ||||
| @@ -41,7 +41,7 @@ def app(request): | ||||
|  | ||||
|     cleanup(datastore_path) | ||||
|  | ||||
|     app_config = {'datastore_path': datastore_path, 'disable_checkver' : True} | ||||
|     app_config = {'datastore_path': datastore_path} | ||||
|     cleanup(app_config['datastore_path']) | ||||
|     datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False) | ||||
|     app = changedetection_app(app_config, datastore) | ||||
|   | ||||
| @@ -24,7 +24,7 @@ def test_preferred_proxy(client, live_server): | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={ | ||||
|                 "include_filters": "", | ||||
|                 "css_filter": "", | ||||
|                 "fetch_backend": "html_requests", | ||||
|                 "headers": "", | ||||
|                 "proxy": "proxy-two", | ||||
|   | ||||
| @@ -23,7 +23,7 @@ def test_basic_auth(client, live_server): | ||||
|     # Check form validation | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| import time | ||||
| from flask import url_for | ||||
| from urllib.request import urlopen | ||||
| from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks | ||||
| from .util import set_original_response, set_modified_response, live_server_setup | ||||
|  | ||||
| sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
| @@ -36,7 +36,7 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|         # Give the thread time to pick it up | ||||
|         wait_for_all_checks(client) | ||||
|         time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         res = client.get(url_for("index")) | ||||
| @@ -69,7 +69,7 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'1 watches are queued for rechecking.' in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # Now something should be ready, indicated by having a 'unviewed' class | ||||
|     res = client.get(url_for("index")) | ||||
| @@ -98,14 +98,14 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     assert b'which has this one new line' in res.data | ||||
|     assert b'Which is across multiple lines' not in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     # Do this a few times.. ensures we dont accidently set the status | ||||
|     for n in range(2): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|         # Give the thread time to pick it up | ||||
|         wait_for_all_checks(client) | ||||
|         time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         res = client.get(url_for("index")) | ||||
| @@ -125,7 +125,7 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     ) | ||||
|  | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|   | ||||
| @@ -46,23 +46,22 @@ def set_modified_response(): | ||||
|  | ||||
|  | ||||
| # Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's | ||||
| def test_include_filters_output(): | ||||
| def test_css_filter_output(): | ||||
|     from changedetectionio import fetch_site_status | ||||
|     from inscriptis import get_text | ||||
|  | ||||
|     # Check text with sub-parts renders correctly | ||||
|     content = """<html> <body><div id="thingthing" >  Some really <b>bold</b> text  </div> </body> </html>""" | ||||
|     html_blob = include_filters(include_filters="#thingthing", html_content=content) | ||||
|     html_blob = css_filter(css_filter="#thingthing", html_content=content) | ||||
|     text = get_text(html_blob) | ||||
|     assert text == "  Some really bold text" | ||||
|  | ||||
|     content = """<html> <body> | ||||
|     <p>foo bar blah</p> | ||||
|     <DIV class="parts">Block A</DiV> <div class="parts">Block B</DIV></body>  | ||||
|     <div class="parts">Block A</div> <div class="parts">Block B</div></body>  | ||||
|     </html> | ||||
| """ | ||||
|  | ||||
|     # in xPath this would be //*[@class='parts'] | ||||
|     html_blob = include_filters(include_filters=".parts", html_content=content) | ||||
|     html_blob = css_filter(css_filter=".parts", html_content=content) | ||||
|     text = get_text(html_blob) | ||||
|  | ||||
|     # Divs are converted to 4 whitespaces by inscriptis | ||||
| @@ -70,10 +69,10 @@ def test_include_filters_output(): | ||||
|  | ||||
|  | ||||
| # Tests the whole stack works with the CSS Filter | ||||
| def test_check_markup_include_filters_restriction(client, live_server): | ||||
| def test_check_markup_css_filter_restriction(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
|     include_filters = "#sametext" | ||||
|     css_filter = "#sametext" | ||||
|  | ||||
|     set_original_response() | ||||
|  | ||||
| @@ -99,7 +98,7 @@ def test_check_markup_include_filters_restriction(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -108,7 +107,7 @@ def test_check_markup_include_filters_restriction(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(include_filters.encode('utf-8')) in res.data | ||||
|     assert bytes(css_filter.encode('utf-8')) in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| @@ -127,58 +126,3 @@ def test_check_markup_include_filters_restriction(client, live_server): | ||||
|     # Because it should be looking at only that 'sametext' id | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|  | ||||
| # Tests the whole stack works with the CSS Filter | ||||
| def test_check_multiple_filters(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
|     include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<html><body> | ||||
|      <div id="blob-a">Blob A</div> | ||||
|      <div id="blob-b">Blob B</div> | ||||
|      <div id="blob-c">Blob C</div> | ||||
|      </body> | ||||
|      </html> | ||||
|     """) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": include_filters, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Only the two blobs should be here | ||||
|     assert b"Blob A" in res.data # CSS was ok | ||||
|     assert b"Blob B" in res.data # xPath was ok | ||||
|     assert b"Blob C" not in res.data # Should not be included | ||||
|   | ||||
| @@ -88,7 +88,7 @@ def test_check_filter_multiline(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": '', | ||||
|         data={"css_filter": '', | ||||
|               'extract_text': '/something.+?6 billion.+?lines/si', | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
| @@ -116,7 +116,7 @@ def test_check_filter_multiline(client, live_server): | ||||
|  | ||||
| def test_check_filter_and_regex_extract(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|     include_filters = ".changetext" | ||||
|     css_filter = ".changetext" | ||||
|  | ||||
|     set_original_response() | ||||
|  | ||||
| @@ -143,7 +143,7 @@ def test_check_filter_and_regex_extract(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": include_filters, | ||||
|         data={"css_filter": css_filter, | ||||
|               'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i', | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|   | ||||
| @@ -92,7 +92,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se | ||||
|         "tag": "my tag", | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "include_filters": '.ticket-available', | ||||
|         "css_filter": '.ticket-available', | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     res = client.post( | ||||
|   | ||||
| @@ -76,7 +76,7 @@ def run_filter_test(client, content_filter): | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "filter_failure_notification_send": 'y', | ||||
|         "include_filters": content_filter, | ||||
|         "css_filter": content_filter, | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     res = client.post( | ||||
| @@ -95,7 +95,7 @@ def run_filter_test(client, content_filter): | ||||
|         time.sleep(3) | ||||
|  | ||||
|     # We should see something in the frontend | ||||
|     assert b'Warning, no filters were found' in res.data | ||||
|     assert b'Warning, filter' in res.data | ||||
|  | ||||
|     # Now it should exist and contain our "filter not found" alert | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
| @@ -131,7 +131,7 @@ def run_filter_test(client, content_filter): | ||||
| def test_setup(live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_check_include_filters_failure_notification(client, live_server): | ||||
| def test_check_css_filter_failure_notification(client, live_server): | ||||
|     set_original_response() | ||||
|     time.sleep(1) | ||||
|     run_filter_test(client, '#nope-doesnt-exist') | ||||
|   | ||||
| @@ -132,7 +132,7 @@ def set_original_response(): | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def set_json_response_with_html(): | ||||
| def set_response_with_html(): | ||||
|     test_return_data = """ | ||||
|     { | ||||
|       "test": [ | ||||
| @@ -176,7 +176,7 @@ def set_modified_response(): | ||||
| def test_check_json_without_filter(client, live_server): | ||||
|     # Request a JSON document from a application/json source containing HTML | ||||
|     # and be sure it doesn't get chewed up by instriptis | ||||
|     set_json_response_with_html() | ||||
|     set_response_with_html() | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
| @@ -189,6 +189,9 @@ def test_check_json_without_filter(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|  | ||||
| @@ -197,7 +200,6 @@ def test_check_json_without_filter(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Should still see '"html": "<b>"' | ||||
|     assert b'"<b>' in res.data | ||||
|     assert res.data.count(b'{\n') >= 2 | ||||
|  | ||||
| @@ -219,6 +221,9 @@ def check_json_filter(json_filter, client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|  | ||||
| @@ -226,7 +231,7 @@ def check_json_filter(json_filter, client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": json_filter, | ||||
|         data={"css_filter": json_filter, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
| @@ -242,6 +247,9 @@ def check_json_filter(json_filter, client, live_server): | ||||
|     ) | ||||
|     assert bytes(escape(json_filter).encode('utf-8')) in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|     #  Make a change | ||||
| @@ -293,7 +301,7 @@ def check_json_filter_bool_val(json_filter, client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": json_filter, | ||||
|         data={"css_filter": json_filter, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
| @@ -303,6 +311,11 @@ def check_json_filter_bool_val(json_filter, client, live_server): | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|     #  Make a change | ||||
| @@ -347,6 +360,9 @@ def check_json_ext_filter(json_filter, client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|  | ||||
| @@ -354,7 +370,7 @@ def check_json_ext_filter(json_filter, client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": json_filter, | ||||
|         data={"css_filter": json_filter, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
| @@ -370,6 +386,9 @@ def check_json_ext_filter(json_filter, client, live_server): | ||||
|     ) | ||||
|     assert bytes(escape(json_filter).encode('utf-8')) in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|     #  Make a change | ||||
|   | ||||
| @@ -14,7 +14,7 @@ def test_share_watch(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     include_filters = ".nice-filter" | ||||
|     css_filter = ".nice-filter" | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
| @@ -29,7 +29,7 @@ def test_share_watch(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -37,7 +37,7 @@ def test_share_watch(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(include_filters.encode('utf-8')) in res.data | ||||
|     assert bytes(css_filter.encode('utf-8')) in res.data | ||||
|  | ||||
|     # click share the link | ||||
|     res = client.get( | ||||
| @@ -73,8 +73,4 @@ def test_share_watch(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(include_filters.encode('utf-8')) in res.data | ||||
|  | ||||
|     # Check it saved the URL | ||||
|     res = client.get(url_for("index")) | ||||
|     assert bytes(test_url.encode('utf-8')) in res.data | ||||
|     assert bytes(css_filter.encode('utf-8')) in res.data | ||||
|   | ||||
| @@ -57,9 +57,10 @@ def test_check_basic_change_detection_functionality_source(client, live_server): | ||||
|  | ||||
|  | ||||
|  | ||||
| # `subtractive_selectors` should still work in `source:` type requests | ||||
|  | ||||
| def test_check_ignore_elements(client, live_server): | ||||
|     set_original_response() | ||||
|  | ||||
|     time.sleep(2) | ||||
|     test_url = 'source:'+url_for('test_endpoint', _external=True) | ||||
|     # Add our URL to the import page | ||||
| @@ -76,9 +77,9 @@ def test_check_ignore_elements(client, live_server): | ||||
|     ##################### | ||||
|     # We want <span> and <p> ONLY, but ignore span with .foobar-detection | ||||
|  | ||||
|     client.post( | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
| @@ -88,6 +89,7 @@ def test_check_ignore_elements(client, live_server): | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'foobar-detection' not in res.data | ||||
|     assert b'<br' not in res.data | ||||
|     assert b'<p' in res.data | ||||
| @@ -49,7 +49,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server): | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"trigger_text": "/cool.stuff/", | ||||
|               "url": test_url, | ||||
|               "include_filters": '#in-here', | ||||
|               "css_filter": '#in-here', | ||||
|               "fetch_backend": "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -22,7 +22,7 @@ def test_check_watch_field_storage(client, live_server): | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n", | ||||
|                "time_between_check-minutes": 126, | ||||
|                "include_filters" : ".fooclass", | ||||
|                "css_filter" : ".fooclass", | ||||
|                "title" : "My title", | ||||
|                "ignore_text" : "ignore this", | ||||
|                "url": test_url, | ||||
|   | ||||
| @@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server): | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server): | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -182,6 +182,9 @@ def test_check_markup_xpath_filter_restriction(client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
| @@ -189,7 +192,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -227,11 +230,10 @@ def test_xpath_validation(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(2) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"is not a valid XPath expression" in res.data | ||||
| @@ -240,7 +242,7 @@ def test_xpath_validation(client, live_server): | ||||
|  | ||||
|  | ||||
| # actually only really used by the distll.io importer, but could be handy too | ||||
| def test_check_with_prefix_include_filters(client, live_server): | ||||
| def test_check_with_prefix_css_filter(client, live_server): | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| @@ -261,7 +263,7 @@ def test_check_with_prefix_include_filters(client, live_server): | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"css_filter":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|   | ||||
| @@ -86,7 +86,6 @@ def extract_UUID_from_client(client): | ||||
| def wait_for_all_checks(client): | ||||
|     # Loop waiting until done.. | ||||
|     attempt=0 | ||||
|     time.sleep(0.1) | ||||
|     while attempt < 60: | ||||
|         time.sleep(1) | ||||
|         res = client.get(url_for("index")) | ||||
|   | ||||
| @@ -4,7 +4,7 @@ import queue | ||||
| import time | ||||
|  | ||||
| from changedetectionio import content_fetcher | ||||
| from changedetectionio.fetch_site_status import FilterNotFoundInResponse | ||||
| from changedetectionio.html_tools import FilterNotFoundInResponse | ||||
|  | ||||
| # A single update worker | ||||
| # | ||||
| @@ -91,8 +91,8 @@ class update_worker(threading.Thread): | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                     'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         watch['css_filter'], | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
| @@ -189,7 +189,7 @@ class update_worker(threading.Thread): | ||||
|                         if not self.datastore.data['watching'].get(uuid): | ||||
|                             continue | ||||
|  | ||||
|                         err_text = "Warning, no filters were found, no change detection ran." | ||||
|                         err_text = "Warning, filter '{}' not found".format(str(e)) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            # So that we get a trigger when the content is added again | ||||
|                                                                            'previous_md5': ''}) | ||||
| @@ -282,19 +282,16 @@ class update_worker(threading.Thread): | ||||
|                             self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) | ||||
|  | ||||
|                     if self.datastore.data['watching'].get(uuid): | ||||
|                         # Always record that we atleast tried | ||||
|                         count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1 | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), | ||||
|                                                                            'last_checked': round(time.time()), | ||||
|                                                                            'check_count': count | ||||
|                                                                            }) | ||||
|  | ||||
|                         # Always save the screenshot if it's available | ||||
|                         if update_handler.screenshot: | ||||
|                             self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot) | ||||
|                         if update_handler.xpath_data: | ||||
|                             self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data) | ||||
|                     # Always record that we atleast tried | ||||
|                     self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), | ||||
|                                                                        'last_checked': round(time.time())}) | ||||
|  | ||||
|                     # Always save the screenshot if it's available | ||||
|                     if update_handler.screenshot: | ||||
|                         self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot) | ||||
|                     if update_handler.xpath_data: | ||||
|                         self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data) | ||||
|  | ||||
|  | ||||
|                 self.current_uuid = None  # Done | ||||
|   | ||||
| @@ -50,9 +50,6 @@ werkzeug~=2.0.0 | ||||
| jinja2~=3.1 | ||||
| jinja2-time | ||||
|  | ||||
| # https://peps.python.org/pep-0508/#environment-markers | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1009 | ||||
| jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux" | ||||
| playwright~=1.26; python_version >= "3.8" and "arm" not in platform_machine and "aarch" not in platform_machine | ||||
|  | ||||
| # playwright is installed at Dockerfile build time because it's not available on all platforms | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user