mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-30 22:27:52 +00:00 
			
		
		
		
	Compare commits
	
		
			8 Commits
		
	
	
		
			regex-filt
			...
			feature/fi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | f5af262a81 | ||
|   | 256e328e98 | ||
|   | befd093ea2 | ||
|   | 73cccff8bb | ||
|   | 7eaae0daee | ||
|   | 269a7f1c54 | ||
|   | c2deb18945 | ||
|   | 2584af19d2 | 
| @@ -349,6 +349,8 @@ class watchForm(commonSettingsForm): | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     proxy = RadioField('Proxy') | ||||
|     filter_failure_notification_send = BooleanField( | ||||
|         'Send a notification when the filter can no longer be found on the page', default=False) | ||||
|  | ||||
|     def validate(self, **kwargs): | ||||
|         if not super().validate(): | ||||
| @@ -387,6 +389,11 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|     api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()]) | ||||
|     password = SaltyPasswordField() | ||||
|  | ||||
|     filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
|                                                                                                      message="Should contain zero or more attempts")]) | ||||
|  | ||||
|  | ||||
| class globalSettingsForm(Form): | ||||
|     # Define these as FormFields/"sub forms", this way it matches the JSON storage | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import json | ||||
| import re | ||||
| from typing import List | ||||
|  | ||||
| from bs4 import BeautifulSoup | ||||
| @@ -8,16 +7,23 @@ import re | ||||
| from inscriptis import get_text | ||||
| from inscriptis.model.config import ParserConfig | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
| class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches | ||||
| def css_filter(css_filter, html_content): | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     html_block = "" | ||||
|     for item in soup.select(css_filter, separator=""): | ||||
|     r = soup.select(css_filter, separator="") | ||||
|     if len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(css_filter) | ||||
|     for item in r: | ||||
|         html_block += str(item) | ||||
|  | ||||
|     return html_block + "\n" | ||||
| @@ -42,8 +48,12 @@ def xpath_filter(xpath_filter, html_content): | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8')) | ||||
|     html_block = "" | ||||
|  | ||||
|     for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}): | ||||
|         html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>" | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) | ||||
|     if len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(css_filter) | ||||
|  | ||||
|     for item in r: | ||||
|         html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "<br/>" | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,8 @@ from changedetectionio.notification import ( | ||||
|     default_notification_title, | ||||
| ) | ||||
|  | ||||
| _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6 | ||||
|  | ||||
| class model(dict): | ||||
|     base_config = { | ||||
|             'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!", | ||||
| @@ -30,6 +32,7 @@ class model(dict): | ||||
|                     'extract_title_as_title': False, | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, | ||||
|                     'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum | ||||
|                     'global_subtractive_selectors': [], | ||||
|                     'ignore_whitespace': True, | ||||
|   | ||||
| @@ -41,6 +41,8 @@ class model(dict): | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'text_should_not_be_present': [], # Text that should not present | ||||
|             'fetch_backend': None, | ||||
|             'filter_failure_notification_send': True, | ||||
|             'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'extract_title_as_title': False, | ||||
|             'check_unique_lines': False, # On change-detected, compare against all history if its something new | ||||
|             'proxy': None, # Preferred proxy connection | ||||
|   | ||||
| @@ -34,7 +34,6 @@ def process_notification(n_object, datastore): | ||||
|         valid_notification_formats[default_notification_format], | ||||
|     ) | ||||
|  | ||||
|  | ||||
|     # Insert variables into the notification content | ||||
|     notification_parameters = create_notification_parameters(n_object, datastore) | ||||
|  | ||||
| @@ -91,7 +90,8 @@ def process_notification(n_object, datastore): | ||||
|                     # So that whats' generated in n_body is in line with what is going to be sent. | ||||
|                     # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321 | ||||
|                     if not 'format=' in url and (n_format == 'text' or n_format == 'markdown'): | ||||
|                         url = "{}?format={}".format(url, n_format) | ||||
|                         prefix = '?' if not '?' in url else '&' | ||||
|                         url = "{}{}format={}".format(url, prefix, n_format) | ||||
|  | ||||
|                 apobj.add(url) | ||||
|  | ||||
|   | ||||
| @@ -62,6 +62,12 @@ | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.extract_title_as_title) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.filter_failure_notification_send) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                          Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore. | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
|   | ||||
| @@ -36,7 +36,13 @@ | ||||
|                         {{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }} | ||||
|                         <span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span> | ||||
|                     </div> | ||||
|  | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }} | ||||
|                         <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification | ||||
|                             <br/> | ||||
|                         Set to <strong>0</strong> to disable | ||||
|                         </span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% if not hide_remove_pass %} | ||||
|                             {% if current_user.is_authenticated %} | ||||
|   | ||||
							
								
								
									
										123
									
								
								changedetectionio/tests/test_filter_failure_notification.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								changedetectionio/tests/test_filter_failure_notification.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,123 @@ | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| from flask import url_for | ||||
| from .util import set_original_response, live_server_setup | ||||
| from changedetectionio.model import App | ||||
|  | ||||
|  | ||||
| def set_response_with_filter(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div id="nope-doesnt-exist">Some text thats the same</div>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
|  | ||||
| # Hard to just add more live server URLs when one test is already running (I think) | ||||
| # So we add our test here (was in a different file) | ||||
| def test_check_notification(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|     set_original_response() | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("form_watch_add"), | ||||
|         data={"url": test_url, "tag": ''}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     # Give the thread time to pick up the first version | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     url = url_for('test_notification_endpoint', _external=True) | ||||
|     notification_url = url.replace('http', 'json') | ||||
|  | ||||
|     print(">>>> Notification URL: " + notification_url) | ||||
|  | ||||
|     # Just a regular notification setting, this will be used by the special 'filter not found' notification | ||||
|     notification_form_data = {"notification_urls": notification_url, | ||||
|                               "notification_title": "New ChangeDetection.io Notification - {watch_url}", | ||||
|                               "notification_body": "BASE URL: {base_url}\n" | ||||
|                                                    "Watch URL: {watch_url}\n" | ||||
|                                                    "Watch UUID: {watch_uuid}\n" | ||||
|                                                    "Watch title: {watch_title}\n" | ||||
|                                                    "Watch tag: {watch_tag}\n" | ||||
|                                                    "Preview: {preview_url}\n" | ||||
|                                                    "Diff URL: {diff_url}\n" | ||||
|                                                    "Snapshot: {current_snapshot}\n" | ||||
|                                                    "Diff: {diff}\n" | ||||
|                                                    "Diff Full: {diff_full}\n" | ||||
|                                                    ":-)", | ||||
|                               "notification_format": "Text"} | ||||
|  | ||||
|     notification_form_data.update({ | ||||
|         "url": test_url, | ||||
|         "tag": "my tag", | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "css_filter": '#nope-doesnt-exist', | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data=notification_form_data, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Now the notification should not exist, because we didnt reach the threshold | ||||
|     assert not os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
|     for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT): | ||||
|         res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         time.sleep(3) | ||||
|  | ||||
|     # We should see something in the frontend | ||||
|     assert b'Did the page change its layout' in res.data | ||||
|  | ||||
|     # Now it should exist and contain our "filter not found" alert | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|     notification = False | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|     assert 'CSS/xPath filter was not present in the page' in notification | ||||
|     assert '#nope-doesnt-exist' in notification | ||||
|  | ||||
|     # Remove it and prove that it doesnt trigger when not expected | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|     set_response_with_filter() | ||||
|  | ||||
|     for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         time.sleep(3) | ||||
|  | ||||
|     # It should have sent a notification, but.. | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|     # but it should not contain the info about the failed filter | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|     assert not 'CSS/xPath filter was not present in the page' in notification | ||||
|  | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -3,6 +3,8 @@ import queue | ||||
| import time | ||||
|  | ||||
| from changedetectionio import content_fetcher | ||||
| from changedetectionio.html_tools import FilterNotFoundInResponse | ||||
|  | ||||
| # A single update worker | ||||
| # | ||||
| # Requests for checking on a single site(watch) from a queue of watches | ||||
| @@ -19,6 +21,32 @@ class update_worker(threading.Thread): | ||||
|         self.datastore = datastore | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     def send_filter_failure_notification(self, uuid): | ||||
|  | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         watch = self.datastore.data['watching'].get(uuid, False) | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         watch['css_filter'], | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|  | ||||
|         elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|             n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|         # Only prepare to notify if the rules above matched | ||||
|         if 'notification_urls' in n_object: | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': uuid | ||||
|             }) | ||||
|             self.notification_q.put(n_object) | ||||
|             print("Sent filter not found notification for {}".format(uuid)) | ||||
|  | ||||
|     def run(self): | ||||
|         from changedetectionio import fetch_site_status | ||||
|  | ||||
| @@ -55,11 +83,23 @@ class update_worker(threading.Thread): | ||||
|                     except content_fetcher.ReplyWithContentButNoText as e: | ||||
|                         # Totally fine, it's by choice - just continue on, nothing more to care about | ||||
|                         # Page had elements/content but no renderable text | ||||
|                         if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'): | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"}) | ||||
|                         else: | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) | ||||
|                         pass | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) | ||||
|                     except FilterNotFoundInResponse as e: | ||||
|                         err_text = "Filter '{}' not found - Did the page change its layout?".format(str(e)) | ||||
|                         c = 0 | ||||
|                         if self.datastore.data['watching'].get(uuid, False): | ||||
|                             c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) | ||||
|                         c += 1 | ||||
|  | ||||
|                         # Send notification if we reached the threshold? | ||||
|                         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) | ||||
|                         print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c)) | ||||
|                         if threshold >0 and c >= threshold: | ||||
|                             self.send_filter_failure_notification(uuid) | ||||
|                             c = 0 | ||||
|  | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'consecutive_filter_failures': c}) | ||||
|                     except content_fetcher.EmptyReply as e: | ||||
|                         # Some kind of custom to-str handler in the exception handler that does this? | ||||
|                         err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) | ||||
| @@ -89,6 +129,7 @@ class update_worker(threading.Thread): | ||||
|                                 fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) | ||||
|  | ||||
|                             # Generally update anything interesting returned | ||||
|                             update_obj['consecutive_filter_failures'] = 0 | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj=update_obj) | ||||
|  | ||||
|                             # A change was detected | ||||
|   | ||||
		Reference in New Issue
	
	Block a user