diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 98f1a954..24718357 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -552,6 +552,7 @@ def changedetection_app(config=None, datastore_o=None): if request.method == 'GET': form.minutes_between_check.data = int(datastore.data['settings']['requests']['minutes_between_check']) form.notification_urls.data = datastore.data['settings']['application']['notification_urls'] + form.global_ignore_text.data = datastore.data['settings']['application']['global_ignore_text'] form.extract_title_as_title.data = datastore.data['settings']['application']['extract_title_as_title'] form.fetch_backend.data = datastore.data['settings']['application']['fetch_backend'] form.notification_title.data = datastore.data['settings']['application']['notification_title'] @@ -578,7 +579,8 @@ def changedetection_app(config=None, datastore_o=None): datastore.data['settings']['application']['notification_format'] = form.notification_format.data datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data datastore.data['settings']['application']['base_url'] = form.base_url.data - + datastore.data['settings']['application']['global_ignore_text'] = form.global_ignore_text.data + if form.trigger_check.data: if len(form.notification_urls.data): n_object = {'watch_url': "Test from changedetection.io!", diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 69ff7de0..dec73987 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -129,11 +129,11 @@ class perform_site_check(): update_obj["last_check_status"] = fetcher.get_last_status_code() update_obj["last_error"] = False - # If there's text to skip # @todo we could abstract out the get_text() to handle this cleaner - if len(watch['ignore_text']): - stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, watch['ignore_text']) + text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) + if len(text_to_ignore): + stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore) else: stripped_text_from_html = stripped_text_from_html.encode('utf8') diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index dc06c67a..586a27bb 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -258,3 +258,4 @@ class globalSettingsForm(commonSettingsForm): [validators.NumberRange(min=1)]) extract_title_as_title = BooleanField('Extract from document and use as watch title') base_url = StringField('Base URL', validators=[validators.Optional()]) + global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) \ No newline at end of file diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 60f3d826..fb7cede5 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -45,6 +45,7 @@ class ChangeDetectionStore: 'base_url' : None, 'extract_title_as_title': False, 'fetch_backend': 'html_requests', + 'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum 'notification_urls': [], # Apprise URL list # Custom notification content 'notification_title': None, diff --git a/changedetectionio/templates/settings.html b/changedetectionio/templates/settings.html index 3a048cc4..5c031305 100644 --- a/changedetectionio/templates/settings.html +++ b/changedetectionio/templates/settings.html @@ -13,6 +13,7 @@ <li class="tab" id="default-tab"><a href="#general">General</a></li> <li class="tab"><a href="#notifications">Notifications</a></li> <li class="tab"><a href="#fetching">Fetching</a></li> + <li class="tab"><a href="#filters">Global Filters</a></li> </ul> </div> <div class="box-wrap inner"> @@ -65,6 +66,20 @@ </span> </div> </div> + + + <div class="tab-pane-inner" id="filters"> + <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span> + <fieldset class="pure-group"> + {{ render_field(form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line +/some.regex\d{2}/ for case-INsensitive regex + ") }} + <span class="pure-form-message-inline"> + Each line processed separately, any line matching will be ignored.<br/> + Regular Expression support, wrap the line in forward slash <b>/regex/</b>. + </span> + </div> + <div id="actions"> <div class="pure-control-group"> <button type="submit" class="pure-button pure-button-primary">Save</button> diff --git a/changedetectionio/tests/test_ignore_text.py b/changedetectionio/tests/test_ignore_text.py index 119f26eb..79aa761d 100644 --- a/changedetectionio/tests/test_ignore_text.py +++ b/changedetectionio/tests/test_ignore_text.py @@ -151,3 +151,88 @@ def test_check_ignore_text_functionality(client, live_server): res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data + +def test_check_global_ignore_text_functionality(client, live_server): + sleep_time_for_fetch_thread = 3 + + ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ" + set_original_ignore_response() + + # Give the endpoint time to spin up + time.sleep(1) + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(sleep_time_for_fetch_thread) + + # Goto the settings page, add our ignore text + res = client.post( + url_for("settings_page"), + data={ + "minutes_between_check": 180, + "global_ignore_text": ignore_text, + 'fetch_backend': "html_requests" + }, + follow_redirects=True + ) + assert b"Settings updated." in res.data + + # Goto the edit page of the item, add our ignore text + # Add our URL to the import page + res = client.post( + url_for("edit_page", uuid="first"), + data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"}, + follow_redirects=True + ) + assert b"Updated watch." in res.data + + # Check it saved + res = client.get( + url_for("settings_page"), + ) + assert bytes(ignore_text.encode('utf-8')) in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(sleep_time_for_fetch_thread) + + # It should report nothing found (no new 'unviewed' class) + res = client.get(url_for("index")) + assert b'unviewed' not in res.data + assert b'/test-endpoint' in res.data + + # Make a change + set_modified_ignore_response() + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + # Give the thread time to pick it up + time.sleep(sleep_time_for_fetch_thread) + + # It should report nothing found (no new 'unviewed' class) + res = client.get(url_for("index")) + assert b'unviewed' not in res.data + assert b'/test-endpoint' in res.data + + # Just to be sure.. set a regular modified change.. + set_modified_original_ignore_response() + client.get(url_for("api_watch_checknow"), follow_redirects=True) + time.sleep(sleep_time_for_fetch_thread) + res = client.get(url_for("index")) + assert b'unviewed' in res.data + + res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data \ No newline at end of file