mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 00:27:48 +00:00 
			
		
		
		
	Compare commits
	
		
			8 Commits
		
	
	
		
			0.46.04
			...
			feature/fi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					f5af262a81 | ||
| 
						 | 
					256e328e98 | ||
| 
						 | 
					befd093ea2 | ||
| 
						 | 
					73cccff8bb | ||
| 
						 | 
					7eaae0daee | ||
| 
						 | 
					269a7f1c54 | ||
| 
						 | 
					c2deb18945 | ||
| 
						 | 
					2584af19d2 | 
@@ -349,6 +349,8 @@ class watchForm(commonSettingsForm):
 | 
			
		||||
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
    save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
    proxy = RadioField('Proxy')
 | 
			
		||||
    filter_failure_notification_send = BooleanField(
 | 
			
		||||
        'Send a notification when the filter can no longer be found on the page', default=False)
 | 
			
		||||
 | 
			
		||||
    def validate(self, **kwargs):
 | 
			
		||||
        if not super().validate():
 | 
			
		||||
@@ -387,6 +389,11 @@ class globalSettingsApplicationForm(commonSettingsForm):
 | 
			
		||||
    api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
 | 
			
		||||
    password = SaltyPasswordField()
 | 
			
		||||
 | 
			
		||||
    filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
 | 
			
		||||
                                                                  render_kw={"style": "width: 5em;"},
 | 
			
		||||
                                                                  validators=[validators.NumberRange(min=0,
 | 
			
		||||
                                                                                                     message="Should contain zero or more attempts")])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class globalSettingsForm(Form):
 | 
			
		||||
    # Define these as FormFields/"sub forms", this way it matches the JSON storage
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,4 @@
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
from typing import List
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
@@ -8,16 +7,23 @@ import re
 | 
			
		||||
from inscriptis import get_text
 | 
			
		||||
from inscriptis.model.config import ParserConfig
 | 
			
		||||
 | 
			
		||||
class FilterNotFoundInResponse(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
 | 
			
		||||
class JSONNotFound(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
 | 
			
		||||
def css_filter(css_filter, html_content):
 | 
			
		||||
    soup = BeautifulSoup(html_content, "html.parser")
 | 
			
		||||
    html_block = ""
 | 
			
		||||
    for item in soup.select(css_filter, separator=""):
 | 
			
		||||
    r = soup.select(css_filter, separator="")
 | 
			
		||||
    if len(r) == 0:
 | 
			
		||||
        raise FilterNotFoundInResponse(css_filter)
 | 
			
		||||
    for item in r:
 | 
			
		||||
        html_block += str(item)
 | 
			
		||||
 | 
			
		||||
    return html_block + "\n"
 | 
			
		||||
@@ -42,8 +48,12 @@ def xpath_filter(xpath_filter, html_content):
 | 
			
		||||
    tree = html.fromstring(bytes(html_content, encoding='utf-8'))
 | 
			
		||||
    html_block = ""
 | 
			
		||||
 | 
			
		||||
    for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}):
 | 
			
		||||
        html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>"
 | 
			
		||||
    r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
 | 
			
		||||
    if len(r) == 0:
 | 
			
		||||
        raise FilterNotFoundInResponse(css_filter)
 | 
			
		||||
 | 
			
		||||
    for item in r:
 | 
			
		||||
        html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "<br/>"
 | 
			
		||||
 | 
			
		||||
    return html_block
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,8 @@ from changedetectionio.notification import (
 | 
			
		||||
    default_notification_title,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
 | 
			
		||||
 | 
			
		||||
class model(dict):
 | 
			
		||||
    base_config = {
 | 
			
		||||
            'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
 | 
			
		||||
@@ -30,6 +32,7 @@ class model(dict):
 | 
			
		||||
                    'extract_title_as_title': False,
 | 
			
		||||
                    'empty_pages_are_a_change': False,
 | 
			
		||||
                    'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
 | 
			
		||||
                    'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
 | 
			
		||||
                    'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
 | 
			
		||||
                    'global_subtractive_selectors': [],
 | 
			
		||||
                    'ignore_whitespace': True,
 | 
			
		||||
 
 | 
			
		||||
@@ -41,6 +41,8 @@ class model(dict):
 | 
			
		||||
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
 | 
			
		||||
            'text_should_not_be_present': [], # Text that should not present
 | 
			
		||||
            'fetch_backend': None,
 | 
			
		||||
            'filter_failure_notification_send': True,
 | 
			
		||||
            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
 | 
			
		||||
            'extract_title_as_title': False,
 | 
			
		||||
            'check_unique_lines': False, # On change-detected, compare against all history if its something new
 | 
			
		||||
            'proxy': None, # Preferred proxy connection
 | 
			
		||||
 
 | 
			
		||||
@@ -34,7 +34,6 @@ def process_notification(n_object, datastore):
 | 
			
		||||
        valid_notification_formats[default_notification_format],
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Insert variables into the notification content
 | 
			
		||||
    notification_parameters = create_notification_parameters(n_object, datastore)
 | 
			
		||||
 | 
			
		||||
@@ -91,7 +90,8 @@ def process_notification(n_object, datastore):
 | 
			
		||||
                    # So that whats' generated in n_body is in line with what is going to be sent.
 | 
			
		||||
                    # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
 | 
			
		||||
                    if not 'format=' in url and (n_format == 'text' or n_format == 'markdown'):
 | 
			
		||||
                        url = "{}?format={}".format(url, n_format)
 | 
			
		||||
                        prefix = '?' if not '?' in url else '&'
 | 
			
		||||
                        url = "{}{}format={}".format(url, prefix, n_format)
 | 
			
		||||
 | 
			
		||||
                apobj.add(url)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -62,6 +62,12 @@
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.extract_title_as_title) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.filter_failure_notification_send) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                         Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -36,7 +36,13 @@
 | 
			
		||||
                        {{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
 | 
			
		||||
                            <br/>
 | 
			
		||||
                        Set to <strong>0</strong> to disable
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {% if not hide_remove_pass %}
 | 
			
		||||
                            {% if current_user.is_authenticated %}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										123
									
								
								changedetectionio/tests/test_filter_failure_notification.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								changedetectionio/tests/test_filter_failure_notification.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,123 @@
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
import re
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import set_original_response, live_server_setup
 | 
			
		||||
from changedetectionio.model import App
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_response_with_filter():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     Some initial text</br>
 | 
			
		||||
     <p>Which is across multiple lines</p>
 | 
			
		||||
     </br>
 | 
			
		||||
     So let's see what happens.  </br>
 | 
			
		||||
     <div id="nope-doesnt-exist">Some text thats the same</div>     
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Hard to just add more live server URLs when one test is already running (I think)
 | 
			
		||||
# So we add our test here (was in a different file)
 | 
			
		||||
def test_check_notification(client, live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("form_watch_add"),
 | 
			
		||||
        data={"url": test_url, "tag": ''},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Watch added" in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick up the first version
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # Goto the edit page, add our ignore text
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    url = url_for('test_notification_endpoint', _external=True)
 | 
			
		||||
    notification_url = url.replace('http', 'json')
 | 
			
		||||
 | 
			
		||||
    print(">>>> Notification URL: " + notification_url)
 | 
			
		||||
 | 
			
		||||
    # Just a regular notification setting, this will be used by the special 'filter not found' notification
 | 
			
		||||
    notification_form_data = {"notification_urls": notification_url,
 | 
			
		||||
                              "notification_title": "New ChangeDetection.io Notification - {watch_url}",
 | 
			
		||||
                              "notification_body": "BASE URL: {base_url}\n"
 | 
			
		||||
                                                   "Watch URL: {watch_url}\n"
 | 
			
		||||
                                                   "Watch UUID: {watch_uuid}\n"
 | 
			
		||||
                                                   "Watch title: {watch_title}\n"
 | 
			
		||||
                                                   "Watch tag: {watch_tag}\n"
 | 
			
		||||
                                                   "Preview: {preview_url}\n"
 | 
			
		||||
                                                   "Diff URL: {diff_url}\n"
 | 
			
		||||
                                                   "Snapshot: {current_snapshot}\n"
 | 
			
		||||
                                                   "Diff: {diff}\n"
 | 
			
		||||
                                                   "Diff Full: {diff_full}\n"
 | 
			
		||||
                                                   ":-)",
 | 
			
		||||
                              "notification_format": "Text"}
 | 
			
		||||
 | 
			
		||||
    notification_form_data.update({
 | 
			
		||||
        "url": test_url,
 | 
			
		||||
        "tag": "my tag",
 | 
			
		||||
        "title": "my title",
 | 
			
		||||
        "headers": "",
 | 
			
		||||
        "css_filter": '#nope-doesnt-exist',
 | 
			
		||||
        "fetch_backend": "html_requests"})
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data=notification_form_data,
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # Now the notification should not exist, because we didnt reach the threshold
 | 
			
		||||
    assert not os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
 | 
			
		||||
        res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
        time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # We should see something in the frontend
 | 
			
		||||
    assert b'Did the page change its layout' in res.data
 | 
			
		||||
 | 
			
		||||
    # Now it should exist and contain our "filter not found" alert
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
    notification = False
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        notification = f.read()
 | 
			
		||||
    assert 'CSS/xPath filter was not present in the page' in notification
 | 
			
		||||
    assert '#nope-doesnt-exist' in notification
 | 
			
		||||
 | 
			
		||||
    # Remove it and prove that it doesnt trigger when not expected
 | 
			
		||||
    os.unlink("test-datastore/notification.txt")
 | 
			
		||||
    set_response_with_filter()
 | 
			
		||||
 | 
			
		||||
    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
 | 
			
		||||
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
        time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # It should have sent a notification, but..
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
    # but it should not contain the info about the failed filter
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        notification = f.read()
 | 
			
		||||
    assert not 'CSS/xPath filter was not present in the page' in notification
 | 
			
		||||
 | 
			
		||||
    # cleanup for the next
 | 
			
		||||
    client.get(
 | 
			
		||||
        url_for("form_delete", uuid="all"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
@@ -3,6 +3,8 @@ import queue
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from changedetectionio import content_fetcher
 | 
			
		||||
from changedetectionio.html_tools import FilterNotFoundInResponse
 | 
			
		||||
 | 
			
		||||
# A single update worker
 | 
			
		||||
#
 | 
			
		||||
# Requests for checking on a single site(watch) from a queue of watches
 | 
			
		||||
@@ -19,6 +21,32 @@ class update_worker(threading.Thread):
 | 
			
		||||
        self.datastore = datastore
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def send_filter_failure_notification(self, uuid):
 | 
			
		||||
 | 
			
		||||
        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid, False)
 | 
			
		||||
 | 
			
		||||
        n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
 | 
			
		||||
                    'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
 | 
			
		||||
                        watch['css_filter'],
 | 
			
		||||
                        threshold),
 | 
			
		||||
                    'notification_format': 'text'}
 | 
			
		||||
 | 
			
		||||
        if len(watch['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = watch['notification_urls']
 | 
			
		||||
 | 
			
		||||
        elif len(self.datastore.data['settings']['application']['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
 | 
			
		||||
 | 
			
		||||
        # Only prepare to notify if the rules above matched
 | 
			
		||||
        if 'notification_urls' in n_object:
 | 
			
		||||
            n_object.update({
 | 
			
		||||
                'watch_url': watch['url'],
 | 
			
		||||
                'uuid': uuid
 | 
			
		||||
            })
 | 
			
		||||
            self.notification_q.put(n_object)
 | 
			
		||||
            print("Sent filter not found notification for {}".format(uuid))
 | 
			
		||||
 | 
			
		||||
    def run(self):
 | 
			
		||||
        from changedetectionio import fetch_site_status
 | 
			
		||||
 | 
			
		||||
@@ -55,11 +83,23 @@ class update_worker(threading.Thread):
 | 
			
		||||
                    except content_fetcher.ReplyWithContentButNoText as e:
 | 
			
		||||
                        # Totally fine, it's by choice - just continue on, nothing more to care about
 | 
			
		||||
                        # Page had elements/content but no renderable text
 | 
			
		||||
                        if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'):
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"})
 | 
			
		||||
                        else:
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
 | 
			
		||||
                        pass
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
 | 
			
		||||
                    except FilterNotFoundInResponse as e:
 | 
			
		||||
                        err_text = "Filter '{}' not found - Did the page change its layout?".format(str(e))
 | 
			
		||||
                        c = 0
 | 
			
		||||
                        if self.datastore.data['watching'].get(uuid, False):
 | 
			
		||||
                            c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
 | 
			
		||||
                        c += 1
 | 
			
		||||
 | 
			
		||||
                        # Send notification if we reached the threshold?
 | 
			
		||||
                        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
 | 
			
		||||
                        print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
 | 
			
		||||
                        if threshold >0 and c >= threshold:
 | 
			
		||||
                            self.send_filter_failure_notification(uuid)
 | 
			
		||||
                            c = 0
 | 
			
		||||
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'consecutive_filter_failures': c})
 | 
			
		||||
                    except content_fetcher.EmptyReply as e:
 | 
			
		||||
                        # Some kind of custom to-str handler in the exception handler that does this?
 | 
			
		||||
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
 | 
			
		||||
@@ -89,6 +129,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                                fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
 | 
			
		||||
 | 
			
		||||
                            # Generally update anything interesting returned
 | 
			
		||||
                            update_obj['consecutive_filter_failures'] = 0
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
 | 
			
		||||
 | 
			
		||||
                            # A change was detected
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user