WIP

Dark mode - system setting var is not required (its cookie based)
0.40.0.3
2025-11-21 00:46:09 +00:00 · 2022-12-19 15:02:41 +01:00 · 2022-12-19 14:13:57 +01:00 · 2022-12-19 12:41:52 +01:00 · 2022-12-19 12:33:31 +01:00 · 2022-12-17 15:40:57 +01:00
17 changed files with 283 additions and 116 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -10,6 +10,7 @@ import threading
 import time
 import timeago
 from changedetectionio import queuedWatchMetaData
 from copy import deepcopy
 from distutils.util import strtobool
 from feedgen.feed import FeedGenerator
@@ -35,7 +36,7 @@ from flask_wtf import CSRFProtect
 from changedetectionio import html_tools
 from changedetectionio.api import api_v1
-__version__ = '0.40.0.2'
+__version__ = '0.40.0.3'
 datastore = None
@@ -404,7 +405,6 @@ def changedetection_app(config=None, datastore_o=None):
                sorted_watches.append(watch)
        existing_tags = datastore.get_all_tags()
        form = forms.quickWatchForm(request.form)
        output = render_template("watch-overview.html",
                                 form=form,
@@ -416,7 +416,7 @@ def changedetection_app(config=None, datastore_o=None):
                                 # Don't link to hosting when we're on the hosting environment
                                 hosted_sticky=os.getenv("SALTED_PASS", False) == False,
                                 guid=datastore.data['app_guid'],
-                                 queued_uuids=[uuid for p,uuid in update_q.queue])
+                                 queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue])
        if session.get('share-link'):
@@ -596,25 +596,16 @@ def changedetection_app(config=None, datastore_o=None):
                    using_default_check_time = False
                    break
-            # Use the default if its the same as system wide
+            # Use the default if it's the same as system-wide.
            if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
                extra_update_obj['fetch_backend'] = None
             # Ignore text
            form_ignore_text = form.ignore_text.data
            datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
            # Reset the previous_md5 so we process a new snapshot including stripping ignore text.
            if form_ignore_text:
                if len(datastore.data['watching'][uuid].history):
                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
            # Reset the previous_md5 so we process a new snapshot including stripping ignore text.
            if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
                if len(datastore.data['watching'][uuid].history):
                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
            # Be sure proxy value is None
            if datastore.proxy_list is not None and form.data['proxy'] == '':
                extra_update_obj['proxy'] = None
@@ -632,7 +623,7 @@ def changedetection_app(config=None, datastore_o=None):
            datastore.needs_write_urgent = True
            # Queue the watch for immediate recheck, with a higher priority
-            update_q.put((1, uuid))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            # Diff page [edit] link should go back to diff page
            if request.args.get("next") and request.args.get("next") == 'diff':
@@ -764,8 +755,11 @@ def changedetection_app(config=None, datastore_o=None):
    @login_required
    def import_page():
        remaining_urls = []
        from changedetectionio import forms
        form = forms.importForm(request.form)
        if request.method == 'POST':
-            from .importer import import_url_list, import_distill_io_json
+            from .importer import import_url_list, import_distill_io_json, import_changedetection_io_zip
            # URL List import
            if request.values.get('urls') and len(request.values.get('urls').strip()):
@@ -773,7 +767,7 @@ def changedetection_app(config=None, datastore_o=None):
                importer = import_url_list()
                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
                for uuid in importer.new_uuids:
-                    update_q.put((1, uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
                if len(importer.remaining_data) == 0:
                    return redirect(url_for('index'))
@@ -786,12 +780,22 @@ def changedetection_app(config=None, datastore_o=None):
                d_importer = import_distill_io_json()
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                for uuid in d_importer.new_uuids:
-                    update_q.put((1, uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            if request.files.get("backup_zip_file"):
                if not form.validate():
                    flash("An error occurred, please see below.", "error")
                else:
                    d_importer = import_changedetection_io_zip()
                    d_importer.run(data=None, flash=flash, datastore=datastore)
                    for uuid in d_importer.new_uuids:
                        # Queue without priority, we will examine their own rule to find out if it should be checked
                        update_q.put(queuedWatchMetaData.PrioritizedItem(item={'uuid': uuid, 'skip_when_checksum_same': True}))
        # Could be some remaining, or we could be on GET
        output = render_template("import.html",
                                 form=form,
                                 import_url_list_remaining="\n".join(remaining_urls),
                                 original_distill_json=''
                                 )
@@ -1151,7 +1155,7 @@ def changedetection_app(config=None, datastore_o=None):
        if not add_paused and new_uuid:
            # Straight into the queue.
-            update_q.put((1, new_uuid))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
            flash("Watch added.")
        if add_paused:
@@ -1188,7 +1192,7 @@ def changedetection_app(config=None, datastore_o=None):
            uuid = list(datastore.data['watching'].keys()).pop()
        new_uuid = datastore.clone(uuid)
-        update_q.put((5, new_uuid))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
        flash('Cloned.')
        return redirect(url_for('index'))
@@ -1196,7 +1200,7 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route("/api/checknow", methods=['GET'])
    @login_required
    def form_watch_checknow():
-
+        # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
        tag = request.args.get('tag')
        uuid = request.args.get('uuid')
        i = 0
@@ -1205,11 +1209,9 @@ def changedetection_app(config=None, datastore_o=None):
        for t in running_update_threads:
            running_uuids.append(t.current_uuid)
        # @todo check thread is running and skip
        if uuid:
            if uuid not in running_uuids:
-                update_q.put((1, uuid))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            i = 1
        elif tag != None:
@@ -1217,14 +1219,14 @@ def changedetection_app(config=None, datastore_o=None):
            for watch_uuid, watch in datastore.data['watching'].items():
                if (tag != None and tag in watch['tag']):
                    if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
-                        update_q.put((1, watch_uuid))
+                        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
                        i += 1
        else:
            # No tag, no uuid, add everything.
            for watch_uuid, watch in datastore.data['watching'].items():
                if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
-                    update_q.put((1, watch_uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
                    i += 1
        flash("{} watches are queued for rechecking.".format(i))
        return redirect(url_for('index', tag=tag))
@@ -1271,6 +1273,14 @@ def changedetection_app(config=None, datastore_o=None):
                    datastore.data['watching'][uuid.strip()]['notification_muted'] = False
            flash("{} watches un-muted".format(len(uuids)))
        elif (op == 'recheck'):
            for uuid in uuids:
                uuid = uuid.strip()
                if datastore.data['watching'].get(uuid):
                    # Recheck and require a full reprocessing
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            flash("{} watches un-muted".format(len(uuids)))
        elif (op == 'notification-default'):
            from changedetectionio.notification import (
                default_notification_format_for_watch
@@ -1344,7 +1354,7 @@ def changedetection_app(config=None, datastore_o=None):
    app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps')
    import changedetectionio.blueprint.price_data_follower as price_data_follower
-    app.register_blueprint(price_data_follower.construct_blueprint(datastore), url_prefix='/price_data_follower')
+    app.register_blueprint(price_data_follower.construct_blueprint(datastore, update_q), url_prefix='/price_data_follower')
    # @todo handle ctrl break
@@ -1452,7 +1462,11 @@ def ticker_thread_check_time_launch_checks():
        watch_uuid_list = []
        while True:
            try:
-                watch_uuid_list = datastore.data['watching'].keys()
+                # Get a list of watches sorted by last_checked, [1] because it gets passed a tuple
                # This is so we examine the most over-due first
                for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked',0)):
                    watch_uuid_list.append(k[0])
            except RuntimeError as e:
                # RuntimeError: dictionary changed size during iteration
                time.sleep(0.1)
@@ -1492,7 +1506,7 @@ def ticker_thread_check_time_launch_checks():
            seconds_since_last_recheck = now - watch['last_checked']
            if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
-                if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
+                if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]:
                    # Proxies can be set to have a limit on seconds between which they can be called
                    watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
@@ -1523,8 +1537,9 @@ def ticker_thread_check_time_launch_checks():
                            priority,
                            watch.jitter_seconds,
                            now - watch['last_checked']))
                    # Into the queue with you
-                    update_q.put((priority, uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
                    # Reset for next time
                    watch.jitter_seconds = 0
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -1,3 +1,4 @@
 from changedetectionio import queuedWatchMetaData
 from flask_restful import abort, Resource
 from flask import request, make_response
 import validators
@@ -24,7 +25,7 @@ class Watch(Resource):
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
        if request.args.get('recheck'):
-            self.update_q.put((1, uuid))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return "OK", 200
        # Return without history, get that via another API call
@@ -100,7 +101,7 @@ class CreateWatch(Resource):
        extras = {'title': json_data['title'].strip()} if json_data.get('title') else {}
        new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras)
-        self.update_q.put((1, new_uuid))
+        self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
        return {'uuid': new_uuid}, 201
    # Return concise list of available watches and some very basic info
@@ -118,7 +119,7 @@ class CreateWatch(Resource):
        if request.args.get('recheck_all'):
            for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put((1, uuid))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return {'status': "OK"}, 200
        return list, 200
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -75,15 +75,13 @@ class steppable_browser_interface():
    def action_goto_url(self, url, optional_value):
        # self.page.set_viewport_size({"width": 1280, "height": 5000})
        now = time.time()
-        response = self.page.goto(url, timeout=0, wait_until='domcontentloaded')
+        response = self.page.goto(url, timeout=0, wait_until='commit')
        print("Time to goto URL", time.time() - now)
        # Wait_until = commit
        # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
        # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
        # This seemed to solve nearly all 'TimeoutErrors'
-        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
+        print("Time to goto URL ", time.time() - now)
        self.page.wait_for_timeout(extra_wait * 1000)
    def action_click_element_containing_text(self, selector=None, value=''):
        if not len(value.strip()):
--- a/changedetectionio/blueprint/price_data_follower/init.py
+++ b/changedetectionio/blueprint/price_data_follower/init.py
@@ -3,11 +3,13 @@ from distutils.util import strtobool
 from flask import Blueprint, flash, redirect, url_for
 from flask_login import login_required
 from changedetectionio.store import ChangeDetectionStore
 from changedetectionio import queuedWatchMetaData
 from queue import PriorityQueue
 PRICE_DATA_TRACK_ACCEPT = 'accepted'
 PRICE_DATA_TRACK_REJECT = 'rejected'
-def construct_blueprint(datastore: ChangeDetectionStore):
+def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue):
    price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
@@ -15,6 +17,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    @price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
    def accept(uuid):
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
        return redirect(url_for("form_watch_checknow", uuid=uuid))
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -23,6 +23,9 @@ class Non200ErrorCodeReceived(Exception):
            self.page_text = html_tools.html_to_text(page_html)
        return
 class checksumFromPreviousCheckWasTheSame(Exception):
    def __init__(self):
        return
 class JSActionExceptions(Exception):
    def __init__(self, status_code, url, screenshot, message=''):
@@ -39,7 +42,7 @@ class BrowserStepsStepTimout(Exception):
 class PageUnloadable(Exception):
-    def __init__(self, status_code, url, screenshot=False, message=False):
+    def __init__(self, status_code, url, message, screenshot=False):
        # Set this so we can use it in other parts of the app
        self.status_code = status_code
        self.url = url
@@ -286,6 +289,8 @@ class base_html_playwright(Fetcher):
                proxy=self.proxy,
                # This is needed to enable JavaScript execution on GitHub and others
                bypass_csp=True,
                # Can't think why we need the service workers for our use case?
                service_workers='block',
                # Should never be needed
                accept_downloads=False
            )
@@ -294,24 +299,34 @@ class base_html_playwright(Fetcher):
            if len(request_headers):
                context.set_extra_http_headers(request_headers)
            try:
                self.page.set_default_navigation_timeout(90000)
                self.page.set_default_timeout(90000)
                # Listen for all console events and handle errors
                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
-                # Bug - never set viewport size BEFORE page.goto
+            # Goto page
-
+            try:
                # Waits for the next navigation. Using Python context manager
                # prevents a race condition between clicking and waiting for a navigation.
                with self.page.expect_navigation():
                    response = self.page.goto(url, wait_until='load')
                # Wait_until = commit
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
                # This seemed to solve nearly all 'TimeoutErrors'
                response = self.page.goto(url, wait_until='commit')
            except playwright._impl._api_types.Error as e:
                # Retry once - https://github.com/browserless/chrome/issues/2485
                # Sometimes errors related to invalid cert's and other can be random
                print ("Content Fetcher > retrying request got error - ", str(e))
                time.sleep(1)
                response = self.page.goto(url, wait_until='commit')
            except Exception as e:
                print ("Content Fetcher > Other exception when page.goto", str(e))
                context.close()
                browser.close()
                raise PageUnloadable(url=url, status_code=None, message=str(e))
            # Execute any browser steps
            try:
                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
                self.page.wait_for_timeout(extra_wait * 1000)
@@ -324,17 +339,15 @@ class base_html_playwright(Fetcher):
                # This can be ok, we will try to grab what we could retrieve
                pass
            except Exception as e:
-                print ("other exception when page.goto")
+                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
                print (str(e))
                context.close()
                browser.close()
-                raise PageUnloadable(url=url, status_code=None)
+                raise PageUnloadable(url=url, status_code=None, message=str(e))
            if response is None:
                context.close()
                browser.close()
-                print ("response object was none")
+                print ("Content Fetcher > Response object was none")
                raise EmptyReply(url=url, status_code=None)
            # Bug 2(?) Set the viewport size AFTER loading the page
@@ -353,7 +366,7 @@ class base_html_playwright(Fetcher):
            if len(self.page.content().strip()) == 0:
                context.close()
                browser.close()
-                print ("Content was empty")
+                print ("Content Fetcher > Content was empty")
                raise EmptyReply(url=url, status_code=None)
            # Bug 2(?) Set the viewport size AFTER loading the page
@@ -498,7 +511,7 @@ class base_html_webdriver(Fetcher):
            try:
                self.driver.quit()
            except Exception as e:
-                print("Exception in chrome shutdown/quit" + str(e))
+                print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
 # "html_requests" is listed as the default fetcher in store.py!
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -1,4 +1,5 @@
 import hashlib
 import json
 import logging
 import os
 import re
@@ -6,6 +7,7 @@ import urllib3
 from changedetectionio import content_fetcher, html_tools
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from copy import deepcopy
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -38,8 +40,7 @@ class perform_site_check():
        return regex
-    def run(self, uuid):
+    def run(self, uuid, skip_when_checksum_same=True):
        from copy import deepcopy
        changed_detected = False
        screenshot = False  # as bytes
        stripped_text_from_html = ""
@@ -122,6 +123,14 @@ class perform_site_check():
        self.screenshot = fetcher.screenshot
        self.xpath_data = fetcher.xpath_data
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
        # Saves a lot of CPU
        update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
        if skip_when_checksum_same:
            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
                raise content_fetcher.checksumFromPreviousCheckWasTheSame()
        # Fetching complete, now filters
        # @todo move to class / maybe inside of fetcher abstract base?
@@ -159,6 +168,14 @@ class perform_site_check():
            include_filters_rule.append("json:$")
            has_filter_rule = True
        if is_json:
            # Sort the JSON so we dont get false alerts when the content is just re-ordered
            try:
                fetcher.content = json.dumps(json.loads(fetcher.content), sort_keys=True)
            except Exception as e:
                # Might have just been a snippet, or otherwise bad JSON, continue
                pass
        if has_filter_rule:
            json_filter_prefixes = ['json:', 'jq:']
            for filter in include_filters_rule:
@@ -166,6 +183,8 @@ class perform_site_check():
                    stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
                    is_html = False
        if is_html or is_source:
            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -3,6 +3,7 @@ import re
 from wtforms import (
    BooleanField,
    FileField,
    Form,
    IntegerField,
    RadioField,
@@ -425,6 +426,14 @@ class watchForm(commonSettingsForm):
            result = False
        return result
 # datastore.data['settings']['requests']..
 class importForm(Form):
    #backup_zip_file = FileField("File", validators=[validators.regexp('\.zip$', flags=re.IGNORECASE)])
    backup_zip_file = FileField("File")
    def validate_backup_zip_file(form, field):
        if field.data:
            x=1
 # datastore.data['settings']['requests']..
 class globalSettingsRequestForm(Form):
--- a/changedetectionio/importer.py
+++ b/changedetectionio/importer.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
 from flask import request, url_for, current_app
 import time
 import validators
@@ -20,6 +21,26 @@ class Importer():
            datastore):
        pass
 class import_changedetection_io_zip(Importer):
    def run(self,
            data,
            flash,
            datastore,
            ):
        # `data` should be none, we will hit up request directly
        import zipfile
        import io
        with zipfile.ZipFile(io.BytesIO(request.files["backup_zip_file"].read()), 'r') as zf:
            p =zf.namelist()
            for file in zf.namelist():
                x=1
 class import_url_list(Importer):
    """
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -27,7 +27,6 @@ class model(dict):
                    'base_url' : None,
                    'extract_title_as_title': False,
                    'empty_pages_are_a_change': False,
                    'css_dark_mode': False,
                    'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
                    'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
                    'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -14,51 +14,52 @@ from changedetectionio.notification import (
 class model(dict):
    __newest_history_key = None
-    __history_n=0
+    __history_n = 0
    __base_config = {
-            #'history': {},  # Dict of timestamp and output stripped filename (removed)
+        # 'history': {},  # Dict of timestamp and output stripped filename (removed)
-            #'newest_history_key': 0, (removed, taken from history.txt index)
+        # 'newest_history_key': 0, (removed, taken from history.txt index)
-            'body': None,
+        'body': None,
-            'check_unique_lines': False, # On change-detected, compare against all history if its something new
+        'check_unique_lines': False,  # On change-detected, compare against all history if its something new
-            'check_count': 0,
+        'check_count': 0,
-            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
+        'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine.
-            'extract_text': [],  # Extract text by regex after filters
+        'extract_text': [],  # Extract text by regex after filters
-            'extract_title_as_title': False,
+        'extract_title_as_title': False,
-            'fetch_backend': None,
+        'fetch_backend': None,
-            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
+        'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
-            'has_ldjson_price_data': None,
+        'has_ldjson_price_data': None,
-            'track_ldjson_price_data': None,
+        'track_ldjson_price_data': None,
-            'headers': {},  # Extra headers to send
+        'headers': {},  # Extra headers to send
-            'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
+        'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
-            'include_filters': [],
+        'include_filters': [],
-            'last_checked': 0,
+        'last_checked': 0,
-            'last_error': False,
+        'last_error': False,
-            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
+        'last_viewed': 0,  # history key value of the last viewed via the [diff] link
-            'method': 'GET',
+        'method': 'GET',
-             # Custom notification content
+        # Custom notification content
-            'notification_body': None,
+        'notification_body': None,
-            'notification_format': default_notification_format_for_watch,
+        'notification_format': default_notification_format_for_watch,
-            'notification_muted': False,
+        'notification_muted': False,
-            'notification_title': None,
+        'notification_title': None,
-            'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
+        'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL
-            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
+        'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
-            'paused': False,
+        'paused': False,
-            'previous_md5': False,
+        'previous_md5': False,
-            'proxy': None, # Preferred proxy connection
+        'previous_md5_before_filters': False,  # Used for skipping changedetection entirely
-            'subtractive_selectors': [],
+        'proxy': None,  # Preferred proxy connection
-            'tag': None,
+        'subtractive_selectors': [],
-            'text_should_not_be_present': [], # Text that should not present
+        'tag': None,
-            # Re #110, so then if this is set to None, we know to use the default value instead
+        'text_should_not_be_present': [],  # Text that should not present
-            # Requires setting to None on submit if it's the same as the default
+        # Re #110, so then if this is set to None, we know to use the default value instead
-            # Should be all None by default, so we use the system default in this case.
+        # Requires setting to None on submit if it's the same as the default
-            'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
+        # Should be all None by default, so we use the system default in this case.
-            'title': None,
+        'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
-            'trigger_text': [],  # List of text or regex to wait for until a change is detected
+        'title': None,
-            'url': None,
+        'trigger_text': [],  # List of text or regex to wait for until a change is detected
-            'uuid': str(uuid.uuid4()),
+        'url': None,
-            'webdriver_delay': None,
+        'uuid': str(uuid.uuid4()),
-            'webdriver_js_execute_code': None, # Run before change-detection
+        'webdriver_delay': None,
-        }
+        'webdriver_js_execute_code': None,  # Run before change-detection
    }
    jitter_seconds = 0
    def __init__(self, *arg, **kw):
--- a/changedetectionio/queuedWatchMetaData.py
+++ b/changedetectionio/queuedWatchMetaData.py
@@ -0,0 +1,10 @@
 from dataclasses import dataclass, field
 from typing import Any
 # So that we can queue some metadata in `item`
 # https://docs.python.org/3/library/queue.html#queue.PriorityQueue
 #
@dataclass(order=True)
 class PrioritizedItem:
    priority: int
    item: Any=field(compare=False)
--- a/changedetectionio/res/xpath_element_scraper.js
+++ b/changedetectionio/res/xpath_element_scraper.js
@@ -1,3 +1,6 @@
 // Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
 // All rights reserved.
 // @file Scrape the page looking for elements of concern (%ELEMENTS%)
 // http://matatk.agrip.org.uk/tests/position-and-width/
 // https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
@@ -89,8 +92,8 @@ for (var i = 0; i < elements.length; i++) {
        continue
    }
-    // Forget really small ones
+    // Skip really small ones, and where width or height ==0
-    if (bbox['width'] < 10 && bbox['height'] < 10) {
+    if (bbox['width'] * bbox['height'] < 100) {
        continue;
    }
@@ -146,7 +149,6 @@ for (var i = 0; i < elements.length; i++) {
 }
 // Inject the current one set in the include_filters, which may be a CSS rule
 // used for displaying the current one in VisualSelector, where its not one we generated.
 if (include_filters.length) {
@@ -174,10 +176,23 @@ if (include_filters.length) {
        }
        if (q) {
-            bbox = q.getBoundingClientRect();
+            // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
-            console.log("xpath_element_scraper: Got filter element, scroll from top was "+scroll_y)
+            if (q.hasOwnProperty('getBoundingClientRect')) {
-        } else {
+                bbox = q.getBoundingClientRect();
-            console.log("xpath_element_scraper: filter element "+f+" was not found");
+                console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
            } else {
                try {
                    // Try and see we can find its ownerElement
                    bbox = q.ownerElement.getBoundingClientRect();
                    console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
                } catch (e) {
                    console.log("xpath_element_scraper: error looking up ownerElement")
                }
            }
        }
        if(!q) {
            console.log("xpath_element_scraper: filter element " + f + " was not found");
        }
        if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
@@ -192,5 +207,9 @@ if (include_filters.length) {
    }
 }
 // Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
 // so that we dont select the wrapping element by mistake and be unable to select what we want
 size_pos.sort((a, b) => (a.width*a.height > b.width*b.height) ? 1 : -1)
 // Window.width required for proper scaling in the frontend
 return {'size_pos': size_pos, 'browser_width': window.innerWidth};
--- a/changedetectionio/static/js/visual-selector.js
+++ b/changedetectionio/static/js/visual-selector.js
@@ -1,4 +1,5 @@
-// Horrible proof of concept code :)
+// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
 // All rights reserved.
 // yes - this is really a hack, if you are a front-ender and want to help, please get in touch!
 $(document).ready(function () {
@@ -177,9 +178,10 @@ $(document).ready(function () {
            // Basically, find the most 'deepest'
            var found = 0;
            ctx.fillStyle = 'rgba(205,0,0,0.35)';
-            for (var i = selector_data['size_pos'].length; i !== 0; i--) {
+            // Will be sorted by smallest width*height first
            for (var i = 0; i <= selector_data['size_pos'].length; i++) {
                // draw all of them? let them choose somehow?
-                var sel = selector_data['size_pos'][i - 1];
+                var sel = selector_data['size_pos'][i];
                // If we are in a bounding-box
                if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
                    &&
@@ -195,7 +197,7 @@ $(document).ready(function () {
                    // no need to keep digging
                    // @todo or, O to go out/up, I to go in
                    // or double click to go up/out the selector?
-                    current_selected_i = i - 1;
+                    current_selected_i = i;
                    found += 1;
                    break;
                }
--- a/changedetectionio/templates/import.html
+++ b/changedetectionio/templates/import.html
@@ -1,5 +1,6 @@
 {% extends 'base.html' %}
 {% block content %}
 {% from '_helpers.jinja' import render_field %}
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <div class="edit-form monospaced-textarea">
@@ -7,11 +8,12 @@
        <ul>
            <li class="tab" id=""><a href="#url-list">URL List</a></li>
            <li class="tab"><a href="#distill-io">Distill.io</a></li>
            <li class="tab"><a href="#changedetection-io">Changedetection.io</a></li>
        </ul>
    </div>
    <div class="box-wrap inner">
-        <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
+        <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST" enctype="multipart/form-data">
            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
            <div class="tab-pane-inner" id="url-list">
                <fieldset class="pure-group">
@@ -77,6 +79,12 @@
 " rows="25">{{ original_distill_json }}</textarea>
                </fieldset>
            </div>
            <div class="tab-pane-inner" id="changedetection-io">
                Upload your changedetection.io backup ZIP here</br>
                <fieldset class="pure-group">
                    {{ render_field(form.backup_zip_file) }}
                </fieldset>
            </div>
            <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
        </form>
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@@ -32,6 +32,7 @@
        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unpause">UnPause</button>
        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="mute">Mute</button>
        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unmute">UnMute</button>
        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button>
        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button>
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
    </div>
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -394,6 +394,48 @@ def check_json_ext_filter(json_filter, client, live_server):
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
 def test_ignore_json_order(client, live_server):
    # A change in order shouldn't trigger a notification
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write('{"hello" : 123, "world": 123}')
    # Add our URL to the import page
    test_url = url_for('test_endpoint', content_type="application/json", _external=True)
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    time.sleep(2)
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write('{"world" : 123, "hello": 123}')
    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    time.sleep(2)
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
    # Just to be sure it still works
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write('{"world" : 123, "hello": 124}')
    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    time.sleep(2)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
 def test_check_jsonpath_ext_filter(client, live_server):
    check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -4,6 +4,7 @@ import queue
 import time
 from changedetectionio import content_fetcher
 from changedetectionio import queuedWatchMetaData
 from changedetectionio.fetch_site_status import FilterNotFoundInResponse
 # A single update worker
@@ -157,11 +158,12 @@ class update_worker(threading.Thread):
        while not self.app.config.exit.is_set():
            try:
-                priority, uuid = self.q.get(block=False)
+                queued_item_data = self.q.get(block=False)
            except queue.Empty:
                pass
            else:
                uuid = queued_item_data.item.get('uuid')
                self.current_uuid = uuid
                if uuid in list(self.datastore.data['watching'].keys()):
@@ -171,11 +173,11 @@ class update_worker(threading.Thread):
                    update_obj= {}
                    xpath_data = False
                    process_changedetection_results = True
-                    print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
+                    print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
                    now = time.time()
                    try:
-                        changed_detected, update_obj, contents = update_handler.run(uuid)
+                        changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
                        # We then convert/.decode('utf-8') for the notification etc
@@ -241,6 +243,10 @@ class update_worker(threading.Thread):
                        process_changedetection_results = True
                    except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
                        # Yes fine, so nothing todo
                        pass
                    except content_fetcher.BrowserStepsStepTimout as e:
                        if not self.datastore.data['watching'].get(uuid):
Author	SHA1	Message	Date
dgtlmoon	dc96a5ff69	WIP	2022-12-19 15:02:41 +01:00
dgtlmoon	7a1d2d924e	Dark mode - system setting var is not required (its cookie based)	2022-12-19 14:13:57 +01:00
dgtlmoon	c3731cf055	0.40.0.3	2022-12-19 12:41:52 +01:00
dgtlmoon	a287e5a86c	Visual Selector - Select smallest/most precise element first, better filtering of zero size elements	2022-12-19 12:33:31 +01:00
dgtlmoon	235535c327	Fetching - Check the most overdue watch first (#1242 )	2022-12-17 15:40:57 +01:00
dgtlmoon	44dc62da2d	Overview list - Checkbox action "Recheck"	2022-12-16 18:35:09 +01:00
dgtlmoon	0c380c170f	Playwright - Better error reporting and re-try fetch on fail once (#1238 )	2022-12-16 18:06:14 +01:00
dgtlmoon	b7a2501d64	Fetching - Always sort the key order of JSON content for less false alerts (May cause an alert on upgrade, but will be better going forwards) #1219	2022-12-15 09:13:09 +01:00
dgtlmoon	e970fef991	Fetcher + VisualSelector - xPath filter with attribute filter was breaking the element finder	2022-12-14 19:06:49 +01:00
dgtlmoon	b76148a0f4	Fetcher - CPU usage - Skip processing if the previous checksum and the just fetched one was the same (#925 )	2022-12-14 15:08:34 +01:00
dgtlmoon	93cc30437f	Playwright+BrowserSteps - Fetch changes - Fetch simply after page starts rendering + delay seconds, disable service workers	2022-12-14 12:16:04 +01:00