Fixing page update

highlight ignore lines
Fix labels
2025-11-05 17:16:12 +00:00 · 2024-10-10 13:18:02 +02:00 · 2024-10-10 13:12:23 +02:00 · 2024-10-10 12:53:56 +02:00 · 2024-10-10 12:27:25 +02:00 · 2024-10-10 11:47:13 +02:00
27 changed files with 114 additions and 378 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,4 @@
 recursive-include changedetectionio/api *
-recursive-include changedetectionio/apprise_plugin *
 recursive-include changedetectionio/blueprint *
 recursive-include changedetectionio/content_fetchers *
 recursive-include changedetectionio/model *
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.47.03'
+__version__ = '0.46.04'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -58,7 +58,7 @@ class Watch(Resource):
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

        if request.args.get('recheck'):
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
            self.datastore.data['watching'].get(uuid).pause()
@@ -246,7 +246,7 @@ class CreateWatch(Resource):

        new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
        if new_uuid:
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
            return {'uuid': new_uuid}, 201
        else:
            return "Invalid or unsupported URL", 400
@@ -303,7 +303,7 @@ class CreateWatch(Resource):

        if request.args.get('recheck_all'):
            for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return {'status': "OK"}, 200

        return list, 200
--- a/changedetectionio/blueprint/check_proxies/init.py
+++ b/changedetectionio/blueprint/check_proxies/init.py
@@ -1,7 +1,4 @@
-import importlib
 from concurrent.futures import ThreadPoolExecutor
-
-from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 from changedetectionio.store import ChangeDetectionStore

 from functools import wraps
@@ -33,6 +30,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def long_task(uuid, preferred_proxy):
        import time
        from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
+        from changedetectionio.processors.text_json_diff import text_json_diff
        from changedetectionio.safe_jinja import render as jinja_render

        status = {'status': '', 'length': 0, 'text': ''}
@@ -40,12 +38,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        contents = ''
        now = time.time()
        try:
-            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
-            update_handler = processor_module.perform_site_check(datastore=datastore,
-                                                                 watch_uuid=uuid
-                                                                 )
-
-            update_handler.call_browser(preferred_proxy_id=preferred_proxy)
+            update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
+            update_handler.call_browser()
        # title, size is len contents not len xfer
        except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
            if e.status_code == 404:
@@ -54,7 +48,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
            else:
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
-        except FilterNotFoundInResponse:
+        except text_json_diff.FilterNotFoundInResponse:
            status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
        except content_fetcher_exceptions.EmptyReply as e:
            if e.status_code == 403 or e.status_code == 401:
--- a/changedetectionio/blueprint/price_data_follower/init.py
+++ b/changedetectionio/blueprint/price_data_follower/init.py
@@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
        datastore.data['watching'][uuid]['processor'] = 'restock_diff'
        datastore.data['watching'][uuid].clear_watch()
-        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
        return redirect(url_for("index"))

    @login_required
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@@ -75,7 +75,6 @@ class fetcher(Fetcher):
        self.headers = r.headers

        if not r.content or not len(r.content):
-            logger.debug(f"Requests returned empty content for '{url}'")
            if not empty_pages_are_a_change:
                raise EmptyReply(url=url, status_code=r.status_code)
            else:
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -788,6 +788,7 @@ def changedetection_app(config=None, datastore_o=None):
            # Recast it if need be to right data Watch handler
            watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
            datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
+
            flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")

            # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
@@ -795,7 +796,7 @@ def changedetection_app(config=None, datastore_o=None):
            datastore.needs_write_urgent = True

            # Queue the watch for immediate recheck, with a higher priority
-            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))

            # Diff page [edit] link should go back to diff page
            if request.args.get("next") and request.args.get("next") == 'diff':
@@ -976,7 +977,7 @@ def changedetection_app(config=None, datastore_o=None):
                importer = import_url_list()
                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
                for uuid in importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

                if len(importer.remaining_data) == 0:
                    return redirect(url_for('index'))
@@ -989,7 +990,7 @@ def changedetection_app(config=None, datastore_o=None):
                d_importer = import_distill_io_json()
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                for uuid in d_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

            # XLSX importer
            if request.files and request.files.get('xlsx_file'):
@@ -1013,7 +1014,7 @@ def changedetection_app(config=None, datastore_o=None):
                    w_importer.run(data=file, flash=flash, datastore=datastore)

                for uuid in w_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

        # Could be some remaining, or we could be on GET
        form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
@@ -1442,7 +1443,7 @@ def changedetection_app(config=None, datastore_o=None):
        new_uuid = datastore.clone(uuid)
        if new_uuid:
            if not datastore.data['watching'].get(uuid).get('paused'):
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
            flash('Cloned.')

        return redirect(url_for('index'))
@@ -1463,7 +1464,7 @@ def changedetection_app(config=None, datastore_o=None):

        if uuid:
            if uuid not in running_uuids:
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            i = 1

        elif tag:
@@ -1474,7 +1475,7 @@ def changedetection_app(config=None, datastore_o=None):
                        continue
                    if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                        update_q.put(
-                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
+                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
                        )
                        i += 1

@@ -1484,8 +1485,9 @@ def changedetection_app(config=None, datastore_o=None):
                if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                    if with_errors and not watch.get('last_error'):
                        continue
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
                    i += 1
+
        flash(f"{i} watches queued for rechecking.")
        return redirect(url_for('index', tag=tag))

@@ -1542,7 +1544,7 @@ def changedetection_app(config=None, datastore_o=None):
                uuid = uuid.strip()
                if datastore.data['watching'].get(uuid):
                    # Recheck and require a full reprocessing
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            flash("{} watches queued for rechecking".format(len(uuids)))

        elif (op == 'clear-errors'):
@@ -1866,7 +1868,7 @@ def ticker_thread_check_time_launch_checks():
                        f"{now - watch['last_checked']:0.2f}s since last checked")

                    # Into the queue with you
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))

                    # Reset for next time
                    watch.jitter_seconds = 0
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -18,7 +18,6 @@ class difference_detection_processor():
    screenshot = None
    watch = None
    xpath_data = None
-    preferred_proxy = None

    def __init__(self, *args, datastore, watch_uuid, **kwargs):
        super().__init__(*args, **kwargs)
@@ -27,8 +26,7 @@ class difference_detection_processor():
        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()

-    def call_browser(self, preferred_proxy_id=None):
-
+    def call_browser(self):
        from requests.structures import CaseInsensitiveDict

        # Protect against file:// access
@@ -44,7 +42,7 @@ class difference_detection_processor():
        prefer_fetch_backend = self.watch.get('fetch_backend', 'system')

        # Proxy ID "key"
-        preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
+        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))

        # Pluggable content self.fetcher
        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
@@ -157,7 +155,7 @@ class difference_detection_processor():
        # After init, call run_changedetection() which will do the actual change-detection

    @abstractmethod
-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -27,27 +27,22 @@ def _search_prop_by_value(matches, value):
                return prop[1]  # Yield the desired value and exit the function

 def _deduplicate_prices(data):
-    import re
+    seen = set()
+    unique_data = []

-    '''
-    Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
-    Get all the values, clean it and add it to a set then return the unique values
-    '''
-    unique_data = set()
-
-    # Return the complete 'datum' where its price was not seen before
    for datum in data:
+        # Convert 'value' to float if it can be a numeric string, otherwise leave it as is
+        try:
+            normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
+        except ValueError:
+            normalized_value = datum.value

-        if isinstance(datum.value, list):
-            # Process each item in the list
-            normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value])
-            unique_data.update(normalized_value)
-        else:
-            # Process single value
-            v = float(re.sub(r'[^\d.]', '', str(datum.value)))
-            unique_data.add(v)
-
-    return list(unique_data)
+        # If the normalized value hasn't been seen yet, add it to unique data
+        if normalized_value not in seen:
+            unique_data.append(datum)
+            seen.add(normalized_value)
+    
+    return unique_data


 # should return Restock()
@@ -88,13 +83,14 @@ def get_itemprop_availability(html_content) -> Restock:
        if price_result:
            # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
            # parse that for the UI?
-            if len(price_result) > 1 and len(price_result) > 1:
+            prices_found = set(str(item.value).replace('$', '') for item in price_result)
+            if len(price_result) > 1 and len(prices_found) > 1:
                # See of all prices are different, in the case that one product has many embedded data types with the same price
                # One might have $121.95 and another 121.95 etc
-                logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
+                logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
                raise MoreThanOnePriceFound()

-            value['price'] = price_result[0]
+            value['price'] = price_result[0].value

        pricecurrency_result = pricecurrency_parse.find(data)
        if pricecurrency_result:
@@ -144,7 +140,7 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
        import hashlib

        if not watch:
@@ -224,7 +220,7 @@ class perform_site_check(difference_detection_processor):
            itemprop_availability['original_price'] = itemprop_availability.get('price')
            update_obj['restock']["original_price"] = itemprop_availability.get('price')

-        if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
+        if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
            raise ProcessorException(
                message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
                url=watch.get('url'),
--- a/changedetectionio/processors/text_json_diff/init.py
+++ b/changedetectionio/processors/text_json_diff/init.py
@@ -11,7 +11,10 @@ def _task(watch, update_handler):

    try:
        # The slow process (we run 2 of these in parallel)
-        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
+        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
+            watch=watch,
+            skip_when_checksum_same=False,
+        )
    except FilterNotFoundInResponse as e:
        text_after_filter = f"Filter not found in HTML: {str(e)}"
    except ReplyWithContentButNoText as e:
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -35,7 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
        changed_detected = False
        html_content = ""
        screenshot = False  # as bytes
@@ -58,6 +58,9 @@ class perform_site_check(difference_detection_processor):
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
        # Saves a lot of CPU
        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
+        if skip_when_checksum_same:
+            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
+                raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()

        # Fetching complete, now filters

@@ -208,7 +211,6 @@ class perform_site_check(difference_detection_processor):
        # @todo whitespace coming from missing rtrim()?
        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
        # Rewrite's the processing text based on only what diff result they want to see
-
        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
            from changedetectionio import diff
@@ -331,21 +333,13 @@ class perform_site_check(difference_detection_processor):
            if result:
                blocked = True

+        # The main thing that all this at the moment comes down to :)
+        if watch.get('previous_md5') != fetched_md5:
+            changed_detected = True

        # Looks like something changed, but did it match all the rules?
        if blocked:
            changed_detected = False
-        else:
-            # The main thing that all this at the moment comes down to :)
-            if watch.get('previous_md5') != fetched_md5:
-                changed_detected = True
-
-            # Always record the new checksum
-            update_obj["previous_md5"] = fetched_md5
-
-            # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
-            if not watch.get('previous_md5'):
-                watch['previous_md5'] = fetched_md5

        logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")

@@ -365,6 +359,12 @@ class perform_site_check(difference_detection_processor):
                else:
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")

+        # Always record the new checksum
+        update_obj["previous_md5"] = fetched_md5
+
+        # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
+        if not watch.get('previous_md5'):
+            watch['previous_md5'] = fetched_md5

        # stripped_text_from_html - Everything after filters and NO 'ignored' content
        return changed_detected, update_obj, stripped_text_from_html
--- a/changedetectionio/static/js/recheck-proxy.js
+++ b/changedetectionio/static/js/recheck-proxy.js
@@ -1,14 +1,14 @@
 $(function () {
    /* add container before each proxy location to show status */
-    var isActive = false;

-    function setup_html_widget() {
-        var option_li = $('.fetch-backend-proxy li').filter(function () {
-            return $("input", this)[0].value.length > 0;
-        });
-        $(option_li).prepend('<div class="proxy-status"></div>');
-        $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
-    }
+    var option_li = $('.fetch-backend-proxy li').filter(function() {
+        return $("input",this)[0].value.length >0;
+    });
+
+    //var option_li = $('.fetch-backend-proxy li');
+    var isActive = false;
+    $(option_li).prepend('<div class="proxy-status"></div>');
+    $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');

    function set_proxy_check_status(proxy_key, state) {
        // select input by value name
@@ -59,14 +59,8 @@ $(function () {
    }

    $('#check-all-proxies').click(function (e) {
-
        e.preventDefault()
-
-        if (!$('body').hasClass('proxy-check-active')) {
-            setup_html_widget();
-            $('body').addClass('proxy-check-active');
-        }
-
+        $('body').addClass('proxy-check-active');
        $('.proxy-check-details').html('');
        $('.proxy-status').html('<span class="spinner"></span>').fadeIn();
        $('.proxy-timing').html('');
--- a/changedetectionio/static/js/tabs.js
+++ b/changedetectionio/static/js/tabs.js
@@ -26,7 +26,8 @@ function set_active_tab() {
    if (tab.length) {
        tab[0].parentElement.className = "active";
    }
-
+    // hash could move the page down
+    window.scrollTo(0, 0);
 }

 function focus_error_tab() {
--- a/changedetectionio/static/styles/scss/parts/_extra_proxies.scss
+++ b/changedetectionio/static/styles/scss/parts/_extra_proxies.scss
@@ -25,19 +25,15 @@ ul#requests-extra_proxies {

 body.proxy-check-active {
  #request {
-    // Padding set by flex layout
-    /*
    .proxy-status {
      width: 2em;
    }
-    */

    .proxy-check-details {
      font-size: 80%;
      color: #555;
      display: block;
-      padding-left: 2em;
-      max-width: 500px;
+      padding-left: 4em;
    }

    .proxy-timing {
--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -147,14 +147,8 @@ body.spinner-active {
  }
 }

-
-.tab-pane-inner {
-  // .tab-pane-inner will have the #id that the tab button jumps/anchors to
-  scroll-margin-top: 200px;
-}
-
 section.content {
-  padding-top: 100px;
+  padding-top: 5em;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
@@ -937,7 +931,6 @@ $form-edge-padding: 20px;
 }

 .tab-pane-inner {
-
  &:not(:target) {
    display: none;
  }
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -119,22 +119,19 @@ ul#requests-extra_proxies {
  #request label[for=proxy] {
    display: inline-block; }

-body.proxy-check-active #request {
-  /*
-    .proxy-status {
-      width: 2em;
-    }
-    */ }
-  body.proxy-check-active #request .proxy-check-details {
-    font-size: 80%;
-    color: #555;
-    display: block;
-    padding-left: 2em;
-    max-width: 500px; }
-  body.proxy-check-active #request .proxy-timing {
-    font-size: 80%;
-    padding-left: 1rem;
-    color: var(--color-link); }
+body.proxy-check-active #request .proxy-status {
+  width: 2em; }
+
+body.proxy-check-active #request .proxy-check-details {
+  font-size: 80%;
+  color: #555;
+  display: block;
+  padding-left: 4em; }
+
+body.proxy-check-active #request .proxy-timing {
+  font-size: 80%;
+  padding-left: 1rem;
+  color: var(--color-link); }

 #recommended-proxy {
  display: grid;
@@ -605,11 +602,8 @@ body.spinner-active #pure-menu-horizontal-spinner {
    background-color: var(--color-background-menu-link-hover);
    color: var(--color-text-menu-link-hover); }

-.tab-pane-inner {
-  scroll-margin-top: 200px; }
-
 section.content {
-  padding-top: 100px;
+  padding-top: 5em;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
--- a/changedetectionio/tests/itemprop_test_examples/README.md
+++ b/changedetectionio/tests/itemprop_test_examples/README.md
@@ -1,6 +0,0 @@
-# A list of real world examples!
-
-Always the price should be 666.66 for our tests
-
-see test_restock_itemprop.py::test_special_prop_examples
-
--- a/changedetectionio/tests/itemprop_test_examples/a.txt
+++ b/changedetectionio/tests/itemprop_test_examples/a.txt
@@ -1,25 +0,0 @@
-<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
-     data-testid="price-section"
-     data-optly-product-tile-price-section="true"><span
-        class="PriceRange ProductPrice variant-huge" itemprop="offers"
-        itemscope="" itemtype="http://schema.org/Offer"><div
-        class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
-        aria-hidden="true" class="Price variant-huge" data-testid="price"
-        itemprop="price"><sup class="sup" data-testid="price-symbol"
-                              itemprop="priceCurrency" content="AUD">$</sup><span
-        class="dollars" data-testid="price-value" itemprop="price"
-        content="155.55">155.55</span><span class="extras"><span class="sup"
-                                                              data-testid="price-sup"></span></span></span></span>
-</div>
-
-<script type="application/ld+json">{
-                                "@type": "Product",
-                                "@context": "https://schema.org",
-                                "name": "test",
-                                "description": "test",
-                                "offers": {
-                                    "@type": "Offer",
-                                    "priceCurrency": "AUD",
-                                    "price": 155.55
-                                },
-                            }</script>
--- a/changedetectionio/tests/proxy_list/test_proxy.py
+++ b/changedetectionio/tests/proxy_list/test_proxy.py
@@ -16,4 +16,4 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    )

    assert b"1 Imported" in res.data
-    wait_for_all_checks(client)
+    time.sleep(3)
--- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
+++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-import json
 import os
 from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks


 def set_response():
@@ -19,6 +18,7 @@ def set_response():
        f.write(data)
    time.sleep(1)

+
 def test_socks5(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
    set_response()
@@ -79,24 +79,3 @@ def test_socks5(client, live_server, measure_memory_usage):

    # Should see the proper string
    assert "Awesome, you made it".encode('utf-8') in res.data
-
-    # PROXY CHECKER WIDGET CHECK - this needs more checking
-    uuid = extract_UUID_from_client(client)
-
-    res = client.get(
-        url_for("check_proxies.start_check", uuid=uuid),
-        follow_redirects=True
-    )
-    # It's probably already finished super fast :(
-    #assert b"RUNNING" in res.data
-    
-    wait_for_all_checks(client)
-    res = client.get(
-        url_for("check_proxies.get_recheck_status", uuid=uuid),
-        follow_redirects=True
-    )
-    assert b"OK" in res.data
-
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
-
--- a/changedetectionio/tests/test_add_replace_remove_filter.py
+++ b/changedetectionio/tests/test_add_replace_remove_filter.py
@@ -77,8 +77,6 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory

    # The trigger line is REMOVED,  this should trigger
    set_original(excluding='The golden line')
-
-    # Check in the processor here what's going on, its triggering empty-reply and no change.
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
@@ -153,6 +151,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa

    # A line thats not the trigger should not trigger anything
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+
    assert b'1 watches queued for rechecking.' in res.data

    wait_for_all_checks(client)
@@ -174,5 +173,6 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
        assert b'-Oh yes please-' in response
        assert '网站监测 内容更新了'.encode('utf-8') in response

+
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_block_while_text_present.py
+++ b/changedetectionio/tests/test_block_while_text_present.py
@@ -65,8 +65,11 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
    live_server_setup(live_server)
    # Use a mix of case in ZzZ to prove it works case-insensitive.
    ignore_text = "out of stoCk\r\nfoobar"
+
    set_original_ignore_response()

+    # Give the endpoint time to spin up
+    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
@@ -124,24 +127,13 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data

-    # 2548
-    # Going back to the ORIGINAL should NOT trigger a change
-    set_original_ignore_response()
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    wait_for_all_checks(client)
-    res = client.get(url_for("index"))
-    assert b'unviewed' not in res.data

-
-    # Now we set a change where the text is gone AND its different content, it should now trigger
+    # Now we set a change where the text is gone, it should now trigger
    set_modified_response_minus_block_text()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

-
-
-
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@@ -5,7 +5,7 @@ import time
 from flask import url_for

 from ..html_tools import *
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup


 def test_setup(live_server):
@@ -119,10 +119,12 @@ across multiple lines


 def test_element_removal_full(client, live_server, measure_memory_usage):
-    #live_server_setup(live_server)
+    sleep_time_for_fetch_thread = 3

    set_original_response()

+    # Give the endpoint time to spin up
+    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for("test_endpoint", _external=True)
@@ -130,8 +132,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        url_for("import_page"), data={"urls": test_url}, follow_redirects=True
    )
    assert b"1 Imported" in res.data
-    wait_for_all_checks(client)
-
+    time.sleep(1)
    # Goto the edit page, add the filter data
    # Not sure why \r needs to be added - absent of the #changetext this is not necessary
    subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
@@ -147,7 +148,6 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        follow_redirects=True,
    )
    assert b"Updated watch." in res.data
-    wait_for_all_checks(client)

    # Check it saved
    res = client.get(
@@ -156,10 +156,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data

    # Trigger a check
-    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    assert b'1 watches queued for rechecking.' in res.data
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)

-    wait_for_all_checks(client)
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)

    # so that we set the state to 'unviewed' after all the edits
    client.get(url_for("diff_history_page", uuid="first"))
@@ -168,11 +168,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    set_modified_response()

    # Trigger a check
-    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    assert b'1 watches queued for rechecking.' in res.data
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
-    wait_for_all_checks(client)
+    time.sleep(sleep_time_for_fetch_thread)

    # There should not be an unviewed change, as changes should be removed
    res = client.get(url_for("index"))
--- a/changedetectionio/tests/test_live_preview.py
+++ b/changedetectionio/tests/test_live_preview.py
@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-
-from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
-
-
-def set_response():
-
-    data = f"""<html>
-       <body>Awesome, you made it<br>
-yeah the socks request worked<br>
-something to ignore<br>
-something to trigger<br>
-     </body>
-     </html>
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(data)
-
-def test_content_filter_live_preview(client, live_server, measure_memory_usage):
-    live_server_setup(live_server)
-    set_response()
-
-    test_url = url_for('test_endpoint', _external=True)
-
-    res = client.post(
-        url_for("form_quick_watch_add"),
-        data={"url": test_url, "tags": ''},
-        follow_redirects=True
-    )
-    uuid = extract_UUID_from_client(client)
-    res = client.post(
-        url_for("edit_page", uuid=uuid),
-        data={
-            "include_filters": "",
-            "fetch_backend": 'html_requests',
-            "ignore_text": "something to ignore",
-            "trigger_text": "something to trigger",
-            "url": test_url,
-        },
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data
-    wait_for_all_checks(client)
-
-    # The endpoint is a POST and accepts the form values to override the watch preview
-    import json
-
-    # DEFAULT OUTPUT WITHOUT ANYTHING UPDATED/CHANGED - SHOULD SEE THE WATCH DEFAULTS
-    res = client.post(
-        url_for("watch_get_preview_rendered", uuid=uuid)
-    )
-    default_return = json.loads(res.data.decode('utf-8'))
-    assert default_return.get('after_filter')
-    assert default_return.get('before_filter')
-    assert default_return.get('ignore_line_numbers') == [3] # "something to ignore" line 3
-    assert default_return.get('trigger_line_numbers') == [4] # "something to trigger" line 4
-
-    # SEND AN UPDATE AND WE SHOULD SEE THE OUTPUT CHANGE SO WE KNOW TO HIGHLIGHT NEW STUFF
-    res = client.post(
-        url_for("watch_get_preview_rendered", uuid=uuid),
-        data={
-            "include_filters": "",
-            "fetch_backend": 'html_requests',
-            "ignore_text": "sOckS", # Also be sure case insensitive works
-            "trigger_text": "AweSOme",
-            "url": test_url,
-        },
-    )
-    reply = json.loads(res.data.decode('utf-8'))
-    assert reply.get('after_filter')
-    assert reply.get('before_filter')
-    assert reply.get('ignore_line_numbers') == [2]  # Ignored - "socks" on line 2
-    assert reply.get('trigger_line_numbers') == [1]  # Triggers "Awesome" in line 1
-
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_preview_endpoints.py
+++ b/changedetectionio/tests/test_preview_endpoints.py
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-
-import time
-from flask import url_for
-from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
-
-
-# `subtractive_selectors` should still work in `source:` type requests
-def test_fetch_pdf(client, live_server, measure_memory_usage):
-    import shutil
-    shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
-
-    live_server_setup(live_server)
-    test_url = url_for('test_pdf_endpoint', _external=True)
-    # Add our URL to the import page
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-
-    wait_for_all_checks(client)
-
-    res = client.get(
-        url_for("preview_page", uuid="first"),
-        follow_redirects=True
-    )
-
-    # PDF header should not be there (it was converted to text)
-    assert b'PDF' not in res.data[:10]
-    assert b'hello world' in res.data
-
-    # So we know if the file changes in other ways
-    import hashlib
-    original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
-    # We should have one
-    assert len(original_md5) > 0
-    # And it's going to be in the document
-    assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data
-
-    shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
-    changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
-    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    assert b'1 watches queued for rechecking.' in res.data
-
-    wait_for_all_checks(client)
-
-    # Now something should be ready, indicated by having a 'unviewed' class
-    res = client.get(url_for("index"))
-    assert b'unviewed' in res.data
-
-    # The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
-
-    res = client.get(
-        url_for("preview_page", uuid="first"),
-        follow_redirects=True
-    )
-
-    assert original_md5.encode('utf-8') not in res.data
-    assert changed_md5.encode('utf-8') in res.data
-
-    res = client.get(
-        url_for("diff_history_page", uuid="first"),
-        follow_redirects=True
-    )
-
-    assert original_md5.encode('utf-8') in res.data
-    assert changed_md5.encode('utf-8') in res.data
-
-    assert b'here is a change' in res.data
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
+from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 from ..notification import default_notification_format

 instock_props = [
@@ -413,31 +413,3 @@ def test_data_sanity(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"))
    assert test_url2.encode('utf-8') in res.data
-
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
-
-# All examples should give a prive of 666.66
-def test_special_prop_examples(client, live_server):
-    import glob
-    #live_server_setup(live_server)
-
-    test_url = url_for('test_endpoint', _external=True)
-    check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
-    files = glob.glob(check_path)
-    assert files
-    for test_example_filename in files:
-        with open(test_example_filename, 'r') as example_f:
-            with open("test-datastore/endpoint-content.txt", "w") as test_f:
-                test_f.write(f"<html><body>{example_f.read()}</body></html>")
-
-            # Now fetch it and check the price worked
-            client.post(
-                url_for("form_quick_watch_add"),
-                data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
-                follow_redirects=True
-            )
-            wait_for_all_checks(client)
-            res = client.get(url_for("index"))
-            assert b'ception' not in res.data
-            assert b'155.55' in res.data
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -260,6 +260,9 @@ class update_worker(threading.Thread):
                    try:
                        # Processor is what we are using for detecting the "Change"
                        processor = watch.get('processor', 'text_json_diff')
+                        # Abort processing when the content was the same as the last fetch
+                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
+

                        # Init a new 'difference_detection_processor', first look in processors
                        processor_module_name = f"changedetectionio.processors.{processor}.processor"
@@ -275,7 +278,10 @@ class update_worker(threading.Thread):

                        update_handler.call_browser()

-                        changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(
+                            watch=watch,
+                            skip_when_checksum_same=skip_when_same_checksum,
+                        )

                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
Author	SHA1	Message	Date
dgtlmoon	73d9373879	Fixing page update	2024-10-10 13:18:02 +02:00
dgtlmoon	d32640d892	highlight ignore lines	2024-10-10 13:12:23 +02:00
dgtlmoon	7ee249e2ff	Fix labels	2024-10-10 12:53:56 +02:00
dgtlmoon	5d753f59c4	Unique line test wasnt considering whitespace changes!	2024-10-10 12:27:25 +02:00
dgtlmoon	090f5d7725	fix test	2024-10-10 11:47:13 +02:00
dgtlmoon	7869a7745a	Fixing whitespace cleanup - didnt work as expected!!	2024-10-10 09:25:52 +02:00
dgtlmoon	de34f0ad83	Fix bad comment	2024-10-09 18:52:27 +02:00
dgtlmoon	fabbb3733a	Stop html_tools.strip_ignore_text from chewing newlines	2024-10-09 18:49:18 +02:00
dgtlmoon	deadf881b0	is now str not bytes	2024-10-09 18:05:08 +02:00
dgtlmoon	77ef42c367	oops	2024-10-09 15:11:56 +02:00
dgtlmoon	5d1f317e30	WIP	2024-10-09 15:09:29 +02:00
dgtlmoon	5ed7f43f6e	Fix test	2024-10-09 13:21:07 +02:00
dgtlmoon	3b6ae70c9c	Misc fixes - juggling utf-8 not needed (its utf-16 by default python string)	2024-10-09 13:11:04 +02:00