Merge branch 'master' into add-system-info-api

Basic system info/system state API
Build - Fix syntax in container build test (#1050 )
2025-11-22 17:36:09 +00:00 · 2022-10-23 18:24:12 +02:00 · 2022-10-23 18:21:36 +02:00 · 2022-10-23 16:02:13 +02:00 · 2022-10-23 15:54:19 +02:00 · 2022-10-23 11:26:32 +02:00
26 changed files with 371 additions and 94 deletions
--- a/.github/workflows/test-container-build.yml
+++ b/.github/workflows/test-container-build.yml
@@ -0,0 +1,55 @@
 name: ChangeDetection.io Container Build Test
 # Triggers the workflow on push or pull request events
 # This line doesnt work, even tho it is the documented one
 #on: [push, pull_request]
 on:
  push:
    paths:
      - requirements.txt
      - Dockerfile
  pull_request:
    paths:
      - requirements.txt
      - Dockerfile
  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
  # @todo: some kind of path filter for requirements.txt and Dockerfile
 jobs:
  test-container-build:
    runs-on: ubuntu-latest
    steps:
        - uses: actions/checkout@v2
        - name: Set up Python 3.9
          uses: actions/setup-python@v2
          with:
            python-version: 3.9
        # Just test that the build works, some libraries won't compile on ARM/rPi etc
        - name: Set up QEMU
          uses: docker/setup-qemu-action@v1
          with:
            image: tonistiigi/binfmt:latest
            platforms: all
        - name: Set up Docker Buildx
          id: buildx
          uses: docker/setup-buildx-action@v1
          with:
            install: true
            version: latest
            driver-opts: image=moby/buildkit:master
        - name: Test that the docker containers can build
          id: docker_build
          uses: docker/build-push-action@v2
          # https://github.com/docker/build-push-action#customizing
          with:
            context: ./
            file: ./Dockerfile
            platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
            cache-from: type=local,src=/tmp/.buildx-cache
            cache-to: type=local,dest=/tmp/.buildx-cache
--- a/.github/workflows/test-only.yml
+++ b/.github/workflows/test-only.yml
@@ -1,28 +1,25 @@
-name: ChangeDetection.io Test
+name: ChangeDetection.io App Test
 # Triggers the workflow on push or pull request events
 on: [push, pull_request]
 jobs:
-  test-build:
+  test-application:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python 3.9
        uses: actions/setup-python@v2
        with:
          python-version: 3.9
      - name: Show env vars
        run: set
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install flake8 pytest
          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
          if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
      - name: Lint with flake8
        run: |
          # stop the build if there are Python syntax errors or undefined names
@@ -39,7 +36,4 @@ jobs:
          # Each test is totally isolated and performs its own cleanup/reset
          cd changedetectionio; ./run_all_tests.sh
      # https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
      # https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
      # https://github.com/docker/buildx/issues/495#issuecomment-918925854
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch.
 Please be sure that all new functionality has a matching test!
-Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example
+Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
 ```
 pip3 install -r requirements-dev
--- a/17
+++ b/17
@@ -5,13 +5,14 @@ FROM python:3.8-slim as builder
 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    libssl-dev \
+    g++ \
    libffi-dev \
    gcc \
    libc-dev \
    libffi-dev \
    libssl-dev \
    libxslt-dev \
-    zlib1g-dev \
+    make \
-    g++
+    zlib1g-dev
 RUN mkdir /install
 WORKDIR /install
@@ -22,9 +23,14 @@ RUN pip install --target=/dependencies -r /requirements.txt
 # Playwright is an alternative to Selenium
 # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
-RUN pip install --target=/dependencies playwright~=1.25 \
+RUN pip install --target=/dependencies playwright~=1.26 \
    || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
 RUN pip install --target=/dependencies jq~=1.3 \
    || echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
 # Final image stage
 FROM python:3.8-slim
@@ -58,6 +64,7 @@ EXPOSE 5000
 # The actual flask app
 COPY changedetectionio /app/changedetectionio
 # The eventlet server wrapper
 COPY changedetection.py /app/changedetection.py
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,6 +2,7 @@ recursive-include changedetectionio/api *
 recursive-include changedetectionio/templates *
 recursive-include changedetectionio/static *
 recursive-include changedetectionio/model *
 recursive-include changedetectionio/tests *
 include changedetection.py
 global-exclude *.pyc
 global-exclude node_modules
--- a/README-pip.md
+++ b/README-pip.md
@@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
 #### Key Features
 - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
+- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
 - Switch between fast non-JS and Chrome JS based "fetchers"
 - Easily specify how often a site should be checked
 - Execute JS before extracting text (Good for logging in, see examples in the UI!)
--- a/README.md
+++ b/README.md
@@ -47,13 +47,15 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
 #### Key Features
 - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
+- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
 - Switch between fast non-JS and Chrome JS based "fetchers"
 - Easily specify how often a site should be checked
 - Execute JS before extracting text (Good for logging in, see examples in the UI!)
 - Override Request Headers, Specify `POST` or `GET` and other methods
 - Use the "Visual Selector" to help target specific elements
 - Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
 We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
 ## Screenshots
@@ -119,8 +121,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
 ## Filters
 XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
 XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools. 
 (We support LXML `re:test`, `re:math` and `re:replace`.)
 ## Notifications
@@ -149,7 +151,7 @@ Now you can also customise your notification content!
 ## JSON API Monitoring
-Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector.
+Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed.
 ![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-filter-field-example.png)
@@ -157,9 +159,20 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
 ![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-diff-example.png)
 ### JSONPath or jq?
 For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq.
 One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc.
 See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
 Note: `jq` library must be added separately (`pip3 install jq`)
 ### Parse JSON embedded in HTML!
-When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites. 
+When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites. 
 ```
 <html>
@@ -169,7 +182,7 @@ When you enable a `json:` filter, you can even automatically extract and parse e
 </script>
 ```  
-`json:$.price` would give `23.50`, or you can extract the whole structure
+`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
 ## Proxy configuration
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
 from changedetectionio import html_tools
 from changedetectionio.api import api_v1
-__version__ = '0.39.19.1'
+__version__ = '0.39.20.4'
 datastore = None
@@ -194,6 +194,9 @@ def changedetection_app(config=None, datastore_o=None):
    watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
    watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
@@ -636,20 +639,27 @@ def changedetection_app(config=None, datastore_o=None):
            # Only works reliably with Playwright
            visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
            # JQ is difficult to install on windows and must be manually added (outside requirements.txt)
            jq_support = True
            try:
                import jq
            except ModuleNotFoundError:
                jq_support = False
            output = render_template("edit.html",
                                     uuid=uuid,
                                     watch=datastore.data['watching'][uuid],
                                     form=form,
                                     has_empty_checktime=using_default_check_time,
                                     has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
                                     using_global_webdriver_wait=default['webdriver_delay'] is None,
                                     current_base_url=datastore.data['settings']['application']['base_url'],
                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
                                     form=form,
                                     has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
                                     has_empty_checktime=using_default_check_time,
                                     jq_support=jq_support,
                                     playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
                                     settings_application=datastore.data['settings']['application'],
                                     using_global_webdriver_wait=default['webdriver_delay'] is None,
                                     uuid=uuid,
                                     visualselector_data_is_ready=visualselector_data_is_ready,
                                     visualselector_enabled=visualselector_enabled,
-                                     playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
+                                     watch=datastore.data['watching'][uuid],
                                     )
        return output
@@ -809,8 +819,10 @@ def changedetection_app(config=None, datastore_o=None):
        newest_file = history[dates[-1]]
        # Read as binary and force decode as UTF-8
        # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
        try:
-            with open(newest_file, 'r') as f:
+            with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
                newest_version_file_contents = f.read()
        except Exception as e:
            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
@@ -823,7 +835,7 @@ def changedetection_app(config=None, datastore_o=None):
            previous_file = history[dates[-2]]
        try:
-            with open(previous_file, 'r') as f:
+            with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
                previous_version_file_contents = f.read()
        except Exception as e:
            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
@@ -900,7 +912,7 @@ def changedetection_app(config=None, datastore_o=None):
        timestamp = list(watch.history.keys())[-1]
        filename = watch.history[timestamp]
        try:
-            with open(filename, 'r') as f:
+            with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
                tmp = f.readlines()
                # Get what needs to be highlighted
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -122,3 +122,33 @@ class CreateWatch(Resource):
            return {'status': "OK"}, 200
        return list, 200
 class SystemInfo(Resource):
    def __init__(self, **kwargs):
        # datastore is a black box dependency
        self.datastore = kwargs['datastore']
        self.update_q = kwargs['update_q']
    @auth.check_token
    def get(self):
        import time
        overdue_watches = []
        # Check all watches and report which have not been checked but should have been
        for uuid, watch in self.datastore.data.get('watching', {}).items():
            # see if now - last_checked is greater than the time that should have been
            # this is not super accurate (maybe they just edited it) but better than nothing
            t = watch.threshold_seconds()
            if not t:
                t = self.datastore.threshold_seconds
            time_since_check = time.time() - watch.get('last_checked')
            if time_since_check > t:
                overdue_watches.append(uuid)
        return {
                   'queue_size': self.update_q.qsize(),
                   'overdue_watches': overdue_watches,
                   'uptime': round(time.time() - self.datastore.start_time, 2),
                   'watch_count': len(self.datastore.data.get('watching', {}))
               }, 200
--- a/changedetectionio/changedetection.py
+++ b/changedetectionio/changedetection.py
@@ -102,6 +102,14 @@ def main():
                    has_password=datastore.data['settings']['application']['password'] != False
                    )
    # Monitored websites will not receive a Referer header
    # when a user clicks on an outgoing link.
    @app.after_request
    def hide_referrer(response):
        if os.getenv("HIDE_REFERER", False):
            response.headers["Referrer-Policy"] = "no-referrer"
        return response
    # Proxy sub-directory support
    # Set environment var USE_X_SETTINGS=1 on this script
    # And then in your proxy_pass settings
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -316,6 +316,7 @@ class base_html_playwright(Fetcher):
        import playwright._impl._api_types
        from playwright._impl._api_types import Error, TimeoutError
        response = None
        with sync_playwright() as p:
            browser_type = getattr(p, self.browser_type)
@@ -373,8 +374,11 @@ class base_html_playwright(Fetcher):
                print("response object was none")
                raise EmptyReply(url=url, status_code=None)
-            # Bug 2(?) Set the viewport size AFTER loading the page
+
-            page.set_viewport_size({"width": 1280, "height": 1024})
+            # Removed browser-set-size, seemed to be needed to make screenshots work reliably in older playwright versions
            # Was causing exceptions like 'waiting for page but content is changing' etc
            # https://www.browserstack.com/docs/automate/playwright/change-browser-window-size 1280x720 should be the default
            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
            time.sleep(extra_wait)
@@ -398,6 +402,13 @@ class base_html_playwright(Fetcher):
                    raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
                else:
                    # JS eval was run, now we also wait some time if possible to let the page settle
                    if self.render_extract_delay:
                        page.wait_for_timeout(self.render_extract_delay * 1000)
            page.wait_for_timeout(500)
            self.content = page.content()
            self.status_code = response.status
            self.headers = response.all_headers()
@@ -514,8 +525,6 @@ class base_html_webdriver(Fetcher):
            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
        self.screenshot = self.driver.get_screenshot_as_png()
        # @todo - how to check this? is it possible?
        self.status_code = 200
        # @todo somehow we should try to get this working for WebDriver
@@ -526,6 +535,8 @@ class base_html_webdriver(Fetcher):
        self.content = self.driver.page_source
        self.headers = {}
        self.screenshot = self.driver.get_screenshot_as_png()
    # Does the connection to the webdriver work? run a test connection.
    def is_ready(self):
        from selenium import webdriver
@@ -564,6 +575,11 @@ class html_requests(Fetcher):
            ignore_status_codes=False,
            current_css_filter=None):
        # Make requests use a more modern looking user-agent
        if not 'User-Agent' in request_headers:
            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
        proxies = {}
        # Allows override the proxy on a per-request basis
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -141,8 +141,9 @@ class perform_site_check():
            has_filter_rule = True
        if has_filter_rule:
-            if 'json:' in css_filter_rule:
+            json_filter_prefixes = ['json:', 'jq:']
-                stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
+            if any(prefix in css_filter_rule for prefix in json_filter_prefixes):
                stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule)
                is_html = False
        if is_html or is_source:
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -303,6 +303,25 @@ class ValidateCSSJSONXPATHInput(object):
                # Re #265 - maybe in the future fetch the page and offer a
                # warning/notice that its possible the rule doesnt yet match anything?
                if not self.allow_json:
                    raise ValidationError("jq not permitted in this field!")
            if 'jq:' in line:
                try:
                    import jq
                except ModuleNotFoundError:
                    # `jq` requires full compilation in windows and so isn't generally available
                    raise ValidationError("jq not support not found")
                input = line.replace('jq:', '')
                try:
                    jq.compile(input)
                except (ValueError) as e:
                    message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
                    raise ValidationError(message % (input, str(e)))
                except:
                    raise ValidationError("A system-error occurred when validating your jq expression")
 class quickWatchForm(Form):
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -1,11 +1,11 @@
 import json
 from typing import List
 from bs4 import BeautifulSoup
 from jsonpath_ng.ext import parse
 import re
 from inscriptis import get_text
 from inscriptis.model.config import ParserConfig
 from jsonpath_ng.ext import parse
 from typing import List
 import json
 import re
 class FilterNotFoundInResponse(ValueError):
    def __init__(self, msg):
@@ -79,19 +79,35 @@ def extract_element(find='title', html_content=''):
    return element_text
 #
-def _parse_json(json_data, jsonpath_filter):
+def _parse_json(json_data, json_filter):
-    s=[]
+    if 'json:' in json_filter:
-    jsonpath_expression = parse(jsonpath_filter.replace('json:', ''))
+        jsonpath_expression = parse(json_filter.replace('json:', ''))
-    match = jsonpath_expression.find(json_data)
+        match = jsonpath_expression.find(json_data)
        return _get_stripped_text_from_json_match(match)
    if 'jq:' in json_filter:
        try:
            import jq
        except ModuleNotFoundError:
            # `jq` requires full compilation in windows and so isn't generally available
            raise Exception("jq not support not found")
        jq_expression = jq.compile(json_filter.replace('jq:', ''))
        match = jq_expression.input(json_data).all()
        return _get_stripped_text_from_json_match(match)
 def _get_stripped_text_from_json_match(match):
    s = []
    # More than one result, we will return it as a JSON list.
    if len(match) > 1:
        for i in match:
-            s.append(i.value)
+            s.append(i.value if hasattr(i, 'value') else i)
    # Single value, use just the value, as it could be later used in a token in notifications.
    if len(match) == 1:
-        s = match[0].value
+        s = match[0].value if hasattr(match[0], 'value') else match[0]
    # Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
    if not match:
@@ -103,16 +119,16 @@ def _parse_json(json_data, jsonpath_filter):
    return stripped_text_from_html
-def extract_json_as_string(content, jsonpath_filter):
+def extract_json_as_string(content, json_filter):
    stripped_text_from_html = False
    # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
    try:
-        stripped_text_from_html = _parse_json(json.loads(content), jsonpath_filter)
+        stripped_text_from_html = _parse_json(json.loads(content), json_filter)
    except json.JSONDecodeError:
-        # Foreach <script json></script> blob.. just return the first that matches jsonpath_filter
+        # Foreach <script json></script> blob.. just return the first that matches json_filter
        s = []
        soup = BeautifulSoup(content, 'html.parser')
        bs_result = soup.findAll('script')
@@ -131,7 +147,7 @@ def extract_json_as_string(content, jsonpath_filter):
                # Just skip it
                continue
            else:
-                stripped_text_from_html = _parse_json(json_data, jsonpath_filter)
+                stripped_text_from_html = _parse_json(json_data, json_filter)
                if stripped_text_from_html:
                    break
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -13,10 +13,6 @@ class model(dict):
            'watching': {},
            'settings': {
                'headers': {
                    'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                    'Accept-Encoding': 'gzip, deflate',  # No support for brolti in python requests yet.
                    'Accept-Language': 'en-GB,en-US;q=0.9,en;'
                },
                'requests': {
                    'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -118,7 +118,10 @@ class model(dict):
        if os.path.isfile(fname):
            logging.debug("Reading history index " + str(time.time()))
            with open(fname, "r") as f:
-                tmp_history = dict(i.strip().split(',', 2) for i in f.readlines())
+                for i in f.readlines():
                    if ',' in i:
                        k, v = i.strip().split(',', 2)
                        tmp_history[k] = v
        if len(tmp_history):
            self.__newest_history_key = list(tmp_history.keys())[-1]
@@ -151,28 +154,30 @@ class model(dict):
        import uuid
        import logging
-        output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
+        output_path = os.path.join(self.__datastore_path, self['uuid'])
        self.ensure_data_dir_exists()
        snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
        snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
        logging.debug("Saving history text {}".format(snapshot_fname))
        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
        # most sites are utf-8 and some are even broken utf-8
        with open(snapshot_fname, 'wb') as f:
            f.write(contents)
            f.close()
        # Append to index
        # @todo check last char was \n
-        index_fname = "{}/history.txt".format(output_path)
+        index_fname = os.path.join(output_path, "history.txt")
        with open(index_fname, 'a') as f:
            f.write("{},{}\n".format(timestamp, snapshot_fname))
            f.close()
        self.__newest_history_key = timestamp
-        self.__history_n+=1
+        self.__history_n += 1
-        #@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
+        # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
        return snapshot_fname
    @property
--- a/changedetectionio/run_all_tests.sh
+++ b/changedetectionio/run_all_tests.sh
@@ -9,6 +9,8 @@
 # exit when any command fails
 set -e
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 find tests/test_*py -type f|while read test_name
 do
  echo "TEST RUNNING $test_name"
@@ -23,6 +25,13 @@ export BASE_URL="https://really-unique-domain.io"
 pytest tests/test_notification.py
 ## JQ + JSON: filter test
 # jq is not available on windows and we should just test it when the package is installed
 # this will re-test with jq support
 pip3 install jq~=1.3
 pytest tests/test_jsonpath_jq_selector.py
 # Now for the selenium and playwright/browserless fetchers
 # Note - this is not UI functional tests - just checking that each one can fetch the content
@@ -38,7 +47,9 @@ docker kill $$-test_selenium
 echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
 # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
-pip3 install playwright~=1.24
+PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
 echo "using $PLAYWRIGHT_VERSION"
 pip3 install "$PLAYWRIGHT_VERSION"
 docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable
 # takes a while to spin up
 sleep 5
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -30,14 +30,14 @@ class ChangeDetectionStore:
    def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
        # Should only be active for docker
        # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
-        self.needs_write = False
+        self.__data = App.model()
        self.datastore_path = datastore_path
        self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
        self.needs_write = False
        self.proxy_list = None
        self.start_time = time.time()
        self.stop_thread = False
        self.__data = App.model()
        # Base definition for all watchers
        # deepcopy part of #569 - not sure why its needed exactly
        self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
@@ -81,8 +81,6 @@ class ChangeDetectionStore:
        except (FileNotFoundError, json.decoder.JSONDecodeError):
            if include_default_watches:
                print("Creating JSON store at", self.datastore_path)
                self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
                self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
                self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
@@ -577,3 +575,11 @@ class ChangeDetectionStore:
                continue
        return
    # We incorrectly used common header overrides that should only apply to Requests
    # These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
    def update_7(self):
        # These were hard-coded in early versions
        for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
            if self.data['settings']['headers'].get(v):
                del self.data['settings']['headers'][v]
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -77,6 +77,7 @@
                        <span class="pure-form-message-inline">
                            <p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
                            <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
                            Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
                        </span>
                    </div>
                {% if form.proxy %}
@@ -183,8 +184,16 @@ User-Agent: wonderbra 1.0") }}
                        <span class="pure-form-message-inline">
                    <ul>
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
-                        <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required,  <a
+                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
-                                href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
+                            <ul>
                                <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
                                {% if jq_support %}
                                <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
                                {% else %}
                                <li>jq support not installed</li>
                                {% endif %}
                            </ul>
                        </li>
                        <li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
                            <ul>
                                <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
@@ -193,7 +202,7 @@ User-Agent: wonderbra 1.0") }}
                            </ul>
                            </li>
                    </ul>
-                    Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
+                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
                </span>
                    </div>
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@@ -99,6 +99,8 @@
                        <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
                        <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
                    </span>
                    <br/>
                    Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
                </div>
                <fieldset class="pure-group" id="webdriver-override-options">
                    <div class="pure-form-message-inline">
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -147,6 +147,16 @@ def test_api_simple(client, live_server):
    # @todo how to handle None/default global values?
    assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"
    # basic systeminfo check
    res = client.get(
        url_for("systeminfo"),
        headers={'x-api-key': api_key},
    )
    info = json.loads(res.data)
    assert info.get('watch_count') == 1
    assert info.get('uptime') > 0.5
    # Finally delete the watch
    res = client.delete(
        url_for("watch", uuid=watch_uuid),
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -2,10 +2,15 @@
 # coding=utf-8
 import time
-from flask import url_for
+from flask import url_for, escape
 from . util import live_server_setup
 import pytest
 jq_support = True
 try:
    import jq
 except ModuleNotFoundError:
    jq_support = False
 def test_setup(live_server):
    live_server_setup(live_server)
@@ -36,16 +41,28 @@ and it can also be repeated
    from .. import html_tools
    # See that we can find the second <script> one, which is not broken, and matches our filter
-    text = html_tools.extract_json_as_string(content, "$.offers.price")
+    text = html_tools.extract_json_as_string(content, "json:$.offers.price")
    assert text == "23.5"
-    text = html_tools.extract_json_as_string('{"id":5}', "$.id")
+    # also check for jq
    if jq_support:
        text = html_tools.extract_json_as_string(content, "jq:.offers.price")
        assert text == "23.5"
        text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
        assert text == "5"
    text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
    assert text == "5"
    # When nothing at all is found, it should throw JSONNOTFound
    # Which is caught and shown to the user in the watch-overview table
    with pytest.raises(html_tools.JSONNotFound) as e_info:
-        html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id")
+        html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
    if jq_support:
        with pytest.raises(html_tools.JSONNotFound) as e_info:
            html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
 def set_original_ext_response():
    data = """
@@ -66,6 +83,7 @@ def set_original_ext_response():
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(data)
    return None
 def set_modified_ext_response():
    data = """
@@ -86,6 +104,7 @@ def set_modified_ext_response():
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(data)
    return None
 def set_original_response():
    test_return_data = """
@@ -184,10 +203,10 @@ def test_check_json_without_filter(client, live_server):
    assert b'&#34;&lt;b&gt;' in res.data
    assert res.data.count(b'{\n') >= 2
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
-def test_check_json_filter(client, live_server):
+def check_json_filter(json_filter, client, live_server):
    json_filter = 'json:boss.name'
    set_original_response()
    # Give the endpoint time to spin up
@@ -226,7 +245,7 @@ def test_check_json_filter(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"),
    )
-    assert bytes(json_filter.encode('utf-8')) in res.data
+    assert bytes(escape(json_filter).encode('utf-8')) in res.data
    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -252,10 +271,17 @@ def test_check_json_filter(client, live_server):
    # And #462 - check we see the proper utf-8 string there
    assert "Örnsköldsvik".encode('utf-8') in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
-def test_check_json_filter_bool_val(client, live_server):
+def test_check_jsonpath_filter(client, live_server):
-    json_filter = "json:$['available']"
+    check_json_filter('json:boss.name', client, live_server)
 def test_check_jq_filter(client, live_server):
    if jq_support:
        check_json_filter('jq:.boss.name', client, live_server)
 def check_json_filter_bool_val(json_filter, client, live_server):
    set_original_response()
    # Give the endpoint time to spin up
@@ -304,14 +330,22 @@ def test_check_json_filter_bool_val(client, live_server):
    # But the change should be there, tho its hard to test the change was detected because it will show old and new versions
    assert b'false' in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
 def test_check_jsonpath_filter_bool_val(client, live_server):
    check_json_filter_bool_val("json:$['available']", client, live_server)
 def test_check_jq_filter_bool_val(client, live_server):
    if jq_support:
        check_json_filter_bool_val("jq:.available", client, live_server)
 # Re #265 - Extended JSON selector test
 # Stuff to consider here
 # - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
 # - The 'diff' tab could show the old and new content
 # - Form should let us enter a selector that doesnt (yet) match anything
-def test_check_json_ext_filter(client, live_server):
+def check_json_ext_filter(json_filter, client, live_server):
    json_filter = 'json:$[?(@.status==Sold)]'
    set_original_ext_response()
    # Give the endpoint time to spin up
@@ -350,7 +384,7 @@ def test_check_json_ext_filter(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"),
    )
-    assert bytes(json_filter.encode('utf-8')) in res.data
+    assert bytes(escape(json_filter).encode('utf-8')) in res.data
    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -376,3 +410,12 @@ def test_check_json_ext_filter(client, live_server):
    assert b'ForSale' not in res.data
    assert b'Sold' in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
 def test_check_jsonpath_ext_filter(client, live_server):
    check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
 def test_check_jq_ext_filter(client, live_server):
    if jq_support:
        check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
--- a/changedetectionio/tests/visualselector/test_fetch_data.py
+++ b/changedetectionio/tests/visualselector/test_fetch_data.py
@@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
    live_server_setup(live_server)
    time.sleep(1)
-    # Add our URL to the import page, maybe better to use something we control?
+    # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
-    # We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
+    test_url = "https://changedetection.io/ci-test/test-runjs.html"
-    test_url = 'https://news.ycombinator.com'
+
    res = client.post(
        url_for("form_quick_watch_add"),
        data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
@@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
    res = client.post(
        url_for("edit_page", uuid="first", unpause_on_save=1),
-        data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
+        data={
              "url": test_url,
              "tag": "",
              "headers": "",
              'fetch_backend': "html_webdriver",
              'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
        },
        follow_redirects=True
    )
    assert b"unpaused" in res.data
    time.sleep(1)
    wait_for_all_checks(client)
    uuid = extract_UUID_from_client(client)
    # Check the JS execute code before extract worked
    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
    )
    assert b'I smell JavaScript' in res.data
    assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
    assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -45,6 +45,9 @@ services:
  #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
  #      - USE_X_SETTINGS=1
  #
  #        Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
  #      - HIDE_REFERER=true
      # Comment out ports: when using behind a reverse proxy , enable networks: etc.
      ports:
--- a/docs/proxy-example.jpg
+++ b/docs/proxy-example.jpg
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-flask~= 2.0
+flask ~= 2.0
 flask_wtf
-eventlet>=0.31.0
+eventlet >= 0.31.0
 validators
-timeago ~=1.0
+timeago ~= 1.0
 inscriptis ~= 2.2
 feedgen ~= 0.9
 flask-login ~= 0.5
@@ -10,15 +10,20 @@ flask_restful
 pytz
 # Set these versions together to avoid a RequestsDependencyWarning
-requests[socks] ~= 2.26
+# >= 2.26 also adds Brotli support if brotli is installed
 brotli ~= 1.0
 requests[socks] ~= 2.28
 urllib3 > 1.26
 chardet > 2.3.0
 wtforms ~= 3.0
 jsonpath-ng ~= 1.5.3
 # jq not available on Windows so must be installed manually
 # Notification library
-apprise ~= 1.0.0
+apprise ~= 1.1.0
 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
 paho-mqtt
@@ -42,3 +47,4 @@ selenium ~= 4.1.0
 werkzeug ~= 2.0.0
 # playwright is installed at Dockerfile build time because it's not available on all platforms
Author	SHA1	Message	Date
dgtlmoon	acb3fa0841	Merge branch 'master' into add-system-info-api	2022-10-23 18:24:12 +02:00
dgtlmoon	614431ff07	Basic system info/system state API	2022-10-23 18:21:36 +02:00
dgtlmoon	492bbce6b6	Build - Fix syntax in container build test (#1050 )	2022-10-23 16:02:13 +02:00
dgtlmoon	0394a56be5	Building - Test container build on PR	2022-10-23 15:54:19 +02:00
Entepotenz	7839551d6b	Testing - Use same version of playwright while running tests as in production builds (#1047 )	2022-10-23 11:26:32 +02:00
Entepotenz	9c5588c791	update path for validation in the CONTRIBUTING.md (#1046 )	2022-10-23 11:25:29 +02:00
dgtlmoon	5a43a350de	History index safety check - Be sure that only valid history index lines are read (#1042 )	2022-10-19 22:41:13 +02:00
Michael McMillan	3c31f023ce	Option to Hide the Referer header from monitored websites. (#996 )	2022-10-18 09:16:22 +02:00
dgtlmoon	4cbcc59461	0.39.20.4	2022-10-17 18:36:47 +02:00
dgtlmoon	4be0260381	Better cross platform file handling in diff and preview (#1034 )	2022-10-17 18:36:22 +02:00
dgtlmoon	957a3c1c16	0.39.20.3	2022-10-17 17:43:35 +02:00
dgtlmoon	85897e0bf9	Windows - diff file handling improvements (#1031 )	2022-10-17 17:40:28 +02:00
dgtlmoon	63095f70ea	Also include tests in pip build	2022-10-17 17:13:15 +02:00
dgtlmoon	8d5b0b5576	Update README.md	2022-10-12 10:51:39 +02:00
dgtlmoon	1b077abd93	0.39.20.2	2022-10-12 09:53:59 +02:00
dgtlmoon	32ea1a8721	Windows - JQ - Make library optional so it doesnt break Windows pip installs (#1009 )	2022-10-12 09:53:16 +02:00
dgtlmoon	fff32cef0d	Adding test - Test the 'execute JS before changedetection' (#1006 )	2022-10-11 14:40:36 +02:00
dgtlmoon	8fb146f3e4	0.39.20.1	2022-10-09 23:05:35 +02:00
dgtlmoon	770b0faa45	Code - check containers build when Dockerfile or requirements.txt changes (#1005 )	2022-10-09 22:58:01 +02:00
dgtlmoon	f6faa90340	Adding `make` to Dockerfile build as required by jq for ARM devices	2022-10-09 22:29:18 +02:00
dgtlmoon	669fd3ae0b	Dont use default Requests `user-agent` and `accept` headers in playwright+selenium requests, breaks sites such as united.com. (#1004 )	2022-10-09 18:25:36 +02:00
dgtlmoon	17d37fb626	0.39.20	2022-10-09 16:13:32 +02:00
Yusef Ouda	dfa7fc3a81	Adds support for jq JSON path querying engine (#1001 )	2022-10-09 16:12:45 +02:00
dgtlmoon	cd467df97a	Adding link to BrightData Proxy info (#1003 )	2022-10-09 15:51:57 +02:00
dgtlmoon	71bc2fed82	Remove quotationspage default watch	2022-10-09 14:06:07 +02:00
Hmmbob	738fcfe01c	Notification library: Bump apprise to 1.1.0 (signal, opsgenie, pagerduty, bark and mailto fixes, adds support for BulkSMS and SMSEagle) (#1002 )	2022-10-09 11:42:51 +02:00
dgtlmoon	3ebb2ab9ba	Selenium fetcher - screenshot should be taken after 'wait' time, not before #873	2022-09-25 11:05:07 +02:00
dgtlmoon	ac98bc9144	Upgrade Playwright to 1.26	2022-09-24 23:51:26 +02:00
dgtlmoon	3705ce6681	Render Extract Configurable Delay Seconds should also apply after executing any JS #958	2022-09-24 23:48:03 +02:00
dgtlmoon	f7ea99412f	Re #958 - remove change screensize, should be in 1280x720 default, was causing "Unable to retrieve content because the page is navigating and changing the content." on some sites	2022-09-19 14:02:32 +02:00
dgtlmoon	d4715e2bc8	Tidy up proxies.json logic, adding tests (#955 )	2022-09-19 13:14:35 +02:00
dgtlmoon	8567a83c47	Update README.md - Include BrightData suggestion	2022-09-16 13:21:01 +02:00