Compare commits

..

32 Commits

Author SHA1 Message Date
dgtlmoon
acb3fa0841 Merge branch 'master' into add-system-info-api 2022-10-23 18:24:12 +02:00
dgtlmoon
614431ff07 Basic system info/system state API 2022-10-23 18:21:36 +02:00
dgtlmoon
492bbce6b6 Build - Fix syntax in container build test (#1050) 2022-10-23 16:02:13 +02:00
dgtlmoon
0394a56be5 Building - Test container build on PR 2022-10-23 15:54:19 +02:00
Entepotenz
7839551d6b Testing - Use same version of playwright while running tests as in production builds (#1047) 2022-10-23 11:26:32 +02:00
Entepotenz
9c5588c791 update path for validation in the CONTRIBUTING.md (#1046) 2022-10-23 11:25:29 +02:00
dgtlmoon
5a43a350de History index safety check - Be sure that only valid history index lines are read (#1042) 2022-10-19 22:41:13 +02:00
Michael McMillan
3c31f023ce Option to Hide the Referer header from monitored websites. (#996) 2022-10-18 09:16:22 +02:00
dgtlmoon
4cbcc59461 0.39.20.4 2022-10-17 18:36:47 +02:00
dgtlmoon
4be0260381 Better cross platform file handling in diff and preview (#1034) 2022-10-17 18:36:22 +02:00
dgtlmoon
957a3c1c16 0.39.20.3 2022-10-17 17:43:35 +02:00
dgtlmoon
85897e0bf9 Windows - diff file handling improvements (#1031) 2022-10-17 17:40:28 +02:00
dgtlmoon
63095f70ea Also include tests in pip build 2022-10-17 17:13:15 +02:00
dgtlmoon
8d5b0b5576 Update README.md 2022-10-12 10:51:39 +02:00
dgtlmoon
1b077abd93 0.39.20.2 2022-10-12 09:53:59 +02:00
dgtlmoon
32ea1a8721 Windows - JQ - Make library optional so it doesnt break Windows pip installs (#1009) 2022-10-12 09:53:16 +02:00
dgtlmoon
fff32cef0d Adding test - Test the 'execute JS before changedetection' (#1006) 2022-10-11 14:40:36 +02:00
dgtlmoon
8fb146f3e4 0.39.20.1 2022-10-09 23:05:35 +02:00
dgtlmoon
770b0faa45 Code - check containers build when Dockerfile or requirements.txt changes (#1005) 2022-10-09 22:58:01 +02:00
dgtlmoon
f6faa90340 Adding make to Dockerfile build as required by jq for ARM devices 2022-10-09 22:29:18 +02:00
dgtlmoon
669fd3ae0b Dont use default Requests user-agent and accept headers in playwright+selenium requests, breaks sites such as united.com. (#1004) 2022-10-09 18:25:36 +02:00
dgtlmoon
17d37fb626 0.39.20 2022-10-09 16:13:32 +02:00
Yusef Ouda
dfa7fc3a81 Adds support for jq JSON path querying engine (#1001) 2022-10-09 16:12:45 +02:00
dgtlmoon
cd467df97a Adding link to BrightData Proxy info (#1003) 2022-10-09 15:51:57 +02:00
dgtlmoon
71bc2fed82 Remove quotationspage default watch 2022-10-09 14:06:07 +02:00
Hmmbob
738fcfe01c Notification library: Bump apprise to 1.1.0 (signal, opsgenie, pagerduty, bark and mailto fixes, adds support for BulkSMS and SMSEagle) (#1002) 2022-10-09 11:42:51 +02:00
dgtlmoon
3ebb2ab9ba Selenium fetcher - screenshot should be taken after 'wait' time, not before #873 2022-09-25 11:05:07 +02:00
dgtlmoon
ac98bc9144 Upgrade Playwright to 1.26 2022-09-24 23:51:26 +02:00
dgtlmoon
3705ce6681 Render Extract Configurable Delay Seconds should also apply after executing any JS #958 2022-09-24 23:48:03 +02:00
dgtlmoon
f7ea99412f Re #958 - remove change screensize, should be in 1280x720 default, was causing "Unable to retrieve content because the page is navigating and changing the content." on some sites 2022-09-19 14:02:32 +02:00
dgtlmoon
d4715e2bc8 Tidy up proxies.json logic, adding tests (#955) 2022-09-19 13:14:35 +02:00
dgtlmoon
8567a83c47 Update README.md - Include BrightData suggestion 2022-09-16 13:21:01 +02:00
26 changed files with 371 additions and 94 deletions

View File

@@ -0,0 +1,55 @@
name: ChangeDetection.io Container Build Test
# Triggers the workflow on push or pull request events
# This line doesnt work, even tho it is the documented one
#on: [push, pull_request]
on:
push:
paths:
- requirements.txt
- Dockerfile
pull_request:
paths:
- requirements.txt
- Dockerfile
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
# @todo: some kind of path filter for requirements.txt and Dockerfile
jobs:
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
# Just test that the build works, some libraries won't compile on ARM/rPi etc
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
with:
install: true
version: latest
driver-opts: image=moby/buildkit:master
- name: Test that the docker containers can build
id: docker_build
uses: docker/build-push-action@v2
# https://github.com/docker/build-push-action#customizing
with:
context: ./
file: ./Dockerfile
platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache

View File

@@ -1,28 +1,25 @@
name: ChangeDetection.io Test name: ChangeDetection.io App Test
# Triggers the workflow on push or pull request events # Triggers the workflow on push or pull request events
on: [push, pull_request] on: [push, pull_request]
jobs: jobs:
test-build: test-application:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python 3.9 - name: Set up Python 3.9
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: 3.9 python-version: 3.9
- name: Show env vars
run: set
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install flake8 pytest pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Lint with flake8 - name: Lint with flake8
run: | run: |
# stop the build if there are Python syntax errors or undefined names # stop the build if there are Python syntax errors or undefined names
@@ -39,7 +36,4 @@ jobs:
# Each test is totally isolated and performs its own cleanup/reset # Each test is totally isolated and performs its own cleanup/reset
cd changedetectionio; ./run_all_tests.sh cd changedetectionio; ./run_all_tests.sh
# https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
# https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
# https://github.com/docker/buildx/issues/495#issuecomment-918925854

View File

@@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch.
Please be sure that all new functionality has a matching test! Please be sure that all new functionality has a matching test!
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
``` ```
pip3 install -r requirements-dev pip3 install -r requirements-dev

View File

@@ -5,13 +5,14 @@ FROM python:3.8-slim as builder
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
libssl-dev \ g++ \
libffi-dev \
gcc \ gcc \
libc-dev \ libc-dev \
libffi-dev \
libssl-dev \
libxslt-dev \ libxslt-dev \
zlib1g-dev \ make \
g++ zlib1g-dev
RUN mkdir /install RUN mkdir /install
WORKDIR /install WORKDIR /install
@@ -22,9 +23,14 @@ RUN pip install --target=/dependencies -r /requirements.txt
# Playwright is an alternative to Selenium # Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
RUN pip install --target=/dependencies playwright~=1.25 \ RUN pip install --target=/dependencies playwright~=1.26 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
RUN pip install --target=/dependencies jq~=1.3 \
|| echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
# Final image stage # Final image stage
FROM python:3.8-slim FROM python:3.8-slim
@@ -58,6 +64,7 @@ EXPOSE 5000
# The actual flask app # The actual flask app
COPY changedetectionio /app/changedetectionio COPY changedetectionio /app/changedetectionio
# The eventlet server wrapper # The eventlet server wrapper
COPY changedetection.py /app/changedetection.py COPY changedetection.py /app/changedetection.py

View File

@@ -2,6 +2,7 @@ recursive-include changedetectionio/api *
recursive-include changedetectionio/templates * recursive-include changedetectionio/templates *
recursive-include changedetectionio/static * recursive-include changedetectionio/static *
recursive-include changedetectionio/model * recursive-include changedetectionio/model *
recursive-include changedetectionio/tests *
include changedetection.py include changedetection.py
global-exclude *.pyc global-exclude *.pyc
global-exclude node_modules global-exclude node_modules

View File

@@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
#### Key Features #### Key Features
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules - Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Switch between fast non-JS and Chrome JS based "fetchers" - Switch between fast non-JS and Chrome JS based "fetchers"
- Easily specify how often a site should be checked - Easily specify how often a site should be checked
- Execute JS before extracting text (Good for logging in, see examples in the UI!) - Execute JS before extracting text (Good for logging in, see examples in the UI!)

View File

@@ -47,13 +47,15 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
#### Key Features #### Key Features
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules - Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Switch between fast non-JS and Chrome JS based "fetchers" - Switch between fast non-JS and Chrome JS based "fetchers"
- Easily specify how often a site should be checked - Easily specify how often a site should be checked
- Execute JS before extracting text (Good for logging in, see examples in the UI!) - Execute JS before extracting text (Good for logging in, see examples in the UI!)
- Override Request Headers, Specify `POST` or `GET` and other methods - Override Request Headers, Specify `POST` or `GET` and other methods
- Use the "Visual Selector" to help target specific elements - Use the "Visual Selector" to help target specific elements
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
## Screenshots ## Screenshots
@@ -119,8 +121,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
## Filters ## Filters
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
(We support LXML `re:test`, `re:math` and `re:replace`.) (We support LXML `re:test`, `re:math` and `re:replace`.)
## Notifications ## Notifications
@@ -149,7 +151,7 @@ Now you can also customise your notification content!
## JSON API Monitoring ## JSON API Monitoring
Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector. Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed.
![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-filter-field-example.png) ![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-filter-field-example.png)
@@ -157,9 +159,20 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-diff-example.png) ![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-diff-example.png)
### JSONPath or jq?
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq.
One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc.
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
Note: `jq` library must be added separately (`pip3 install jq`)
### Parse JSON embedded in HTML! ### Parse JSON embedded in HTML!
When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites. When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
``` ```
<html> <html>
@@ -169,7 +182,7 @@ When you enable a `json:` filter, you can even automatically extract and parse e
</script> </script>
``` ```
`json:$.price` would give `23.50`, or you can extract the whole structure `json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
## Proxy configuration ## Proxy configuration

View File

@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools from changedetectionio import html_tools
from changedetectionio.api import api_v1 from changedetectionio.api import api_v1
__version__ = '0.39.19.1' __version__ = '0.39.20.4'
datastore = None datastore = None
@@ -194,6 +194,9 @@ def changedetection_app(config=None, datastore_o=None):
watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>', watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
@@ -636,20 +639,27 @@ def changedetection_app(config=None, datastore_o=None):
# Only works reliably with Playwright # Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver' visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
output = render_template("edit.html", output = render_template("edit.html",
uuid=uuid,
watch=datastore.data['watching'][uuid],
form=form,
has_empty_checktime=using_default_check_time,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
using_global_webdriver_wait=default['webdriver_delay'] is None,
current_base_url=datastore.data['settings']['application']['base_url'], current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
settings_application=datastore.data['settings']['application'], settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
visualselector_data_is_ready=visualselector_data_is_ready, visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled, visualselector_enabled=visualselector_enabled,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False) watch=datastore.data['watching'][uuid],
) )
return output return output
@@ -809,8 +819,10 @@ def changedetection_app(config=None, datastore_o=None):
newest_file = history[dates[-1]] newest_file = history[dates[-1]]
# Read as binary and force decode as UTF-8
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
try: try:
with open(newest_file, 'r') as f: with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
newest_version_file_contents = f.read() newest_version_file_contents = f.read()
except Exception as e: except Exception as e:
newest_version_file_contents = "Unable to read {}.\n".format(newest_file) newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
@@ -823,7 +835,7 @@ def changedetection_app(config=None, datastore_o=None):
previous_file = history[dates[-2]] previous_file = history[dates[-2]]
try: try:
with open(previous_file, 'r') as f: with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
previous_version_file_contents = f.read() previous_version_file_contents = f.read()
except Exception as e: except Exception as e:
previous_version_file_contents = "Unable to read {}.\n".format(previous_file) previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
@@ -900,7 +912,7 @@ def changedetection_app(config=None, datastore_o=None):
timestamp = list(watch.history.keys())[-1] timestamp = list(watch.history.keys())[-1]
filename = watch.history[timestamp] filename = watch.history[timestamp]
try: try:
with open(filename, 'r') as f: with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
tmp = f.readlines() tmp = f.readlines()
# Get what needs to be highlighted # Get what needs to be highlighted

View File

@@ -122,3 +122,33 @@ class CreateWatch(Resource):
return {'status': "OK"}, 200 return {'status': "OK"}, 200
return list, 200 return list, 200
class SystemInfo(Resource):
def __init__(self, **kwargs):
# datastore is a black box dependency
self.datastore = kwargs['datastore']
self.update_q = kwargs['update_q']
@auth.check_token
def get(self):
import time
overdue_watches = []
# Check all watches and report which have not been checked but should have been
for uuid, watch in self.datastore.data.get('watching', {}).items():
# see if now - last_checked is greater than the time that should have been
# this is not super accurate (maybe they just edited it) but better than nothing
t = watch.threshold_seconds()
if not t:
t = self.datastore.threshold_seconds
time_since_check = time.time() - watch.get('last_checked')
if time_since_check > t:
overdue_watches.append(uuid)
return {
'queue_size': self.update_q.qsize(),
'overdue_watches': overdue_watches,
'uptime': round(time.time() - self.datastore.start_time, 2),
'watch_count': len(self.datastore.data.get('watching', {}))
}, 200

View File

@@ -102,6 +102,14 @@ def main():
has_password=datastore.data['settings']['application']['password'] != False has_password=datastore.data['settings']['application']['password'] != False
) )
# Monitored websites will not receive a Referer header
# when a user clicks on an outgoing link.
@app.after_request
def hide_referrer(response):
if os.getenv("HIDE_REFERER", False):
response.headers["Referrer-Policy"] = "no-referrer"
return response
# Proxy sub-directory support # Proxy sub-directory support
# Set environment var USE_X_SETTINGS=1 on this script # Set environment var USE_X_SETTINGS=1 on this script
# And then in your proxy_pass settings # And then in your proxy_pass settings

View File

@@ -316,6 +316,7 @@ class base_html_playwright(Fetcher):
import playwright._impl._api_types import playwright._impl._api_types
from playwright._impl._api_types import Error, TimeoutError from playwright._impl._api_types import Error, TimeoutError
response = None response = None
with sync_playwright() as p: with sync_playwright() as p:
browser_type = getattr(p, self.browser_type) browser_type = getattr(p, self.browser_type)
@@ -373,8 +374,11 @@ class base_html_playwright(Fetcher):
print("response object was none") print("response object was none")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
page.set_viewport_size({"width": 1280, "height": 1024}) # Removed browser-set-size, seemed to be needed to make screenshots work reliably in older playwright versions
# Was causing exceptions like 'waiting for page but content is changing' etc
# https://www.browserstack.com/docs/automate/playwright/change-browser-window-size 1280x720 should be the default
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
time.sleep(extra_wait) time.sleep(extra_wait)
@@ -398,6 +402,13 @@ class base_html_playwright(Fetcher):
raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url) raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
else:
# JS eval was run, now we also wait some time if possible to let the page settle
if self.render_extract_delay:
page.wait_for_timeout(self.render_extract_delay * 1000)
page.wait_for_timeout(500)
self.content = page.content() self.content = page.content()
self.status_code = response.status self.status_code = response.status
self.headers = response.all_headers() self.headers = response.all_headers()
@@ -514,8 +525,6 @@ class base_html_webdriver(Fetcher):
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
self.screenshot = self.driver.get_screenshot_as_png()
# @todo - how to check this? is it possible? # @todo - how to check this? is it possible?
self.status_code = 200 self.status_code = 200
# @todo somehow we should try to get this working for WebDriver # @todo somehow we should try to get this working for WebDriver
@@ -526,6 +535,8 @@ class base_html_webdriver(Fetcher):
self.content = self.driver.page_source self.content = self.driver.page_source
self.headers = {} self.headers = {}
self.screenshot = self.driver.get_screenshot_as_png()
# Does the connection to the webdriver work? run a test connection. # Does the connection to the webdriver work? run a test connection.
def is_ready(self): def is_ready(self):
from selenium import webdriver from selenium import webdriver
@@ -564,6 +575,11 @@ class html_requests(Fetcher):
ignore_status_codes=False, ignore_status_codes=False,
current_css_filter=None): current_css_filter=None):
# Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers:
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
proxies = {} proxies = {}
# Allows override the proxy on a per-request basis # Allows override the proxy on a per-request basis

View File

@@ -141,8 +141,9 @@ class perform_site_check():
has_filter_rule = True has_filter_rule = True
if has_filter_rule: if has_filter_rule:
if 'json:' in css_filter_rule: json_filter_prefixes = ['json:', 'jq:']
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule) if any(prefix in css_filter_rule for prefix in json_filter_prefixes):
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule)
is_html = False is_html = False
if is_html or is_source: if is_html or is_source:

View File

@@ -303,6 +303,25 @@ class ValidateCSSJSONXPATHInput(object):
# Re #265 - maybe in the future fetch the page and offer a # Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything? # warning/notice that its possible the rule doesnt yet match anything?
if not self.allow_json:
raise ValidationError("jq not permitted in this field!")
if 'jq:' in line:
try:
import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise ValidationError("jq not support not found")
input = line.replace('jq:', '')
try:
jq.compile(input)
except (ValueError) as e:
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
raise ValidationError(message % (input, str(e)))
except:
raise ValidationError("A system-error occurred when validating your jq expression")
class quickWatchForm(Form): class quickWatchForm(Form):

View File

@@ -1,11 +1,11 @@
import json
from typing import List
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse
import re
from inscriptis import get_text from inscriptis import get_text
from inscriptis.model.config import ParserConfig from inscriptis.model.config import ParserConfig
from jsonpath_ng.ext import parse
from typing import List
import json
import re
class FilterNotFoundInResponse(ValueError): class FilterNotFoundInResponse(ValueError):
def __init__(self, msg): def __init__(self, msg):
@@ -79,19 +79,35 @@ def extract_element(find='title', html_content=''):
return element_text return element_text
# #
def _parse_json(json_data, jsonpath_filter): def _parse_json(json_data, json_filter):
s=[] if 'json:' in json_filter:
jsonpath_expression = parse(jsonpath_filter.replace('json:', '')) jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data) match = jsonpath_expression.find(json_data)
return _get_stripped_text_from_json_match(match)
if 'jq:' in json_filter:
try:
import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise Exception("jq not support not found")
jq_expression = jq.compile(json_filter.replace('jq:', ''))
match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match)
def _get_stripped_text_from_json_match(match):
s = []
# More than one result, we will return it as a JSON list. # More than one result, we will return it as a JSON list.
if len(match) > 1: if len(match) > 1:
for i in match: for i in match:
s.append(i.value) s.append(i.value if hasattr(i, 'value') else i)
# Single value, use just the value, as it could be later used in a token in notifications. # Single value, use just the value, as it could be later used in a token in notifications.
if len(match) == 1: if len(match) == 1:
s = match[0].value s = match[0].value if hasattr(match[0], 'value') else match[0]
# Re #257 - Better handling where it does not exist, in the case the original 's' value was False.. # Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
if not match: if not match:
@@ -103,16 +119,16 @@ def _parse_json(json_data, jsonpath_filter):
return stripped_text_from_html return stripped_text_from_html
def extract_json_as_string(content, jsonpath_filter): def extract_json_as_string(content, json_filter):
stripped_text_from_html = False stripped_text_from_html = False
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson> # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
try: try:
stripped_text_from_html = _parse_json(json.loads(content), jsonpath_filter) stripped_text_from_html = _parse_json(json.loads(content), json_filter)
except json.JSONDecodeError: except json.JSONDecodeError:
# Foreach <script json></script> blob.. just return the first that matches jsonpath_filter # Foreach <script json></script> blob.. just return the first that matches json_filter
s = [] s = []
soup = BeautifulSoup(content, 'html.parser') soup = BeautifulSoup(content, 'html.parser')
bs_result = soup.findAll('script') bs_result = soup.findAll('script')
@@ -131,7 +147,7 @@ def extract_json_as_string(content, jsonpath_filter):
# Just skip it # Just skip it
continue continue
else: else:
stripped_text_from_html = _parse_json(json_data, jsonpath_filter) stripped_text_from_html = _parse_json(json_data, json_filter)
if stripped_text_from_html: if stripped_text_from_html:
break break

View File

@@ -13,10 +13,6 @@ class model(dict):
'watching': {}, 'watching': {},
'settings': { 'settings': {
'headers': { 'headers': {
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
}, },
'requests': { 'requests': {
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds 'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds

View File

@@ -118,7 +118,10 @@ class model(dict):
if os.path.isfile(fname): if os.path.isfile(fname):
logging.debug("Reading history index " + str(time.time())) logging.debug("Reading history index " + str(time.time()))
with open(fname, "r") as f: with open(fname, "r") as f:
tmp_history = dict(i.strip().split(',', 2) for i in f.readlines()) for i in f.readlines():
if ',' in i:
k, v = i.strip().split(',', 2)
tmp_history[k] = v
if len(tmp_history): if len(tmp_history):
self.__newest_history_key = list(tmp_history.keys())[-1] self.__newest_history_key = list(tmp_history.keys())[-1]
@@ -151,28 +154,30 @@ class model(dict):
import uuid import uuid
import logging import logging
output_path = "{}/{}".format(self.__datastore_path, self['uuid']) output_path = os.path.join(self.__datastore_path, self['uuid'])
self.ensure_data_dir_exists() self.ensure_data_dir_exists()
snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
logging.debug("Saving history text {}".format(snapshot_fname)) logging.debug("Saving history text {}".format(snapshot_fname))
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
# most sites are utf-8 and some are even broken utf-8
with open(snapshot_fname, 'wb') as f: with open(snapshot_fname, 'wb') as f:
f.write(contents) f.write(contents)
f.close() f.close()
# Append to index # Append to index
# @todo check last char was \n # @todo check last char was \n
index_fname = "{}/history.txt".format(output_path) index_fname = os.path.join(output_path, "history.txt")
with open(index_fname, 'a') as f: with open(index_fname, 'a') as f:
f.write("{},{}\n".format(timestamp, snapshot_fname)) f.write("{},{}\n".format(timestamp, snapshot_fname))
f.close() f.close()
self.__newest_history_key = timestamp self.__newest_history_key = timestamp
self.__history_n+=1 self.__history_n += 1
#@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
return snapshot_fname return snapshot_fname
@property @property

View File

@@ -9,6 +9,8 @@
# exit when any command fails # exit when any command fails
set -e set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
find tests/test_*py -type f|while read test_name find tests/test_*py -type f|while read test_name
do do
echo "TEST RUNNING $test_name" echo "TEST RUNNING $test_name"
@@ -23,6 +25,13 @@ export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py pytest tests/test_notification.py
## JQ + JSON: filter test
# jq is not available on windows and we should just test it when the package is installed
# this will re-test with jq support
pip3 install jq~=1.3
pytest tests/test_jsonpath_jq_selector.py
# Now for the selenium and playwright/browserless fetchers # Now for the selenium and playwright/browserless fetchers
# Note - this is not UI functional tests - just checking that each one can fetch the content # Note - this is not UI functional tests - just checking that each one can fetch the content
@@ -38,7 +47,9 @@ docker kill $$-test_selenium
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
pip3 install playwright~=1.24 PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
echo "using $PLAYWRIGHT_VERSION"
pip3 install "$PLAYWRIGHT_VERSION"
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
# takes a while to spin up # takes a while to spin up
sleep 5 sleep 5

View File

@@ -30,14 +30,14 @@ class ChangeDetectionStore:
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
# Should only be active for docker # Should only be active for docker
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO) # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
self.needs_write = False self.__data = App.model()
self.datastore_path = datastore_path self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path) self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
self.needs_write = False
self.proxy_list = None self.proxy_list = None
self.start_time = time.time()
self.stop_thread = False self.stop_thread = False
self.__data = App.model()
# Base definition for all watchers # Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly # deepcopy part of #569 - not sure why its needed exactly
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
@@ -81,8 +81,6 @@ class ChangeDetectionStore:
except (FileNotFoundError, json.decoder.JSONDecodeError): except (FileNotFoundError, json.decoder.JSONDecodeError):
if include_default_watches: if include_default_watches:
print("Creating JSON store at", self.datastore_path) print("Creating JSON store at", self.datastore_path)
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io') self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
@@ -577,3 +575,11 @@ class ChangeDetectionStore:
continue continue
return return
# We incorrectly used common header overrides that should only apply to Requests
# These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
def update_7(self):
# These were hard-coded in early versions
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
if self.data['settings']['headers'].get(v):
del self.data['settings']['headers'][v]

View File

@@ -77,6 +77,7 @@
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p> <p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p> <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
</span> </span>
</div> </div>
{% if form.proxy %} {% if form.proxy %}
@@ -183,8 +184,16 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li> <ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, <li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
<ul> <ul>
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a <li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
@@ -193,7 +202,7 @@ User-Agent: wonderbra 1.0") }}
</ul> </ul>
</li> </li>
</ul> </ul>
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/> href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
</span> </span>
</div> </div>

View File

@@ -99,6 +99,8 @@
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p> <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p> <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
</span> </span>
<br/>
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
</div> </div>
<fieldset class="pure-group" id="webdriver-override-options"> <fieldset class="pure-group" id="webdriver-override-options">
<div class="pure-form-message-inline"> <div class="pure-form-message-inline">

View File

@@ -147,6 +147,16 @@ def test_api_simple(client, live_server):
# @todo how to handle None/default global values? # @todo how to handle None/default global values?
assert watch['history_n'] == 2, "Found replacement history section, which is in its own API" assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"
# basic systeminfo check
res = client.get(
url_for("systeminfo"),
headers={'x-api-key': api_key},
)
info = json.loads(res.data)
assert info.get('watch_count') == 1
assert info.get('uptime') > 0.5
# Finally delete the watch # Finally delete the watch
res = client.delete( res = client.delete(
url_for("watch", uuid=watch_uuid), url_for("watch", uuid=watch_uuid),

View File

@@ -2,10 +2,15 @@
# coding=utf-8 # coding=utf-8
import time import time
from flask import url_for from flask import url_for, escape
from . util import live_server_setup from . util import live_server_setup
import pytest import pytest
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
@@ -36,16 +41,28 @@ and it can also be repeated
from .. import html_tools from .. import html_tools
# See that we can find the second <script> one, which is not broken, and matches our filter # See that we can find the second <script> one, which is not broken, and matches our filter
text = html_tools.extract_json_as_string(content, "$.offers.price") text = html_tools.extract_json_as_string(content, "json:$.offers.price")
assert text == "23.5" assert text == "23.5"
text = html_tools.extract_json_as_string('{"id":5}', "$.id") # also check for jq
if jq_support:
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
assert text == "23.5"
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
assert text == "5" assert text == "5"
# When nothing at all is found, it should throw JSONNOTFound # When nothing at all is found, it should throw JSONNOTFound
# Which is caught and shown to the user in the watch-overview table # Which is caught and shown to the user in the watch-overview table
with pytest.raises(html_tools.JSONNotFound) as e_info: with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id") html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
if jq_support:
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
def set_original_ext_response(): def set_original_ext_response():
data = """ data = """
@@ -66,6 +83,7 @@ def set_original_ext_response():
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data) f.write(data)
return None
def set_modified_ext_response(): def set_modified_ext_response():
data = """ data = """
@@ -86,6 +104,7 @@ def set_modified_ext_response():
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data) f.write(data)
return None
def set_original_response(): def set_original_response():
test_return_data = """ test_return_data = """
@@ -184,10 +203,10 @@ def test_check_json_without_filter(client, live_server):
assert b'&#34;&lt;b&gt;' in res.data assert b'&#34;&lt;b&gt;' in res.data
assert res.data.count(b'{\n') >= 2 assert res.data.count(b'{\n') >= 2
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_json_filter(client, live_server): def check_json_filter(json_filter, client, live_server):
json_filter = 'json:boss.name'
set_original_response() set_original_response()
# Give the endpoint time to spin up # Give the endpoint time to spin up
@@ -226,7 +245,7 @@ def test_check_json_filter(client, live_server):
res = client.get( res = client.get(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
) )
assert bytes(json_filter.encode('utf-8')) in res.data assert bytes(escape(json_filter).encode('utf-8')) in res.data
# Trigger a check # Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -252,10 +271,17 @@ def test_check_json_filter(client, live_server):
# And #462 - check we see the proper utf-8 string there # And #462 - check we see the proper utf-8 string there
assert "Örnsköldsvik".encode('utf-8') in res.data assert "Örnsköldsvik".encode('utf-8') in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_json_filter_bool_val(client, live_server): def test_check_jsonpath_filter(client, live_server):
json_filter = "json:$['available']" check_json_filter('json:boss.name', client, live_server)
def test_check_jq_filter(client, live_server):
if jq_support:
check_json_filter('jq:.boss.name', client, live_server)
def check_json_filter_bool_val(json_filter, client, live_server):
set_original_response() set_original_response()
# Give the endpoint time to spin up # Give the endpoint time to spin up
@@ -304,14 +330,22 @@ def test_check_json_filter_bool_val(client, live_server):
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions # But the change should be there, tho its hard to test the change was detected because it will show old and new versions
assert b'false' in res.data assert b'false' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_jsonpath_filter_bool_val(client, live_server):
check_json_filter_bool_val("json:$['available']", client, live_server)
def test_check_jq_filter_bool_val(client, live_server):
if jq_support:
check_json_filter_bool_val("jq:.available", client, live_server)
# Re #265 - Extended JSON selector test # Re #265 - Extended JSON selector test
# Stuff to consider here # Stuff to consider here
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition) # - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
# - The 'diff' tab could show the old and new content # - The 'diff' tab could show the old and new content
# - Form should let us enter a selector that doesnt (yet) match anything # - Form should let us enter a selector that doesnt (yet) match anything
def test_check_json_ext_filter(client, live_server): def check_json_ext_filter(json_filter, client, live_server):
json_filter = 'json:$[?(@.status==Sold)]'
set_original_ext_response() set_original_ext_response()
# Give the endpoint time to spin up # Give the endpoint time to spin up
@@ -350,7 +384,7 @@ def test_check_json_ext_filter(client, live_server):
res = client.get( res = client.get(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
) )
assert bytes(json_filter.encode('utf-8')) in res.data assert bytes(escape(json_filter).encode('utf-8')) in res.data
# Trigger a check # Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -376,3 +410,12 @@ def test_check_json_ext_filter(client, live_server):
assert b'ForSale' not in res.data assert b'ForSale' not in res.data
assert b'Sold' in res.data assert b'Sold' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_jsonpath_ext_filter(client, live_server):
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
def test_check_jq_ext_filter(client, live_server):
if jq_support:
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)

View File

@@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
live_server_setup(live_server) live_server_setup(live_server)
time.sleep(1) time.sleep(1)
# Add our URL to the import page, maybe better to use something we control? # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket test_url = "https://changedetection.io/ci-test/test-runjs.html"
test_url = 'https://news.ycombinator.com'
res = client.post( res = client.post(
url_for("form_quick_watch_add"), url_for("form_quick_watch_add"),
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
@@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
res = client.post( res = client.post(
url_for("edit_page", uuid="first", unpause_on_save=1), url_for("edit_page", uuid="first", unpause_on_save=1),
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"}, data={
"url": test_url,
"tag": "",
"headers": "",
'fetch_backend': "html_webdriver",
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
},
follow_redirects=True follow_redirects=True
) )
assert b"unpaused" in res.data assert b"unpaused" in res.data
time.sleep(1) time.sleep(1)
wait_for_all_checks(client) wait_for_all_checks(client)
uuid = extract_UUID_from_client(client) uuid = extract_UUID_from_client(client)
# Check the JS execute code before extract worked
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'I smell JavaScript' in res.data
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"

View File

@@ -45,6 +45,9 @@ services:
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;` # Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory # More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1 # - USE_X_SETTINGS=1
#
# Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
# - HIDE_REFERER=true
# Comment out ports: when using behind a reverse proxy , enable networks: etc. # Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports: ports:

BIN
docs/proxy-example.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -1,8 +1,8 @@
flask~= 2.0 flask ~= 2.0
flask_wtf flask_wtf
eventlet>=0.31.0 eventlet >= 0.31.0
validators validators
timeago ~=1.0 timeago ~= 1.0
inscriptis ~= 2.2 inscriptis ~= 2.2
feedgen ~= 0.9 feedgen ~= 0.9
flask-login ~= 0.5 flask-login ~= 0.5
@@ -10,15 +10,20 @@ flask_restful
pytz pytz
# Set these versions together to avoid a RequestsDependencyWarning # Set these versions together to avoid a RequestsDependencyWarning
requests[socks] ~= 2.26 # >= 2.26 also adds Brotli support if brotli is installed
brotli ~= 1.0
requests[socks] ~= 2.28
urllib3 > 1.26 urllib3 > 1.26
chardet > 2.3.0 chardet > 2.3.0
wtforms ~= 3.0 wtforms ~= 3.0
jsonpath-ng ~= 1.5.3 jsonpath-ng ~= 1.5.3
# jq not available on Windows so must be installed manually
# Notification library # Notification library
apprise ~= 1.0.0 apprise ~= 1.1.0
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
paho-mqtt paho-mqtt
@@ -42,3 +47,4 @@ selenium ~= 4.1.0
werkzeug ~= 2.0.0 werkzeug ~= 2.0.0
# playwright is installed at Dockerfile build time because it's not available on all platforms # playwright is installed at Dockerfile build time because it's not available on all platforms