mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-05 17:16:12 +00:00
Compare commits
26 Commits
proxy-impr
...
windows-di
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f16aa1b658 | ||
|
|
6d02110473 | ||
|
|
957a3c1c16 | ||
|
|
85897e0bf9 | ||
|
|
63095f70ea | ||
|
|
c782a7e5c8 | ||
|
|
d4b241720a | ||
|
|
8d5b0b5576 | ||
|
|
1b077abd93 | ||
|
|
32ea1a8721 | ||
|
|
fff32cef0d | ||
|
|
8fb146f3e4 | ||
|
|
770b0faa45 | ||
|
|
f6faa90340 | ||
|
|
669fd3ae0b | ||
|
|
17d37fb626 | ||
|
|
dfa7fc3a81 | ||
|
|
cd467df97a | ||
|
|
71bc2fed82 | ||
|
|
738fcfe01c | ||
|
|
3ebb2ab9ba | ||
|
|
ac98bc9144 | ||
|
|
3705ce6681 | ||
|
|
f7ea99412f | ||
|
|
d4715e2bc8 | ||
|
|
8567a83c47 |
46
.github/workflows/test-container-build.yml
vendored
Normal file
46
.github/workflows/test-container-build.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
name: ChangeDetection.io Container Build Test
|
||||
|
||||
# Triggers the workflow on push or pull request events
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- requirements.txt
|
||||
- Dockerfile
|
||||
|
||||
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
|
||||
# @todo: some kind of path filter for requirements.txt and Dockerfile
|
||||
jobs:
|
||||
test-container-build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
- name: Test that the docker containers can build
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
12
.github/workflows/test-only.yml
vendored
12
.github/workflows/test-only.yml
vendored
@@ -1,28 +1,25 @@
|
||||
name: ChangeDetection.io Test
|
||||
name: ChangeDetection.io App Test
|
||||
|
||||
# Triggers the workflow on push or pull request events
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
test-build:
|
||||
test-application:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
- name: Show env vars
|
||||
run: set
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
|
||||
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
@@ -39,7 +36,4 @@ jobs:
|
||||
# Each test is totally isolated and performs its own cleanup/reset
|
||||
cd changedetectionio; ./run_all_tests.sh
|
||||
|
||||
# https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
|
||||
# https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
|
||||
|
||||
# https://github.com/docker/buildx/issues/495#issuecomment-918925854
|
||||
|
||||
16
Dockerfile
16
Dockerfile
@@ -5,13 +5,14 @@ FROM python:3.8-slim as builder
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
zlib1g-dev \
|
||||
g++
|
||||
make \
|
||||
zlib1g-dev
|
||||
|
||||
RUN mkdir /install
|
||||
WORKDIR /install
|
||||
@@ -22,9 +23,14 @@ RUN pip install --target=/dependencies -r /requirements.txt
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
RUN pip install --target=/dependencies playwright~=1.25 \
|
||||
RUN pip install --target=/dependencies playwright~=1.26 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
|
||||
RUN pip install --target=/dependencies jq~=1.3 \
|
||||
|| echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
|
||||
|
||||
|
||||
# Final image stage
|
||||
FROM python:3.8-slim
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ recursive-include changedetectionio/api *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/model *
|
||||
recursive-include changedetectionio/tests *
|
||||
include changedetection.py
|
||||
global-exclude *.pyc
|
||||
global-exclude node_modules
|
||||
|
||||
@@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
#### Key Features
|
||||
|
||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||
- Easily specify how often a site should be checked
|
||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||
|
||||
23
README.md
23
README.md
@@ -47,13 +47,15 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
#### Key Features
|
||||
|
||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||
- Easily specify how often a site should be checked
|
||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||
- Override Request Headers, Specify `POST` or `GET` and other methods
|
||||
- Use the "Visual Selector" to help target specific elements
|
||||
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
|
||||
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||
|
||||
## Screenshots
|
||||
|
||||
@@ -119,8 +121,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
|
||||
|
||||
|
||||
## Filters
|
||||
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||
|
||||
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
|
||||
|
||||
## Notifications
|
||||
@@ -149,7 +151,7 @@ Now you can also customise your notification content!
|
||||
|
||||
## JSON API Monitoring
|
||||
|
||||
Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector.
|
||||
Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed.
|
||||
|
||||

|
||||
|
||||
@@ -157,9 +159,20 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
|
||||
|
||||

|
||||
|
||||
### JSONPath or jq?
|
||||
|
||||
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq.
|
||||
|
||||
One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc.
|
||||
|
||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
|
||||
|
||||
Note: `jq` library must be added separately (`pip3 install jq`)
|
||||
|
||||
|
||||
### Parse JSON embedded in HTML!
|
||||
|
||||
When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
|
||||
When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
|
||||
|
||||
```
|
||||
<html>
|
||||
@@ -169,7 +182,7 @@ When you enable a `json:` filter, you can even automatically extract and parse e
|
||||
</script>
|
||||
```
|
||||
|
||||
`json:$.price` would give `23.50`, or you can extract the whole structure
|
||||
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
|
||||
|
||||
## Proxy configuration
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.39.19.1'
|
||||
__version__ = '0.39.20.3'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -636,20 +636,27 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Only works reliably with Playwright
|
||||
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
|
||||
jq_support = True
|
||||
try:
|
||||
import jq
|
||||
except ModuleNotFoundError:
|
||||
jq_support = False
|
||||
|
||||
output = render_template("edit.html",
|
||||
uuid=uuid,
|
||||
watch=datastore.data['watching'][uuid],
|
||||
form=form,
|
||||
has_empty_checktime=using_default_check_time,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
using_global_webdriver_wait=default['webdriver_delay'] is None,
|
||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
form=form,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
has_empty_checktime=using_default_check_time,
|
||||
jq_support=jq_support,
|
||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
using_global_webdriver_wait=default['webdriver_delay'] is None,
|
||||
uuid=uuid,
|
||||
visualselector_data_is_ready=visualselector_data_is_ready,
|
||||
visualselector_enabled=visualselector_enabled,
|
||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
watch=datastore.data['watching'][uuid],
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -809,8 +816,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
newest_file = history[dates[-1]]
|
||||
|
||||
# Read as binary and force decode as UTF-8
|
||||
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||
try:
|
||||
with open(newest_file, 'r') as f:
|
||||
with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
except Exception as e:
|
||||
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
|
||||
@@ -823,7 +832,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
previous_file = history[dates[-2]]
|
||||
|
||||
try:
|
||||
with open(previous_file, 'r') as f:
|
||||
with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
except Exception as e:
|
||||
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
|
||||
@@ -900,7 +909,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
filename = watch.history[timestamp]
|
||||
try:
|
||||
with open(filename, 'r') as f:
|
||||
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
tmp = f.readlines()
|
||||
|
||||
# Get what needs to be highlighted
|
||||
|
||||
@@ -316,6 +316,7 @@ class base_html_playwright(Fetcher):
|
||||
import playwright._impl._api_types
|
||||
from playwright._impl._api_types import Error, TimeoutError
|
||||
response = None
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser_type = getattr(p, self.browser_type)
|
||||
|
||||
@@ -373,8 +374,11 @@ class base_html_playwright(Fetcher):
|
||||
print("response object was none")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
# Bug 2(?) Set the viewport size AFTER loading the page
|
||||
page.set_viewport_size({"width": 1280, "height": 1024})
|
||||
|
||||
# Removed browser-set-size, seemed to be needed to make screenshots work reliably in older playwright versions
|
||||
# Was causing exceptions like 'waiting for page but content is changing' etc
|
||||
# https://www.browserstack.com/docs/automate/playwright/change-browser-window-size 1280x720 should be the default
|
||||
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
time.sleep(extra_wait)
|
||||
|
||||
@@ -398,6 +402,13 @@ class base_html_playwright(Fetcher):
|
||||
|
||||
raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
|
||||
|
||||
else:
|
||||
# JS eval was run, now we also wait some time if possible to let the page settle
|
||||
if self.render_extract_delay:
|
||||
page.wait_for_timeout(self.render_extract_delay * 1000)
|
||||
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
self.content = page.content()
|
||||
self.status_code = response.status
|
||||
self.headers = response.all_headers()
|
||||
@@ -514,8 +525,6 @@ class base_html_webdriver(Fetcher):
|
||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
|
||||
self.screenshot = self.driver.get_screenshot_as_png()
|
||||
|
||||
# @todo - how to check this? is it possible?
|
||||
self.status_code = 200
|
||||
# @todo somehow we should try to get this working for WebDriver
|
||||
@@ -526,6 +535,8 @@ class base_html_webdriver(Fetcher):
|
||||
self.content = self.driver.page_source
|
||||
self.headers = {}
|
||||
|
||||
self.screenshot = self.driver.get_screenshot_as_png()
|
||||
|
||||
# Does the connection to the webdriver work? run a test connection.
|
||||
def is_ready(self):
|
||||
from selenium import webdriver
|
||||
@@ -564,6 +575,11 @@ class html_requests(Fetcher):
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
|
||||
# Make requests use a more modern looking user-agent
|
||||
if not 'User-Agent' in request_headers:
|
||||
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
||||
|
||||
proxies = {}
|
||||
|
||||
# Allows override the proxy on a per-request basis
|
||||
|
||||
@@ -141,8 +141,9 @@ class perform_site_check():
|
||||
has_filter_rule = True
|
||||
|
||||
if has_filter_rule:
|
||||
if 'json:' in css_filter_rule:
|
||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
|
||||
json_filter_prefixes = ['json:', 'jq:']
|
||||
if any(prefix in css_filter_rule for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule)
|
||||
is_html = False
|
||||
|
||||
if is_html or is_source:
|
||||
|
||||
@@ -303,6 +303,25 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
|
||||
# Re #265 - maybe in the future fetch the page and offer a
|
||||
# warning/notice that its possible the rule doesnt yet match anything?
|
||||
if not self.allow_json:
|
||||
raise ValidationError("jq not permitted in this field!")
|
||||
|
||||
if 'jq:' in line:
|
||||
try:
|
||||
import jq
|
||||
except ModuleNotFoundError:
|
||||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise ValidationError("jq not support not found")
|
||||
|
||||
input = line.replace('jq:', '')
|
||||
|
||||
try:
|
||||
jq.compile(input)
|
||||
except (ValueError) as e:
|
||||
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
|
||||
raise ValidationError(message % (input, str(e)))
|
||||
except:
|
||||
raise ValidationError("A system-error occurred when validating your jq expression")
|
||||
|
||||
|
||||
class quickWatchForm(Form):
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from jsonpath_ng.ext import parse
|
||||
import re
|
||||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
from jsonpath_ng.ext import parse
|
||||
from typing import List
|
||||
import json
|
||||
import re
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg):
|
||||
@@ -79,19 +79,35 @@ def extract_element(find='title', html_content=''):
|
||||
return element_text
|
||||
|
||||
#
|
||||
def _parse_json(json_data, jsonpath_filter):
|
||||
s=[]
|
||||
jsonpath_expression = parse(jsonpath_filter.replace('json:', ''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
def _parse_json(json_data, json_filter):
|
||||
if 'json:' in json_filter:
|
||||
jsonpath_expression = parse(json_filter.replace('json:', ''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
if 'jq:' in json_filter:
|
||||
|
||||
try:
|
||||
import jq
|
||||
except ModuleNotFoundError:
|
||||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise Exception("jq not support not found")
|
||||
|
||||
jq_expression = jq.compile(json_filter.replace('jq:', ''))
|
||||
match = jq_expression.input(json_data).all()
|
||||
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
def _get_stripped_text_from_json_match(match):
|
||||
s = []
|
||||
# More than one result, we will return it as a JSON list.
|
||||
if len(match) > 1:
|
||||
for i in match:
|
||||
s.append(i.value)
|
||||
s.append(i.value if hasattr(i, 'value') else i)
|
||||
|
||||
# Single value, use just the value, as it could be later used in a token in notifications.
|
||||
if len(match) == 1:
|
||||
s = match[0].value
|
||||
s = match[0].value if hasattr(match[0], 'value') else match[0]
|
||||
|
||||
# Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
|
||||
if not match:
|
||||
@@ -103,16 +119,16 @@ def _parse_json(json_data, jsonpath_filter):
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
def extract_json_as_string(content, jsonpath_filter):
|
||||
def extract_json_as_string(content, json_filter):
|
||||
|
||||
stripped_text_from_html = False
|
||||
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
|
||||
try:
|
||||
stripped_text_from_html = _parse_json(json.loads(content), jsonpath_filter)
|
||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||
except json.JSONDecodeError:
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches jsonpath_filter
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
s = []
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
bs_result = soup.findAll('script')
|
||||
@@ -131,7 +147,7 @@ def extract_json_as_string(content, jsonpath_filter):
|
||||
# Just skip it
|
||||
continue
|
||||
else:
|
||||
stripped_text_from_html = _parse_json(json_data, jsonpath_filter)
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
if stripped_text_from_html:
|
||||
break
|
||||
|
||||
|
||||
@@ -13,10 +13,6 @@ class model(dict):
|
||||
'watching': {},
|
||||
'settings': {
|
||||
'headers': {
|
||||
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
||||
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
||||
},
|
||||
'requests': {
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
|
||||
@@ -151,28 +151,30 @@ class model(dict):
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
|
||||
output_path = os.path.join(self.__datastore_path, self['uuid'])
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
|
||||
|
||||
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
||||
logging.debug("Saving history text {}".format(snapshot_fname))
|
||||
|
||||
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
||||
# most sites are utf-8 and some are even broken utf-8
|
||||
with open(snapshot_fname, 'wb') as f:
|
||||
f.write(contents)
|
||||
f.close()
|
||||
|
||||
# Append to index
|
||||
# @todo check last char was \n
|
||||
index_fname = "{}/history.txt".format(output_path)
|
||||
index_fname = os.path.join(output_path, "history.txt")
|
||||
with open(index_fname, 'a') as f:
|
||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
||||
f.close()
|
||||
|
||||
self.__newest_history_key = timestamp
|
||||
self.__history_n+=1
|
||||
self.__history_n += 1
|
||||
|
||||
#@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
|
||||
# @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
|
||||
return snapshot_fname
|
||||
|
||||
@property
|
||||
|
||||
@@ -23,6 +23,13 @@ export BASE_URL="https://really-unique-domain.io"
|
||||
pytest tests/test_notification.py
|
||||
|
||||
|
||||
## JQ + JSON: filter test
|
||||
# jq is not available on windows and we should just test it when the package is installed
|
||||
# this will re-test with jq support
|
||||
pip3 install jq~=1.3
|
||||
pytest tests/test_jsonpath_jq_selector.py
|
||||
|
||||
|
||||
# Now for the selenium and playwright/browserless fetchers
|
||||
# Note - this is not UI functional tests - just checking that each one can fetch the content
|
||||
|
||||
|
||||
@@ -81,8 +81,6 @@ class ChangeDetectionStore:
|
||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||
if include_default_watches:
|
||||
print("Creating JSON store at", self.datastore_path)
|
||||
|
||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
|
||||
|
||||
@@ -577,3 +575,11 @@ class ChangeDetectionStore:
|
||||
continue
|
||||
return
|
||||
|
||||
|
||||
# We incorrectly used common header overrides that should only apply to Requests
|
||||
# These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
|
||||
def update_7(self):
|
||||
# These were hard-coded in early versions
|
||||
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||
if self.data['settings']['headers'].get(v):
|
||||
del self.data['settings']['headers'][v]
|
||||
|
||||
@@ -77,6 +77,7 @@
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
|
||||
</span>
|
||||
</div>
|
||||
{% if form.proxy %}
|
||||
@@ -183,8 +184,16 @@ User-Agent: wonderbra 1.0") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
|
||||
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
<ul>
|
||||
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||
{% if jq_support %}
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
|
||||
{% else %}
|
||||
<li>jq support not installed</li>
|
||||
{% endif %}
|
||||
</ul>
|
||||
</li>
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
|
||||
<ul>
|
||||
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
|
||||
@@ -193,7 +202,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
|
||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -99,6 +99,8 @@
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
<br/>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options">
|
||||
<div class="pure-form-message-inline">
|
||||
|
||||
@@ -2,10 +2,15 @@
|
||||
# coding=utf-8
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from flask import url_for, escape
|
||||
from . util import live_server_setup
|
||||
import pytest
|
||||
jq_support = True
|
||||
|
||||
try:
|
||||
import jq
|
||||
except ModuleNotFoundError:
|
||||
jq_support = False
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
@@ -36,16 +41,28 @@ and it can also be repeated
|
||||
from .. import html_tools
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(content, "$.offers.price")
|
||||
text = html_tools.extract_json_as_string(content, "json:$.offers.price")
|
||||
assert text == "23.5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "$.id")
|
||||
# also check for jq
|
||||
if jq_support:
|
||||
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
|
||||
assert text == "23.5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
|
||||
assert text == "5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
|
||||
assert text == "5"
|
||||
|
||||
# When nothing at all is found, it should throw JSONNOTFound
|
||||
# Which is caught and shown to the user in the watch-overview table
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id")
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
|
||||
|
||||
if jq_support:
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||
|
||||
def set_original_ext_response():
|
||||
data = """
|
||||
@@ -66,6 +83,7 @@ def set_original_ext_response():
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
return None
|
||||
|
||||
def set_modified_ext_response():
|
||||
data = """
|
||||
@@ -86,6 +104,7 @@ def set_modified_ext_response():
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
return None
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """
|
||||
@@ -184,10 +203,10 @@ def test_check_json_without_filter(client, live_server):
|
||||
assert b'"<b>' in res.data
|
||||
assert res.data.count(b'{\n') >= 2
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_json_filter(client, live_server):
|
||||
json_filter = 'json:boss.name'
|
||||
|
||||
def check_json_filter(json_filter, client, live_server):
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
@@ -226,7 +245,7 @@ def test_check_json_filter(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(json_filter.encode('utf-8')) in res.data
|
||||
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -252,10 +271,17 @@ def test_check_json_filter(client, live_server):
|
||||
# And #462 - check we see the proper utf-8 string there
|
||||
assert "Örnsköldsvik".encode('utf-8') in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_json_filter_bool_val(client, live_server):
|
||||
json_filter = "json:$['available']"
|
||||
def test_check_jsonpath_filter(client, live_server):
|
||||
check_json_filter('json:boss.name', client, live_server)
|
||||
|
||||
def test_check_jq_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter('jq:.boss.name', client, live_server)
|
||||
|
||||
def check_json_filter_bool_val(json_filter, client, live_server):
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
@@ -304,14 +330,22 @@ def test_check_json_filter_bool_val(client, live_server):
|
||||
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
|
||||
assert b'false' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_jsonpath_filter_bool_val(client, live_server):
|
||||
check_json_filter_bool_val("json:$['available']", client, live_server)
|
||||
|
||||
def test_check_jq_filter_bool_val(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server)
|
||||
|
||||
# Re #265 - Extended JSON selector test
|
||||
# Stuff to consider here
|
||||
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
|
||||
# - The 'diff' tab could show the old and new content
|
||||
# - Form should let us enter a selector that doesnt (yet) match anything
|
||||
def test_check_json_ext_filter(client, live_server):
|
||||
json_filter = 'json:$[?(@.status==Sold)]'
|
||||
|
||||
def check_json_ext_filter(json_filter, client, live_server):
|
||||
set_original_ext_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
@@ -350,7 +384,7 @@ def test_check_json_ext_filter(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(json_filter.encode('utf-8')) in res.data
|
||||
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -376,3 +410,12 @@ def test_check_json_ext_filter(client, live_server):
|
||||
assert b'ForSale' not in res.data
|
||||
assert b'Sold' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_jsonpath_ext_filter(client, live_server):
|
||||
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
|
||||
|
||||
def test_check_jq_ext_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
@@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page, maybe better to use something we control?
|
||||
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
|
||||
test_url = 'https://news.ycombinator.com'
|
||||
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
||||
test_url = "https://changedetection.io/ci-test/test-runjs.html"
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||
@@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first", unpause_on_save=1),
|
||||
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
|
||||
data={
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
'fetch_backend': "html_webdriver",
|
||||
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"unpaused" in res.data
|
||||
time.sleep(1)
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
# Check the JS execute code before extract worked
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'I smell JavaScript' in res.data
|
||||
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
||||
|
||||
|
||||
BIN
docs/proxy-example.jpg
Normal file
BIN
docs/proxy-example.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
@@ -10,15 +10,20 @@ flask_restful
|
||||
pytz
|
||||
|
||||
# Set these versions together to avoid a RequestsDependencyWarning
|
||||
requests[socks] ~= 2.26
|
||||
# >= 2.26 also adds Brotli support if brotli is installed
|
||||
brotli ~= 1.0
|
||||
requests[socks] ~= 2.28
|
||||
|
||||
urllib3 > 1.26
|
||||
chardet > 2.3.0
|
||||
|
||||
wtforms ~= 3.0
|
||||
jsonpath-ng ~= 1.5.3
|
||||
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise ~= 1.0.0
|
||||
apprise ~= 1.1.0
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
paho-mqtt
|
||||
|
||||
49
x
Normal file
49
x
Normal file
@@ -0,0 +1,49 @@
|
||||
diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py
|
||||
index c745dd3e..19873cce 100644
|
||||
--- a/changedetectionio/__init__.py
|
||||
+++ b/changedetectionio/__init__.py
|
||||
@@ -819,8 +819,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Read as binary and force decode as UTF-8
|
||||
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||
try:
|
||||
- with open(newest_file, 'rb') as f:
|
||||
- newest_version_file_contents = f.read().decode('utf-8')
|
||||
+ with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
+ newest_version_file_contents = f.read()
|
||||
except Exception as e:
|
||||
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
|
||||
|
||||
@@ -832,8 +832,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
previous_file = history[dates[-2]]
|
||||
|
||||
try:
|
||||
- with open(previous_file, 'rb') as f:
|
||||
- previous_version_file_contents = f.read().decode('utf-8')
|
||||
+ with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
+ previous_version_file_contents = f.read()
|
||||
except Exception as e:
|
||||
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
|
||||
|
||||
@@ -909,7 +909,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
filename = watch.history[timestamp]
|
||||
try:
|
||||
- with open(filename, 'r') as f:
|
||||
+ with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
tmp = f.readlines()
|
||||
|
||||
# Get what needs to be highlighted
|
||||
diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
|
||||
index 9a87ad71..566eb88e 100644
|
||||
--- a/changedetectionio/model/Watch.py
|
||||
+++ b/changedetectionio/model/Watch.py
|
||||
@@ -158,7 +158,8 @@ class model(dict):
|
||||
|
||||
logging.debug("Saving history text {}".format(snapshot_fname))
|
||||
|
||||
- # in /diff/ we are going to assume for now that it's UTF-8 when reading
|
||||
+ # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
||||
+ # most sites are utf-8 and some are even broken utf-8
|
||||
with open(snapshot_fname, 'wb') as f:
|
||||
f.write(contents)
|
||||
f.close()
|
||||
Reference in New Issue
Block a user