Compare commits

...

11 Commits

33 changed files with 354 additions and 226 deletions

31
.github/test/Dockerfile-alpine vendored Normal file
View File

@@ -0,0 +1,31 @@
# Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
FROM ghcr.io/linuxserver/baseimage-alpine:3.16
ENV PYTHONUNBUFFERED=1
COPY requirements.txt /requirements.txt
RUN \
apk add --update --no-cache --virtual=build-dependencies \
cargo \
g++ \
gcc \
libc-dev \
libffi-dev \
libxslt-dev \
make \
openssl-dev \
py3-wheel \
python3-dev \
zlib-dev && \
apk add --update --no-cache \
libxslt \
python3 \
py3-pip && \
echo "**** pip3 install test of changedetection.io ****" && \
pip3 install -U pip wheel setuptools && \
pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.16/ -r /requirements.txt && \
apk del --purge \
build-dependencies

View File

@@ -43,6 +43,16 @@ jobs:
version: latest version: latest
driver-opts: image=moby/buildkit:master driver-opts: image=moby/buildkit:master
# https://github.com/dgtlmoon/changedetection.io/pull/1067
# Check we can still build under alpine/musl
- name: Test that the docker containers can build (musl via alpine check)
id: docker_build_musl
uses: docker/build-push-action@v2
with:
context: ./
file: ./.github/test/Dockerfile-alpine
platforms: linux/amd64,linux/arm64
- name: Test that the docker containers can build - name: Test that the docker containers can build
id: docker_build id: docker_build
uses: docker/build-push-action@v2 uses: docker/build-push-action@v2
@@ -53,3 +63,4 @@ jobs:
platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64, platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
cache-from: type=local,src=/tmp/.buildx-cache cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache cache-to: type=local,dest=/tmp/.buildx-cache

View File

@@ -23,14 +23,10 @@ RUN pip install --target=/dependencies -r /requirements.txt
# Playwright is an alternative to Selenium # Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
RUN pip install --target=/dependencies playwright~=1.26 \ RUN pip install --target=/dependencies playwright~=1.26 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
RUN pip install --target=/dependencies jq~=1.3 \
|| echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
# Final image stage # Final image stage
FROM python:3.8-slim FROM python:3.8-slim

View File

@@ -1,6 +1,7 @@
## Web Site Change Detection, Monitoring and Notification. ## Web Site Change Detection, Monitoring and Notification.
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more _Live your data-life pro-actively, track website and JSON content changes, trigger notifications via Discord, Email, Slack, Telegram, API calls and 70+ more._
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github) [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)
@@ -8,8 +9,6 @@ Live your data-life pro-actively, track website content changes and receive noti
![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master) ![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master)
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_ [**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
- Chrome browser included. - Chrome browser included.
@@ -167,9 +166,6 @@ One big advantage of `jq` is that you can use logic in your JSON filter, such as
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
Note: `jq` library must be added separately (`pip3 install jq`)
### Parse JSON embedded in HTML! ### Parse JSON embedded in HTML!
When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites. When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.

View File

@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools from changedetectionio import html_tools
from changedetectionio.api import api_v1 from changedetectionio.api import api_v1
__version__ = '0.39.20.4' __version__ = '0.39.21.1'
datastore = None datastore = None
@@ -199,8 +199,6 @@ def changedetection_app(config=None, datastore_o=None):
# Setup cors headers to allow all domains # Setup cors headers to allow all domains
# https://flask-cors.readthedocs.io/en/latest/ # https://flask-cors.readthedocs.io/en/latest/
# CORS(app) # CORS(app)
@@ -601,7 +599,7 @@ def changedetection_app(config=None, datastore_o=None):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Reset the previous_md5 so we process a new snapshot including stripping ignore text. # Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']: if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
if len(datastore.data['watching'][uuid].history): if len(datastore.data['watching'][uuid].history):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
@@ -1309,8 +1307,8 @@ def changedetection_app(config=None, datastore_o=None):
threading.Thread(target=notification_runner).start() threading.Thread(target=notification_runner).start()
# Check for new release version, but not when running in test/build # Check for new release version, but not when running in test/build or pytest
if not os.getenv("GITHUB_REF", False): if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True:
threading.Thread(target=check_for_new_version).start() threading.Thread(target=check_for_new_version).start()
return app return app

View File

@@ -102,8 +102,8 @@ def main():
has_password=datastore.data['settings']['application']['password'] != False has_password=datastore.data['settings']['application']['password'] != False
) )
# Monitored websites will not receive a Referer header # Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
# when a user clicks on an outgoing link. # @Note: Incompatible with password login (and maybe other features) for now, submit a PR!
@app.after_request @app.after_request
def hide_referrer(response): def hide_referrer(response):
if os.getenv("HIDE_REFERER", False): if os.getenv("HIDE_REFERER", False):

View File

@@ -164,16 +164,16 @@ class Fetcher():
} }
// inject the current one set in the css_filter, which may be a CSS rule // inject the current one set in the include_filters, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated. // used for displaying the current one in VisualSelector, where its not one we generated.
if (css_filter.length) { if (include_filters.length) {
q=false; q=false;
try { try {
// is it xpath? // is it xpath?
if (css_filter.startsWith('/') || css_filter.startsWith('xpath:')) { if (include_filters.startsWith('/') || include_filters.startsWith('xpath:')) {
q=document.evaluate(css_filter.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; q=document.evaluate(include_filters.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
} else { } else {
q=document.querySelector(css_filter); q=document.querySelector(include_filters);
} }
} catch (e) { } catch (e) {
// Maybe catch DOMException and alert? // Maybe catch DOMException and alert?
@@ -186,7 +186,7 @@ class Fetcher():
if (bbox && bbox['width'] >0 && bbox['height']>0) { if (bbox && bbox['width'] >0 && bbox['height']>0) {
size_pos.push({ size_pos.push({
xpath: css_filter, xpath: include_filters,
width: bbox['width'], width: bbox['width'],
height: bbox['height'], height: bbox['height'],
left: bbox['left'], left: bbox['left'],
@@ -220,7 +220,7 @@ class Fetcher():
request_body, request_body,
request_method, request_method,
ignore_status_codes=False, ignore_status_codes=False,
current_css_filter=None): current_include_filters=None):
# Should set self.error, self.status_code and self.content # Should set self.error, self.status_code and self.content
pass pass
@@ -310,7 +310,7 @@ class base_html_playwright(Fetcher):
request_body, request_body,
request_method, request_method,
ignore_status_codes=False, ignore_status_codes=False,
current_css_filter=None): current_include_filters=None):
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
import playwright._impl._api_types import playwright._impl._api_types
@@ -413,10 +413,10 @@ class base_html_playwright(Fetcher):
self.status_code = response.status self.status_code = response.status
self.headers = response.all_headers() self.headers = response.all_headers()
if current_css_filter is not None: if current_include_filters is not None:
page.evaluate("var css_filter={}".format(json.dumps(current_css_filter))) page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
else: else:
page.evaluate("var css_filter=''") page.evaluate("var include_filters=''")
self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}") self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")
@@ -497,7 +497,7 @@ class base_html_webdriver(Fetcher):
request_body, request_body,
request_method, request_method,
ignore_status_codes=False, ignore_status_codes=False,
current_css_filter=None): current_include_filters=None):
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@@ -573,7 +573,7 @@ class html_requests(Fetcher):
request_body, request_body,
request_method, request_method,
ignore_status_codes=False, ignore_status_codes=False,
current_css_filter=None): current_include_filters=None):
# Make requests use a more modern looking user-agent # Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers: if not 'User-Agent' in request_headers:

View File

@@ -10,6 +10,12 @@ from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class FilterNotFoundInResponse(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
# Some common stuff here that can be moved to a base class # Some common stuff here that can be moved to a base class
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(): class perform_site_check():
@@ -104,7 +110,7 @@ class perform_site_check():
if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter']) fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['include_filters'])
fetcher.quit() fetcher.quit()
self.screenshot = fetcher.screenshot self.screenshot = fetcher.screenshot
@@ -128,24 +134,25 @@ class perform_site_check():
is_html = False is_html = False
is_json = False is_json = False
css_filter_rule = watch['css_filter'] include_filters_rule = watch['include_filters']
subtractive_selectors = watch.get( subtractive_selectors = watch.get(
"subtractive_selectors", [] "subtractive_selectors", []
) + self.datastore.data["settings"]["application"].get( ) + self.datastore.data["settings"]["application"].get(
"global_subtractive_selectors", [] "global_subtractive_selectors", []
) )
has_filter_rule = css_filter_rule and len(css_filter_rule.strip()) has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip()) has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
if is_json and not has_filter_rule: if is_json and not has_filter_rule:
css_filter_rule = "json:$" include_filters_rule.append("json:$")
has_filter_rule = True has_filter_rule = True
if has_filter_rule: if has_filter_rule:
json_filter_prefixes = ['json:', 'jq:'] json_filter_prefixes = ['json:', 'jq:']
if any(prefix in css_filter_rule for prefix in json_filter_prefixes): for filter in include_filters_rule:
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule) if any(prefix in filter for prefix in json_filter_prefixes):
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
is_html = False is_html = False
if is_html or is_source: if is_html or is_source:
@@ -161,18 +168,28 @@ class perform_site_check():
else: else:
# Then we assume HTML # Then we assume HTML
if has_filter_rule: if has_filter_rule:
html_content = ""
for filter_rule in include_filters_rule:
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.." # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'): if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''), html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
html_content=fetcher.content) html_content=fetcher.content,
append_pretty_line_formatting=not is_source)
else: else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content) html_content += html_tools.include_filters(include_filters=filter_rule,
html_content=fetcher.content,
append_pretty_line_formatting=not is_source)
if not html_content.strip():
raise FilterNotFoundInResponse(include_filters_rule)
if has_subtractive_selectors: if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content) html_content = html_tools.element_removal(subtractive_selectors, html_content)
if not is_source: if is_source:
stripped_text_from_html = html_content
else:
# extract text # extract text
stripped_text_from_html = \ stripped_text_from_html = \
html_tools.html_to_text( html_tools.html_to_text(
@@ -182,12 +199,6 @@ class perform_site_check():
"render_anchor_tag_content", False) "render_anchor_tag_content", False)
) )
elif is_source:
stripped_text_from_html = html_content
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
# Re #340 - return the content before the 'ignore text' was applied # Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

View File

@@ -349,7 +349,7 @@ class watchForm(commonSettingsForm):
time_between_check = FormField(TimeBetweenCheckForm) time_between_check = FormField(TimeBetweenCheckForm)
css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='') include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])

View File

@@ -7,26 +7,30 @@ from typing import List
import json import json
import re import re
class FilterNotFoundInResponse(ValueError): # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
def __init__(self, msg): TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
ValueError.__init__(self, msg)
class JSONNotFound(ValueError): class JSONNotFound(ValueError):
def __init__(self, msg): def __init__(self, msg):
ValueError.__init__(self, msg) ValueError.__init__(self, msg)
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
def css_filter(css_filter, html_content): def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
html_block = "" html_block = ""
r = soup.select(css_filter, separator="") r = soup.select(include_filters, separator="")
if len(html_content) > 0 and len(r) == 0:
raise FilterNotFoundInResponse(css_filter)
for item in r:
html_block += str(item)
return html_block + "\n" for element in r:
# When there's more than 1 match, then add the suffix to separate each line
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
# (This way each 'match' reliably has a new-line in the diff)
# Divs are converted to 4 whitespaces by inscriptis
if append_pretty_line_formatting and len(html_block) and not element.name in (['br', 'hr', 'div', 'p']):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
html_block += str(element)
return html_block
def subtractive_css_selector(css_selector, html_content): def subtractive_css_selector(css_selector, html_content):
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
@@ -42,25 +46,29 @@ def element_removal(selectors: List[str], html_content):
# Return str Utf-8 of matched rules # Return str Utf-8 of matched rules
def xpath_filter(xpath_filter, html_content): def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False):
from lxml import etree, html from lxml import etree, html
tree = html.fromstring(bytes(html_content, encoding='utf-8')) tree = html.fromstring(bytes(html_content, encoding='utf-8'))
html_block = "" html_block = ""
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
if len(html_content) > 0 and len(r) == 0:
raise FilterNotFoundInResponse(xpath_filter)
#@note: //title/text() wont work where <title>CDATA.. #@note: //title/text() wont work where <title>CDATA..
for element in r: for element in r:
# When there's more than 1 match, then add the suffix to separate each line
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
# (This way each 'match' reliably has a new-line in the diff)
# Divs are converted to 4 whitespaces by inscriptis
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
if type(element) == etree._ElementStringResult: if type(element) == etree._ElementStringResult:
html_block += str(element) + "<br/>" html_block += str(element)
elif type(element) == etree._ElementUnicodeResult: elif type(element) == etree._ElementUnicodeResult:
html_block += str(element) + "<br/>" html_block += str(element)
else: else:
html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>" html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
return html_block return html_block

View File

@@ -103,12 +103,12 @@ class import_distill_io_json(Importer):
pass pass
except IndexError: except IndexError:
pass pass
extras['include_filters'] = []
try: try:
extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr']
if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath': if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
extras['css_filter'] = 'xpath:' + extras['css_filter'] extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
else:
extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
except KeyError: except KeyError:
pass pass
except IndexError: except IndexError:

View File

@@ -16,42 +16,43 @@ class model(dict):
__newest_history_key = None __newest_history_key = None
__history_n=0 __history_n=0
__base_config = { __base_config = {
'url': None, #'history': {}, # Dict of timestamp and output stripped filename (removed)
'tag': None, #'newest_history_key': 0, (removed, taken from history.txt index)
'last_checked': 0,
'paused': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
#'newest_history_key': 0,
'title': None,
'previous_md5': False,
'uuid': str(uuid.uuid4()),
'headers': {}, # Extra headers to send
'body': None, 'body': None,
'method': 'GET', 'check_unique_lines': False, # On change-detected, compare against all history if its something new
#'history': {}, # Dict of timestamp and output stripped filename 'check_count': 0,
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': None,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum 'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'include_filters': [],
'last_checked': 0,
'last_error': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET',
# Custom notification content # Custom notification content
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'notification_title': None,
'notification_body': None, 'notification_body': None,
'notification_format': default_notification_format_for_watch, 'notification_format': default_notification_format_for_watch,
'notification_muted': False, 'notification_muted': False,
'css_filter': '', 'notification_title': None,
'last_error': False, 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'extract_text': [], # Extract text by regex after filters 'paused': False,
'subtractive_selectors': [], 'previous_md5': False,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'text_should_not_be_present': [], # Text that should not present
'fetch_backend': None,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_title_as_title': False,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'proxy': None, # Preferred proxy connection 'proxy': None, # Preferred proxy connection
'subtractive_selectors': [],
'tag': None,
'text_should_not_be_present': [], # Text that should not present
# Re #110, so then if this is set to None, we know to use the default value instead # Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default # Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case. # Should be all None by default, so we use the system default in this case.
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'title': None,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': None,
'uuid': str(uuid.uuid4()),
'webdriver_delay': None, 'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection 'webdriver_js_execute_code': None, # Run before change-detection
} }
@@ -185,6 +186,12 @@ class model(dict):
def save_history_text(self, contents, timestamp): def save_history_text(self, contents, timestamp):
self.ensure_data_dir_exists() self.ensure_data_dir_exists()
# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
time.sleep(timestamp - self.__newest_history_key)
snapshot_fname = "{}.txt".format(str(uuid.uuid4())) snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading

View File

@@ -24,14 +24,6 @@ echo "RUNNING WITH BASE_URL SET"
export BASE_URL="https://really-unique-domain.io" export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py pytest tests/test_notification.py
## JQ + JSON: filter test
# jq is not available on windows and we should just test it when the package is installed
# this will re-test with jq support
pip3 install jq~=1.3
pytest tests/test_jsonpath_jq_selector.py
# Now for the selenium and playwright/browserless fetchers # Now for the selenium and playwright/browserless fetchers
# Note - this is not UI functional tests - just checking that each one can fetch the content # Note - this is not UI functional tests - just checking that each one can fetch the content

View File

@@ -50,7 +50,7 @@ $(document).ready(function() {
state_clicked=false; state_clicked=false;
ctx.clearRect(0, 0, c.width, c.height); ctx.clearRect(0, 0, c.width, c.height);
xctx.clearRect(0, 0, c.width, c.height); xctx.clearRect(0, 0, c.width, c.height);
$("#css_filter").val(''); $("#include_filters").val('');
}); });
@@ -68,7 +68,7 @@ $(document).ready(function() {
xctx = c.getContext("2d"); xctx = c.getContext("2d");
// redline highlight context // redline highlight context
ctx = c.getContext("2d"); ctx = c.getContext("2d");
current_default_xpath =$("#css_filter").val(); current_default_xpath =$("#include_filters").val();
fetch_data(); fetch_data();
$('#selector-canvas').off("mousemove mousedown"); $('#selector-canvas').off("mousemove mousedown");
// screenshot_url defined in the edit.html template // screenshot_url defined in the edit.html template
@@ -205,9 +205,9 @@ $(document).ready(function() {
var sel = selector_data['size_pos'][current_selected_i]; var sel = selector_data['size_pos'][current_selected_i];
if (sel[0] == '/') { if (sel[0] == '/') {
// @todo - not sure just checking / is right // @todo - not sure just checking / is right
$("#css_filter").val('xpath:'+sel.xpath); $("#include_filters").val('xpath:'+sel.xpath);
} else { } else {
$("#css_filter").val(sel.xpath); $("#include_filters").val(sel.xpath);
} }
xctx.fillStyle = 'rgba(205,205,205,0.95)'; xctx.fillStyle = 'rgba(205,205,205,0.95)';
xctx.strokeStyle = 'rgba(225,0,0,0.9)'; xctx.strokeStyle = 'rgba(225,0,0,0.9)';

View File

@@ -27,6 +27,8 @@ class ChangeDetectionStore:
# For when we edit, we should write to disk # For when we edit, we should write to disk
needs_write_urgent = False needs_write_urgent = False
__version_check = True
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
# Should only be active for docker # Should only be active for docker
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO) # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
@@ -37,7 +39,6 @@ class ChangeDetectionStore:
self.proxy_list = None self.proxy_list = None
self.start_time = time.time() self.start_time = time.time()
self.stop_thread = False self.stop_thread = False
# Base definition for all watchers # Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly # deepcopy part of #569 - not sure why its needed exactly
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
@@ -81,8 +82,13 @@ class ChangeDetectionStore:
except (FileNotFoundError, json.decoder.JSONDecodeError): except (FileNotFoundError, json.decoder.JSONDecodeError):
if include_default_watches: if include_default_watches:
print("Creating JSON store at", self.datastore_path) print("Creating JSON store at", self.datastore_path)
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') self.add_watch(url='https://news.ycombinator.com/',
self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io') tag='Tech news',
extras={'fetch_backend': 'html_requests'})
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
tag='changedetection.io',
extras={'fetch_backend': 'html_requests'})
self.__data['version_tag'] = version_tag self.__data['version_tag'] = version_tag
@@ -281,17 +287,31 @@ class ChangeDetectionStore:
res = r.json() res = r.json()
# List of permissible attributes we accept from the wild internet # List of permissible attributes we accept from the wild internet
for k in ['url', 'tag', for k in [
'paused', 'title', 'body',
'previous_md5', 'headers', 'css_filter',
'body', 'method', 'extract_text',
'ignore_text', 'css_filter', 'extract_title_as_title',
'subtractive_selectors', 'trigger_text', 'headers',
'extract_title_as_title', 'extract_text', 'ignore_text',
'include_filters',
'method',
'paused',
'previous_md5',
'subtractive_selectors',
'tag',
'text_should_not_be_present', 'text_should_not_be_present',
'webdriver_js_execute_code']: 'title',
'trigger_text',
'webdriver_js_execute_code',
'url',
]:
if res.get(k): if res.get(k):
if k != 'css_filter':
apply_extras[k] = res[k] apply_extras[k] = res[k]
else:
# We renamed the field and made it a list
apply_extras['include_filters'] = [res['css_filter']]
except Exception as e: except Exception as e:
logging.error("Error fetching metadata for shared watch link", url, str(e)) logging.error("Error fetching metadata for shared watch link", url, str(e))
@@ -320,6 +340,7 @@ class ChangeDetectionStore:
if write_to_disk_now: if write_to_disk_now:
self.sync_to_json() self.sync_to_json()
return new_uuid return new_uuid
def visualselector_data_is_ready(self, watch_uuid): def visualselector_data_is_ready(self, watch_uuid):
@@ -583,3 +604,14 @@ class ChangeDetectionStore:
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']: for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
if self.data['settings']['headers'].get(v): if self.data['settings']['headers'].get(v):
del self.data['settings']['headers'][v] del self.data['settings']['headers'][v]
# Convert filters to a list of filters css_filter -> include_filters
def update_8(self):
for uuid, watch in self.data['watching'].items():
try:
existing_filter = watch.get('css_filter', '')
if existing_filter:
watch['include_filters'] = [existing_filter]
except:
continue
return

View File

@@ -174,15 +174,17 @@ User-Agent: wonderbra 1.0") }}
</div> </div>
</fieldset> </fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
{% set field = render_field(form.css_filter, {% set field = render_field(form.include_filters,
placeholder=".class-name or #some-id, or other CSS selector rule.", rows=5,
placeholder="#example
xpath://body/div/span[contains(@class, 'example-class')]",
class="m-d") class="m-d")
%} %}
{{ field }} {{ field }}
{% if '/text()' in field %} {% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br/> <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br/>
{% endif %} {% endif %}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
<ul> <ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).

View File

@@ -41,7 +41,7 @@ def app(request):
cleanup(datastore_path) cleanup(datastore_path)
app_config = {'datastore_path': datastore_path} app_config = {'datastore_path': datastore_path, 'disable_checkver' : True}
cleanup(app_config['datastore_path']) cleanup(app_config['datastore_path'])
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False) datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
app = changedetection_app(app_config, datastore) app = changedetection_app(app_config, datastore)

View File

@@ -24,7 +24,7 @@ def test_preferred_proxy(client, live_server):
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={ data={
"css_filter": "", "include_filters": "",
"fetch_backend": "html_requests", "fetch_backend": "html_requests",
"headers": "", "headers": "",
"proxy": "proxy-two", "proxy": "proxy-two",

View File

@@ -23,7 +23,7 @@ def test_basic_auth(client, live_server):
# Check form validation # Check form validation
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data

View File

@@ -3,7 +3,7 @@
import time import time
from flask import url_for from flask import url_for
from urllib.request import urlopen from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
@@ -36,7 +36,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
# It should report nothing found (no new 'unviewed' class) # It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index")) res = client.get(url_for("index"))
@@ -69,7 +69,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
res = client.get(url_for("form_watch_checknow"), follow_redirects=True) res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches are queued for rechecking.' in res.data assert b'1 watches are queued for rechecking.' in res.data
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
# Now something should be ready, indicated by having a 'unviewed' class # Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("index")) res = client.get(url_for("index"))
@@ -98,14 +98,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
assert b'which has this one new line' in res.data assert b'which has this one new line' in res.data
assert b'Which is across multiple lines' not in res.data assert b'Which is across multiple lines' not in res.data
time.sleep(2) wait_for_all_checks(client)
# Do this a few times.. ensures we dont accidently set the status # Do this a few times.. ensures we dont accidently set the status
for n in range(2): for n in range(2):
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
# It should report nothing found (no new 'unviewed' class) # It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index")) res = client.get(url_for("index"))
@@ -125,7 +125,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
) )
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data

View File

@@ -46,22 +46,23 @@ def set_modified_response():
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's # Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
def test_css_filter_output(): def test_include_filters_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text from inscriptis import get_text
# Check text with sub-parts renders correctly # Check text with sub-parts renders correctly
content = """<html> <body><div id="thingthing" > Some really <b>bold</b> text </div> </body> </html>""" content = """<html> <body><div id="thingthing" > Some really <b>bold</b> text </div> </body> </html>"""
html_blob = css_filter(css_filter="#thingthing", html_content=content) html_blob = include_filters(include_filters="#thingthing", html_content=content)
text = get_text(html_blob) text = get_text(html_blob)
assert text == " Some really bold text" assert text == " Some really bold text"
content = """<html> <body> content = """<html> <body>
<p>foo bar blah</p> <p>foo bar blah</p>
<div class="parts">Block A</div> <div class="parts">Block B</div></body> <DIV class="parts">Block A</DiV> <div class="parts">Block B</DIV></body>
</html> </html>
""" """
html_blob = css_filter(css_filter=".parts", html_content=content)
# in xPath this would be //*[@class='parts']
html_blob = include_filters(include_filters=".parts", html_content=content)
text = get_text(html_blob) text = get_text(html_blob)
# Divs are converted to 4 whitespaces by inscriptis # Divs are converted to 4 whitespaces by inscriptis
@@ -69,10 +70,10 @@ def test_css_filter_output():
# Tests the whole stack works with the CSS Filter # Tests the whole stack works with the CSS Filter
def test_check_markup_css_filter_restriction(client, live_server): def test_check_markup_include_filters_restriction(client, live_server):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
css_filter = "#sametext" include_filters = "#sametext"
set_original_response() set_original_response()
@@ -98,7 +99,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
@@ -107,7 +108,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
res = client.get( res = client.get(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
) )
assert bytes(css_filter.encode('utf-8')) in res.data assert bytes(include_filters.encode('utf-8')) in res.data
# Trigger a check # Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -126,3 +127,58 @@ def test_check_markup_css_filter_restriction(client, live_server):
# Because it should be looking at only that 'sametext' id # Because it should be looking at only that 'sametext' id
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data
# Tests the whole stack works with the CSS Filter
def test_check_multiple_filters(client, live_server):
sleep_time_for_fetch_thread = 3
include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]"
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html><body>
<div id="blob-a">Blob A</div>
<div id="blob-b">Blob B</div>
<div id="blob-c">Blob C</div>
</body>
</html>
""")
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(1)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"include_filters": include_filters,
"url": test_url,
"tag": "",
"headers": "",
'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Only the two blobs should be here
assert b"Blob A" in res.data # CSS was ok
assert b"Blob B" in res.data # xPath was ok
assert b"Blob C" not in res.data # Should not be included

View File

@@ -88,7 +88,7 @@ def test_check_filter_multiline(client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": '', data={"include_filters": '',
'extract_text': '/something.+?6 billion.+?lines/si', 'extract_text': '/something.+?6 billion.+?lines/si',
"url": test_url, "url": test_url,
"tag": "", "tag": "",
@@ -116,7 +116,7 @@ def test_check_filter_multiline(client, live_server):
def test_check_filter_and_regex_extract(client, live_server): def test_check_filter_and_regex_extract(client, live_server):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
css_filter = ".changetext" include_filters = ".changetext"
set_original_response() set_original_response()
@@ -143,7 +143,7 @@ def test_check_filter_and_regex_extract(client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": css_filter, data={"include_filters": include_filters,
'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i', 'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
"url": test_url, "url": test_url,
"tag": "", "tag": "",

View File

@@ -92,7 +92,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
"tag": "my tag", "tag": "my tag",
"title": "my title", "title": "my title",
"headers": "", "headers": "",
"css_filter": '.ticket-available', "include_filters": '.ticket-available',
"fetch_backend": "html_requests"}) "fetch_backend": "html_requests"})
res = client.post( res = client.post(

View File

@@ -76,7 +76,7 @@ def run_filter_test(client, content_filter):
"title": "my title", "title": "my title",
"headers": "", "headers": "",
"filter_failure_notification_send": 'y', "filter_failure_notification_send": 'y',
"css_filter": content_filter, "include_filters": content_filter,
"fetch_backend": "html_requests"}) "fetch_backend": "html_requests"})
res = client.post( res = client.post(
@@ -95,7 +95,7 @@ def run_filter_test(client, content_filter):
time.sleep(3) time.sleep(3)
# We should see something in the frontend # We should see something in the frontend
assert b'Warning, filter' in res.data assert b'Warning, no filters were found' in res.data
# Now it should exist and contain our "filter not found" alert # Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt") assert os.path.isfile("test-datastore/notification.txt")
@@ -131,7 +131,7 @@ def run_filter_test(client, content_filter):
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_css_filter_failure_notification(client, live_server): def test_check_include_filters_failure_notification(client, live_server):
set_original_response() set_original_response()
time.sleep(1) time.sleep(1)
run_filter_test(client, '#nope-doesnt-exist') run_filter_test(client, '#nope-doesnt-exist')

View File

@@ -132,7 +132,7 @@ def set_original_response():
return None return None
def set_response_with_html(): def set_json_response_with_html():
test_return_data = """ test_return_data = """
{ {
"test": [ "test": [
@@ -176,7 +176,7 @@ def set_modified_response():
def test_check_json_without_filter(client, live_server): def test_check_json_without_filter(client, live_server):
# Request a JSON document from a application/json source containing HTML # Request a JSON document from a application/json source containing HTML
# and be sure it doesn't get chewed up by instriptis # and be sure it doesn't get chewed up by instriptis
set_response_with_html() set_json_response_with_html()
# Give the endpoint time to spin up # Give the endpoint time to spin up
time.sleep(1) time.sleep(1)
@@ -189,9 +189,6 @@ def test_check_json_without_filter(client, live_server):
follow_redirects=True follow_redirects=True
) )
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(3)
@@ -200,6 +197,7 @@ def test_check_json_without_filter(client, live_server):
follow_redirects=True follow_redirects=True
) )
# Should still see '"html": "<b>"'
assert b'&#34;&lt;b&gt;' in res.data assert b'&#34;&lt;b&gt;' in res.data
assert res.data.count(b'{\n') >= 2 assert res.data.count(b'{\n') >= 2
@@ -221,9 +219,6 @@ def check_json_filter(json_filter, client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(3)
@@ -231,7 +226,7 @@ def check_json_filter(json_filter, client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": json_filter, data={"include_filters": json_filter,
"url": test_url, "url": test_url,
"tag": "", "tag": "",
"headers": "", "headers": "",
@@ -247,9 +242,6 @@ def check_json_filter(json_filter, client, live_server):
) )
assert bytes(escape(json_filter).encode('utf-8')) in res.data assert bytes(escape(json_filter).encode('utf-8')) in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(3)
# Make a change # Make a change
@@ -301,7 +293,7 @@ def check_json_filter_bool_val(json_filter, client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": json_filter, data={"include_filters": json_filter,
"url": test_url, "url": test_url,
"tag": "", "tag": "",
"headers": "", "headers": "",
@@ -311,11 +303,6 @@ def check_json_filter_bool_val(json_filter, client, live_server):
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
time.sleep(3)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(3)
# Make a change # Make a change
@@ -360,9 +347,6 @@ def check_json_ext_filter(json_filter, client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(3)
@@ -370,7 +354,7 @@ def check_json_ext_filter(json_filter, client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": json_filter, data={"include_filters": json_filter,
"url": test_url, "url": test_url,
"tag": "", "tag": "",
"headers": "", "headers": "",
@@ -386,9 +370,6 @@ def check_json_ext_filter(json_filter, client, live_server):
) )
assert bytes(escape(json_filter).encode('utf-8')) in res.data assert bytes(escape(json_filter).encode('utf-8')) in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(3)
# Make a change # Make a change

View File

@@ -14,7 +14,7 @@ def test_share_watch(client, live_server):
live_server_setup(live_server) live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
css_filter = ".nice-filter" include_filters = ".nice-filter"
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
@@ -29,7 +29,7 @@ def test_share_watch(client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
@@ -37,7 +37,7 @@ def test_share_watch(client, live_server):
res = client.get( res = client.get(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
) )
assert bytes(css_filter.encode('utf-8')) in res.data assert bytes(include_filters.encode('utf-8')) in res.data
# click share the link # click share the link
res = client.get( res = client.get(
@@ -73,4 +73,8 @@ def test_share_watch(client, live_server):
res = client.get( res = client.get(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
) )
assert bytes(css_filter.encode('utf-8')) in res.data assert bytes(include_filters.encode('utf-8')) in res.data
# Check it saved the URL
res = client.get(url_for("index"))
assert bytes(test_url.encode('utf-8')) in res.data

View File

@@ -57,10 +57,9 @@ def test_check_basic_change_detection_functionality_source(client, live_server):
# `subtractive_selectors` should still work in `source:` type requests
def test_check_ignore_elements(client, live_server): def test_check_ignore_elements(client, live_server):
set_original_response() set_original_response()
time.sleep(2) time.sleep(2)
test_url = 'source:'+url_for('test_endpoint', _external=True) test_url = 'source:'+url_for('test_endpoint', _external=True)
# Add our URL to the import page # Add our URL to the import page
@@ -77,9 +76,9 @@ def test_check_ignore_elements(client, live_server):
##################### #####################
# We want <span> and <p> ONLY, but ignore span with .foobar-detection # We want <span> and <p> ONLY, but ignore span with .foobar-detection
res = client.post( client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"}, data={"include_filters": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -89,7 +88,6 @@ def test_check_ignore_elements(client, live_server):
url_for("preview_page", uuid="first"), url_for("preview_page", uuid="first"),
follow_redirects=True follow_redirects=True
) )
assert b'foobar-detection' not in res.data assert b'foobar-detection' not in res.data
assert b'&lt;br' not in res.data assert b'&lt;br' not in res.data
assert b'&lt;p' in res.data assert b'&lt;p' in res.data

View File

@@ -49,7 +49,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server):
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"trigger_text": "/cool.stuff/", data={"trigger_text": "/cool.stuff/",
"url": test_url, "url": test_url,
"css_filter": '#in-here', "include_filters": '#in-here',
"fetch_backend": "html_requests"}, "fetch_backend": "html_requests"},
follow_redirects=True follow_redirects=True
) )

View File

@@ -22,7 +22,7 @@ def test_check_watch_field_storage(client, live_server):
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n", data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n",
"time_between_check-minutes": 126, "time_between_check-minutes": 126,
"css_filter" : ".fooclass", "include_filters" : ".fooclass",
"title" : "My title", "title" : "My title",
"ignore_text" : "ignore this", "ignore_text" : "ignore this",
"url": test_url, "url": test_url,

View File

@@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server):
time.sleep(1) time.sleep(1)
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
@@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server):
time.sleep(1) time.sleep(1)
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
@@ -182,9 +182,6 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
@@ -192,7 +189,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
@@ -230,10 +227,11 @@ def test_xpath_validation(client, live_server):
follow_redirects=True follow_redirects=True
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(2)
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"is not a valid XPath expression" in res.data assert b"is not a valid XPath expression" in res.data
@@ -242,7 +240,7 @@ def test_xpath_validation(client, live_server):
# actually only really used by the distll.io importer, but could be handy too # actually only really used by the distll.io importer, but could be handy too
def test_check_with_prefix_css_filter(client, live_server): def test_check_with_prefix_include_filters(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
@@ -263,7 +261,7 @@ def test_check_with_prefix_css_filter(client, live_server):
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"css_filter": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, data={"include_filters": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )

View File

@@ -86,6 +86,7 @@ def extract_UUID_from_client(client):
def wait_for_all_checks(client): def wait_for_all_checks(client):
# Loop waiting until done.. # Loop waiting until done..
attempt=0 attempt=0
time.sleep(0.1)
while attempt < 60: while attempt < 60:
time.sleep(1) time.sleep(1)
res = client.get(url_for("index")) res = client.get(url_for("index"))

View File

@@ -4,7 +4,7 @@ import queue
import time import time
from changedetectionio import content_fetcher from changedetectionio import content_fetcher
from changedetectionio.html_tools import FilterNotFoundInResponse from changedetectionio.fetch_site_status import FilterNotFoundInResponse
# A single update worker # A single update worker
# #
@@ -91,8 +91,8 @@ class update_worker(threading.Thread):
return return
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( 'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
watch['css_filter'], ", ".join(watch['include_filters']),
threshold), threshold),
'notification_format': 'text'} 'notification_format': 'text'}
@@ -189,7 +189,7 @@ class update_worker(threading.Thread):
if not self.datastore.data['watching'].get(uuid): if not self.datastore.data['watching'].get(uuid):
continue continue
err_text = "Warning, filter '{}' not found".format(str(e)) err_text = "Warning, no filters were found, no change detection ran."
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
# So that we get a trigger when the content is added again # So that we get a trigger when the content is added again
'previous_md5': ''}) 'previous_md5': ''})
@@ -282,10 +282,12 @@ class update_worker(threading.Thread):
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
# Always record that we atleast tried # Always record that we atleast tried
count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time())}) 'last_checked': round(time.time()),
'check_count': count
})
# Always save the screenshot if it's available # Always save the screenshot if it's available
if update_handler.screenshot: if update_handler.screenshot:

View File

@@ -50,5 +50,9 @@ werkzeug ~= 2.0.0
jinja2~=3.1 jinja2~=3.1
jinja2-time jinja2-time
# https://peps.python.org/pep-0508/#environment-markers
# https://github.com/dgtlmoon/changedetection.io/pull/1009
jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux"
# playwright is installed at Dockerfile build time because it's not available on all platforms # playwright is installed at Dockerfile build time because it's not available on all platforms