mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-10-30 14:17:40 +00:00
Compare commits
30 Commits
jinja2-tem
...
hours-day-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b2b8c3f288 | ||
|
|
ca3b351bae | ||
|
|
83add91f78 | ||
|
|
b7e0f0a5e4 | ||
|
|
fedb16c242 | ||
|
|
61f0ac2937 | ||
|
|
2d948ea6d1 | ||
|
|
dee0c735e6 | ||
|
|
9fa98f4ec6 | ||
|
|
b3b4b5d3f1 | ||
|
|
a3f9ac0a6f | ||
|
|
fcda5a0818 | ||
|
|
3920e613b9 | ||
|
|
d023aa982e | ||
|
|
c341baf71b | ||
|
|
fca66eb558 | ||
|
|
359fc48fb4 | ||
|
|
d0efeb9770 | ||
|
|
3416532cd6 | ||
|
|
defc7a340e | ||
|
|
c197c062e1 | ||
|
|
77b59809ca | ||
|
|
f90b170e68 | ||
|
|
c93ca1841c | ||
|
|
57f604dff1 | ||
|
|
8499468749 | ||
|
|
7f6a13ea6c | ||
|
|
9874f0cbc7 | ||
|
|
72834a42fd | ||
|
|
724cb17224 |
31
.github/test/Dockerfile-alpine
vendored
Normal file
31
.github/test/Dockerfile-alpine
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile
|
||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.16
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
|
||||
RUN \
|
||||
apk add --update --no-cache --virtual=build-dependencies \
|
||||
cargo \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
openssl-dev \
|
||||
py3-wheel \
|
||||
python3-dev \
|
||||
zlib-dev && \
|
||||
apk add --update --no-cache \
|
||||
libxslt \
|
||||
python3 \
|
||||
py3-pip && \
|
||||
echo "**** pip3 install test of changedetection.io ****" && \
|
||||
pip3 install -U pip wheel setuptools && \
|
||||
pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.16/ -r /requirements.txt && \
|
||||
apk del --purge \
|
||||
build-dependencies
|
||||
11
.github/workflows/test-container-build.yml
vendored
11
.github/workflows/test-container-build.yml
vendored
@@ -43,6 +43,16 @@ jobs:
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067
|
||||
# Check we can still build under alpine/musl
|
||||
- name: Test that the docker containers can build (musl via alpine check)
|
||||
id: docker_build_musl
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: ./
|
||||
file: ./.github/test/Dockerfile-alpine
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Test that the docker containers can build
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
@@ -53,3 +63,4 @@ jobs:
|
||||
platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
|
||||
|
||||
@@ -23,14 +23,10 @@ RUN pip install --target=/dependencies -r /requirements.txt
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||
RUN pip install --target=/dependencies playwright~=1.26 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
|
||||
RUN pip install --target=/dependencies jq~=1.3 \
|
||||
|| echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
|
||||
|
||||
|
||||
# Final image stage
|
||||
FROM python:3.8-slim
|
||||
|
||||
|
||||
12
README.md
12
README.md
@@ -1,6 +1,7 @@
|
||||
## Web Site Change Detection, Monitoring and Notification.
|
||||
|
||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||
_Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)
|
||||
|
||||
@@ -8,8 +9,6 @@ Live your data-life pro-actively, track website content changes and receive noti
|
||||
|
||||

|
||||
|
||||
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
|
||||
|
||||
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||
|
||||
- Chrome browser included.
|
||||
@@ -167,9 +166,6 @@ One big advantage of `jq` is that you can use logic in your JSON filter, such as
|
||||
|
||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
|
||||
|
||||
Note: `jq` library must be added separately (`pip3 install jq`)
|
||||
|
||||
|
||||
### Parse JSON embedded in HTML!
|
||||
|
||||
When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
|
||||
@@ -184,9 +180,9 @@ When you enable a `json:` or `jq:` filter, you can even automatically extract an
|
||||
|
||||
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
|
||||
|
||||
## Proxy configuration
|
||||
## Proxy Configuration
|
||||
|
||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration
|
||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration , we also support using [BrightData proxy services where possible]( https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support)
|
||||
|
||||
## Raspberry Pi support?
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.39.20.4'
|
||||
__version__ = '0.39.21.1'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -199,8 +199,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Setup cors headers to allow all domains
|
||||
# https://flask-cors.readthedocs.io/en/latest/
|
||||
# CORS(app)
|
||||
@@ -568,23 +566,12 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
for p in datastore.proxy_list:
|
||||
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
extra_update_obj = {}
|
||||
|
||||
if request.args.get('unpause_on_save'):
|
||||
extra_update_obj['paused'] = False
|
||||
|
||||
# Re #110, if they submit the same as the default value, set it to None, so we continue to follow the default
|
||||
# Assume we use the default value, unless something relevant is different, then use the form value
|
||||
# values could be None, 0 etc.
|
||||
# Set to None unless the next for: says that something is different
|
||||
extra_update_obj['time_between_check'] = dict.fromkeys(form.time_between_check.data)
|
||||
for k, v in form.time_between_check.data.items():
|
||||
if v and v != datastore.data['settings']['requests']['time_between_check'][k]:
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
using_default_check_time = False
|
||||
break
|
||||
|
||||
# Use the default if its the same as system wide
|
||||
if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
|
||||
@@ -601,7 +588,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
||||
|
||||
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
|
||||
if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']:
|
||||
if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
|
||||
if len(datastore.data['watching'][uuid].history):
|
||||
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
||||
|
||||
@@ -734,13 +721,19 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
else:
|
||||
flash("An error occurred, please see below.", "error")
|
||||
|
||||
import datetime
|
||||
datetime = datetime.datetime.now(pytz.timezone(datastore.data['settings']['application'].get('timezone')))
|
||||
|
||||
output = render_template("settings.html",
|
||||
form=form,
|
||||
current_base_url = datastore.data['settings']['application']['base_url'],
|
||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||
datetime=str(datetime),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
settings_application=datastore.data['settings']['application'])
|
||||
form=form,
|
||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
timezone=datastore.data['settings']['application'].get('timezone')
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@@ -987,9 +980,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# create a ZipFile object
|
||||
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
|
||||
|
||||
# We only care about UUIDS from the current index file
|
||||
uuids = list(datastore.data['watching'].keys())
|
||||
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
|
||||
|
||||
with zipfile.ZipFile(backup_filepath, "w",
|
||||
@@ -1005,12 +995,12 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Add the flask app secret
|
||||
zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
|
||||
|
||||
# Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'):
|
||||
parent_p = txt_file_path.parent
|
||||
if parent_p.name in uuids:
|
||||
zipObj.write(txt_file_path,
|
||||
arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''),
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in datastore.data['watching'].items():
|
||||
for f in Path(w.watch_data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
|
||||
@@ -1312,8 +1302,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
threading.Thread(target=notification_runner).start()
|
||||
|
||||
# Check for new release version, but not when running in test/build
|
||||
if not os.getenv("GITHUB_REF", False):
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True:
|
||||
threading.Thread(target=check_for_new_version).start()
|
||||
|
||||
return app
|
||||
@@ -1453,8 +1443,12 @@ def ticker_thread_check_time_launch_checks():
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
|
||||
if not watch.is_schedule_permitted:
|
||||
# Skip if the schedule (day of week and time) isnt permitted
|
||||
continue
|
||||
|
||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
# Proxies can be set to have a limit on seconds between which they can be called
|
||||
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()):
|
||||
|
||||
@@ -102,8 +102,8 @@ def main():
|
||||
has_password=datastore.data['settings']['application']['password'] != False
|
||||
)
|
||||
|
||||
# Monitored websites will not receive a Referer header
|
||||
# when a user clicks on an outgoing link.
|
||||
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||
# @Note: Incompatible with password login (and maybe other features) for now, submit a PR!
|
||||
@app.after_request
|
||||
def hide_referrer(response):
|
||||
if os.getenv("HIDE_REFERER", False):
|
||||
|
||||
@@ -164,16 +164,16 @@ class Fetcher():
|
||||
}
|
||||
|
||||
|
||||
// inject the current one set in the css_filter, which may be a CSS rule
|
||||
// inject the current one set in the include_filters, which may be a CSS rule
|
||||
// used for displaying the current one in VisualSelector, where its not one we generated.
|
||||
if (css_filter.length) {
|
||||
if (include_filters.length) {
|
||||
q=false;
|
||||
try {
|
||||
// is it xpath?
|
||||
if (css_filter.startsWith('/') || css_filter.startsWith('xpath:')) {
|
||||
q=document.evaluate(css_filter.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
||||
if (include_filters.startsWith('/') || include_filters.startsWith('xpath:')) {
|
||||
q=document.evaluate(include_filters.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
||||
} else {
|
||||
q=document.querySelector(css_filter);
|
||||
q=document.querySelector(include_filters);
|
||||
}
|
||||
} catch (e) {
|
||||
// Maybe catch DOMException and alert?
|
||||
@@ -186,7 +186,7 @@ class Fetcher():
|
||||
|
||||
if (bbox && bbox['width'] >0 && bbox['height']>0) {
|
||||
size_pos.push({
|
||||
xpath: css_filter,
|
||||
xpath: include_filters,
|
||||
width: bbox['width'],
|
||||
height: bbox['height'],
|
||||
left: bbox['left'],
|
||||
@@ -220,7 +220,7 @@ class Fetcher():
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
current_include_filters=None):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
@@ -310,7 +310,7 @@ class base_html_playwright(Fetcher):
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
current_include_filters=None):
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
import playwright._impl._api_types
|
||||
@@ -413,10 +413,10 @@ class base_html_playwright(Fetcher):
|
||||
self.status_code = response.status
|
||||
self.headers = response.all_headers()
|
||||
|
||||
if current_css_filter is not None:
|
||||
page.evaluate("var css_filter={}".format(json.dumps(current_css_filter)))
|
||||
if current_include_filters is not None:
|
||||
page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
else:
|
||||
page.evaluate("var css_filter=''")
|
||||
page.evaluate("var include_filters=''")
|
||||
|
||||
self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")
|
||||
|
||||
@@ -497,7 +497,7 @@ class base_html_webdriver(Fetcher):
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
current_include_filters=None):
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
@@ -573,7 +573,7 @@ class html_requests(Fetcher):
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
current_include_filters=None):
|
||||
|
||||
# Make requests use a more modern looking user-agent
|
||||
if not 'User-Agent' in request_headers:
|
||||
|
||||
@@ -10,6 +10,12 @@ from changedetectionio import content_fetcher, html_tools
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check():
|
||||
@@ -65,7 +71,9 @@ class perform_site_check():
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
url = watch.get('url')
|
||||
|
||||
url = watch.link
|
||||
|
||||
request_body = self.datastore.data['watching'][uuid].get('body')
|
||||
request_method = self.datastore.data['watching'][uuid].get('method')
|
||||
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
|
||||
@@ -102,7 +110,7 @@ class perform_site_check():
|
||||
if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
|
||||
fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
|
||||
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['include_filters'])
|
||||
fetcher.quit()
|
||||
|
||||
self.screenshot = fetcher.screenshot
|
||||
@@ -126,25 +134,26 @@ class perform_site_check():
|
||||
is_html = False
|
||||
is_json = False
|
||||
|
||||
css_filter_rule = watch['css_filter']
|
||||
include_filters_rule = watch['include_filters']
|
||||
subtractive_selectors = watch.get(
|
||||
"subtractive_selectors", []
|
||||
) + self.datastore.data["settings"]["application"].get(
|
||||
"global_subtractive_selectors", []
|
||||
)
|
||||
|
||||
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
|
||||
has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
|
||||
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
|
||||
|
||||
if is_json and not has_filter_rule:
|
||||
css_filter_rule = "json:$"
|
||||
include_filters_rule.append("json:$")
|
||||
has_filter_rule = True
|
||||
|
||||
if has_filter_rule:
|
||||
json_filter_prefixes = ['json:', 'jq:']
|
||||
if any(prefix in css_filter_rule for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule)
|
||||
is_html = False
|
||||
for filter in include_filters_rule:
|
||||
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
|
||||
is_html = False
|
||||
|
||||
if is_html or is_source:
|
||||
|
||||
@@ -159,18 +168,28 @@ class perform_site_check():
|
||||
else:
|
||||
# Then we assume HTML
|
||||
if has_filter_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'):
|
||||
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''),
|
||||
html_content=fetcher.content)
|
||||
else:
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
||||
html_content = ""
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=fetcher.content,
|
||||
append_pretty_line_formatting=not is_source)
|
||||
else:
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
html_content=fetcher.content,
|
||||
append_pretty_line_formatting=not is_source)
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(include_filters_rule)
|
||||
|
||||
if has_subtractive_selectors:
|
||||
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||
|
||||
if not is_source:
|
||||
if is_source:
|
||||
stripped_text_from_html = html_content
|
||||
else:
|
||||
# extract text
|
||||
stripped_text_from_html = \
|
||||
html_tools.html_to_text(
|
||||
@@ -180,12 +199,6 @@ class perform_site_check():
|
||||
"render_anchor_tag_content", False)
|
||||
)
|
||||
|
||||
elif is_source:
|
||||
stripped_text_from_html = html_content
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import re
|
||||
|
||||
import pytz
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
Field,
|
||||
@@ -8,9 +8,11 @@ from wtforms import (
|
||||
PasswordField,
|
||||
RadioField,
|
||||
SelectField,
|
||||
SelectMultipleField,
|
||||
StringField,
|
||||
SubmitField,
|
||||
TextAreaField,
|
||||
TimeField,
|
||||
fields,
|
||||
validators,
|
||||
widgets,
|
||||
@@ -97,6 +99,44 @@ class TimeBetweenCheckForm(Form):
|
||||
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
|
||||
|
||||
class MultiCheckboxDayOfWeekField(SelectMultipleField):
|
||||
widget = widgets.ListWidget(prefix_label=False)
|
||||
option_widget = widgets.CheckboxInput()
|
||||
|
||||
class TimeScheduleCheckLimitForm(Form):
|
||||
# @todo must be a better python way todo this c/i list
|
||||
c=[]
|
||||
i=0
|
||||
for d in ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']:
|
||||
c.append((i, d))
|
||||
i+=1
|
||||
day_of_week = MultiCheckboxDayOfWeekField('',coerce=int, choices=c)
|
||||
from_time = TimeField('From', validators=[validators.Optional()])
|
||||
until_time = TimeField('Until', validators=[validators.Optional()])
|
||||
|
||||
def validate(self, **kwargs):
|
||||
if not super().validate():
|
||||
return False
|
||||
|
||||
result = True
|
||||
|
||||
f = self.data.get('from_time')
|
||||
u = self.data.get('until_time')
|
||||
if f and u:
|
||||
import time
|
||||
f = time.strptime(str(f), '%H:%M:%S')
|
||||
u = time.strptime(str(u), '%H:%M:%S')
|
||||
if f >= u:
|
||||
#@todo doesnt present
|
||||
self.from_time.errors.append('From time must be LESS than the until/end time')
|
||||
result = False
|
||||
|
||||
if len(self.data.get('day_of_week', [])) == 0:
|
||||
self.day_of_week.errors.append('No day selected')
|
||||
result = False
|
||||
|
||||
return result
|
||||
|
||||
# Separated by key:value
|
||||
class StringDictKeyValue(StringField):
|
||||
widget = widgets.TextArea()
|
||||
@@ -347,27 +387,22 @@ class watchForm(commonSettingsForm):
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tag = StringField('Group tag', [validators.Optional()], default='')
|
||||
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
|
||||
css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
|
||||
extract_text = StringListField('Extract text', [ValidateListRegex()])
|
||||
|
||||
title = StringField('Title', default='')
|
||||
|
||||
ignore_text = StringListField('Ignore text', [ValidateListRegex()])
|
||||
headers = StringDictKeyValue('Request headers')
|
||||
body = TextAreaField('Request body', [validators.Optional()])
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||
check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
|
||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
extract_text = StringListField('Extract text', [ValidateListRegex()])
|
||||
headers = StringDictKeyValue('Request headers')
|
||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||
ignore_text = StringListField('Ignore text', [ValidateListRegex()])
|
||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
|
||||
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
time_schedule_check_limit = FormField(TimeScheduleCheckLimitForm)
|
||||
time_use_system_default = BooleanField('Use system/default check time', default=False, validators=[validators.Optional()])
|
||||
title = StringField('Title', default='')
|
||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
proxy = RadioField('Proxy')
|
||||
@@ -389,10 +424,10 @@ class watchForm(commonSettingsForm):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
time_schedule_check_limit = FormField(TimeScheduleCheckLimitForm)
|
||||
proxy = RadioField('Proxy')
|
||||
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
@@ -401,21 +436,21 @@ class globalSettingsRequestForm(Form):
|
||||
# datastore.data['settings']['application']..
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
base_url = StringField('Base URL', validators=[validators.Optional()])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
|
||||
password = SaltyPasswordField()
|
||||
|
||||
base_url = StringField('Base URL', validators=[validators.Optional()])
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should contain zero or more attempts")])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
password = SaltyPasswordField()
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
timezone = SelectField('Timezone', choices=pytz.all_timezones)
|
||||
|
||||
|
||||
class globalSettingsForm(Form):
|
||||
|
||||
@@ -7,26 +7,30 @@ from typing import List
|
||||
import json
|
||||
import re
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
|
||||
|
||||
class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
|
||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
||||
def css_filter(css_filter, html_content):
|
||||
def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
html_block = ""
|
||||
r = soup.select(css_filter, separator="")
|
||||
if len(html_content) > 0 and len(r) == 0:
|
||||
raise FilterNotFoundInResponse(css_filter)
|
||||
for item in r:
|
||||
html_block += str(item)
|
||||
r = soup.select(include_filters, separator="")
|
||||
|
||||
return html_block + "\n"
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and not element.name in (['br', 'hr', 'div', 'p']):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
html_block += str(element)
|
||||
|
||||
return html_block
|
||||
|
||||
def subtractive_css_selector(css_selector, html_content):
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
@@ -42,25 +46,29 @@ def element_removal(selectors: List[str], html_content):
|
||||
|
||||
|
||||
# Return str Utf-8 of matched rules
|
||||
def xpath_filter(xpath_filter, html_content):
|
||||
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False):
|
||||
from lxml import etree, html
|
||||
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'))
|
||||
html_block = ""
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
||||
if len(html_content) > 0 and len(r) == 0:
|
||||
raise FilterNotFoundInResponse(xpath_filter)
|
||||
|
||||
#@note: //title/text() wont work where <title>CDATA..
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
if type(element) == etree._ElementStringResult:
|
||||
html_block += str(element) + "<br/>"
|
||||
html_block += str(element)
|
||||
elif type(element) == etree._ElementUnicodeResult:
|
||||
html_block += str(element) + "<br/>"
|
||||
html_block += str(element)
|
||||
else:
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>"
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
||||
|
||||
return html_block
|
||||
|
||||
|
||||
@@ -103,12 +103,12 @@ class import_distill_io_json(Importer):
|
||||
pass
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
extras['include_filters'] = []
|
||||
try:
|
||||
extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr']
|
||||
if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
|
||||
extras['css_filter'] = 'xpath:' + extras['css_filter']
|
||||
|
||||
extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
|
||||
else:
|
||||
extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
|
||||
except KeyError:
|
||||
pass
|
||||
except IndexError:
|
||||
|
||||
@@ -17,29 +17,31 @@ class model(dict):
|
||||
'requests': {
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'time_schedule_check_limit': {'day_of_week': [0, 1, 2, 3, 4, 5, 6], 'time_from': '', 'time_until': ''},
|
||||
'jitter_seconds': 0,
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||
'proxy': None # Preferred proxy connection
|
||||
},
|
||||
'application': {
|
||||
# Custom notification content
|
||||
'api_access_token_enabled': True,
|
||||
'password': False,
|
||||
'base_url' : None,
|
||||
'extract_title_as_title': False,
|
||||
'empty_pages_are_a_change': False,
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'ignore_whitespace': True,
|
||||
'render_anchor_tag_content': False,
|
||||
'notification_urls': [], # Apprise URL list
|
||||
# Custom notification content
|
||||
'notification_title': default_notification_title,
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'notification_title': default_notification_title,
|
||||
'notification_urls': [], # Apprise URL list
|
||||
'password': False,
|
||||
'render_anchor_tag_content': False,
|
||||
'schema_version' : 0,
|
||||
'webdriver_delay': None # Extra delay in seconds before extracting text
|
||||
'timezone': 'UTC',
|
||||
'webdriver_delay': None, # Extra delay in seconds before extracting text
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import os
|
||||
import uuid as uuid_builder
|
||||
from distutils.util import strtobool
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
@@ -14,42 +16,44 @@ class model(dict):
|
||||
__newest_history_key = None
|
||||
__history_n=0
|
||||
__base_config = {
|
||||
'url': None,
|
||||
'tag': None,
|
||||
'last_checked': 0,
|
||||
'paused': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
#'newest_history_key': 0,
|
||||
'title': None,
|
||||
'previous_md5': False,
|
||||
'uuid': str(uuid_builder.uuid4()),
|
||||
'headers': {}, # Extra headers to send
|
||||
#'history': {}, # Dict of timestamp and output stripped filename (removed)
|
||||
#'newest_history_key': 0, (removed, taken from history.txt index)
|
||||
'body': None,
|
||||
'method': 'GET',
|
||||
#'history': {}, # Dict of timestamp and output stripped filename
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'check_count': 0,
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': None,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
# Custom notification content
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'notification_title': None,
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'method': 'GET',
|
||||
# Custom notification content
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'css_filter': '',
|
||||
'last_error': False,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'subtractive_selectors': [],
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
'fetch_backend': None,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_title_as_title': False,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'notification_title': None,
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'subtractive_selectors': [],
|
||||
'tag': None,
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
# Requires setting to None on submit if it's the same as the default
|
||||
# Should be all None by default, so we use the system default in this case.
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||
'time_schedule_check_limit': {'day_of_week': [0, 1, 2, 3, 4, 5, 6], 'time_from': '', 'time_until': ''},
|
||||
'title': None,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': None,
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
}
|
||||
@@ -60,7 +64,7 @@ class model(dict):
|
||||
self.update(self.__base_config)
|
||||
self.__datastore_path = kw['datastore_path']
|
||||
|
||||
self['uuid'] = str(uuid_builder.uuid4())
|
||||
self['uuid'] = str(uuid.uuid4())
|
||||
|
||||
del kw['datastore_path']
|
||||
|
||||
@@ -82,10 +86,19 @@ class model(dict):
|
||||
return False
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
target_path = os.path.join(self.__datastore_path, self['uuid'])
|
||||
if not os.path.isdir(target_path):
|
||||
print ("> Creating data dir {}".format(target_path))
|
||||
os.mkdir(target_path)
|
||||
if not os.path.isdir(self.watch_data_dir):
|
||||
print ("> Creating data dir {}".format(self.watch_data_dir))
|
||||
os.mkdir(self.watch_data_dir)
|
||||
|
||||
@property
|
||||
def link(self):
|
||||
url = self.get('url', '')
|
||||
if '{%' in url or '{{' in url:
|
||||
from jinja2 import Environment
|
||||
# Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/
|
||||
jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
|
||||
return str(jinja2_env.from_string(url).render())
|
||||
return url
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
@@ -109,18 +122,39 @@ class model(dict):
|
||||
|
||||
@property
|
||||
def history(self):
|
||||
"""History index is just a text file as a list
|
||||
{watch-uuid}/history.txt
|
||||
|
||||
contains a list like
|
||||
|
||||
{epoch-time},{filename}\n
|
||||
|
||||
We read in this list as the history information
|
||||
|
||||
"""
|
||||
tmp_history = {}
|
||||
import logging
|
||||
import time
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
if os.path.isfile(fname):
|
||||
logging.debug("Reading history index " + str(time.time()))
|
||||
with open(fname, "r") as f:
|
||||
for i in f.readlines():
|
||||
if ',' in i:
|
||||
k, v = i.strip().split(',', 2)
|
||||
|
||||
# The index history could contain a relative path, so we need to make the fullpath
|
||||
# so that python can read it
|
||||
if not '/' in v and not '\'' in v:
|
||||
v = os.path.join(self.watch_data_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
snapshot_fname = v.split('/')[-1]
|
||||
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
|
||||
tmp_history[k] = v
|
||||
|
||||
if len(tmp_history):
|
||||
@@ -132,7 +166,7 @@ class model(dict):
|
||||
|
||||
@property
|
||||
def has_history(self):
|
||||
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
return os.path.isfile(fname)
|
||||
|
||||
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
|
||||
@@ -151,25 +185,25 @@ class model(dict):
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, contents, timestamp):
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
output_path = os.path.join(self.__datastore_path, self['uuid'])
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
|
||||
|
||||
logging.debug("Saving history text {}".format(snapshot_fname))
|
||||
# Small hack so that we sleep just enough to allow 1 second between history snapshots
|
||||
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
|
||||
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
||||
time.sleep(timestamp - self.__newest_history_key)
|
||||
|
||||
snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
|
||||
|
||||
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
||||
# most sites are utf-8 and some are even broken utf-8
|
||||
with open(snapshot_fname, 'wb') as f:
|
||||
with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
|
||||
f.write(contents)
|
||||
f.close()
|
||||
|
||||
# Append to index
|
||||
# @todo check last char was \n
|
||||
index_fname = os.path.join(output_path, "history.txt")
|
||||
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
with open(index_fname, 'a') as f:
|
||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
||||
f.close()
|
||||
@@ -195,6 +229,11 @@ class model(dict):
|
||||
seconds += x * n
|
||||
return seconds
|
||||
|
||||
def is_schedule_permitted(self):
|
||||
"""According to the current day of week and time, is this watch queueable?"""
|
||||
|
||||
return True
|
||||
|
||||
# Iterate over all history texts and see if something new exists
|
||||
def lines_contain_something_unique_compared_to_history(self, lines: list):
|
||||
local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
|
||||
@@ -210,14 +249,14 @@ class model(dict):
|
||||
return not local_lines.issubset(existing_history)
|
||||
|
||||
def get_screenshot(self):
|
||||
fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
|
||||
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
return False
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.__datastore_path, self['uuid'], filename)
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
return int(os.path.getmtime(fname))
|
||||
return False
|
||||
@@ -242,9 +281,14 @@ class model(dict):
|
||||
def snapshot_error_screenshot_ctime(self):
|
||||
return self.__get_file_ctime('last-error-screenshot.png')
|
||||
|
||||
@property
|
||||
def watch_data_dir(self):
|
||||
# The base dir of the watch data
|
||||
return os.path.join(self.__datastore_path, self['uuid'])
|
||||
|
||||
def get_error_text(self):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
|
||||
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r') as f:
|
||||
return f.read()
|
||||
@@ -252,7 +296,7 @@ class model(dict):
|
||||
|
||||
def get_error_snapshot(self):
|
||||
"""Return path to the screenshot that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
|
||||
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
return False
|
||||
|
||||
@@ -24,14 +24,6 @@ echo "RUNNING WITH BASE_URL SET"
|
||||
export BASE_URL="https://really-unique-domain.io"
|
||||
pytest tests/test_notification.py
|
||||
|
||||
|
||||
## JQ + JSON: filter test
|
||||
# jq is not available on windows and we should just test it when the package is installed
|
||||
# this will re-test with jq support
|
||||
pip3 install jq~=1.3
|
||||
pytest tests/test_jsonpath_jq_selector.py
|
||||
|
||||
|
||||
# Now for the selenium and playwright/browserless fetchers
|
||||
# Note - this is not UI functional tests - just checking that each one can fetch the content
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ $(document).ready(function() {
|
||||
state_clicked=false;
|
||||
ctx.clearRect(0, 0, c.width, c.height);
|
||||
xctx.clearRect(0, 0, c.width, c.height);
|
||||
$("#css_filter").val('');
|
||||
$("#include_filters").val('');
|
||||
});
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ $(document).ready(function() {
|
||||
xctx = c.getContext("2d");
|
||||
// redline highlight context
|
||||
ctx = c.getContext("2d");
|
||||
current_default_xpath =$("#css_filter").val();
|
||||
current_default_xpath =$("#include_filters").val();
|
||||
fetch_data();
|
||||
$('#selector-canvas').off("mousemove mousedown");
|
||||
// screenshot_url defined in the edit.html template
|
||||
@@ -205,9 +205,9 @@ $(document).ready(function() {
|
||||
var sel = selector_data['size_pos'][current_selected_i];
|
||||
if (sel[0] == '/') {
|
||||
// @todo - not sure just checking / is right
|
||||
$("#css_filter").val('xpath:'+sel.xpath);
|
||||
$("#include_filters").val('xpath:'+sel.xpath);
|
||||
} else {
|
||||
$("#css_filter").val(sel.xpath);
|
||||
$("#include_filters").val(sel.xpath);
|
||||
}
|
||||
xctx.fillStyle = 'rgba(205,205,205,0.95)';
|
||||
xctx.strokeStyle = 'rgba(225,0,0,0.9)';
|
||||
|
||||
@@ -132,7 +132,7 @@ body:after, body:before {
|
||||
|
||||
.fetch-error {
|
||||
padding-top: 1em;
|
||||
font-size: 60%;
|
||||
font-size: 80%;
|
||||
max-width: 400px;
|
||||
display: block; }
|
||||
|
||||
@@ -480,6 +480,22 @@ ul {
|
||||
.time-check-widget tr input[type="number"] {
|
||||
width: 5em; }
|
||||
|
||||
.pure-control-group table label {
|
||||
color: #333;
|
||||
font-weight: normal; }
|
||||
|
||||
.time-schedule-check-limit-widget tr {
|
||||
display: inline-block; }
|
||||
|
||||
.time-schedule-check-limit-widget li {
|
||||
text-decoration: none; }
|
||||
|
||||
.time-schedule-check-limit-widget ul {
|
||||
padding-left: 0px; }
|
||||
.time-schedule-check-limit-widget ul li {
|
||||
display: inline-block;
|
||||
width: 3em; }
|
||||
|
||||
#selector-wrapper {
|
||||
height: 600px;
|
||||
overflow-y: scroll;
|
||||
|
||||
@@ -156,7 +156,7 @@ body:after, body:before {
|
||||
|
||||
.fetch-error {
|
||||
padding-top: 1em;
|
||||
font-size: 60%;
|
||||
font-size: 80%;
|
||||
max-width: 400px;
|
||||
display: block;
|
||||
}
|
||||
@@ -677,6 +677,29 @@ ul {
|
||||
}
|
||||
}
|
||||
}
|
||||
.pure-control-group table label {
|
||||
color: #333;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
.time-schedule-check-limit-widget {
|
||||
tr {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
li {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
ul {
|
||||
padding-left: 0px;
|
||||
li {
|
||||
display: inline-block;
|
||||
width: 3em;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#selector-wrapper {
|
||||
height: 600px;
|
||||
@@ -803,4 +826,4 @@ ul {
|
||||
padding: 0.5rem;
|
||||
border-radius: 5px;
|
||||
color: #ff3300;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,8 @@ class ChangeDetectionStore:
|
||||
# For when we edit, we should write to disk
|
||||
needs_write_urgent = False
|
||||
|
||||
__version_check = True
|
||||
|
||||
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
|
||||
# Should only be active for docker
|
||||
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
||||
@@ -37,7 +39,6 @@ class ChangeDetectionStore:
|
||||
self.proxy_list = None
|
||||
self.start_time = time.time()
|
||||
self.stop_thread = False
|
||||
|
||||
# Base definition for all watchers
|
||||
# deepcopy part of #569 - not sure why its needed exactly
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
|
||||
@@ -81,8 +82,13 @@ class ChangeDetectionStore:
|
||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||
if include_default_watches:
|
||||
print("Creating JSON store at", self.datastore_path)
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
|
||||
self.add_watch(url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
self.__data['version_tag'] = version_tag
|
||||
|
||||
@@ -266,7 +272,7 @@ class ChangeDetectionStore:
|
||||
extras = {}
|
||||
# should always be str
|
||||
if tag is None or not tag:
|
||||
tag=''
|
||||
tag = ''
|
||||
|
||||
# Incase these are copied across, assume it's a reference and deepcopy()
|
||||
apply_extras = deepcopy(extras)
|
||||
@@ -281,17 +287,31 @@ class ChangeDetectionStore:
|
||||
res = r.json()
|
||||
|
||||
# List of permissible attributes we accept from the wild internet
|
||||
for k in ['url', 'tag',
|
||||
'paused', 'title',
|
||||
'previous_md5', 'headers',
|
||||
'body', 'method',
|
||||
'ignore_text', 'css_filter',
|
||||
'subtractive_selectors', 'trigger_text',
|
||||
'extract_title_as_title', 'extract_text',
|
||||
'text_should_not_be_present',
|
||||
'webdriver_js_execute_code']:
|
||||
for k in [
|
||||
'body',
|
||||
'css_filter',
|
||||
'extract_text',
|
||||
'extract_title_as_title',
|
||||
'headers',
|
||||
'ignore_text',
|
||||
'include_filters',
|
||||
'method',
|
||||
'paused',
|
||||
'previous_md5',
|
||||
'subtractive_selectors',
|
||||
'tag',
|
||||
'text_should_not_be_present',
|
||||
'title',
|
||||
'trigger_text',
|
||||
'webdriver_js_execute_code',
|
||||
'url',
|
||||
]:
|
||||
if res.get(k):
|
||||
apply_extras[k] = res[k]
|
||||
if k != 'css_filter':
|
||||
apply_extras[k] = res[k]
|
||||
else:
|
||||
# We renamed the field and made it a list
|
||||
apply_extras['include_filters'] = [res['css_filter']]
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Error fetching metadata for shared watch link", url, str(e))
|
||||
@@ -314,12 +334,13 @@ class ChangeDetectionStore:
|
||||
del apply_extras[k]
|
||||
|
||||
new_watch.update(apply_extras)
|
||||
self.__data['watching'][new_uuid]=new_watch
|
||||
self.__data['watching'][new_uuid] = new_watch
|
||||
|
||||
self.__data['watching'][new_uuid].ensure_data_dir_exists()
|
||||
|
||||
if write_to_disk_now:
|
||||
self.sync_to_json()
|
||||
|
||||
return new_uuid
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
@@ -583,3 +604,14 @@ class ChangeDetectionStore:
|
||||
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||
if self.data['settings']['headers'].get(v):
|
||||
del self.data['settings']['headers'][v]
|
||||
|
||||
# Convert filters to a list of filters css_filter -> include_filters
|
||||
def update_8(self):
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
existing_filter = watch.get('css_filter', '')
|
||||
if existing_filter:
|
||||
watch['include_filters'] = [existing_filter]
|
||||
except:
|
||||
continue
|
||||
return
|
||||
@@ -40,7 +40,8 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span>
|
||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
|
||||
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, class="m-d") }}
|
||||
@@ -50,14 +51,15 @@
|
||||
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.time_use_system_default) }}
|
||||
<div style="opacity: 0.5">
|
||||
{{ render_field(form.time_between_check, class="time-check-widget") }}
|
||||
{{ render_field(form.time_schedule_check_limit, class="time-schedule-check-limit-widget") }}
|
||||
@todo - add 'use default' checkbox
|
||||
</div>
|
||||
{% if has_empty_checktime %}
|
||||
<span class="pure-form-message-inline">Currently using the <a
|
||||
href="{{ url_for('settings_page', uuid=uuid) }}">default global settings</a>, change to another value if you want to be specific.</span>
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">Set to blank to use the <a
|
||||
href="{{ url_for('settings_page', uuid=uuid) }}">default global settings</a>.</span>
|
||||
{% endif %}
|
||||
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.extract_title_as_title) }}
|
||||
@@ -173,15 +175,17 @@ User-Agent: wonderbra 1.0") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
<div class="pure-control-group">
|
||||
{% set field = render_field(form.css_filter,
|
||||
placeholder=".class-name or #some-id, or other CSS selector rule.",
|
||||
{% set field = render_field(form.include_filters,
|
||||
rows=5,
|
||||
placeholder="#example
|
||||
xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
class="m-d")
|
||||
%}
|
||||
{{ field }}
|
||||
{% if '/text()' in field %}
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">
|
||||
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
<li class="tab"><a href="#fetching">Fetching</a></li>
|
||||
<li class="tab"><a href="#filters">Global Filters</a></li>
|
||||
<li class="tab"><a href="#api">API</a></li>
|
||||
<li class="tab"><a href="#date-time">Date & Time</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
@@ -30,6 +31,7 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.time_between_check, class="time-check-widget") }}
|
||||
{{ render_field(form.requests.form.time_schedule_check_limit, class="time-schedule-check-limit-widget") }}
|
||||
<span class="pure-form-message-inline">Default time for all watches, when the watch does not have a specific time setting.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
@@ -91,7 +93,6 @@
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="fetching">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
|
||||
@@ -170,6 +171,19 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="date-time">
|
||||
<fieldset>
|
||||
<div class="field-group">
|
||||
{{ render_field(form.application.form.timezone) }}
|
||||
</div>
|
||||
<div class="field-group">
|
||||
<p>
|
||||
<label>Local time</label> {{ datetime }}<br/>
|
||||
<label>Configured timezone:</label> {{ timezone }}<br/>
|
||||
</p>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div id="actions">
|
||||
<div class="pure-control-group">
|
||||
|
||||
@@ -87,7 +87,7 @@
|
||||
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
|
||||
</td>
|
||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
|
||||
|
||||
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
|
||||
|
||||
@@ -41,7 +41,7 @@ def app(request):
|
||||
|
||||
cleanup(datastore_path)
|
||||
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
app_config = {'datastore_path': datastore_path, 'disable_checkver' : True}
|
||||
cleanup(app_config['datastore_path'])
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
@@ -24,7 +24,7 @@ def test_preferred_proxy(client, live_server):
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"css_filter": "",
|
||||
"include_filters": "",
|
||||
"fetch_backend": "html_requests",
|
||||
"headers": "",
|
||||
"proxy": "proxy-two",
|
||||
|
||||
@@ -23,7 +23,7 @@ def test_basic_auth(client, live_server):
|
||||
# Check form validation
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import time
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
from .util import set_original_response, set_modified_response, live_server_setup
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
@@ -36,7 +36,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
@@ -69,7 +69,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches are queued for rechecking.' in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("index"))
|
||||
@@ -98,14 +98,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
assert b'which has this one new line' in res.data
|
||||
assert b'Which is across multiple lines' not in res.data
|
||||
|
||||
time.sleep(2)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Do this a few times.. ensures we dont accidently set the status
|
||||
for n in range(2):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
@@ -125,7 +125,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
)
|
||||
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
@@ -1,18 +1,31 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from .util import set_original_response, set_modified_response, live_server_setup
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
from . util import set_original_response, set_modified_response, live_server_setup
|
||||
from zipfile import ZipFile
|
||||
import re
|
||||
import time
|
||||
|
||||
|
||||
def test_backup(client, live_server):
|
||||
|
||||
live_server_setup(live_server)
|
||||
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": url_for('test_endpoint', _external=True)},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
|
||||
res = client.get(
|
||||
url_for("get_backup"),
|
||||
follow_redirects=True
|
||||
@@ -20,6 +33,19 @@ def test_backup(client, live_server):
|
||||
|
||||
# Should get the right zip content type
|
||||
assert res.content_type == "application/zip"
|
||||
|
||||
# Should be PK/ZIP stream
|
||||
assert res.data.count(b'PK') >= 2
|
||||
|
||||
# ZipFile from buffer seems non-obvious, just save it instead
|
||||
with open("download.zip", 'wb') as f:
|
||||
f.write(res.data)
|
||||
|
||||
zip = ZipFile('download.zip')
|
||||
l = zip.namelist()
|
||||
uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
newlist = list(filter(uuid4hex.match, l)) # Read Note below
|
||||
|
||||
# Should be two txt files in the archive (history and the snapshot)
|
||||
assert len(newlist) == 2
|
||||
|
||||
|
||||
@@ -46,22 +46,23 @@ def set_modified_response():
|
||||
|
||||
|
||||
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
|
||||
def test_css_filter_output():
|
||||
from changedetectionio import fetch_site_status
|
||||
def test_include_filters_output():
|
||||
from inscriptis import get_text
|
||||
|
||||
# Check text with sub-parts renders correctly
|
||||
content = """<html> <body><div id="thingthing" > Some really <b>bold</b> text </div> </body> </html>"""
|
||||
html_blob = css_filter(css_filter="#thingthing", html_content=content)
|
||||
html_blob = include_filters(include_filters="#thingthing", html_content=content)
|
||||
text = get_text(html_blob)
|
||||
assert text == " Some really bold text"
|
||||
|
||||
content = """<html> <body>
|
||||
<p>foo bar blah</p>
|
||||
<div class="parts">Block A</div> <div class="parts">Block B</div></body>
|
||||
<DIV class="parts">Block A</DiV> <div class="parts">Block B</DIV></body>
|
||||
</html>
|
||||
"""
|
||||
html_blob = css_filter(css_filter=".parts", html_content=content)
|
||||
|
||||
# in xPath this would be //*[@class='parts']
|
||||
html_blob = include_filters(include_filters=".parts", html_content=content)
|
||||
text = get_text(html_blob)
|
||||
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
@@ -69,10 +70,10 @@ def test_css_filter_output():
|
||||
|
||||
|
||||
# Tests the whole stack works with the CSS Filter
|
||||
def test_check_markup_css_filter_restriction(client, live_server):
|
||||
def test_check_markup_include_filters_restriction(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
css_filter = "#sametext"
|
||||
include_filters = "#sametext"
|
||||
|
||||
set_original_response()
|
||||
|
||||
@@ -98,7 +99,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -107,7 +108,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(css_filter.encode('utf-8')) in res.data
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -126,3 +127,58 @@ def test_check_markup_css_filter_restriction(client, live_server):
|
||||
# Because it should be looking at only that 'sametext' id
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
|
||||
# Tests the whole stack works with the CSS Filter
|
||||
def test_check_multiple_filters(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]"
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("""<html><body>
|
||||
<div id="blob-a">Blob A</div>
|
||||
<div id="blob-b">Blob B</div>
|
||||
<div id="blob-c">Blob C</div>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(1)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"include_filters": include_filters,
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Only the two blobs should be here
|
||||
assert b"Blob A" in res.data # CSS was ok
|
||||
assert b"Blob B" in res.data # xPath was ok
|
||||
assert b"Blob C" not in res.data # Should not be included
|
||||
|
||||
@@ -88,7 +88,7 @@ def test_check_filter_multiline(client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": '',
|
||||
data={"include_filters": '',
|
||||
'extract_text': '/something.+?6 billion.+?lines/si',
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
@@ -116,7 +116,7 @@ def test_check_filter_multiline(client, live_server):
|
||||
|
||||
def test_check_filter_and_regex_extract(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
css_filter = ".changetext"
|
||||
include_filters = ".changetext"
|
||||
|
||||
set_original_response()
|
||||
|
||||
@@ -143,7 +143,7 @@ def test_check_filter_and_regex_extract(client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": css_filter,
|
||||
data={"include_filters": include_filters,
|
||||
'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
|
||||
@@ -92,7 +92,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
|
||||
"tag": "my tag",
|
||||
"title": "my title",
|
||||
"headers": "",
|
||||
"css_filter": '.ticket-available',
|
||||
"include_filters": '.ticket-available',
|
||||
"fetch_backend": "html_requests"})
|
||||
|
||||
res = client.post(
|
||||
|
||||
@@ -76,7 +76,7 @@ def run_filter_test(client, content_filter):
|
||||
"title": "my title",
|
||||
"headers": "",
|
||||
"filter_failure_notification_send": 'y',
|
||||
"css_filter": content_filter,
|
||||
"include_filters": content_filter,
|
||||
"fetch_backend": "html_requests"})
|
||||
|
||||
res = client.post(
|
||||
@@ -95,7 +95,7 @@ def run_filter_test(client, content_filter):
|
||||
time.sleep(3)
|
||||
|
||||
# We should see something in the frontend
|
||||
assert b'Warning, filter' in res.data
|
||||
assert b'Warning, no filters were found' in res.data
|
||||
|
||||
# Now it should exist and contain our "filter not found" alert
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
@@ -131,7 +131,7 @@ def run_filter_test(client, content_filter):
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_check_css_filter_failure_notification(client, live_server):
|
||||
def test_check_include_filters_failure_notification(client, live_server):
|
||||
set_original_response()
|
||||
time.sleep(1)
|
||||
run_filter_test(client, '#nope-doesnt-exist')
|
||||
|
||||
33
changedetectionio/tests/test_jinja2.py
Normal file
33
changedetectionio/tests/test_jinja2.py
Normal file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup
|
||||
|
||||
|
||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||
def test_jinja2_in_url_query(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_return_query', _external=True)
|
||||
|
||||
# because url_for() will URL-encode the var, but we dont here
|
||||
full_url = "{}?{}".format(test_url,
|
||||
"date={% now 'Europe/Berlin', '%Y' %}.{% now 'Europe/Berlin', '%m' %}.{% now 'Europe/Berlin', '%d' %}", )
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": full_url, "tag": "test"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
time.sleep(3)
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'date=2' in res.data
|
||||
@@ -132,7 +132,7 @@ def set_original_response():
|
||||
return None
|
||||
|
||||
|
||||
def set_response_with_html():
|
||||
def set_json_response_with_html():
|
||||
test_return_data = """
|
||||
{
|
||||
"test": [
|
||||
@@ -176,7 +176,7 @@ def set_modified_response():
|
||||
def test_check_json_without_filter(client, live_server):
|
||||
# Request a JSON document from a application/json source containing HTML
|
||||
# and be sure it doesn't get chewed up by instriptis
|
||||
set_response_with_html()
|
||||
set_json_response_with_html()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
@@ -189,9 +189,6 @@ def test_check_json_without_filter(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
|
||||
@@ -200,6 +197,7 @@ def test_check_json_without_filter(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should still see '"html": "<b>"'
|
||||
assert b'"<b>' in res.data
|
||||
assert res.data.count(b'{\n') >= 2
|
||||
|
||||
@@ -221,9 +219,6 @@ def check_json_filter(json_filter, client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
|
||||
@@ -231,7 +226,7 @@ def check_json_filter(json_filter, client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": json_filter,
|
||||
data={"include_filters": json_filter,
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
@@ -247,9 +242,6 @@ def check_json_filter(json_filter, client, live_server):
|
||||
)
|
||||
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
# Make a change
|
||||
@@ -301,7 +293,7 @@ def check_json_filter_bool_val(json_filter, client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": json_filter,
|
||||
data={"include_filters": json_filter,
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
@@ -311,11 +303,6 @@ def check_json_filter_bool_val(json_filter, client, live_server):
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
# Make a change
|
||||
@@ -360,9 +347,6 @@ def check_json_ext_filter(json_filter, client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
|
||||
@@ -370,7 +354,7 @@ def check_json_ext_filter(json_filter, client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": json_filter,
|
||||
data={"include_filters": json_filter,
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
@@ -386,9 +370,6 @@ def check_json_ext_filter(json_filter, client, live_server):
|
||||
)
|
||||
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
# Make a change
|
||||
|
||||
@@ -14,7 +14,7 @@ def test_share_watch(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
css_filter = ".nice-filter"
|
||||
include_filters = ".nice-filter"
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
@@ -29,7 +29,7 @@ def test_share_watch(client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -37,7 +37,7 @@ def test_share_watch(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(css_filter.encode('utf-8')) in res.data
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# click share the link
|
||||
res = client.get(
|
||||
@@ -73,4 +73,8 @@ def test_share_watch(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(css_filter.encode('utf-8')) in res.data
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# Check it saved the URL
|
||||
res = client.get(url_for("index"))
|
||||
assert bytes(test_url.encode('utf-8')) in res.data
|
||||
|
||||
@@ -57,10 +57,9 @@ def test_check_basic_change_detection_functionality_source(client, live_server):
|
||||
|
||||
|
||||
|
||||
|
||||
# `subtractive_selectors` should still work in `source:` type requests
|
||||
def test_check_ignore_elements(client, live_server):
|
||||
set_original_response()
|
||||
|
||||
time.sleep(2)
|
||||
test_url = 'source:'+url_for('test_endpoint', _external=True)
|
||||
# Add our URL to the import page
|
||||
@@ -77,9 +76,9 @@ def test_check_ignore_elements(client, live_server):
|
||||
#####################
|
||||
# We want <span> and <p> ONLY, but ignore span with .foobar-detection
|
||||
|
||||
res = client.post(
|
||||
client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -89,7 +88,6 @@ def test_check_ignore_elements(client, live_server):
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'foobar-detection' not in res.data
|
||||
assert b'<br' not in res.data
|
||||
assert b'<p' in res.data
|
||||
@@ -49,7 +49,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server):
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"trigger_text": "/cool.stuff/",
|
||||
"url": test_url,
|
||||
"css_filter": '#in-here',
|
||||
"include_filters": '#in-here',
|
||||
"fetch_backend": "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ def test_check_watch_field_storage(client, live_server):
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n",
|
||||
"time_between_check-minutes": 126,
|
||||
"css_filter" : ".fooclass",
|
||||
"include_filters" : ".fooclass",
|
||||
"title" : "My title",
|
||||
"ignore_text" : "ignore this",
|
||||
"url": test_url,
|
||||
|
||||
@@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server):
|
||||
time.sleep(1)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server):
|
||||
time.sleep(1)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -182,9 +182,6 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
@@ -192,7 +189,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -230,10 +227,11 @@ def test_xpath_validation(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(2)
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"is not a valid XPath expression" in res.data
|
||||
@@ -242,7 +240,7 @@ def test_xpath_validation(client, live_server):
|
||||
|
||||
|
||||
# actually only really used by the distll.io importer, but could be handy too
|
||||
def test_check_with_prefix_css_filter(client, live_server):
|
||||
def test_check_with_prefix_include_filters(client, live_server):
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -263,7 +261,7 @@ def test_check_with_prefix_css_filter(client, live_server):
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
data={"include_filters": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
|
||||
@@ -86,6 +86,7 @@ def extract_UUID_from_client(client):
|
||||
def wait_for_all_checks(client):
|
||||
# Loop waiting until done..
|
||||
attempt=0
|
||||
time.sleep(0.1)
|
||||
while attempt < 60:
|
||||
time.sleep(1)
|
||||
res = client.get(url_for("index"))
|
||||
@@ -159,5 +160,10 @@ def live_server_setup(live_server):
|
||||
ret = " ".join([auth.username, auth.password, auth.type])
|
||||
return ret
|
||||
|
||||
# Just return some GET var
|
||||
@live_server.app.route('/test-return-query', methods=['GET'])
|
||||
def test_return_query():
|
||||
return request.query_string
|
||||
|
||||
live_server.start()
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import queue
|
||||
import time
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio.html_tools import FilterNotFoundInResponse
|
||||
from changedetectionio.fetch_site_status import FilterNotFoundInResponse
|
||||
|
||||
# A single update worker
|
||||
#
|
||||
@@ -91,8 +91,8 @@ class update_worker(threading.Thread):
|
||||
return
|
||||
|
||||
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||
'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
||||
watch['css_filter'],
|
||||
'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
||||
", ".join(watch['include_filters']),
|
||||
threshold),
|
||||
'notification_format': 'text'}
|
||||
|
||||
@@ -189,7 +189,7 @@ class update_worker(threading.Thread):
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
err_text = "Warning, filter '{}' not found".format(str(e))
|
||||
err_text = "Warning, no filters were found, no change detection ran."
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
@@ -282,10 +282,12 @@ class update_worker(threading.Thread):
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
|
||||
# Always record that we atleast tried
|
||||
count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time())})
|
||||
'last_checked': round(time.time()),
|
||||
'check_count': count
|
||||
})
|
||||
|
||||
# Always save the screenshot if it's available
|
||||
if update_handler.screenshot:
|
||||
|
||||
@@ -1,36 +1,36 @@
|
||||
flask ~= 2.0
|
||||
flask~=2.0
|
||||
flask_wtf
|
||||
eventlet >= 0.31.0
|
||||
eventlet>=0.31.0
|
||||
validators
|
||||
timeago ~= 1.0
|
||||
inscriptis ~= 2.2
|
||||
feedgen ~= 0.9
|
||||
flask-login ~= 0.5
|
||||
timeago~=1.0
|
||||
inscriptis~=2.2
|
||||
feedgen~=0.9
|
||||
flask-login~=0.5
|
||||
flask_restful
|
||||
pytz
|
||||
|
||||
# Set these versions together to avoid a RequestsDependencyWarning
|
||||
# >= 2.26 also adds Brotli support if brotli is installed
|
||||
brotli ~= 1.0
|
||||
requests[socks] ~= 2.28
|
||||
brotli~=1.0
|
||||
requests[socks] ~=2.28
|
||||
|
||||
urllib3 > 1.26
|
||||
chardet > 2.3.0
|
||||
urllib3>1.26
|
||||
chardet>2.3.0
|
||||
|
||||
wtforms ~= 3.0
|
||||
jsonpath-ng ~= 1.5.3
|
||||
wtforms~=3.0
|
||||
jsonpath-ng~=1.5.3
|
||||
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise ~= 1.1.0
|
||||
apprise~=1.1.0
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
paho-mqtt
|
||||
|
||||
# Pinned version of cryptography otherwise
|
||||
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
|
||||
cryptography ~= 3.4
|
||||
cryptography~=3.4
|
||||
|
||||
# Used for CSS filtering
|
||||
bs4
|
||||
@@ -39,12 +39,20 @@ bs4
|
||||
lxml
|
||||
|
||||
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
||||
selenium ~= 4.1.0
|
||||
selenium~=4.1.0
|
||||
|
||||
# https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849
|
||||
# ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security'
|
||||
# need to revisit flask login versions
|
||||
werkzeug ~= 2.0.0
|
||||
werkzeug~=2.0.0
|
||||
|
||||
# Templating, so far just in the URLs but in the future can be for the notifications also
|
||||
jinja2~=3.1
|
||||
jinja2-time
|
||||
|
||||
# https://peps.python.org/pep-0508/#environment-markers
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1009
|
||||
jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux"
|
||||
|
||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||
|
||||
|
||||
Reference in New Issue
Block a user