Compare commits

..

1 Commits

Author SHA1 Message Date
dgtlmoon
b57fd92a6d 'Save chrome screenshot' checkbox never used, removing, we always save the screenshot. 2022-08-16 15:58:22 +02:00
29 changed files with 132 additions and 442 deletions

View File

@@ -7,20 +7,6 @@ assignees: 'dgtlmoon'
---
**DO NOT USE THIS FORM TO REPORT THAT A PARTICULAR WEBSITE IS NOT SCRAPING/WATCHING AS EXPECTED**
This form is only for direct bugs and feature requests todo directly with the software.
Please report watched websites (full URL and _any_ settings) that do not work with changedetection.io as expected [**IN THE DISCUSSION FORUMS**](https://github.com/dgtlmoon/changedetection.io/discussions) or your report will be deleted
CONSIDER TAKING OUT A SUBSCRIPTION FOR A SMALL PRICE PER MONTH, YOU GET THE BENEFIT OF USING OUR PAID PROXIES AND FURTHERING THE DEVELOPMENT OF CHANGEDETECTION.IO
THANK YOU
**Describe the bug**
A clear and concise description of what the bug is.

View File

@@ -1,16 +1,23 @@
## Web Site Change Detection, Monitoring and Notification.
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
# changedetection.io
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master)
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS.
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
_Know when web pages change! Stay ontop of new information! get notifications when important website content changes_
Live your data-life *pro-actively* instead of *re-actively*.
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
**Get your own private instance now! Let us host it for you!**
[**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
@@ -39,18 +46,7 @@ Know when important content changes, we support notifications via Discord, Teleg
- Monitor HTML source code for unexpected changes, strengthen your PCI compliance
- You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
#### Key Features
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
- Switch between fast non-JS and Chrome JS based "fetchers"
- Easily specify how often a site should be checked
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
- Override Request Headers, Specify `POST` or `GET` and other methods
- Use the "Visual Selector" to help target specific elements
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver!</a>_
## Screenshots

View File

@@ -44,7 +44,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.39.18'
__version__ = '0.39.17.2'
datastore = None
@@ -500,10 +500,10 @@ def changedetection_app(config=None, datastore_o=None):
import hashlib
from .fetch_processor import json_html_plaintext
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
newest_history_key = datastore.get_val(uuid, 'newest_history_key')
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
@@ -514,7 +514,7 @@ def changedetection_app(config=None, datastore_o=None):
encoding='utf-8') as file:
raw_content = file.read()
handler = json_html_plaintext.perform_site_check(datastore=datastore)
handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
@@ -1186,36 +1186,6 @@ def changedetection_app(config=None, datastore_o=None):
flash("{} watches are queued for rechecking.".format(i))
return redirect(url_for('index', tag=tag))
@app.route("/form/checkbox-operations", methods=['POST'])
@login_required
def form_watch_list_checkbox_operations():
op = request.form['op']
uuids = request.form.getlist('uuids')
if (op == 'delete'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.delete(uuid.strip())
flash("{} watches deleted".format(len(uuids)))
if (op == 'pause'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = True
flash("{} watches paused".format(len(uuids)))
if (op == 'unpause'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = False
flash("{} watches unpaused".format(len(uuids)))
return redirect(url_for('index'))
@app.route("/api/share-url", methods=['GET'])
@login_required
def form_share_put_watch():
@@ -1415,18 +1385,13 @@ def ticker_thread_check_time_launch_checks():
seconds_since_last_recheck = now - watch['last_checked']
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
priority = int(time.time())
print(
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
uuid,
watch['last_checked'],
now,
priority,
watch.jitter_seconds,
now - watch['last_checked']))
print("> Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid,
watch['last_checked'],
now,
watch.jitter_seconds,
now - watch['last_checked']))
# Into the queue with you
update_q.put((priority, uuid))
update_q.put((5, uuid))
# Reset for next time
watch.jitter_seconds = 0

View File

@@ -31,12 +31,11 @@ class JSActionExceptions(Exception):
return
class PageUnloadable(Exception):
def __init__(self, status_code, url, screenshot=False, message=False):
def __init__(self, status_code, url, screenshot=False):
# Set this so we can use it in other parts of the app
self.status_code = status_code
self.url = url
self.screenshot = screenshot
self.message = message
return
class EmptyReply(Exception):
@@ -293,15 +292,7 @@ class base_html_playwright(Fetcher):
# allow per-watch proxy selection override
if proxy_override:
# https://playwright.dev/docs/network#http-proxy
from urllib.parse import urlparse
parsed = urlparse(proxy_override)
proxy_url = "{}://{}:{}".format(parsed.scheme, parsed.hostname, parsed.port)
self.proxy = {'server': proxy_url}
if parsed.username:
self.proxy['username'] = parsed.username
if parsed.password:
self.proxy['password'] = parsed.password
self.proxy = {'server': proxy_override}
def run(self,
url,
@@ -365,7 +356,7 @@ class base_html_playwright(Fetcher):
print(str(e))
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None, message=e.message)
raise PageUnloadable(url=url, status_code=None)
if response is None:
context.close()

View File

@@ -1,41 +0,0 @@
class fetch_processor():
contents = b''
screenshot = None
history_artifact_suffix = 'txt'
"""
base class for all fetch processors
- json_html_plaintext
- image (future)
"""
def __init__(self, *args, datastore, **kwargs):
super().__init__(*args, **kwargs)
self.datastore = datastore
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
# if watch.proxy use that
# fetcher.proxy_override = watch.proxy or main config proxy
# Allows override the proxy on a per-request basis
# ALWAYS use the first one is nothing selected
def set_proxy_from_list(self, watch):
proxy_args = None
if self.datastore.proxy_list is None:
return None
# If its a valid one
if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
proxy_args = watch['proxy']
# not valid (including None), try the system one
else:
system_proxy = self.datastore.data['settings']['requests']['proxy']
# Is not None and exists
if any([system_proxy in p for p in self.datastore.proxy_list]):
proxy_args = system_proxy
# Fallback - Did not resolve anything, use the first available
if proxy_args is None:
proxy_args = self.datastore.proxy_list[0][0]
return proxy_args

View File

@@ -9,13 +9,42 @@ from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from . import fetch_processor
# Some common stuff here that can be moved to a base class
# (set_proxy_from_list)
class perform_site_check(fetch_processor):
class perform_site_check():
xpath_data = None
def __init__(self, *args, datastore, **kwargs):
super().__init__(*args, **kwargs)
self.datastore = datastore
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
# if watch.proxy use that
# fetcher.proxy_override = watch.proxy or main config proxy
# Allows override the proxy on a per-request basis
# ALWAYS use the first one is nothing selected
def set_proxy_from_list(self, watch):
proxy_args = None
if self.datastore.proxy_list is None:
return None
# If its a valid one
if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
proxy_args = watch['proxy']
# not valid (including None), try the system one
else:
system_proxy = self.datastore.data['settings']['requests']['proxy']
# Is not None and exists
if any([system_proxy in p for p in self.datastore.proxy_list]):
proxy_args = system_proxy
# Fallback - Did not resolve anything, use the first available
if proxy_args is None:
proxy_args = self.datastore.proxy_list[0][0]
return proxy_args
# Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "foobar(?i)" type configuration
@@ -32,11 +61,13 @@ class perform_site_check(fetch_processor):
def run(self, uuid):
timestamp = int(time.time()) # used for storage etc too
changed_detected = False
screenshot = False # as bytes
stripped_text_from_html = ""
watch = self.datastore.data['watching'].get(uuid)
watch = self.datastore.data['watching'][uuid]
# Protect against file:// access
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
@@ -47,7 +78,7 @@ class perform_site_check(fetch_processor):
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False}
extra_headers =self.datastore.data['watching'][uuid].get('headers')
extra_headers = self.datastore.get_val(uuid, 'headers')
# Tweak the base config with the per-watch ones
request_headers = self.datastore.data['settings']['headers'].copy()
@@ -60,9 +91,9 @@ class perform_site_check(fetch_processor):
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
timeout = self.datastore.data['settings']['requests']['timeout']
url = watch.get('url')
request_body = self.datastore.data['watching'][uuid].get('body')
request_method = self.datastore.data['watching'][uuid].get('method')
url = self.datastore.get_val(uuid, 'url')
request_body = self.datastore.get_val(uuid, 'body')
request_method = self.datastore.get_val(uuid, 'method')
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
# source: support
@@ -96,9 +127,6 @@ class perform_site_check(fetch_processor):
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
fetcher.quit()
self.screenshot = fetcher.screenshot
self.xpath_data = fetcher.xpath_data
# Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base?
@@ -284,6 +312,4 @@ class perform_site_check(fetch_processor):
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
self.contents = text_content_before_ignored_filter
return changed_detected, update_obj
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data

View File

@@ -355,8 +355,6 @@ class watchForm(commonSettingsForm):
filter_failure_notification_send = BooleanField(
'Send a notification when the filter can no longer be found on the page', default=False)
notification_use_default = BooleanField('Use default/system notification settings', default=True)
def validate(self, **kwargs):
if not super().validate():
return False

View File

@@ -35,7 +35,6 @@ class model(dict):
'notification_title': default_notification_title,
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'notification_use_default': True, # Use default for new
'notification_muted': False,
'css_filter': '',
'last_error': False,
@@ -148,7 +147,9 @@ class model(dict):
bump = self.history
return self.__newest_history_key
def save_history_artifact(self, contents: bytes, timestamp, suffix='txt'):
# Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp):
import uuid
import logging
@@ -156,8 +157,8 @@ class model(dict):
self.ensure_data_dir_exists()
snapshot_fname = "{}/{}.{}".format(output_path, uuid.uuid4(), suffix)
logging.debug("Saving history artifact {}".format(snapshot_fname))
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
logging.debug("Saving history text {}".format(snapshot_fname))
with open(snapshot_fname, 'wb') as f:
f.write(contents)

View File

@@ -38,14 +38,13 @@ docker kill $$-test_selenium
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
pip3 install playwright~=1.24
pip3 install playwright~=1.22
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
# takes a while to spin up
sleep 5
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py
pytest tests/visualselector/test_fetch_data.py
unset PLAYWRIGHT_DRIVER_URL
docker kill $$-test_browserless

View File

@@ -22,18 +22,5 @@ $(function () {
});
});
// checkboxes - check all
$("#check-all").click(function (e) {
$('input[type=checkbox]').not(this).prop('checked', this.checked);
});
// checkboxes - show/hide buttons
$("input[type=checkbox]").click(function (e) {
if ($('input[type=checkbox]:checked').length) {
$('#checkbox-operations').slideDown();
} else {
$('#checkbox-operations').slideUp();
}
});
});

View File

@@ -1,7 +1,7 @@
$(document).ready(function () {
function toggle_fetch_backend() {
$(document).ready(function() {
function toggle() {
if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
if (playwright_enabled) {
if(playwright_enabled) {
// playwright supports headers, so hide everything else
// See #664
$('#requests-override-options #request-method').hide();
@@ -13,8 +13,12 @@ $(document).ready(function () {
// selenium/webdriver doesnt support anything afaik, hide it all
$('#requests-override-options').hide();
}
$('#webdriver-override-options').show();
} else {
$('#requests-override-options').show();
$('#requests-override-options *:hidden').show();
$('#webdriver-override-options').hide();
@@ -22,27 +26,8 @@ $(document).ready(function () {
}
$('input[name="fetch_backend"]').click(function (e) {
toggle_fetch_backend();
toggle();
});
toggle_fetch_backend();
toggle();
function toggle_default_notifications() {
var n=$('#notification_urls, #notification_title, #notification_body, #notification_format');
if ($('#notification_use_default').is(':checked')) {
$('#notification-field-group').fadeOut();
$(n).each(function (e) {
$(this).attr('readonly', true);
});
} else {
$('#notification-field-group').show();
$(n).each(function (e) {
$(this).attr('readonly', false);
});
}
}
$('#notification_use_default').click(function (e) {
toggle_default_notifications();
});
toggle_default_notifications();
});

View File

@@ -555,13 +555,3 @@ ul {
.snapshot-age.error {
background-color: #ff0000;
color: #fff; }
#checkbox-operations {
background: rgba(0, 0, 0, 0.05);
padding: 1em;
border-radius: 10px;
margin-bottom: 1em;
display: none; }
.checkbox-uuid > * {
vertical-align: middle; }

View File

@@ -774,15 +774,3 @@ ul {
}
}
#checkbox-operations {
background: rgba(0, 0, 0, 0.05);
padding: 1em;
border-radius: 10px;
margin-bottom: 1em;
display: none;
}
.checkbox-uuid {
> * {
vertical-align: middle;
}
}

View File

@@ -244,6 +244,10 @@ class ChangeDetectionStore:
return False
def get_val(self, uuid, val):
# Probably their should be dict...
return self.data['watching'][uuid].get(val)
# Remove a watchs data but keep the entry (URL etc)
def clear_watch_history(self, uuid):
import pathlib
@@ -337,8 +341,6 @@ class ChangeDetectionStore:
# Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
if not self.data['watching'].get(watch_uuid):
return
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
@@ -352,20 +354,18 @@ class ChangeDetectionStore:
f.close()
def save_error_text(self, watch_uuid, contents):
if not self.data['watching'].get(watch_uuid):
return
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
with open(target_path, 'w') as f:
f.write(contents)
def save_xpath_data(self, watch_uuid, data, as_error=False):
if not self.data['watching'].get(watch_uuid):
return
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
with open(target_path, 'w') as f:
f.write(json.dumps(data))
@@ -535,28 +535,4 @@ class ChangeDetectionStore:
del(watch['last_changed'])
except:
continue
return
def update_5(self):
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
default_notification_title,
)
for uuid, watch in self.data['watching'].items():
try:
# If it's all the same to the system settings, then prefer system notification settings
# include \r\n -> \n incase they already hit submit and the browser put \r in
if watch.get('notification_body').replace('\r\n', '\n') == default_notification_body.replace('\r\n', '\n') and \
watch.get('notification_format') == default_notification_format and \
watch.get('notification_title').replace('\r\n', '\n') == default_notification_title.replace('\r\n', '\n') and \
watch.get('notification_urls') == self.__data['settings']['application']['notification_urls']:
watch['notification_use_default'] = True
else:
watch['notification_use_default'] = False
except:
continue
return

View File

@@ -135,11 +135,9 @@ User-Agent: wonderbra 1.0") }}
</div>
<div class="tab-pane-inner" id="notifications">
<strong>Note: <i>These settings override the global settings for this watch.</i></strong>
<fieldset>
<div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.notification_use_default) }}
</div>
<div class="field-group" id="notification-field-group">
<div class="field-group">
{{ render_common_settings_form(form, current_base_url, emailprefix) }}
</div>
</fieldset>

View File

@@ -57,7 +57,6 @@
</br>
{% if is_html_webdriver %}
{% if screenshot %}
<div class="snapshot-age">{{watch.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
{% else %}
No screenshot available just yet! Try rechecking the page.

View File

@@ -24,14 +24,6 @@
</fieldset>
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
</form>
<form class="pure-form" action="{{ url_for('form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div id="checkbox-operations">
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="pause">Pause</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unpause">UnPause</button>
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
</div>
<div>
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
{% for tag in tags %}
@@ -49,7 +41,7 @@
<table class="pure-table pure-table-striped watch-table">
<thead>
<tr>
<th><input style="vertical-align: middle" type="checkbox" id="check-all"/> #</th>
<th>#</th>
<th></th>
{% set link_order = "desc" if sort_order else "asc" %}
{% set arrow_span = "" %}
@@ -74,7 +66,7 @@
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
{% if watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}unviewed{% endif %}
{% if watch.uuid in queued_uuids %}queued{% endif %}">
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} "/> <span>{{ loop.index }}</span></td>
<td class="inline">{{ loop.index }}</td>
<td class="inline watch-controls">
<a class="state-{{'on' if watch.paused }}" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks"/></a>
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
@@ -137,6 +129,5 @@
#}
</div>
</form>
</div>
{% endblock %}

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks
from ..util import live_server_setup
import logging
@@ -29,8 +29,14 @@ def test_fetch_webdriver_content(client, live_server):
assert b"1 Imported" in res.data
time.sleep(3)
wait_for_all_checks(client)
attempt = 0
while attempt < 20:
res = client.get(url_for("index"))
if not b'Checking now' in res.data:
break
logging.getLogger().info("Waiting for check to not say 'Checking now'..")
time.sleep(3)
attempt += 1
res = client.get(

View File

@@ -47,6 +47,7 @@ def set_modified_response():
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
def test_css_filter_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text
# Check text with sub-parts renders correctly

View File

@@ -71,6 +71,7 @@ def set_modified_response():
def test_element_removal_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text
# Check text with sub-parts renders correctly

View File

@@ -1,5 +1,7 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
from changedetectionio import html_tools
@@ -9,7 +11,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_regex_text_func():
from ..fetch_processor import json_html_plaintext
from changedetectionio import fetch_site_status
test_content = """
but sometimes we want to remove the lines.
@@ -21,7 +23,7 @@ def test_strip_regex_text_func():
ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
fetcher = json_html_plaintext.perform_site_check(datastore=False)
fetcher = fetch_site_status.perform_site_check(datastore=False)
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"but 1 lines" in stripped_content

View File

@@ -11,7 +11,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_text_func():
from ..fetch_processor import json_html_plaintext
from changedetectionio import fetch_site_status
test_content = """
Some content
@@ -23,9 +23,7 @@ def test_strip_text_func():
ignore_lines = ["sometimes"]
from ..fetch_processor import json_html_plaintext
fetcher = json_html_plaintext.perform_site_check(datastore=False)
fetcher = fetch_site_status.perform_site_check(datastore=False)
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"sometimes" not in stripped_content

View File

@@ -71,7 +71,6 @@ def test_check_notification(client, live_server):
"url": test_url,
"tag": "my tag",
"title": "my title",
# No 'notification_use_default' here, so it's effectively False/off
"headers": "",
"fetch_backend": "html_requests"})
@@ -216,82 +215,3 @@ def test_notification_validation(client, live_server):
url_for("form_delete", uuid="all"),
follow_redirects=True
)
# Check that the default VS watch specific notification is hit
def test_check_notification_use_default(client, live_server):
set_original_response()
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tag": ''},
follow_redirects=True
)
assert b"Watch added" in res.data
## Setup the local one and enable it
res = client.post(
url_for("edit_page", uuid="first"),
data={"notification_urls": notification_url,
"notification_title": "watch-notification",
"notification_body": "watch-body",
'notification_use_default': "True",
"notification_format": "Text",
"url": test_url,
"tag": "my tag",
"title": "my title",
"headers": "",
"fetch_backend": "html_requests"},
follow_redirects=True
)
res = client.post(
url_for("settings_page"),
data={"application-notification_title": "global-notifications-title",
"application-notification_body": "global-notifications-body\n",
"application-notification_format": "Text",
"application-notification_urls": notification_url,
"requests-time_between_check-minutes": 180,
"fetch_backend": "html_requests"
},
follow_redirects=True
)
# A change should by default trigger a notification of the global-notifications
time.sleep(1)
set_modified_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
with open("test-datastore/notification.txt", "r") as f:
assert 'global-notifications-title' in f.read()
## Setup the local one and enable it
res = client.post(
url_for("edit_page", uuid="first"),
data={"notification_urls": notification_url,
"notification_title": "watch-notification",
"notification_body": "watch-body",
# No 'notification_use_default' here, so it's effectively False/off = "dont use default, use this one"
"notification_format": "Text",
"url": test_url,
"tag": "my tag",
"title": "my title",
"headers": "",
"fetch_backend": "html_requests"},
follow_redirects=True
)
set_original_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
assert os.path.isfile("test-datastore/notification.txt")
with open("test-datastore/notification.txt", "r") as f:
assert 'watch-notification' in f.read()
# cleanup for the next
client.get(
url_for("form_delete", uuid="all"),
follow_redirects=True
)

View File

@@ -2,8 +2,6 @@
from flask import make_response, request
from flask import url_for
import logging
import time
def set_original_response():
test_return_data = """<html>
@@ -70,31 +68,6 @@ def extract_api_key_from_UI(client):
api_key = m.group(1)
return api_key.strip()
# kinda funky, but works for now
def extract_UUID_from_client(client):
import re
res = client.get(
url_for("index"),
)
# <span id="api-key">{{api_key}}</span>
m = re.search('edit/(.+?)"', str(res.data))
uuid = m.group(1)
return uuid.strip()
def wait_for_all_checks(client):
# Loop waiting until done..
attempt=0
while attempt < 60:
time.sleep(1)
res = client.get(url_for("index"))
if not b'Checking now' in res.data:
break
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
attempt += 1
def live_server_setup(live_server):
@live_server.app.route('/test-endpoint')
@@ -160,4 +133,3 @@ def live_server_setup(live_server):
return ret
live_server.start()

View File

@@ -1,2 +0,0 @@
"""Tests for the app."""

View File

@@ -1,3 +0,0 @@
#!/usr/bin/python3
from .. import conftest

View File

@@ -1,40 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
def test_visual_selector_content_ready(client, live_server):
import os
import json
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
live_server_setup(live_server)
time.sleep(1)
# Add our URL to the import page, maybe better to use something we control?
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
test_url = 'https://news.ycombinator.com'
res = client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data
res = client.post(
url_for("edit_page", uuid="first", unpause_on_save=1),
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
follow_redirects=True
)
assert b"unpaused" in res.data
time.sleep(1)
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
# Open it and see if it roughly looks correct
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f:
json.load(f)

View File

@@ -41,7 +41,7 @@ class update_worker(threading.Thread):
)
# Did it have any notification alerts to hit?
if not watch.get('notification_use_default') and len(watch['notification_urls']):
if len(watch['notification_urls']):
print(">>> Notifications queued for UUID from watch {}".format(watch_uuid))
n_object['notification_urls'] = watch['notification_urls']
n_object['notification_title'] = watch['notification_title']
@@ -49,7 +49,7 @@ class update_worker(threading.Thread):
n_object['notification_format'] = watch['notification_format']
# No? maybe theres a global setting, queue them all
elif watch.get('notification_use_default') and len(self.datastore.data['settings']['application']['notification_urls']):
elif len(self.datastore.data['settings']['application']['notification_urls']):
print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(watch_uuid))
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
@@ -117,9 +117,9 @@ class update_worker(threading.Thread):
os.unlink(full_path)
def run(self):
from .fetch_processor import json_html_plaintext
from changedetectionio import fetch_site_status
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
while not self.app.config.exit.is_set():
@@ -132,20 +132,21 @@ class update_worker(threading.Thread):
self.current_uuid = uuid
if uuid in list(self.datastore.data['watching'].keys()):
update_handler = None # Interface object
changed_detected = False
contents = b''
screenshot = False
update_obj= {}
xpath_data = False
process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
now = time.time()
try:
update_handler = json_html_plaintext.perform_site_check(datastore=self.datastore)
changed_detected, update_obj = update_handler.run(uuid)
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc
if not isinstance(update_handler.contents, (bytes, bytearray)):
if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e))
@@ -222,9 +223,6 @@ class update_worker(threading.Thread):
'last_check_status': e.status_code})
except content_fetcher.PageUnloadable as e:
err_text = "Page request from server didnt respond correctly"
if e.message:
err_text = "{} - {}".format(err_text, e.message)
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
@@ -236,9 +234,6 @@ class update_worker(threading.Thread):
# Other serious error
process_changedetection_results = False
else:
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
if not self.datastore.data['watching'].get(uuid):
continue
# Mark that we never had any failures
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
@@ -246,15 +241,20 @@ class update_worker(threading.Thread):
self.cleanup_error_artifacts(uuid)
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
if not self.datastore.data['watching'].get(uuid):
continue
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results:
try:
watch = self.datastore.data['watching'][uuid]
fname = "" # Saved history text filename
# For the FIRST time we check a site, or a change detected, save the snapshot.
if changed_detected or not watch['last_checked']:
# A change was detected
watch.save_history_artifact(contents=update_handler.contents, timestamp=str(round(time.time())), suffix=update_handler.history_artifact_suffix)
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
@@ -280,10 +280,10 @@ class update_worker(threading.Thread):
'last_checked': round(time.time())})
# Always save the screenshot if it's available
if update_handler.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
if update_handler.xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
if screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
if xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
self.current_uuid = None # Done

Binary file not shown.

Before

Width:  |  Height:  |  Size: 209 KiB

After

Width:  |  Height:  |  Size: 190 KiB