Compare commits

...

25 Commits

Author SHA1 Message Date
dgtlmoon
00ac8645f7 Merge branch 'master' into fetchers-abstract-graphic-compare 2022-10-11 15:12:16 +02:00
dgtlmoon
bef4b40d7f cmake? 2022-10-11 15:03:56 +02:00
dgtlmoon
6dd26226bc Merge branch 'adding-test-webdriver_js_execute_code' into fetchers-abstract-graphic-compare 2022-10-11 14:26:12 +02:00
dgtlmoon
779e9c1780 Test that the 'execute JS before' works 2022-10-11 14:21:52 +02:00
dgtlmoon
d9ed04ee56 fix bad test 2022-10-11 13:52:48 +02:00
dgtlmoon
0f63dca9f7 Merge branch 'master' into fetchers-abstract-graphic-compare 2022-10-11 13:06:49 +02:00
dgtlmoon
da7f613e9f tidyups 2022-09-19 17:34:56 +02:00
dgtlmoon
bb03879aad tweaks for merge 2022-09-19 14:24:40 +02:00
dgtlmoon
d23a39a7d8 Merge branch 'master' into fetchers-abstract-graphic-compare 2022-09-19 14:09:50 +02:00
dgtlmoon
338b4dacd0 WIP 2022-09-13 15:09:53 +02:00
dgtlmoon
c0fcae0076 WIP 2022-09-13 13:44:11 +02:00
dgtlmoon
0e0bd93234 WIP 2022-09-13 09:52:29 +02:00
dgtlmoon
c5b0c19836 WIP 2022-09-12 16:48:53 +02:00
dgtlmoon
c00459e18f WIP slider 2022-09-12 16:12:35 +02:00
dgtlmoon
41db6652fe WIP 2022-09-12 16:04:58 +02:00
dgtlmoon
20869a13b3 tweaks to saving 2022-09-12 12:45:29 +02:00
dgtlmoon
97c2cd633d WIP 2022-09-12 12:13:03 +02:00
dgtlmoon
9244e2fb9c sorting lines 2022-09-12 10:06:45 +02:00
dgtlmoon
a86cbd8b7a Merge branch 'master' into fetchers-abstract 2022-09-12 10:04:23 +02:00
dgtlmoon
f35d91e4fb Cleaner history suffix handling 2022-08-31 19:22:26 +02:00
dgtlmoon
687cf9beb4 More tidyup 2022-08-31 18:11:18 +02:00
dgtlmoon
f59b198ffb fetch right class 2022-08-31 18:02:22 +02:00
dgtlmoon
518bdf5a3f move this 2022-08-31 18:00:53 +02:00
dgtlmoon
dcd09359eb cleanup 2022-08-31 17:57:02 +02:00
dgtlmoon
425f8ea632 Abstract out the fetch handlers for different fetch types 2022-08-31 17:52:32 +02:00
23 changed files with 774 additions and 98 deletions

View File

@@ -5,6 +5,7 @@ FROM python:3.8-slim as builder
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
g++ \
gcc \
libc-dev \

View File

@@ -396,18 +396,20 @@ def changedetection_app(config=None, datastore_o=None):
existing_tags = datastore.get_all_tags()
form = forms.quickWatchForm(request.form)
webdriver_enabled = True if os.getenv('PLAYWRIGHT_DRIVER_URL', False) or os.getenv('PLAYWRIGHT_DRIVER_URL', False) else False
output = render_template("watch-overview.html",
form=form,
watches=sorted_watches,
tags=existing_tags,
active_tag=limit_tag,
app_rss_token=datastore.data['settings']['application']['rss_access_token'],
has_unviewed=datastore.has_unviewed,
# Don't link to hosting when we're on the hosting environment
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
form=form,
guid=datastore.data['app_guid'],
queued_uuids=[uuid for p,uuid in update_q.queue])
has_unviewed=datastore.has_unviewed,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
queued_uuids=[uuid for p, uuid in update_q.queue],
tags=existing_tags,
watches=sorted_watches,
webdriver_enabled=webdriver_enabled
)
if session.get('share-link'):
del(session['share-link'])
@@ -489,7 +491,7 @@ def changedetection_app(config=None, datastore_o=None):
import hashlib
from changedetectionio import fetch_site_status
from .fetch_processor import json_html_plaintext
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
@@ -503,7 +505,7 @@ def changedetection_app(config=None, datastore_o=None):
encoding='utf-8') as file:
raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore)
handler = json_html_plaintext.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
@@ -636,20 +638,31 @@ def changedetection_app(config=None, datastore_o=None):
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
watch = datastore.data['watching'].get(uuid)
# Which tabs to show/hide ?
enabled_tabs = []
if watch.get('fetch_processor') == 'json_html_plaintext' or not watch.get('fetch_processor'):
enabled_tabs.append('visual-selector')
enabled_tabs.append('text-filters-and-triggers')
if watch.get('fetch_processor') == 'image':
enabled_tabs.append('visual-selector')
output = render_template("edit.html",
uuid=uuid,
watch=datastore.data['watching'][uuid],
form=form,
has_empty_checktime=using_default_check_time,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
using_global_webdriver_wait=default['webdriver_delay'] is None,
current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
enabled_tabs = enabled_tabs,
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
watch=watch,
)
return output
@@ -782,6 +795,86 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/diff/image/<string:uuid>", methods=['GET'])
@login_required
def diff_image_history_page(uuid):
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('index'))
history = watch.history
dates = list(history.keys())
if len(dates) < 2:
flash("Not enough saved change detection snapshots to produce a report.", "error")
return redirect(url_for('index'))
previous_version = dates[-2]
datastore.set_last_viewed(uuid, time.time())
output = render_template("diff-image.html",
watch=watch,
extra_stylesheets=extra_stylesheets,
versions=dates[:-1], # All except current/last
uuid=uuid,
newest_version_timestamp=dates[-1],
current_previous_version=str(previous_version),
current_diff_url=watch['url'],
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
left_sticky=True,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot()
)
return output
@app.route("/preview/image/<string:uuid>", methods=['GET'])
@login_required
def preview_image_history_page(uuid):
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('index'))
history = watch.history
dates = list(history.keys())
if len(dates) < 1:
flash("Not enough saved change detection snapshots to produce a report.", "error")
return redirect(url_for('index'))
output = render_template("preview-image.html",
watch=watch,
extra_stylesheets=extra_stylesheets,
uuid=uuid,
current_diff_url=watch['url'],
newest_history_key = watch.newest_history_key,
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
left_sticky=True,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot()
)
return output
@app.route("/diff/<string:uuid>", methods=['GET'])
@login_required
def diff_history_page(uuid):
@@ -947,6 +1040,67 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/preview/image/<string:uuid>/<string:history_timestamp>")
def render_single_image(uuid, history_timestamp):
watch = datastore.data['watching'].get(uuid)
dates = list(watch.history.keys())
if not history_timestamp or history_timestamp == 'None':
history_timestamp = dates[-2]
filename = watch.history[history_timestamp]
with open(filename, 'rb') as f:
img = f.read()
response = make_response(img)
response.headers['Content-type'] = 'image/png'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = 0
return response
# Diff renderer for images
# Renders the diff which includes the red box around what changes
# We always compare the newest against whatever compare_date we are given
@app.route("/diff/image/<string:uuid>/<string:compare_date>")
def render_diff_image(uuid, compare_date):
from changedetectionio import image_diff
from flask import make_response
watch = datastore.data['watching'].get(uuid)
dates = list(watch.history.keys())
if len(dates) < 2:
flash("Not enough saved change detection snapshots to produce a report.", "error")
return redirect(url_for('index'))
if not compare_date or compare_date == 'None':
compare_date = dates[-2]
new_img = watch.history[watch.newest_history_key]
prev_img = watch.history[compare_date]
try:
img = image_diff.render_diff(new_img, prev_img)
except ValueError as e:
print ("EXCEPTION: Diff image - got exception {} reverting to raw image without rendering difference".format(str(e)))
with open(new_img, 'rb') as f:
img = f.read()
resp = make_response(img)
resp.headers['Content-Type'] = 'image/jpeg'
return resp
@app.route("/settings/notification-logs", methods=['GET'])
@login_required
def notification_logs():
@@ -1095,12 +1249,24 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
url = request.form.get('url').strip()
if datastore.url_exists(url):
flash('The URL {} already exists'.format(url), "error")
return redirect(url_for('index'))
add_paused = request.form.get('edit_and_watch_submit_button') != None
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
fetch_processor = request.form.get('fetch_processor')
extras = {'paused': add_paused}
if fetch_processor:
extras['fetch_processor']=fetch_processor
if fetch_processor == 'image':
extras['fetch_backend'] = 'html_webdriver'
new_uuid = datastore.add_watch(url=url,
tag=request.form.get('tag').strip(),
extras=extras
)
if not add_paused and new_uuid:

View File

@@ -21,7 +21,6 @@ class Non200ErrorCodeReceived(Exception):
self.page_text = html_tools.html_to_text(page_html)
return
class JSActionExceptions(Exception):
def __init__(self, status_code, url, screenshot, message=''):
self.status_code = status_code
@@ -66,13 +65,14 @@ class ReplyWithContentButNoText(Exception):
return
class Fetcher():
error = None
status_code = None
content = None
headers = None
error = None
fetcher_description = "No description"
headers = None
raw_content = None
status_code = None
webdriver_js_execute_code = None
xpath_element_js = """
// Include the getXpath script directly, easier than fetching
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
@@ -202,6 +202,7 @@ class Fetcher():
# Will be needed in the future by the VisualSelector, always get this where possible.
screenshot = False
element_screenshot = None
system_http_proxy = os.getenv('HTTP_PROXY')
system_https_proxy = os.getenv('HTTPS_PROXY')
@@ -310,7 +311,8 @@ class base_html_playwright(Fetcher):
request_body,
request_method,
ignore_status_codes=False,
current_css_filter=None):
current_css_filter=None
):
from playwright.sync_api import sync_playwright
import playwright._impl._api_types
@@ -410,11 +412,18 @@ class base_html_playwright(Fetcher):
page.wait_for_timeout(500)
self.content = page.content()
self.raw_content = page.content()
self.status_code = response.status
self.headers = response.all_headers()
if current_css_filter is not None:
if current_css_filter is not None and len(current_css_filter):
page.evaluate("var css_filter={}".format(json.dumps(current_css_filter)))
el = page.locator(current_css_filter)
if el:
el.scroll_into_view_if_needed()
self.element_screenshot = el.screenshot()
else:
page.evaluate("var css_filter=''")
@@ -429,9 +438,9 @@ class base_html_playwright(Fetcher):
# acceptable screenshot quality here
try:
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
#page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
# The actual screenshot
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 82)))
except Exception as e:
context.close()
browser.close()
@@ -533,6 +542,7 @@ class base_html_webdriver(Fetcher):
# @todo - dom wait loaded?
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = self.driver.page_source
self.raw_content = self.driver.page_source
self.headers = {}
self.screenshot = self.driver.get_screenshot_as_png()
@@ -619,6 +629,7 @@ class html_requests(Fetcher):
self.status_code = r.status_code
self.content = r.text
self.raw_content = r.content
self.headers = r.headers

View File

@@ -0,0 +1,12 @@
available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Graphically by image or web-page')]
class fetch_processor():
contents = b''
screenshot = None
datastore = None
"""
base class for all fetch processors
- json_html_plaintext
- image (future)
"""

View File

@@ -0,0 +1,130 @@
import hashlib
import imagehash
from PIL import Image
import io
import logging
import os
import re
import time
import urllib3
# fetch processor for requesting and comparing a single image
# can use both requests and playwright/selenium
# - imagehash for change detection (or https://github.com/dgtlmoon/changedetection.io/pull/419/files#diff-7d3854710a6c0faead783f75850100a4c4b69409309200d3a83692dc9783bf6eR17 ?)
# - skimage.metrics import structural_similarity for viewing the diff
from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from . import fetch_processor
# Some common stuff here that can be moved to a base class
# (set_proxy_from_list)
class perform_site_check(fetch_processor):
xpath_data = None
def __init__(self, *args, datastore, **kwargs):
self.datastore = datastore
super().__init__(*args, **kwargs)
def run(self, uuid):
changed_detected = False
screenshot = False # as bytes
stripped_text_from_html = ""
watch = self.datastore.data['watching'].get(uuid)
if watch.get('fetch_backend') != 'html_webdriver':
raise Exception(
"Requires a Chrome compatible fetcher enabled."
)
# Protect against file:// access
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
raise Exception(
"file:// type access is denied for security reasons."
)
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False}
extra_headers = self.datastore.data['watching'][uuid].get('headers')
# Tweak the base config with the per-watch ones
request_headers = self.datastore.data['settings']['headers'].copy()
request_headers.update(extra_headers)
# https://github.com/psf/requests/issues/4525
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
# do this by accident.
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
timeout = self.datastore.data['settings']['requests']['timeout']
url = watch.get('url')
request_body = self.datastore.data['watching'][uuid].get('body')
request_method = self.datastore.data['watching'][uuid].get('method')
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
prefer_backend = watch['fetch_backend']
if hasattr(content_fetcher, prefer_backend):
klass = getattr(content_fetcher, prefer_backend)
else:
# If the klass doesnt exist, just use a default
klass = getattr(content_fetcher, "html_requests")
proxy_args = self.datastore.get_preferred_proxy_for_watch(uuid)
fetcher = klass(proxy_override=proxy_args)
fetcher.run(
ignore_status_codes=ignore_status_codes,
request_body=request_body,
request_headers=request_headers,
request_method=request_method,
current_css_filter=watch.get('css_filter'),
timeout=timeout,
url=url
)
fetcher.quit()
# if not image/foobar in mimetype
# raise content_fecther.NotAnImage(mimetype) ?
# or better to try load with PIL and catch exception?
update_obj["last_check_status"] = fetcher.get_last_status_code()
if 'image' in fetcher.headers['content-type']:
self.contents = fetcher.raw_content
else:
self.contents = fetcher.element_screenshot if fetcher.element_screenshot else fetcher.screenshot
# Used for visual-selector
self.xpath_data = fetcher.xpath_data
self.screenshot = fetcher.screenshot
now = time.time()
image = Image.open(io.BytesIO(self.contents))
# @todo different choice?
# https://github.com/JohannesBuchner/imagehash#references
fetched_hash = str(imagehash.average_hash(image))
print(uuid, "Time to image hash", time.time() - now)
# The main thing that all this at the moment comes down to :)
if watch['previous_md5'] != fetched_hash:
changed_detected = True
# Always record the new checksum
update_obj["previous_md5"] = fetched_hash
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_hash
return changed_detected, update_obj

View File

@@ -9,16 +9,18 @@ from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from . import fetch_processor
# Some common stuff here that can be moved to a base class
# (set_proxy_from_list)
class perform_site_check():
class perform_site_check(fetch_processor):
screenshot = None
xpath_data = None
def __init__(self, *args, datastore, **kwargs):
super().__init__(*args, **kwargs)
self.datastore = datastore
super().__init__(*args, **kwargs)
# Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "foobar(?i)" type configuration
@@ -294,4 +296,6 @@ class perform_site_check():
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter
self.contents = text_content_before_ignored_filter
return changed_detected, update_obj

View File

@@ -321,8 +321,11 @@ class ValidateCSSJSONXPATHInput(object):
class quickWatchForm(Form):
from . import fetch_processor
url = fields.URLField('URL', validators=[validateURL()])
tag = StringField('Group tag', [validators.Optional()])
fetch_processor = RadioField(u'Compare as', choices=fetch_processor.available_fetchers, default=fetch_processor.available_fetchers[0][0])
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})

View File

@@ -0,0 +1,44 @@
from skimage.metrics import structural_similarity as compare_ssim
import argparse
import imutils
import cv2
# From https://www.pyimagesearch.com/2017/06/19/image-difference-with-opencv-and-python/
def render_diff(fpath_imageA, fpath_imageB):
import time
now = time.time()
imageA = cv2.imread(fpath_imageA)
imageB = cv2.imread(fpath_imageB)
# convert the images to grayscale
grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
# compute the Structural Similarity Index (SSIM) between the two
# images, ensuring that the difference image is returned
(score, diff) = compare_ssim(grayA, grayB, full=True)
diff = (diff * 255).astype("uint8")
print("SSIM: {}".format(score))
# threshold the difference image, followed by finding contours to
# obtain the regions of the two input images that differ
thresh = cv2.threshold(diff, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
# loop over the contours
for c in cnts:
# compute the bounding box of the contour and then draw the
# bounding box on both input images to represent where the two
# images differ
(x, y, w, h) = cv2.boundingRect(c)
cv2.rectangle(imageA, (x, y), (x + w, y + h), (0, 0, 255), 1)
cv2.rectangle(imageB, (x, y), (x + w, y + h), (0, 0, 255), 1)
#return cv2.imencode('.jpg', imageB)[1].tobytes()
print ("Image comparison processing time", time.time()-now)
return cv2.imencode('.jpg', imageA)[1].tobytes()

View File

@@ -14,42 +14,43 @@ class model(dict):
__newest_history_key = None
__history_n=0
__base_config = {
'url': None,
'tag': None,
'last_checked': 0,
'paused': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
#'newest_history_key': 0,
'title': None,
'previous_md5': False,
'uuid': str(uuid_builder.uuid4()),
'headers': {}, # Extra headers to send
'body': None,
'method': 'GET',
#'history': {}, # Dict of timestamp and output stripped filename
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
# Custom notification content
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'notification_title': None,
'notification_body': None,
'notification_format': default_notification_format_for_watch,
'notification_muted': False,
'css_filter': '',
'last_error': False,
'extract_text': [], # Extract text by regex after filters
'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected
'text_should_not_be_present': [], # Text that should not present
'fetch_backend': None,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_title_as_title': False,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'proxy': None, # Preferred proxy connection
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case.
#'history': {}, # Dict of timestamp and output stripped filename
#'newest_history_key': 0,
'body': None,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'css_filter': '',
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': None,
'fetch_processor': 'json_html_plaintext', # json_html_plaintext, image, rendered_webpage
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'last_checked': 0,
'last_error': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET',
'notification_body': None,
'notification_format': default_notification_format_for_watch,
'notification_muted': False,
'notification_title': None,
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'paused': False,
'previous_md5': False,
'proxy': None, # Preferred proxy connection
'subtractive_selectors': [],
'tag': None,
'text_should_not_be_present': [], # Text that should not present
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'title': None,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': None,
'uuid': str(uuid_builder.uuid4()),
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection
}
@@ -145,18 +146,25 @@ class model(dict):
bump = self.history
return self.__newest_history_key
# Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp):
def save_history_artifact(self, contents: bytes, timestamp):
import uuid
import logging
import magic
import re
suffix = 'bin'
# detect extension type
mtype = magic.from_buffer(contents, mime=True)
if mtype:
r = re.search(r'image/(\w+)', mtype, re.IGNORECASE)
if r:
suffix = r.group(1)
output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
self.ensure_data_dir_exists()
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
logging.debug("Saving history text {}".format(snapshot_fname))
snapshot_fname = "{}/{}.{}".format(output_path, uuid.uuid4(), suffix)
logging.debug("Saving history artifact {}".format(snapshot_fname))
with open(snapshot_fname, 'wb') as f:
f.write(contents)

View File

@@ -0,0 +1,149 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generator: Adobe Illustrator 19.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg
version="1.1"
id="Layer_1"
x="0px"
y="0px"
viewBox="0 0 20.745352 20.745251"
xml:space="preserve"
width="20.745352"
height="20.745251"
sodipodi:docname="picture-frame.svg"
inkscape:version="1.1.1 (1:1.1+202109281949+c3084ef5ed)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg"><sodipodi:namedview
id="namedview31"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
showgrid="false"
fit-margin-top="0"
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:zoom="24.215073"
inkscape:cx="11.810825"
inkscape:cy="10.158962"
inkscape:window-width="1920"
inkscape:window-height="1056"
inkscape:window-x="1920"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="g1325" /><defs
id="defs57">
</defs>
<g
id="g22"
transform="translate(-141.68664,-143.32441)">
<g
id="g986"
transform="matrix(0.09174031,0,0,0.09174031,139.41786,139.41786)"><g
id="g1313" /><g
id="g18">
<g
id="g1325"
transform="matrix(1.0989302,0,0,1.0989302,-30.889712,-13.037446)"><g
id="g1413"><rect
x="58.112999"
y="58.112999"
style="fill:#95e1d3"
width="190.77765"
height="190.77299"
id="rect4"
rx="0"
ry="0" /><polygon
style="fill:#eaffd0"
points="117.389,248.887 183.138,135.007 248.887,248.887 "
id="polygon6" /><polygon
style="fill:#eaffd0"
points="100.26,175.887 58.113,248.887 117.389,248.887 129.898,227.221 "
id="polygon8" /><circle
style="fill:#fce38a"
cx="141.82001"
cy="119.433"
r="16.547001"
id="circle10" /><path
style="fill:#414042"
d="M 248.887,50.613 H 58.113 c -4.142,0 -7.5,3.357 -7.5,7.5 v 190.773 c 0,4.118 3.362,7.5 7.5,7.5 h 59.276 131.498 c 4.06,0 7.5,-3.304 7.5,-7.5 V 58.113 c 0,-4.142 -3.358,-7.5 -7.5,-7.5 z m -7.5,15 v 155.283 l -51.754,-89.64 c -2.886,-4.998 -10.11,-4.988 -12.99,0 l -46.745,80.965 -23.143,-40.085 c -2.886,-4.998 -10.11,-4.988 -12.99,0 l -28.151,48.76 V 65.613 Z m -141.127,125.274 20.978,36.335 -7.823,13.549 -0.356,0.616 H 71.103 Z m 30.12,50.5 6.013,-10.415 c 0.001,-0.002 0.002,-0.004 0.003,-0.006 l 46.742,-80.959 52.759,91.38 z"
id="path14" /><path
style="fill:#414042"
d="m 141.82,143.48 c 13.259,0 24.046,-10.787 24.046,-24.047 0,-13.26 -10.787,-24.047 -24.046,-24.047 -13.259,0 -24.046,10.787 -24.046,24.047 0,13.26 10.786,24.047 24.046,24.047 z m 0,-33.093 c 4.988,0 9.046,4.059 9.046,9.047 0,4.988 -4.058,9.047 -9.046,9.047 -4.988,0 -9.046,-4.059 -9.046,-9.047 -0.001,-4.989 4.057,-9.047 9.046,-9.047 z"
id="path16" /></g></g>
</g></g>
</g>
<g
id="g24"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g26"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g28"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g30"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g32"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g34"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g36"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g38"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g40"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g42"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g44"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g46"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g48"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g50"
transform="translate(-141.68664,-143.32441)">
</g>
<g
id="g52"
transform="translate(-141.68664,-143.32441)">
</g>
</svg>

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

@@ -578,3 +578,15 @@ ul {
display: inline;
height: 26px;
vertical-align: middle; }
#quickwatch-fetch-processor {
color: #fff;
font-size: 80%; }
#quickwatch-fetch-processor ul {
padding: 0px;
list-style-type: none; }
#quickwatch-fetch-processor ul li {
display: inline-block;
margin-right: 1em; }
#quickwatch-fetch-processor ul li label:hover {
cursor: pointer; }

View File

@@ -803,4 +803,24 @@ ul {
padding: 0.5rem;
border-radius: 5px;
color: #ff3300;
}
}
#quickwatch-fetch-processor {
color: #fff;
font-size: 80%;
ul {
padding: 0px;
list-style-type: none;
li {
display: inline-block;
margin-right: 1em;
label {
&:hover {
cursor: pointer;
}
}
}
}
}

View File

@@ -0,0 +1,64 @@
{% extends 'base.html' %}
{% block content %}
<div id="settings">
<h1>Differences</h1>
<form class="pure-form " action="" method="GET">
<fieldset>
{% if versions|length >= 1 %}
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
<select id="diff-version" name="previous_version">
{% for version in versions %}
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
{{version}}
</option>
{% endfor %}
</select>
<button type="submit" class="pure-button pure-button-primary">Go</button>
{% endif %}
</fieldset>
</form>
</div>
<div id="diff-ui">
<script
defer
src="https://unpkg.com/img-comparison-slider@7/dist/index.js"
></script>
<link
rel="stylesheet"
href="https://unpkg.com/img-comparison-slider@7/dist/styles.css"
/>
<img-comparison-slider>
<img slot="first" src="{{ url_for('render_diff_image', uuid=uuid, compare_date=current_previous_version) }}" />
<img slot="second" src="{{ url_for('render_single_image', uuid=uuid, history_timestamp=current_previous_version) }}" />
</img-comparison-slider>
</div>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.js')}}"></script>
<script defer="">
window.onload = function() {
/* Set current version date as local time in the browser also */
var current_v = document.getElementById("current-v-date");
var dateObject = new Date({{ newest_version_timestamp }}*1000);
current_v.innerHTML=dateObject.toLocaleString();
/* Convert what is options from UTC time.time() to local browser time */
var diffList=document.getElementById("diff-version");
if (typeof(diffList) != 'undefined' && diffList != null) {
for (var option of diffList.options) {
var dateObject = new Date(option.value*1000);
option.label=dateObject.toLocaleString();
}
}
}
</script>
{% endblock %}

View File

@@ -25,7 +25,9 @@
<ul>
<li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li>
{% if 'visual-selector' in enabled_tabs %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
{%endif%}
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
<li class="tab"><a href="#notifications">Notifications</a></li>
</ul>
@@ -155,6 +157,7 @@ User-Agent: wonderbra 1.0") }}
</div>
<div class="tab-pane-inner" id="filters-and-triggers">
{% if 'text-filters-and-triggers' in enabled_tabs %}
<div class="pure-control-group">
<strong>Pro-tips:</strong><br/>
<ul>
@@ -166,12 +169,14 @@ User-Agent: wonderbra 1.0") }}
</li>
</ul>
</div>
<fieldset>
<div class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
</div>
</fieldset>
{% endif %}
<div class="pure-control-group">
{% set field = render_field(form.css_filter,
placeholder=".class-name or #some-id, or other CSS selector rule.",
@@ -202,6 +207,9 @@ User-Agent: wonderbra 1.0") }}
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
</span>
</div>
{% if 'text-filters-and-triggers' in enabled_tabs %}
<div class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer
@@ -277,6 +285,8 @@ Unavailable") }}
</span>
</div>
</fieldset>
{% endif %}
</div>
<div class="tab-pane-inner visual-selector-ui" id="visualselector">

View File

@@ -0,0 +1,11 @@
{% extends 'base.html' %}
{% block content %}
<div id="settings">
<h1>Preview</h1>
</div>
<div id="diff-ui">
<img style="max-width: 100%" src="{{ url_for('render_single_image', uuid=uuid, history_timestamp=newest_history_key) }}" />
</div>
{% endblock %}

View File

@@ -15,10 +15,18 @@
<div>
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
<span>
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
{{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
</span>
{% if webdriver_enabled %}
<div id="quickwatch-fetch-processor">
{{ render_field(form.fetch_processor) }}
</div>
{% endif %}
</div>
<div>
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
{{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
</div>
</div>
</fieldset>
@@ -87,11 +95,11 @@
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
</td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
<a class="external" title="Open in new window" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" alt="Share" title="Share"/></a>
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Fetching with Chrome" title="Fetching with Chrome"/>{% endif %}
{%if watch.fetch_processor == "image" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='picture-frame.svg')}}" alt="Comparing graphically" title="Comparing graphically"/>{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}</div>
{% endif %}
@@ -114,10 +122,20 @@
class="recheck pure-button button-small pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
<a href="{{ url_for('edit_page', uuid=watch.uuid)}}" class="pure-button button-small pure-button-primary">Edit</a>
{% if watch.history_n >= 2 %}
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
{% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
<a href="{{ url_for('diff_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
{% else %}
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
{% endif %}
{% else %}
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
{% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
<a href="{{ url_for('preview_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Preview</a>
{% else %}
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
{% endif %}
{% endif %}
{% endif %}
</td>

View File

@@ -47,7 +47,6 @@ def set_modified_response():
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
def test_css_filter_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text
# Check text with sub-parts renders correctly

View File

@@ -71,7 +71,6 @@ def set_modified_response():
def test_element_removal_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text
# Check text with sub-parts renders correctly

View File

@@ -1,7 +1,5 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
from changedetectionio import html_tools
@@ -11,7 +9,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_regex_text_func():
from changedetectionio import fetch_site_status
from ..fetch_processor import json_html_plaintext
test_content = """
but sometimes we want to remove the lines.
@@ -23,7 +21,7 @@ def test_strip_regex_text_func():
ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
fetcher = fetch_site_status.perform_site_check(datastore=False)
fetcher = json_html_plaintext.perform_site_check(datastore=False)
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"but 1 lines" in stripped_content

View File

@@ -11,7 +11,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_text_func():
from changedetectionio import fetch_site_status
from ..fetch_processor import json_html_plaintext
test_content = """
Some content
@@ -23,7 +23,9 @@ def test_strip_text_func():
ignore_lines = ["sometimes"]
fetcher = fetch_site_status.perform_site_check(datastore=False)
from ..fetch_processor import json_html_plaintext
fetcher = json_html_plaintext.perform_site_check(datastore=False)
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"sometimes" not in stripped_content

View File

@@ -92,7 +92,6 @@ def wait_for_all_checks(client):
if not b'Checking now' in res.data:
break
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
attempt += 1
def live_server_setup(live_server):

View File

@@ -120,10 +120,6 @@ class update_worker(threading.Thread):
os.unlink(full_path)
def run(self):
from changedetectionio import fetch_site_status
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
while not self.app.config.exit.is_set():
try:
@@ -135,21 +131,34 @@ class update_worker(threading.Thread):
self.current_uuid = uuid
if uuid in list(self.datastore.data['watching'].keys()):
update_handler = None # Interface object
changed_detected = False
contents = b''
screenshot = False
update_obj= {}
xpath_data = False
update_obj = {}
process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
watch = self.datastore.data['watching'].get(uuid)
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, watch.get('url')))
now = time.time()
try:
changed_detected, update_obj, contents = update_handler.run(uuid)
update_handler = None
if watch.get('fetch_processor') == 'image':
from .fetch_processor import image as processor_image
update_handler = processor_image.perform_site_check(datastore=self.datastore)
elif watch.get('fetch_processor') == 'rendered_webpage':
from .fetch_processor import image as processor_rendered_webpage
update_handler = processor_rendered_webpage.perform_site_check(datastore=self.datastore)
else:
# Anything else for now will be `json_html_plaintext`
from .fetch_processor import json_html_plaintext as processor_json_html_plaintext
update_handler = processor_json_html_plaintext.perform_site_check(datastore=self.datastore)
changed_detected, update_obj = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc
if not isinstance(contents, (bytes, bytearray)):
if not isinstance(update_handler.contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e))
@@ -256,13 +265,12 @@ class update_worker(threading.Thread):
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results:
try:
watch = self.datastore.data['watching'][uuid]
fname = "" # Saved history text filename
watch = self.datastore.data['watching'].get(uuid)
# For the FIRST time we check a site, or a change detected, save the snapshot.
if changed_detected or not watch['last_checked']:
# A change was detected
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
watch.save_history_artifact(contents=update_handler.contents, timestamp=str(round(time.time())))
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

View File

@@ -46,3 +46,11 @@ selenium ~= 4.1.0
werkzeug ~= 2.0.0
# playwright is installed at Dockerfile build time because it's not available on all platforms
imagehash ~= 4.3.0
pillow
scikit-image
imutils
opencv-python
python-magic