mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-04 16:45:57 +00:00
Compare commits
20 Commits
0.45.10
...
image-bina
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9b036d7b19 | ||
|
|
0761984bcd | ||
|
|
e73721a3f0 | ||
|
|
86fc9d669f | ||
|
|
7a66b69158 | ||
|
|
ddd7b2772d | ||
|
|
305060f79c | ||
|
|
cfcf59d009 | ||
|
|
af25b824a0 | ||
|
|
a29085fa18 | ||
|
|
d7832d735d | ||
|
|
7d1c4d7673 | ||
|
|
6e00f0e025 | ||
|
|
4f536bb559 | ||
|
|
38d8aa8d28 | ||
|
|
dec47d5c43 | ||
|
|
cec24fe2c1 | ||
|
|
f4bc0aa2ba | ||
|
|
499c4797da | ||
|
|
9bc71d187e |
@@ -695,6 +695,10 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
@app.route("/diff/<string:uuid>", methods=['GET'])
|
@app.route("/diff/<string:uuid>", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
def diff_history_page(uuid):
|
def diff_history_page(uuid):
|
||||||
|
from changedetectionio import content_fetcher
|
||||||
|
|
||||||
|
newest_version_file_contents = ""
|
||||||
|
previous_version_file_contents = ""
|
||||||
|
|
||||||
# More for testing, possible to return the first/only
|
# More for testing, possible to return the first/only
|
||||||
if uuid == 'first':
|
if uuid == 'first':
|
||||||
@@ -720,21 +724,28 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
# Save the current newest history as the most recently viewed
|
# Save the current newest history as the most recently viewed
|
||||||
datastore.set_last_viewed(uuid, dates[0])
|
datastore.set_last_viewed(uuid, dates[0])
|
||||||
newest_file = watch['history'][dates[0]]
|
|
||||||
with open(newest_file, 'r') as f:
|
|
||||||
newest_version_file_contents = f.read()
|
|
||||||
|
|
||||||
previous_version = request.args.get('previous_version')
|
previous_version = request.args.get('previous_version')
|
||||||
try:
|
if ('content-type' in watch and content_fetcher.supported_binary_type(watch['content-type'])):
|
||||||
previous_file = watch['history'][previous_version]
|
template = "diff-image.html"
|
||||||
except KeyError:
|
else:
|
||||||
# Not present, use a default value, the second one in the sorted list.
|
newest_file = watch['history'][dates[0]]
|
||||||
previous_file = watch['history'][dates[1]]
|
with open(newest_file, 'r') as f:
|
||||||
|
newest_version_file_contents = f.read()
|
||||||
|
|
||||||
with open(previous_file, 'r') as f:
|
try:
|
||||||
previous_version_file_contents = f.read()
|
previous_file = watch['history'][previous_version]
|
||||||
|
except KeyError:
|
||||||
|
# Not present, use a default value, the second one in the sorted list.
|
||||||
|
previous_file = watch['history'][dates[1]]
|
||||||
|
|
||||||
output = render_template("diff.html", watch_a=watch,
|
with open(previous_file, 'r') as f:
|
||||||
|
previous_version_file_contents = f.read()
|
||||||
|
|
||||||
|
template = "diff.html"
|
||||||
|
|
||||||
|
output = render_template(template,
|
||||||
|
watch_a=watch,
|
||||||
newest=newest_version_file_contents,
|
newest=newest_version_file_contents,
|
||||||
previous=previous_version_file_contents,
|
previous=previous_version_file_contents,
|
||||||
extra_stylesheets=extra_stylesheets,
|
extra_stylesheets=extra_stylesheets,
|
||||||
@@ -751,6 +762,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
@app.route("/preview/<string:uuid>", methods=['GET'])
|
@app.route("/preview/<string:uuid>", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
def preview_page(uuid):
|
def preview_page(uuid):
|
||||||
|
from changedetectionio import content_fetcher
|
||||||
|
|
||||||
# More for testing, possible to return the first/only
|
# More for testing, possible to return the first/only
|
||||||
if uuid == 'first':
|
if uuid == 'first':
|
||||||
@@ -765,14 +777,25 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
|
||||||
newest = list(watch['history'].keys())[-1]
|
newest = list(watch['history'].keys())[-1]
|
||||||
with open(watch['history'][newest], 'r') as f:
|
fname = watch['history'][newest]
|
||||||
content = f.readlines()
|
|
||||||
|
if ('content-type' in watch and content_fetcher.supported_binary_type(watch['content-type'])):
|
||||||
|
template = "preview-image.html"
|
||||||
|
content = fname
|
||||||
|
else:
|
||||||
|
template = "preview.html"
|
||||||
|
try:
|
||||||
|
with open(fname, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
except:
|
||||||
|
content = "Cant read {}".format(fname)
|
||||||
|
|
||||||
output = render_template("preview.html",
|
output = render_template("preview.html",
|
||||||
content=content,
|
content=content,
|
||||||
extra_stylesheets=extra_stylesheets,
|
extra_stylesheets=extra_stylesheets,
|
||||||
current_diff_url=watch['url'],
|
current_diff_url=watch['url'],
|
||||||
uuid=uuid)
|
uuid=uuid,
|
||||||
|
watch=watch)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@app.route("/settings/notification-logs", methods=['GET'])
|
@app.route("/settings/notification-logs", methods=['GET'])
|
||||||
@@ -783,6 +806,50 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
logs=notification_debug_log if len(notification_debug_log) else ["No errors or warnings detected"])
|
logs=notification_debug_log if len(notification_debug_log) else ["No errors or warnings detected"])
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
# render an image which contains the diff of two images
|
||||||
|
# We always compare the newest against whatever compare_date we are given
|
||||||
|
@app.route("/diff/show-image/<string:uuid>/<string:datestr>")
|
||||||
|
def show_single_image(uuid, datestr):
|
||||||
|
|
||||||
|
from flask import make_response
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
|
||||||
|
if datestr == 'None' or datestr is None:
|
||||||
|
datestr = list(watch['history'].keys())[0]
|
||||||
|
|
||||||
|
fname = watch['history'][datestr]
|
||||||
|
with open(fname, 'rb') as f:
|
||||||
|
resp = make_response(f.read())
|
||||||
|
|
||||||
|
# @todo assumption here about the type, re-encode? detect?
|
||||||
|
resp.headers['Content-Type'] = 'image/jpeg'
|
||||||
|
return resp
|
||||||
|
|
||||||
|
# render an image which contains the diff of two images
|
||||||
|
# We always compare the newest against whatever compare_date we are given
|
||||||
|
@app.route("/diff/image/<string:uuid>/<string:compare_date>")
|
||||||
|
def render_diff_image(uuid, compare_date):
|
||||||
|
from changedetectionio import image_diff
|
||||||
|
|
||||||
|
from flask import make_response
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
newest = list(watch['history'].keys())[-1]
|
||||||
|
|
||||||
|
# @todo this is weird
|
||||||
|
if compare_date == 'None' or compare_date is None:
|
||||||
|
compare_date = list(watch['history'].keys())[0]
|
||||||
|
|
||||||
|
new_img = watch['history'][newest]
|
||||||
|
prev_img = watch['history'][compare_date]
|
||||||
|
img = image_diff.render_diff(new_img, prev_img)
|
||||||
|
|
||||||
|
resp = make_response(img)
|
||||||
|
resp.headers['Content-Type'] = 'image/jpeg'
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/<string:uuid>/snapshot/current", methods=['GET'])
|
@app.route("/api/<string:uuid>/snapshot/current", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
def api_snapshot(uuid):
|
def api_snapshot(uuid):
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ from selenium import webdriver
|
|||||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
||||||
from selenium.common.exceptions import WebDriverException
|
from selenium.common.exceptions import WebDriverException
|
||||||
import urllib3.exceptions
|
|
||||||
|
|
||||||
|
# image/jpeg etc
|
||||||
|
supported_binary_types = ['image']
|
||||||
|
|
||||||
class EmptyReply(Exception):
|
class EmptyReply(Exception):
|
||||||
def __init__(self, status_code, url):
|
def __init__(self, status_code, url):
|
||||||
@@ -51,6 +52,15 @@ class Fetcher():
|
|||||||
# def return_diff(self, stream_a, stream_b):
|
# def return_diff(self, stream_a, stream_b):
|
||||||
# return
|
# return
|
||||||
|
|
||||||
|
# Assume we dont support it as binary if its not in our list
|
||||||
|
def supported_binary_type(content_type):
|
||||||
|
# Not a binary thing we support? then use text (also used for JSON/XML etc)
|
||||||
|
# @todo - future - use regex for matching
|
||||||
|
if content_type and content_type.lower().strip().split('/')[0] not in (string.lower() for string in supported_binary_types):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def available_fetchers():
|
def available_fetchers():
|
||||||
import inspect
|
import inspect
|
||||||
from changedetectionio import content_fetcher
|
from changedetectionio import content_fetcher
|
||||||
@@ -156,15 +166,18 @@ class html_requests(Fetcher):
|
|||||||
verify=False)
|
verify=False)
|
||||||
|
|
||||||
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
|
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
|
||||||
# Return bytes here
|
|
||||||
html = r.text
|
if not supported_binary_type(r.headers.get('Content-Type', '')):
|
||||||
|
content = r.text
|
||||||
|
else:
|
||||||
|
content = r.content
|
||||||
|
|
||||||
# @todo test this
|
# @todo test this
|
||||||
# @todo maybe you really want to test zero-byte return pages?
|
# @todo maybe you really want to test zero-byte return pages?
|
||||||
if not r or not html or not len(html):
|
if not r or not content or not len(content):
|
||||||
raise EmptyReply(url=url, status_code=r.status_code)
|
raise EmptyReply(url=url, status_code=r.status_code)
|
||||||
|
|
||||||
self.status_code = r.status_code
|
self.status_code = r.status_code
|
||||||
self.content = html
|
self.content = content
|
||||||
self.headers = r.headers
|
self.headers = r.headers
|
||||||
|
|
||||||
|
|||||||
@@ -55,10 +55,13 @@ class perform_site_check():
|
|||||||
|
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
stripped_text_from_html = ""
|
stripped_text_from_html = ""
|
||||||
|
fetched_md5 = ""
|
||||||
|
|
||||||
|
original_content_before_filters = False
|
||||||
|
|
||||||
watch = self.datastore.data['watching'][uuid]
|
watch = self.datastore.data['watching'][uuid]
|
||||||
# Unset any existing notification error
|
|
||||||
|
|
||||||
|
# Unset any existing notification error
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
|
|
||||||
extra_headers = self.datastore.get_val(uuid, 'headers')
|
extra_headers = self.datastore.get_val(uuid, 'headers')
|
||||||
@@ -92,6 +95,7 @@ class perform_site_check():
|
|||||||
|
|
||||||
fetcher = klass()
|
fetcher = klass()
|
||||||
fetcher.run(url, timeout, request_headers, request_body, request_method)
|
fetcher.run(url, timeout, request_headers, request_body, request_method)
|
||||||
|
|
||||||
# Fetching complete, now filters
|
# Fetching complete, now filters
|
||||||
# @todo move to class / maybe inside of fetcher abstract base?
|
# @todo move to class / maybe inside of fetcher abstract base?
|
||||||
|
|
||||||
@@ -101,26 +105,39 @@ class perform_site_check():
|
|||||||
# - Do we convert to JSON?
|
# - Do we convert to JSON?
|
||||||
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
||||||
# return content().textfilter().jsonextract().checksumcompare() ?
|
# return content().textfilter().jsonextract().checksumcompare() ?
|
||||||
|
update_obj['content-type'] = fetcher.headers.get('Content-Type', '').lower().strip()
|
||||||
is_json = fetcher.headers.get('Content-Type', '') == 'application/json'
|
|
||||||
is_html = not is_json
|
# Could be 'application/json; charset=utf-8' etc
|
||||||
|
is_json = 'application/json' in update_obj['content-type']
|
||||||
|
is_text_or_html = 'text/' in update_obj['content-type'] # text/plain , text/html etc
|
||||||
|
is_binary = not is_text_or_html and content_fetcher.supported_binary_type(update_obj['content-type'])
|
||||||
css_filter_rule = watch['css_filter']
|
css_filter_rule = watch['css_filter']
|
||||||
|
|
||||||
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
|
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
|
||||||
|
|
||||||
|
# Auto-detect application/json, make it reformat the JSON to something nice
|
||||||
if is_json and not has_filter_rule:
|
if is_json and not has_filter_rule:
|
||||||
css_filter_rule = "json:$"
|
css_filter_rule = "json:$"
|
||||||
has_filter_rule = True
|
has_filter_rule = True
|
||||||
|
|
||||||
if has_filter_rule:
|
##### CONVERT THE INPUT TO TEXT, EXTRACT THE PARTS THAT NEED TO BE FILTERED
|
||||||
if 'json:' in css_filter_rule:
|
|
||||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
|
|
||||||
is_html = False
|
|
||||||
|
|
||||||
if is_html:
|
# Dont depend on the content-type header here, maybe it's not present
|
||||||
|
if 'json:' in css_filter_rule:
|
||||||
|
is_json = True
|
||||||
|
rule = css_filter_rule.replace('json:', '')
|
||||||
|
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content,
|
||||||
|
jsonpath_filter=rule).encode('utf-8')
|
||||||
|
is_text_or_html = False
|
||||||
|
original_content_before_filters = stripped_text_from_html
|
||||||
|
|
||||||
|
if is_text_or_html:
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
html_content = fetcher.content
|
html_content = fetcher.content
|
||||||
if not fetcher.headers.get('Content-Type', '') == 'text/plain':
|
if 'text/plain' in update_obj['content-type']:
|
||||||
|
stripped_text_from_html = html_content
|
||||||
|
|
||||||
|
# Assume it's HTML if it's not text/plain
|
||||||
|
if not 'text/plain' in update_obj['content-type']:
|
||||||
if has_filter_rule:
|
if has_filter_rule:
|
||||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||||
if css_filter_rule[0] == '/':
|
if css_filter_rule[0] == '/':
|
||||||
@@ -128,35 +145,52 @@ class perform_site_check():
|
|||||||
else:
|
else:
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
||||||
|
|
||||||
# get_text() via inscriptis
|
# get_text() via inscriptis
|
||||||
stripped_text_from_html = get_text(html_content)
|
stripped_text_from_html = get_text(html_content)
|
||||||
else:
|
|
||||||
# Don't run get_text or xpath/css filters on plaintext
|
|
||||||
stripped_text_from_html = html_content
|
|
||||||
|
|
||||||
# Re #340 - return the content before the 'ignore text' was applied
|
# Extract title as title
|
||||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
|
||||||
|
if not watch['title'] or not len(watch['title']):
|
||||||
|
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
||||||
|
|
||||||
|
# Re #340 - return the content before the 'ignore text' was applied
|
||||||
|
original_content_before_filters = stripped_text_from_html.encode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||||
# in the future we'll implement other mechanisms.
|
# in the future we'll implement other mechanisms.
|
||||||
|
|
||||||
update_obj["last_check_status"] = fetcher.get_last_status_code()
|
update_obj["last_check_status"] = fetcher.get_last_status_code()
|
||||||
|
|
||||||
# If there's text to skip
|
######## AFTER FILTERING, STRIP OUT IGNORE TEXT
|
||||||
# @todo we could abstract out the get_text() to handle this cleaner
|
if is_text_or_html:
|
||||||
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||||
if len(text_to_ignore):
|
if len(text_to_ignore):
|
||||||
stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||||
else:
|
else:
|
||||||
stripped_text_from_html = stripped_text_from_html.encode('utf8')
|
stripped_text_from_html = stripped_text_from_html.encode('utf8')
|
||||||
|
|
||||||
|
|
||||||
|
######## CALCULATE CHECKSUM FOR DIFF DETECTION
|
||||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
if is_text_or_html:
|
||||||
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||||
else:
|
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||||
|
else:
|
||||||
|
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
|
||||||
|
|
||||||
|
if is_json:
|
||||||
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
|
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
|
||||||
|
|
||||||
|
# Goal here in the future is to be able to abstract out different content type checks into their own class
|
||||||
|
|
||||||
|
if is_binary:
|
||||||
|
# @todo - use some actual image hash here where possible, audio hash, etc etc
|
||||||
|
m = hashlib.sha256()
|
||||||
|
m.update(fetcher.content)
|
||||||
|
fetched_md5 = m.hexdigest()
|
||||||
|
original_content_before_filters = fetcher.content
|
||||||
|
|
||||||
# On the first run of a site, watch['previous_md5'] will be an empty string, set it the current one.
|
# On the first run of a site, watch['previous_md5'] will be an empty string, set it the current one.
|
||||||
if not len(watch['previous_md5']):
|
if not len(watch['previous_md5']):
|
||||||
watch['previous_md5'] = fetched_md5
|
watch['previous_md5'] = fetched_md5
|
||||||
@@ -164,36 +198,30 @@ class perform_site_check():
|
|||||||
|
|
||||||
blocked_by_not_found_trigger_text = False
|
blocked_by_not_found_trigger_text = False
|
||||||
|
|
||||||
if len(watch['trigger_text']):
|
# Trigger text can apply to JSON parsed documents too
|
||||||
blocked_by_not_found_trigger_text = True
|
if is_text_or_html or is_json:
|
||||||
for line in watch['trigger_text']:
|
if len(watch['trigger_text']):
|
||||||
# Because JSON wont serialize a re.compile object
|
blocked_by_not_found_trigger_text = True
|
||||||
if line[0] == '/' and line[-1] == '/':
|
for line in watch['trigger_text']:
|
||||||
regex = re.compile(line.strip('/'), re.IGNORECASE)
|
# Because JSON wont serialize a re.compile object
|
||||||
# Found it? so we don't wait for it anymore
|
if line[0] == '/' and line[-1] == '/':
|
||||||
r = re.search(regex, str(stripped_text_from_html))
|
regex = re.compile(line.strip('/'), re.IGNORECASE)
|
||||||
if r:
|
# Found it? so we don't wait for it anymore
|
||||||
|
r = re.search(regex, str(stripped_text_from_html))
|
||||||
|
if r:
|
||||||
|
blocked_by_not_found_trigger_text = False
|
||||||
|
break
|
||||||
|
|
||||||
|
elif line.lower() in str(stripped_text_from_html).lower():
|
||||||
|
# We found it don't wait for it.
|
||||||
blocked_by_not_found_trigger_text = False
|
blocked_by_not_found_trigger_text = False
|
||||||
break
|
break
|
||||||
|
|
||||||
elif line.lower() in str(stripped_text_from_html).lower():
|
|
||||||
# We found it don't wait for it.
|
|
||||||
blocked_by_not_found_trigger_text = False
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
|
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
|
||||||
changed_detected = True
|
changed_detected = True
|
||||||
update_obj["previous_md5"] = fetched_md5
|
update_obj["previous_md5"] = fetched_md5
|
||||||
update_obj["last_changed"] = timestamp
|
update_obj["last_changed"] = timestamp
|
||||||
|
|
||||||
|
|
||||||
# Extract title as title
|
# original_content_before_filters is returned for saving the data to disk
|
||||||
if is_html:
|
return changed_detected, update_obj, original_content_before_filters
|
||||||
if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
|
|
||||||
if not watch['title'] or not len(watch['title']):
|
|
||||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
|
||||||
|
|
||||||
|
|
||||||
return changed_detected, update_obj, text_content_before_ignored_filter
|
|
||||||
|
|||||||
41
changedetectionio/image_diff.py
Normal file
41
changedetectionio/image_diff.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# import the necessary packages
|
||||||
|
from skimage.metrics import structural_similarity as compare_ssim
|
||||||
|
import argparse
|
||||||
|
import imutils
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
# From https://www.pyimagesearch.com/2017/06/19/image-difference-with-opencv-and-python/
|
||||||
|
def render_diff(fpath_imageA, fpath_imageB):
|
||||||
|
|
||||||
|
imageA = cv2.imread(fpath_imageA)
|
||||||
|
imageB = cv2.imread(fpath_imageB)
|
||||||
|
|
||||||
|
# convert the images to grayscale
|
||||||
|
grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
|
||||||
|
grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# compute the Structural Similarity Index (SSIM) between the two
|
||||||
|
# images, ensuring that the difference image is returned
|
||||||
|
(score, diff) = compare_ssim(grayA, grayB, full=True)
|
||||||
|
diff = (diff * 255).astype("uint8")
|
||||||
|
print("SSIM: {}".format(score))
|
||||||
|
|
||||||
|
# threshold the difference image, followed by finding contours to
|
||||||
|
# obtain the regions of the two input images that differ
|
||||||
|
thresh = cv2.threshold(diff, 0, 255,
|
||||||
|
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
|
||||||
|
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
|
||||||
|
cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
cnts = imutils.grab_contours(cnts)
|
||||||
|
|
||||||
|
# loop over the contours
|
||||||
|
for c in cnts:
|
||||||
|
# compute the bounding box of the contour and then draw the
|
||||||
|
# bounding box on both input images to represent where the two
|
||||||
|
# images differ
|
||||||
|
(x, y, w, h) = cv2.boundingRect(c)
|
||||||
|
cv2.rectangle(imageA, (x, y), (x + w, y + h), (0, 0, 255), 2)
|
||||||
|
cv2.rectangle(imageB, (x, y), (x + w, y + h), (0, 0, 255), 2)
|
||||||
|
|
||||||
|
#return cv2.imencode('.jpg', imageB)[1].tobytes()
|
||||||
|
return cv2.imencode('.jpg', imageA)[1].tobytes()
|
||||||
@@ -372,7 +372,9 @@ class ChangeDetectionStore:
|
|||||||
if not os.path.isdir(output_path):
|
if not os.path.isdir(output_path):
|
||||||
mkdir(output_path)
|
mkdir(output_path)
|
||||||
|
|
||||||
fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
suffix = "stripped.txt"
|
||||||
|
|
||||||
|
fname = "{}/{}.{}".format(output_path, uuid.uuid4(), suffix)
|
||||||
with open(fname, 'wb') as f:
|
with open(fname, 'wb') as f:
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
f.close()
|
f.close()
|
||||||
|
|||||||
59
changedetectionio/templates/diff-image.html
Normal file
59
changedetectionio/templates/diff-image.html
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<div id="settings">
|
||||||
|
<h1>Differences</h1>
|
||||||
|
<form class="pure-form " action="" method="GET">
|
||||||
|
<fieldset>
|
||||||
|
{% if versions|length >= 1 %}
|
||||||
|
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
|
||||||
|
<select id="diff-version" name="previous_version">
|
||||||
|
{% for version in versions %}
|
||||||
|
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
|
||||||
|
{{version}}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||||
|
{% endif %}
|
||||||
|
</fieldset>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="diff-ui">
|
||||||
|
<img style="max-width: 100%" src="{{ url_for('render_diff_image', uuid=uuid, compare_date=current_previous_version) }}" />
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<span style="width: 50%">
|
||||||
|
<img style="max-width: 100%" src="{{ url_for('show_single_image', uuid=uuid, datestr=newest_version_timestamp) }}" />
|
||||||
|
</span>
|
||||||
|
<span style="width: 50%">
|
||||||
|
<img style="max-width: 100%" src="{{ url_for('show_single_image', uuid=uuid, datestr=current_previous_version) }}" />
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.js')}}"></script>
|
||||||
|
|
||||||
|
<script defer="">
|
||||||
|
window.onload = function() {
|
||||||
|
/* Set current version date as local time in the browser also */
|
||||||
|
var current_v = document.getElementById("current-v-date");
|
||||||
|
var dateObject = new Date({{ newest_version_timestamp }}*1000);
|
||||||
|
current_v.innerHTML=dateObject.toLocaleString();
|
||||||
|
|
||||||
|
/* Convert what is options from UTC time.time() to local browser time */
|
||||||
|
var diffList=document.getElementById("diff-version");
|
||||||
|
if (typeof(diffList) != 'undefined' && diffList != null) {
|
||||||
|
for (var option of diffList.options) {
|
||||||
|
var dateObject = new Date(option.value*1000);
|
||||||
|
option.label=dateObject.toLocaleString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
13
changedetectionio/templates/preview-image.html
Normal file
13
changedetectionio/templates/preview-image.html
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<div id="settings">
|
||||||
|
<h1>Current</h1>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="diff-ui">
|
||||||
|
image goes here
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td id="diff-col">
|
<td id="diff-col">
|
||||||
<span id="result">{% for row in content %}{{row}}{% endfor %}</span>
|
<span id="result">{{content}}</span>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|||||||
@@ -100,6 +100,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
|||||||
# It should have picked up the <title>
|
# It should have picked up the <title>
|
||||||
assert b'head title' in res.data
|
assert b'head title' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
# be sure the HTML converter worked
|
||||||
|
res = client.get(url_for("preview_page", uuid="first"))
|
||||||
|
assert b'<html>' not in res.data
|
||||||
|
|
||||||
|
res = client.get(url_for("preview_page", uuid="first"))
|
||||||
|
assert b'Some initial text' in res.data
|
||||||
|
|
||||||
#
|
#
|
||||||
# Cleanup everything
|
# Cleanup everything
|
||||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||||
|
|||||||
56
changedetectionio/tests/test_binary_fetch.py
Normal file
56
changedetectionio/tests/test_binary_fetch.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
import secrets
|
||||||
|
from flask import url_for
|
||||||
|
from . util import live_server_setup
|
||||||
|
|
||||||
|
|
||||||
|
def test_binary_file_change(client, live_server):
|
||||||
|
with open("test-datastore/test.bin", "wb") as f:
|
||||||
|
f.write(secrets.token_bytes())
|
||||||
|
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_binaryfile_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
assert b'/test-binary-endpoint' in res.data
|
||||||
|
|
||||||
|
# Make a change
|
||||||
|
with open("test-datastore/test.bin", "wb") as f:
|
||||||
|
f.write(secrets.token_bytes())
|
||||||
|
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' in res.data
|
||||||
@@ -37,6 +37,16 @@ def set_modified_response():
|
|||||||
|
|
||||||
def live_server_setup(live_server):
|
def live_server_setup(live_server):
|
||||||
|
|
||||||
|
@live_server.app.route('/test-binary-endpoint')
|
||||||
|
def test_binaryfile_endpoint():
|
||||||
|
|
||||||
|
from flask import make_response
|
||||||
|
|
||||||
|
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||||
|
with open("test-datastore/test.bin", "rb") as f:
|
||||||
|
resp = make_response(f.read())
|
||||||
|
resp.headers['Content-Type'] = 'image/jpeg'
|
||||||
|
return resp
|
||||||
|
|
||||||
@live_server.app.route('/test-endpoint')
|
@live_server.app.route('/test-endpoint')
|
||||||
def test_endpoint():
|
def test_endpoint():
|
||||||
|
|||||||
@@ -42,7 +42,6 @@ class update_worker(threading.Thread):
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||||
|
|
||||||
# Re #342
|
# Re #342
|
||||||
@@ -135,8 +134,8 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||||
print("!!!! Exception in update_worker !!!\n", e)
|
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||||
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||||
finally:
|
finally:
|
||||||
# Always record that we atleast tried
|
# Always record that we atleast tried
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||||
|
|||||||
Reference in New Issue
Block a user