mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 00:27:48 +00:00 
			
		
		
		
	Compare commits
	
		
			20 Commits
		
	
	
		
			test-speed
			...
			image-bina
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					9b036d7b19 | ||
| 
						 | 
					0761984bcd | ||
| 
						 | 
					e73721a3f0 | ||
| 
						 | 
					86fc9d669f | ||
| 
						 | 
					7a66b69158 | ||
| 
						 | 
					ddd7b2772d | ||
| 
						 | 
					305060f79c | ||
| 
						 | 
					cfcf59d009 | ||
| 
						 | 
					af25b824a0 | ||
| 
						 | 
					a29085fa18 | ||
| 
						 | 
					d7832d735d | ||
| 
						 | 
					7d1c4d7673 | ||
| 
						 | 
					6e00f0e025 | ||
| 
						 | 
					4f536bb559 | ||
| 
						 | 
					38d8aa8d28 | ||
| 
						 | 
					dec47d5c43 | ||
| 
						 | 
					cec24fe2c1 | ||
| 
						 | 
					f4bc0aa2ba | ||
| 
						 | 
					499c4797da | ||
| 
						 | 
					9bc71d187e | 
@@ -695,6 +695,10 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    @app.route("/diff/<string:uuid>", methods=['GET'])
 | 
			
		||||
    @login_required
 | 
			
		||||
    def diff_history_page(uuid):
 | 
			
		||||
        from changedetectionio import content_fetcher
 | 
			
		||||
 | 
			
		||||
        newest_version_file_contents = ""
 | 
			
		||||
        previous_version_file_contents = ""
 | 
			
		||||
 | 
			
		||||
        # More for testing, possible to return the first/only
 | 
			
		||||
        if uuid == 'first':
 | 
			
		||||
@@ -720,21 +724,28 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
        # Save the current newest history as the most recently viewed
 | 
			
		||||
        datastore.set_last_viewed(uuid, dates[0])
 | 
			
		||||
        newest_file = watch['history'][dates[0]]
 | 
			
		||||
        with open(newest_file, 'r') as f:
 | 
			
		||||
            newest_version_file_contents = f.read()
 | 
			
		||||
 | 
			
		||||
        previous_version = request.args.get('previous_version')
 | 
			
		||||
        try:
 | 
			
		||||
            previous_file = watch['history'][previous_version]
 | 
			
		||||
        except KeyError:
 | 
			
		||||
            # Not present, use a default value, the second one in the sorted list.
 | 
			
		||||
            previous_file = watch['history'][dates[1]]
 | 
			
		||||
        if ('content-type' in watch and content_fetcher.supported_binary_type(watch['content-type'])):
 | 
			
		||||
            template = "diff-image.html"
 | 
			
		||||
        else:
 | 
			
		||||
            newest_file = watch['history'][dates[0]]
 | 
			
		||||
            with open(newest_file, 'r') as f:
 | 
			
		||||
                newest_version_file_contents = f.read()
 | 
			
		||||
 | 
			
		||||
        with open(previous_file, 'r') as f:
 | 
			
		||||
            previous_version_file_contents = f.read()
 | 
			
		||||
            try:
 | 
			
		||||
                previous_file = watch['history'][previous_version]
 | 
			
		||||
            except KeyError:
 | 
			
		||||
                # Not present, use a default value, the second one in the sorted list.
 | 
			
		||||
                previous_file = watch['history'][dates[1]]
 | 
			
		||||
 | 
			
		||||
        output = render_template("diff.html", watch_a=watch,
 | 
			
		||||
            with open(previous_file, 'r') as f:
 | 
			
		||||
                previous_version_file_contents = f.read()
 | 
			
		||||
 | 
			
		||||
            template = "diff.html"
 | 
			
		||||
 | 
			
		||||
        output = render_template(template,
 | 
			
		||||
                                 watch_a=watch,
 | 
			
		||||
                                 newest=newest_version_file_contents,
 | 
			
		||||
                                 previous=previous_version_file_contents,
 | 
			
		||||
                                 extra_stylesheets=extra_stylesheets,
 | 
			
		||||
@@ -751,6 +762,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    @app.route("/preview/<string:uuid>", methods=['GET'])
 | 
			
		||||
    @login_required
 | 
			
		||||
    def preview_page(uuid):
 | 
			
		||||
        from changedetectionio import content_fetcher
 | 
			
		||||
 | 
			
		||||
        # More for testing, possible to return the first/only
 | 
			
		||||
        if uuid == 'first':
 | 
			
		||||
@@ -765,14 +777,25 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            return redirect(url_for('index'))
 | 
			
		||||
 | 
			
		||||
        newest = list(watch['history'].keys())[-1]
 | 
			
		||||
        with open(watch['history'][newest], 'r') as f:
 | 
			
		||||
            content = f.readlines()
 | 
			
		||||
        fname = watch['history'][newest]
 | 
			
		||||
 | 
			
		||||
        if ('content-type' in watch and content_fetcher.supported_binary_type(watch['content-type'])):
 | 
			
		||||
            template = "preview-image.html"
 | 
			
		||||
            content = fname
 | 
			
		||||
        else:
 | 
			
		||||
            template = "preview.html"
 | 
			
		||||
            try:
 | 
			
		||||
                with open(fname, 'r') as f:
 | 
			
		||||
                    content = f.read()
 | 
			
		||||
            except:
 | 
			
		||||
                content = "Cant read {}".format(fname)
 | 
			
		||||
 | 
			
		||||
        output = render_template("preview.html",
 | 
			
		||||
                                 content=content,
 | 
			
		||||
                                 extra_stylesheets=extra_stylesheets,
 | 
			
		||||
                                 current_diff_url=watch['url'],
 | 
			
		||||
                                 uuid=uuid)
 | 
			
		||||
                                 uuid=uuid,
 | 
			
		||||
                                 watch=watch)
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
    @app.route("/settings/notification-logs", methods=['GET'])
 | 
			
		||||
@@ -783,6 +806,50 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                                 logs=notification_debug_log if len(notification_debug_log) else ["No errors or warnings detected"])
 | 
			
		||||
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # render an image which contains the diff of two images
 | 
			
		||||
    # We always compare the newest against whatever compare_date we are given
 | 
			
		||||
    @app.route("/diff/show-image/<string:uuid>/<string:datestr>")
 | 
			
		||||
    def show_single_image(uuid, datestr):
 | 
			
		||||
 | 
			
		||||
        from flask import make_response
 | 
			
		||||
        watch = datastore.data['watching'][uuid]
 | 
			
		||||
 | 
			
		||||
        if datestr == 'None' or datestr is None:
 | 
			
		||||
            datestr = list(watch['history'].keys())[0]
 | 
			
		||||
 | 
			
		||||
        fname = watch['history'][datestr]
 | 
			
		||||
        with open(fname, 'rb') as f:
 | 
			
		||||
            resp = make_response(f.read())
 | 
			
		||||
            
 | 
			
		||||
        # @todo assumption here about the type, re-encode? detect?
 | 
			
		||||
        resp.headers['Content-Type'] = 'image/jpeg'
 | 
			
		||||
        return resp
 | 
			
		||||
 | 
			
		||||
    # render an image which contains the diff of two images
 | 
			
		||||
    # We always compare the newest against whatever compare_date we are given
 | 
			
		||||
    @app.route("/diff/image/<string:uuid>/<string:compare_date>")
 | 
			
		||||
    def render_diff_image(uuid, compare_date):
 | 
			
		||||
        from changedetectionio import image_diff
 | 
			
		||||
 | 
			
		||||
        from flask import make_response
 | 
			
		||||
        watch = datastore.data['watching'][uuid]
 | 
			
		||||
        newest = list(watch['history'].keys())[-1]
 | 
			
		||||
 | 
			
		||||
        # @todo this is weird
 | 
			
		||||
        if compare_date == 'None' or compare_date is None:
 | 
			
		||||
            compare_date = list(watch['history'].keys())[0]
 | 
			
		||||
 | 
			
		||||
        new_img = watch['history'][newest]
 | 
			
		||||
        prev_img = watch['history'][compare_date]
 | 
			
		||||
        img = image_diff.render_diff(new_img, prev_img)
 | 
			
		||||
 | 
			
		||||
        resp = make_response(img)
 | 
			
		||||
        resp.headers['Content-Type'] = 'image/jpeg'
 | 
			
		||||
        return resp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @app.route("/api/<string:uuid>/snapshot/current", methods=['GET'])
 | 
			
		||||
    @login_required
 | 
			
		||||
    def api_snapshot(uuid):
 | 
			
		||||
 
 | 
			
		||||
@@ -5,8 +5,9 @@ from selenium import webdriver
 | 
			
		||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
			
		||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
 | 
			
		||||
from selenium.common.exceptions import WebDriverException
 | 
			
		||||
import urllib3.exceptions
 | 
			
		||||
 | 
			
		||||
# image/jpeg etc
 | 
			
		||||
supported_binary_types = ['image']
 | 
			
		||||
 | 
			
		||||
class EmptyReply(Exception):
 | 
			
		||||
    def __init__(self, status_code, url):
 | 
			
		||||
@@ -51,6 +52,15 @@ class Fetcher():
 | 
			
		||||
#    def return_diff(self, stream_a, stream_b):
 | 
			
		||||
#        return
 | 
			
		||||
 | 
			
		||||
# Assume we dont support it as binary if its not in our list
 | 
			
		||||
def supported_binary_type(content_type):
 | 
			
		||||
    # Not a binary thing we support? then use text (also used for JSON/XML etc)
 | 
			
		||||
    # @todo - future - use regex for matching
 | 
			
		||||
    if content_type and content_type.lower().strip().split('/')[0] not in (string.lower() for string in supported_binary_types):
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
def available_fetchers():
 | 
			
		||||
        import inspect
 | 
			
		||||
        from changedetectionio import content_fetcher
 | 
			
		||||
@@ -156,15 +166,18 @@ class html_requests(Fetcher):
 | 
			
		||||
                         verify=False)
 | 
			
		||||
 | 
			
		||||
        # https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
 | 
			
		||||
        # Return bytes here
 | 
			
		||||
        html = r.text
 | 
			
		||||
 | 
			
		||||
        if not supported_binary_type(r.headers.get('Content-Type', '')):
 | 
			
		||||
            content = r.text
 | 
			
		||||
        else:
 | 
			
		||||
            content = r.content
 | 
			
		||||
 | 
			
		||||
        # @todo test this
 | 
			
		||||
        # @todo maybe you really want to test zero-byte return pages?
 | 
			
		||||
        if not r or not html or not len(html):
 | 
			
		||||
        if not r or not content or not len(content):
 | 
			
		||||
            raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
        self.status_code = r.status_code
 | 
			
		||||
        self.content = html
 | 
			
		||||
        self.content = content
 | 
			
		||||
        self.headers = r.headers
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -55,10 +55,13 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
        changed_detected = False
 | 
			
		||||
        stripped_text_from_html = ""
 | 
			
		||||
        fetched_md5 = ""
 | 
			
		||||
 | 
			
		||||
        original_content_before_filters = False
 | 
			
		||||
 | 
			
		||||
        watch = self.datastore.data['watching'][uuid]
 | 
			
		||||
        # Unset any existing notification error
 | 
			
		||||
 | 
			
		||||
        # Unset any existing notification error
 | 
			
		||||
        update_obj = {'last_notification_error': False, 'last_error': False}
 | 
			
		||||
 | 
			
		||||
        extra_headers = self.datastore.get_val(uuid, 'headers')
 | 
			
		||||
@@ -92,6 +95,7 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
            fetcher = klass()
 | 
			
		||||
            fetcher.run(url, timeout, request_headers, request_body, request_method)
 | 
			
		||||
 | 
			
		||||
            # Fetching complete, now filters
 | 
			
		||||
            # @todo move to class / maybe inside of fetcher abstract base?
 | 
			
		||||
 | 
			
		||||
@@ -101,26 +105,39 @@ class perform_site_check():
 | 
			
		||||
            #  - Do we convert to JSON?
 | 
			
		||||
            # https://stackoverflow.com/questions/41817578/basic-method-chaining ?
 | 
			
		||||
            # return content().textfilter().jsonextract().checksumcompare() ?
 | 
			
		||||
 | 
			
		||||
            is_json = fetcher.headers.get('Content-Type', '') == 'application/json'
 | 
			
		||||
            is_html = not is_json
 | 
			
		||||
            update_obj['content-type'] = fetcher.headers.get('Content-Type', '').lower().strip()
 | 
			
		||||
            
 | 
			
		||||
            # Could be 'application/json; charset=utf-8' etc
 | 
			
		||||
            is_json = 'application/json' in update_obj['content-type']
 | 
			
		||||
            is_text_or_html = 'text/' in update_obj['content-type'] # text/plain , text/html etc
 | 
			
		||||
            is_binary = not is_text_or_html and content_fetcher.supported_binary_type(update_obj['content-type'])
 | 
			
		||||
            css_filter_rule = watch['css_filter']
 | 
			
		||||
 | 
			
		||||
            has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
 | 
			
		||||
 | 
			
		||||
            # Auto-detect application/json, make it reformat the JSON to something nice
 | 
			
		||||
            if is_json and not has_filter_rule:
 | 
			
		||||
                css_filter_rule = "json:$"
 | 
			
		||||
                has_filter_rule = True
 | 
			
		||||
 | 
			
		||||
            if has_filter_rule:
 | 
			
		||||
                if 'json:' in css_filter_rule:
 | 
			
		||||
                    stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
 | 
			
		||||
                    is_html = False
 | 
			
		||||
            ##### CONVERT THE INPUT TO TEXT, EXTRACT THE PARTS THAT NEED TO BE FILTERED
 | 
			
		||||
 | 
			
		||||
            if is_html:
 | 
			
		||||
            # Dont depend on the content-type header here, maybe it's not present
 | 
			
		||||
            if 'json:' in css_filter_rule:
 | 
			
		||||
                is_json = True
 | 
			
		||||
                rule = css_filter_rule.replace('json:', '')
 | 
			
		||||
                stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content,
 | 
			
		||||
                                                                            jsonpath_filter=rule).encode('utf-8')
 | 
			
		||||
                is_text_or_html = False
 | 
			
		||||
                original_content_before_filters = stripped_text_from_html
 | 
			
		||||
 | 
			
		||||
            if is_text_or_html:
 | 
			
		||||
                # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
 | 
			
		||||
                html_content = fetcher.content
 | 
			
		||||
                if not fetcher.headers.get('Content-Type', '') == 'text/plain':
 | 
			
		||||
                if 'text/plain' in update_obj['content-type']:
 | 
			
		||||
                    stripped_text_from_html = html_content
 | 
			
		||||
 | 
			
		||||
                # Assume it's HTML if it's not text/plain
 | 
			
		||||
                if not 'text/plain' in update_obj['content-type']:
 | 
			
		||||
                    if has_filter_rule:
 | 
			
		||||
                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
 | 
			
		||||
                        if css_filter_rule[0] == '/':
 | 
			
		||||
@@ -128,35 +145,52 @@ class perform_site_check():
 | 
			
		||||
                        else:
 | 
			
		||||
                            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
 | 
			
		||||
                            html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
 | 
			
		||||
 | 
			
		||||
                    # get_text() via inscriptis
 | 
			
		||||
                    stripped_text_from_html = get_text(html_content)
 | 
			
		||||
                else:
 | 
			
		||||
                    # Don't run get_text or xpath/css filters on plaintext
 | 
			
		||||
                    stripped_text_from_html = html_content
 | 
			
		||||
 | 
			
		||||
            # Re #340 - return the content before the 'ignore text' was applied
 | 
			
		||||
            text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
 | 
			
		||||
                    # Extract title as title
 | 
			
		||||
                    if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
 | 
			
		||||
                        if not watch['title'] or not len(watch['title']):
 | 
			
		||||
                            update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
 | 
			
		||||
 | 
			
		||||
                # Re #340 - return the content before the 'ignore text' was applied
 | 
			
		||||
                original_content_before_filters = stripped_text_from_html.encode('utf-8')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            # We rely on the actual text in the html output.. many sites have random script vars etc,
 | 
			
		||||
            # in the future we'll implement other mechanisms.
 | 
			
		||||
 | 
			
		||||
            update_obj["last_check_status"] = fetcher.get_last_status_code()
 | 
			
		||||
 | 
			
		||||
            # If there's text to skip
 | 
			
		||||
            # @todo we could abstract out the get_text() to handle this cleaner
 | 
			
		||||
            text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
 | 
			
		||||
            if len(text_to_ignore):
 | 
			
		||||
                stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore)
 | 
			
		||||
            else:
 | 
			
		||||
                stripped_text_from_html = stripped_text_from_html.encode('utf8')
 | 
			
		||||
            ######## AFTER FILTERING, STRIP OUT IGNORE TEXT
 | 
			
		||||
            if is_text_or_html:
 | 
			
		||||
                text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
 | 
			
		||||
                if len(text_to_ignore):
 | 
			
		||||
                    stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore)
 | 
			
		||||
                else:
 | 
			
		||||
                    stripped_text_from_html = stripped_text_from_html.encode('utf8')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            ######## CALCULATE CHECKSUM FOR DIFF DETECTION
 | 
			
		||||
            # Re #133 - if we should strip whitespaces from triggering the change detected comparison
 | 
			
		||||
            if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
 | 
			
		||||
                fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
 | 
			
		||||
            else:
 | 
			
		||||
            if is_text_or_html:
 | 
			
		||||
                if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
 | 
			
		||||
                    fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
 | 
			
		||||
                else:
 | 
			
		||||
                    fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
 | 
			
		||||
 | 
			
		||||
            if is_json:
 | 
			
		||||
                fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
 | 
			
		||||
 | 
			
		||||
            # Goal here in the future is to be able to abstract out different content type checks into their own class
 | 
			
		||||
 | 
			
		||||
            if is_binary:
 | 
			
		||||
                # @todo - use some actual image hash here where possible, audio hash, etc etc
 | 
			
		||||
                m = hashlib.sha256()
 | 
			
		||||
                m.update(fetcher.content)
 | 
			
		||||
                fetched_md5 = m.hexdigest()
 | 
			
		||||
                original_content_before_filters = fetcher.content
 | 
			
		||||
 | 
			
		||||
            # On the first run of a site, watch['previous_md5'] will be an empty string, set it the current one.
 | 
			
		||||
            if not len(watch['previous_md5']):
 | 
			
		||||
                watch['previous_md5'] = fetched_md5
 | 
			
		||||
@@ -164,36 +198,30 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
            blocked_by_not_found_trigger_text = False
 | 
			
		||||
 | 
			
		||||
            if len(watch['trigger_text']):
 | 
			
		||||
                blocked_by_not_found_trigger_text = True
 | 
			
		||||
                for line in watch['trigger_text']:
 | 
			
		||||
                    # Because JSON wont serialize a re.compile object
 | 
			
		||||
                    if line[0] == '/' and line[-1] == '/':
 | 
			
		||||
                        regex = re.compile(line.strip('/'), re.IGNORECASE)
 | 
			
		||||
                        # Found it? so we don't wait for it anymore
 | 
			
		||||
                        r = re.search(regex, str(stripped_text_from_html))
 | 
			
		||||
                        if r:
 | 
			
		||||
            # Trigger text can apply to JSON parsed documents too
 | 
			
		||||
            if is_text_or_html or is_json:
 | 
			
		||||
                if len(watch['trigger_text']):
 | 
			
		||||
                    blocked_by_not_found_trigger_text = True
 | 
			
		||||
                    for line in watch['trigger_text']:
 | 
			
		||||
                        # Because JSON wont serialize a re.compile object
 | 
			
		||||
                        if line[0] == '/' and line[-1] == '/':
 | 
			
		||||
                            regex = re.compile(line.strip('/'), re.IGNORECASE)
 | 
			
		||||
                            # Found it? so we don't wait for it anymore
 | 
			
		||||
                            r = re.search(regex, str(stripped_text_from_html))
 | 
			
		||||
                            if r:
 | 
			
		||||
                                blocked_by_not_found_trigger_text = False
 | 
			
		||||
                                break
 | 
			
		||||
 | 
			
		||||
                        elif line.lower() in str(stripped_text_from_html).lower():
 | 
			
		||||
                            # We found it don't wait for it.
 | 
			
		||||
                            blocked_by_not_found_trigger_text = False
 | 
			
		||||
                            break
 | 
			
		||||
 | 
			
		||||
                    elif line.lower() in str(stripped_text_from_html).lower():
 | 
			
		||||
                        # We found it don't wait for it.
 | 
			
		||||
                        blocked_by_not_found_trigger_text = False
 | 
			
		||||
                        break
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
 | 
			
		||||
                changed_detected = True
 | 
			
		||||
                update_obj["previous_md5"] = fetched_md5
 | 
			
		||||
                update_obj["last_changed"] = timestamp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            # Extract title as title
 | 
			
		||||
            if is_html:
 | 
			
		||||
                if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
 | 
			
		||||
                    if not watch['title'] or not len(watch['title']):
 | 
			
		||||
                        update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        return changed_detected, update_obj, text_content_before_ignored_filter
 | 
			
		||||
        # original_content_before_filters is returned for saving the data to disk
 | 
			
		||||
        return changed_detected, update_obj, original_content_before_filters
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										41
									
								
								changedetectionio/image_diff.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								changedetectionio/image_diff.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,41 @@
 | 
			
		||||
# import the necessary packages
 | 
			
		||||
from skimage.metrics import structural_similarity as compare_ssim
 | 
			
		||||
import argparse
 | 
			
		||||
import imutils
 | 
			
		||||
import cv2
 | 
			
		||||
 | 
			
		||||
# From https://www.pyimagesearch.com/2017/06/19/image-difference-with-opencv-and-python/
 | 
			
		||||
def render_diff(fpath_imageA, fpath_imageB):
 | 
			
		||||
 | 
			
		||||
	imageA = cv2.imread(fpath_imageA)
 | 
			
		||||
	imageB = cv2.imread(fpath_imageB)
 | 
			
		||||
 | 
			
		||||
	# convert the images to grayscale
 | 
			
		||||
	grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
 | 
			
		||||
	grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
 | 
			
		||||
 | 
			
		||||
	# compute the Structural Similarity Index (SSIM) between the two
 | 
			
		||||
	# images, ensuring that the difference image is returned
 | 
			
		||||
	(score, diff) = compare_ssim(grayA, grayB, full=True)
 | 
			
		||||
	diff = (diff * 255).astype("uint8")
 | 
			
		||||
	print("SSIM: {}".format(score))
 | 
			
		||||
 | 
			
		||||
	# threshold the difference image, followed by finding contours to
 | 
			
		||||
	# obtain the regions of the two input images that differ
 | 
			
		||||
	thresh = cv2.threshold(diff, 0, 255,
 | 
			
		||||
		cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
 | 
			
		||||
	cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
 | 
			
		||||
		cv2.CHAIN_APPROX_SIMPLE)
 | 
			
		||||
	cnts = imutils.grab_contours(cnts)
 | 
			
		||||
 | 
			
		||||
	# loop over the contours
 | 
			
		||||
	for c in cnts:
 | 
			
		||||
		# compute the bounding box of the contour and then draw the
 | 
			
		||||
		# bounding box on both input images to represent where the two
 | 
			
		||||
		# images differ
 | 
			
		||||
		(x, y, w, h) = cv2.boundingRect(c)
 | 
			
		||||
		cv2.rectangle(imageA, (x, y), (x + w, y + h), (0, 0, 255), 2)
 | 
			
		||||
		cv2.rectangle(imageB, (x, y), (x + w, y + h), (0, 0, 255), 2)
 | 
			
		||||
 | 
			
		||||
	#return cv2.imencode('.jpg', imageB)[1].tobytes()
 | 
			
		||||
	return cv2.imencode('.jpg', imageA)[1].tobytes()
 | 
			
		||||
@@ -372,7 +372,9 @@ class ChangeDetectionStore:
 | 
			
		||||
        if not os.path.isdir(output_path):
 | 
			
		||||
            mkdir(output_path)
 | 
			
		||||
 | 
			
		||||
        fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
 | 
			
		||||
        suffix = "stripped.txt"
 | 
			
		||||
 | 
			
		||||
        fname = "{}/{}.{}".format(output_path, uuid.uuid4(), suffix)
 | 
			
		||||
        with open(fname, 'wb') as f:
 | 
			
		||||
            f.write(contents)
 | 
			
		||||
            f.close()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										59
									
								
								changedetectionio/templates/diff-image.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								changedetectionio/templates/diff-image.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,59 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
 | 
			
		||||
{% block content %}
 | 
			
		||||
 | 
			
		||||
<div id="settings">
 | 
			
		||||
    <h1>Differences</h1>
 | 
			
		||||
    <form class="pure-form " action="" method="GET">
 | 
			
		||||
        <fieldset>
 | 
			
		||||
            {% if versions|length >= 1 %}
 | 
			
		||||
            <label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
 | 
			
		||||
            <select id="diff-version" name="previous_version">
 | 
			
		||||
                {% for version in versions %}
 | 
			
		||||
                <option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
 | 
			
		||||
                    {{version}}
 | 
			
		||||
                </option>
 | 
			
		||||
                {% endfor %}
 | 
			
		||||
            </select>
 | 
			
		||||
            <button type="submit" class="pure-button pure-button-primary">Go</button>
 | 
			
		||||
            {% endif %}
 | 
			
		||||
        </fieldset>
 | 
			
		||||
    </form>
 | 
			
		||||
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
<div id="diff-ui">
 | 
			
		||||
  <img style="max-width: 100%" src="{{ url_for('render_diff_image', uuid=uuid, compare_date=current_previous_version) }}" />
 | 
			
		||||
 | 
			
		||||
    <div>
 | 
			
		||||
        <span style="width: 50%">
 | 
			
		||||
            <img style="max-width: 100%" src="{{ url_for('show_single_image', uuid=uuid, datestr=newest_version_timestamp) }}" />
 | 
			
		||||
        </span>
 | 
			
		||||
        <span style="width: 50%">
 | 
			
		||||
            <img style="max-width: 100%" src="{{ url_for('show_single_image', uuid=uuid, datestr=current_previous_version) }}" />
 | 
			
		||||
        </span>
 | 
			
		||||
    </div>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.js')}}"></script>
 | 
			
		||||
 | 
			
		||||
<script defer="">
 | 
			
		||||
window.onload = function() {
 | 
			
		||||
    /* Set current version date as local time in the browser also */
 | 
			
		||||
    var current_v = document.getElementById("current-v-date");
 | 
			
		||||
    var dateObject = new Date({{ newest_version_timestamp }}*1000);
 | 
			
		||||
    current_v.innerHTML=dateObject.toLocaleString();
 | 
			
		||||
 | 
			
		||||
    /* Convert what is options from UTC time.time() to local browser time */
 | 
			
		||||
    var diffList=document.getElementById("diff-version");
 | 
			
		||||
    if (typeof(diffList) != 'undefined' && diffList != null) {
 | 
			
		||||
        for (var option of diffList.options) {
 | 
			
		||||
          var dateObject = new Date(option.value*1000);
 | 
			
		||||
          option.label=dateObject.toLocaleString();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
</script>
 | 
			
		||||
 | 
			
		||||
{% endblock %}
 | 
			
		||||
							
								
								
									
										13
									
								
								changedetectionio/templates/preview-image.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								changedetectionio/templates/preview-image.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,13 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
 | 
			
		||||
{% block content %}
 | 
			
		||||
 | 
			
		||||
<div id="settings">
 | 
			
		||||
    <h1>Current</h1>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
<div id="diff-ui">
 | 
			
		||||
    image goes here
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
{% endblock %}
 | 
			
		||||
@@ -11,7 +11,7 @@
 | 
			
		||||
        <tbody>
 | 
			
		||||
        <tr>
 | 
			
		||||
            <td id="diff-col">
 | 
			
		||||
                <span id="result">{% for row in content %}{{row}}{% endfor %}</span>
 | 
			
		||||
                <span id="result">{{content}}</span>
 | 
			
		||||
            </td>
 | 
			
		||||
        </tr>
 | 
			
		||||
        </tbody>
 | 
			
		||||
 
 | 
			
		||||
@@ -100,6 +100,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
 | 
			
		||||
    # It should have picked up the <title>
 | 
			
		||||
    assert b'head title' in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # be sure the HTML converter worked
 | 
			
		||||
    res = client.get(url_for("preview_page", uuid="first"))
 | 
			
		||||
    assert b'<html>' not in res.data
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("preview_page", uuid="first"))
 | 
			
		||||
    assert b'Some initial text' in res.data
 | 
			
		||||
 | 
			
		||||
    #
 | 
			
		||||
    # Cleanup everything
 | 
			
		||||
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										56
									
								
								changedetectionio/tests/test_binary_fetch.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								changedetectionio/tests/test_binary_fetch.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,56 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
import secrets
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from . util import live_server_setup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_binary_file_change(client, live_server):
 | 
			
		||||
    with open("test-datastore/test.bin", "wb") as f:
 | 
			
		||||
        f.write(secrets.token_bytes())
 | 
			
		||||
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_binaryfile_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
    assert b'/test-binary-endpoint' in res.data
 | 
			
		||||
 | 
			
		||||
    #  Make a change
 | 
			
		||||
    with open("test-datastore/test.bin", "wb") as f:
 | 
			
		||||
        f.write(secrets.token_bytes())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
@@ -37,6 +37,16 @@ def set_modified_response():
 | 
			
		||||
 | 
			
		||||
def live_server_setup(live_server):
 | 
			
		||||
 | 
			
		||||
    @live_server.app.route('/test-binary-endpoint')
 | 
			
		||||
    def test_binaryfile_endpoint():
 | 
			
		||||
 | 
			
		||||
        from flask import make_response
 | 
			
		||||
 | 
			
		||||
        # Tried using a global var here but didn't seem to work, so reading from a file instead.
 | 
			
		||||
        with open("test-datastore/test.bin", "rb") as f:
 | 
			
		||||
            resp = make_response(f.read())
 | 
			
		||||
            resp.headers['Content-Type'] = 'image/jpeg'
 | 
			
		||||
            return resp
 | 
			
		||||
 | 
			
		||||
    @live_server.app.route('/test-endpoint')
 | 
			
		||||
    def test_endpoint():
 | 
			
		||||
 
 | 
			
		||||
@@ -42,7 +42,6 @@ class update_worker(threading.Thread):
 | 
			
		||||
                    now = time.time()
 | 
			
		||||
 | 
			
		||||
                    try:
 | 
			
		||||
 | 
			
		||||
                        changed_detected, update_obj, contents = update_handler.run(uuid)
 | 
			
		||||
 | 
			
		||||
                        # Re #342
 | 
			
		||||
@@ -135,8 +134,8 @@ class update_worker(threading.Thread):
 | 
			
		||||
 | 
			
		||||
                        except Exception as e:
 | 
			
		||||
                            # Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
 | 
			
		||||
                            print("!!!! Exception in update_worker !!!\n", e)
 | 
			
		||||
 | 
			
		||||
                            self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
 | 
			
		||||
                    finally:
 | 
			
		||||
                        # Always record that we atleast tried
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user