mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 14:47:21 +00:00 
			
		
		
		
	Compare commits
	
		
			13 Commits
		
	
	
		
			fix-appris
			...
			refactor-f
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 73d9373879 | ||
|   | d32640d892 | ||
|   | 7ee249e2ff | ||
|   | 5d753f59c4 | ||
|   | 090f5d7725 | ||
|   | 7869a7745a | ||
|   | de34f0ad83 | ||
|   | fabbb3733a | ||
|   | deadf881b0 | ||
|   | 77ef42c367 | ||
|   | 5d1f317e30 | ||
|   | 5ed7f43f6e | ||
|   | 3b6ae70c9c | 
| @@ -17,7 +17,6 @@ | ||||
| </script> | ||||
|  | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script> | ||||
| <!--<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>--> | ||||
| <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> | ||||
|  | ||||
| <div class="edit-form monospaced-textarea"> | ||||
|   | ||||
| @@ -476,7 +476,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|  | ||||
|     title = StringField('Title', default='') | ||||
|  | ||||
|     ignore_text = StringListField('Remove lines containing', [ValidateListRegex()]) | ||||
|     ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()]) | ||||
|     headers = StringDictKeyValue('Request headers') | ||||
|     body = TextAreaField('Request body', [validators.Optional()]) | ||||
|     method = SelectField('Request method', choices=valid_method, default=default_method) | ||||
|   | ||||
| @@ -3,11 +3,11 @@ from lxml import etree | ||||
| import json | ||||
| import re | ||||
|  | ||||
|  | ||||
| # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis | ||||
| TEXT_FILTER_LIST_LINE_SUFFIX = "<br>" | ||||
|  | ||||
| TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ') | ||||
| PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$' | ||||
|  | ||||
| # 'price' , 'lowPrice', 'highPrice' are usually under here | ||||
| # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here | ||||
| LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] | ||||
| @@ -326,6 +326,7 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None | ||||
| #          - "line numbers" return a list of line numbers that match (int list) | ||||
| # | ||||
| # wordlist - list of regex's (str) or words (str) | ||||
| # Preserves all linefeeds and other whitespacing, its not the job of this to remove that | ||||
| def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     i = 0 | ||||
|     output = [] | ||||
| @@ -341,32 +342,30 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|         else: | ||||
|             ignore_text.append(k.strip()) | ||||
|  | ||||
|     for line in content.splitlines(): | ||||
|     for line in content.splitlines(keepends=True): | ||||
|         i += 1 | ||||
|         # Always ignore blank lines in this mode. (when this function gets called) | ||||
|         got_match = False | ||||
|         if len(line.strip()): | ||||
|             for l in ignore_text: | ||||
|                 if l.lower() in line.lower(): | ||||
|         for l in ignore_text: | ||||
|             if l.lower() in line.lower(): | ||||
|                 got_match = True | ||||
|  | ||||
|         if not got_match: | ||||
|             for r in ignore_regex: | ||||
|                 if r.search(line): | ||||
|                     got_match = True | ||||
|  | ||||
|             if not got_match: | ||||
|                 for r in ignore_regex: | ||||
|                     if r.search(line): | ||||
|                         got_match = True | ||||
|  | ||||
|             if not got_match: | ||||
|                 # Not ignored | ||||
|                 output.append(line.encode('utf8')) | ||||
|             else: | ||||
|                 ignored_line_numbers.append(i) | ||||
|  | ||||
|         if not got_match: | ||||
|             # Not ignored, and should preserve "keepends" | ||||
|             output.append(line) | ||||
|         else: | ||||
|             ignored_line_numbers.append(i) | ||||
|  | ||||
|     # Used for finding out what to highlight | ||||
|     if mode == "line numbers": | ||||
|         return ignored_line_numbers | ||||
|  | ||||
|     return "\n".encode('utf8').join(output) | ||||
|     return ''.join(output) | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
|   | ||||
| @@ -6,6 +6,8 @@ import re | ||||
| from pathlib import Path | ||||
| from loguru import logger | ||||
|  | ||||
| from ..html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
|  | ||||
| # Allowable protocols, protects against javascript: etc | ||||
| # file:// is further checked by ALLOW_FILE_URI | ||||
| SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' | ||||
| @@ -312,13 +314,13 @@ class model(watch_base): | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(brotli.compress(contents, mode=brotli.MODE_TEXT)) | ||||
|                     f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)) | ||||
|         else: | ||||
|             snapshot_fname = f"{snapshot_id}.txt" | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(contents) | ||||
|                     f.write(contents.encode('utf-8')) | ||||
|  | ||||
|         # Append to index | ||||
|         # @todo check last char was \n | ||||
| @@ -350,14 +352,32 @@ class model(watch_base): | ||||
|         return seconds | ||||
|  | ||||
|     # Iterate over all history texts and see if something new exists | ||||
|     def lines_contain_something_unique_compared_to_history(self, lines: list): | ||||
|         local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) | ||||
|     # Always applying .strip() to start/end but optionally replace any other whitespace | ||||
|     def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False): | ||||
|         local_lines = [] | ||||
|         if lines: | ||||
|             if ignore_whitespace: | ||||
|                 if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk | ||||
|                     local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines]) | ||||
|                 else: | ||||
|                     local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines]) | ||||
|             else: | ||||
|                 if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk | ||||
|                     local_lines = set([l.strip().lower() for l in lines]) | ||||
|                 else: | ||||
|                     local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) | ||||
|  | ||||
|  | ||||
|         # Compare each lines (set) against each history text file (set) looking for something new.. | ||||
|         existing_history = set({}) | ||||
|         for k, v in self.history.items(): | ||||
|             content = self.get_history_snapshot(k) | ||||
|             alist = set([line.strip().lower() for line in content.splitlines()]) | ||||
|  | ||||
|             if ignore_whitespace: | ||||
|                 alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()]) | ||||
|             else: | ||||
|                 alist = set([line.strip().lower() for line in content.splitlines()]) | ||||
|  | ||||
|             existing_history = existing_history.union(alist) | ||||
|  | ||||
|         # Check that everything in local_lines(new stuff) already exists in existing_history - it should | ||||
|   | ||||
| @@ -307,4 +307,4 @@ class perform_site_check(difference_detection_processor): | ||||
|         # Always record the new checksum | ||||
|         update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|         return changed_detected, update_obj, snapshot_content.encode('utf-8').strip() | ||||
|         return changed_detected, update_obj, snapshot_content.strip() | ||||
|   | ||||
| @@ -46,6 +46,9 @@ def prepare_filter_prevew(datastore, watch_uuid): | ||||
|  | ||||
|     text_after_filter = '' | ||||
|     text_before_filter = '' | ||||
|     trigger_line_numbers = [] | ||||
|     ignore_line_numbers = [] | ||||
|  | ||||
|     tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid)) | ||||
|  | ||||
|     if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir): | ||||
| @@ -72,7 +75,7 @@ def prepare_filter_prevew(datastore, watch_uuid): | ||||
|                                                                  ) | ||||
|             # Use the last loaded HTML as the input | ||||
|             update_handler.datastore = datastore | ||||
|             update_handler.fetcher.content = decompressed_data | ||||
|             update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string | ||||
|             update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') | ||||
|  | ||||
|             # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk | ||||
| @@ -84,9 +87,7 @@ def prepare_filter_prevew(datastore, watch_uuid): | ||||
|                 text_after_filter = future1.result() | ||||
|                 text_before_filter = future2.result() | ||||
|  | ||||
|     trigger_line_numbers = [] | ||||
|     try: | ||||
|  | ||||
|         trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, | ||||
|                                                             wordlist=tmp_watch['trigger_text'], | ||||
|                                                             mode='line numbers' | ||||
| @@ -94,6 +95,15 @@ def prepare_filter_prevew(datastore, watch_uuid): | ||||
|     except Exception as e: | ||||
|         text_before_filter = f"Error: {str(e)}" | ||||
|  | ||||
|     try: | ||||
|         text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, | ||||
|                                                            wordlist=text_to_ignore, | ||||
|                                                            mode='line numbers' | ||||
|                                                            ) | ||||
|     except Exception as e: | ||||
|         text_before_filter = f"Error: {str(e)}" | ||||
|  | ||||
|     logger.trace(f"Parsed in {time.time() - now:.3f}s") | ||||
|  | ||||
|     return jsonify( | ||||
| @@ -102,6 +112,7 @@ def prepare_filter_prevew(datastore, watch_uuid): | ||||
|             'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter, | ||||
|             'duration': time.time() - now, | ||||
|             'trigger_line_numbers': trigger_line_numbers, | ||||
|             'ignore_line_numbers': ignore_line_numbers, | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ import re | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio.processors import difference_detection_processor | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from loguru import logger | ||||
| @@ -36,7 +36,6 @@ class PDFToHTMLToolNotFound(ValueError): | ||||
| class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|     def run_changedetection(self, watch, skip_when_checksum_same=True): | ||||
|  | ||||
|         changed_detected = False | ||||
|         html_content = "" | ||||
|         screenshot = False  # as bytes | ||||
| @@ -205,18 +204,9 @@ class perform_site_check(difference_detection_processor): | ||||
|         if watch.get('trim_text_whitespace'): | ||||
|             stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()) | ||||
|  | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         # Also used to calculate/show what was removed | ||||
|         text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|         text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|         # @todo whitespace coming from missing rtrim()? | ||||
|         # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about. | ||||
| @@ -235,12 +225,12 @@ class perform_site_check(difference_detection_processor): | ||||
|                                              line_feed_sep="\n", | ||||
|                                              include_change_type_prefix=False) | ||||
|  | ||||
|             watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter) | ||||
|             watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8')) | ||||
|  | ||||
|             if not rendered_diff and stripped_text_from_html: | ||||
|                 # We had some content, but no differences were found | ||||
|                 # Store our new file as the MD5 so it will trigger in the future | ||||
|                 c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest() | ||||
|                 c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|                 return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') | ||||
|             else: | ||||
|                 stripped_text_from_html = rendered_diff | ||||
| @@ -261,14 +251,6 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         update_obj["last_check_status"] = self.fetcher.get_last_status_code() | ||||
|  | ||||
|         # If there's text to skip | ||||
|         # @todo we could abstract out the get_text() to handle this cleaner | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         if len(text_to_ignore): | ||||
|             stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
|         else: | ||||
|             stripped_text_from_html = stripped_text_from_html.encode('utf8') | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = watch.get('extract_text', []) | ||||
|         if len(extract_text) > 0: | ||||
| @@ -277,39 +259,53 @@ class perform_site_check(difference_detection_processor): | ||||
|                 # incase they specified something in '/.../x' | ||||
|                 if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                     regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                     result = re.findall(regex.encode('utf-8'), stripped_text_from_html) | ||||
|                     result = re.findall(regex, stripped_text_from_html) | ||||
|  | ||||
|                     for l in result: | ||||
|                         if type(l) is tuple: | ||||
|                             # @todo - some formatter option default (between groups) | ||||
|                             regex_matched_output += list(l) + [b'\n'] | ||||
|                             regex_matched_output += list(l) + ['\n'] | ||||
|                         else: | ||||
|                             # @todo - some formatter option default (between each ungrouped result) | ||||
|                             regex_matched_output += [l] + [b'\n'] | ||||
|                             regex_matched_output += [l] + ['\n'] | ||||
|                 else: | ||||
|                     # Doesnt look like regex, just hunt for plaintext and return that which matches | ||||
|                     # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes | ||||
|                     r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE) | ||||
|                     r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                     res = r.findall(stripped_text_from_html) | ||||
|                     if res: | ||||
|                         for match in res: | ||||
|                             regex_matched_output += [match] + [b'\n'] | ||||
|                             regex_matched_output += [match] + ['\n'] | ||||
|  | ||||
|             ########################################################## | ||||
|             stripped_text_from_html = b'' | ||||
|             text_content_before_ignored_filter = b'' | ||||
|             stripped_text_from_html = '' | ||||
|  | ||||
|             if regex_matched_output: | ||||
|                 # @todo some formatter for presentation? | ||||
|                 stripped_text_from_html = b''.join(regex_matched_output) | ||||
|                 text_content_before_ignored_filter = stripped_text_from_html | ||||
|                 stripped_text_from_html = ''.join(regex_matched_output) | ||||
|  | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
| ### CALCULATE MD5 | ||||
|         # If there's text to ignore | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         text_for_checksuming = stripped_text_from_html | ||||
|         if text_to_ignore: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() | ||||
|         if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|         else: | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest() | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         blocked = False | ||||
| @@ -349,7 +345,13 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         if changed_detected: | ||||
|             if watch.get('check_unique_lines', False): | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) | ||||
|                 ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace') | ||||
|  | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                     lines=stripped_text_from_html.splitlines(), | ||||
|                     ignore_whitespace=ignore_whitespace | ||||
|                 ) | ||||
|  | ||||
|                 # One or more lines? unsure? | ||||
|                 if not has_unique_lines: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
|   | ||||
| @@ -42,8 +42,12 @@ function request_textpreview_update() { | ||||
|                 { | ||||
|                     'color': '#ee0000', | ||||
|                     'lines': data['trigger_line_numbers'] | ||||
|                 }, | ||||
|                 { | ||||
|                     'color': '#757575', | ||||
|                     'lines': data['ignore_line_numbers'] | ||||
|                 } | ||||
|             ]); | ||||
|             ]) | ||||
|     }).fail(function (error) { | ||||
|         if (error.statusText === 'abort') { | ||||
|             console.log('Request was aborted due to a new request being fired.'); | ||||
| @@ -76,8 +80,8 @@ $(document).ready(function () { | ||||
|         $('body').toggleClass('preview-text-enabled') | ||||
|         request_textpreview_update(); | ||||
|         const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off'; | ||||
|         $('textarea:visible')[method]('keyup blur', request_textpreview_update.throttle(1000)); | ||||
|         $('input:visible')[method]('keyup blur change', request_textpreview_update.throttle(1000)); | ||||
|         $('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000)); | ||||
|         $('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000)); | ||||
|         $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000)); | ||||
|     }); | ||||
|     $('.minitabs-wrapper').miniTabs({ | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from flask import ( | ||||
|     flash | ||||
| ) | ||||
|  | ||||
| from .html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
| from . model import App, Watch | ||||
| from copy import deepcopy, copy | ||||
| from os import path, unlink | ||||
| @@ -750,17 +751,17 @@ class ChangeDetectionStore: | ||||
|     def update_5(self): | ||||
|         # If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings | ||||
|         # In other words - the watch notification_title and notification_body are not needed if they are the same as the default one | ||||
|         current_system_body = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n ")) | ||||
|         current_system_title = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n ")) | ||||
|         current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE) | ||||
|         current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE) | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             try: | ||||
|                 watch_body = watch.get('notification_body', '') | ||||
|                 if watch_body and watch_body.translate(str.maketrans('', '', "\r\n ")) == current_system_body: | ||||
|                 if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body: | ||||
|                     # Looks the same as the default one, so unset it | ||||
|                     watch['notification_body'] = None | ||||
|  | ||||
|                 watch_title = watch.get('notification_title', '') | ||||
|                 if watch_title and watch_title.translate(str.maketrans('', '', "\r\n ")) == current_system_title: | ||||
|                 if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title: | ||||
|                     # Looks the same as the default one, so unset it | ||||
|                     watch['notification_title'] = None | ||||
|             except Exception as e: | ||||
|   | ||||
| @@ -26,7 +26,6 @@ | ||||
| </script> | ||||
| <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script> | ||||
| {% if playwright_enabled %} | ||||
| @@ -330,9 +329,9 @@ nav | ||||
|                         {{ render_checkbox_field(form.filter_text_added) }} | ||||
|                         {{ render_checkbox_field(form.filter_text_replaced) }} | ||||
|                         {{ render_checkbox_field(form.filter_text_removed) }} | ||||
|                     <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span> | ||||
|                     <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br> | ||||
|                     <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> | ||||
|                     <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br> | ||||
|                     <span class="pure-form-message-inline"> So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br> | ||||
|                     <span class="pure-form-message-inline"> When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.check_unique_lines) }} | ||||
| @@ -371,7 +370,7 @@ nav | ||||
| ") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                             <li>Matching text will be <strong>removed</strong> from the text snapshot</li> | ||||
|                             <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li> | ||||
|                             <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> | ||||
|                             <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> | ||||
|                             <li>Changing this will affect the comparison checksum which may trigger an alert</li> | ||||
|   | ||||
| @@ -172,7 +172,7 @@ nav | ||||
|                     <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br> | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                             <li>Matching text will be <strong>removed</strong> from the text snapshot</li> | ||||
|                             <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li> | ||||
|                             <li>Note: This is applied globally in addition to the per-watch rules.</li> | ||||
|                             <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> | ||||
|                             <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> | ||||
|   | ||||
| @@ -71,7 +71,7 @@ def test_setup(client, live_server, measure_memory_usage): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_check_filter_multiline(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|    # live_server_setup(live_server) | ||||
|     set_multiline_response() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|   | ||||
| @@ -33,13 +33,17 @@ def test_strip_regex_text_func(): | ||||
|  | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines) | ||||
|  | ||||
|     assert b"but 1 lines" in stripped_content | ||||
|     assert b"igNORe-cAse text" not in stripped_content | ||||
|     assert b"but 1234 lines" not in stripped_content | ||||
|     assert b"really" not in stripped_content | ||||
|     assert b"not this" not in stripped_content | ||||
|     assert "but 1 lines" in stripped_content | ||||
|     assert "igNORe-cAse text" not in stripped_content | ||||
|     assert "but 1234 lines" not in stripped_content | ||||
|     assert "really" not in stripped_content | ||||
|     assert "not this" not in stripped_content | ||||
|  | ||||
|     # Check line number reporting | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers") | ||||
|     assert stripped_content == [2, 5, 6, 7, 8, 10] | ||||
|  | ||||
|     # Check that linefeeds are preserved when there are is no matching ignores | ||||
|     content = "some text\n\nand other text\n" | ||||
|     stripped_content = html_tools.strip_ignore_text(content, ignore_lines) | ||||
|     assert content == stripped_content | ||||
|   | ||||
| @@ -22,10 +22,15 @@ def test_strip_text_func(): | ||||
|     ignore_lines = ["sometimes"] | ||||
|  | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines) | ||||
|     assert "sometimes" not in stripped_content | ||||
|     assert "Some content" in stripped_content | ||||
|  | ||||
|     assert b"sometimes" not in stripped_content | ||||
|     assert b"Some content" in stripped_content | ||||
|     # Check that line feeds dont get chewed up when something is found | ||||
|     test_content = "Some initial text\n\nWhich is across multiple lines\n\nZZZZz\n\n\nSo let's see what happens." | ||||
|     ignore = ['something irrelevent but just to check', 'XXXXX', 'YYYYY', 'ZZZZZ'] | ||||
|  | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore) | ||||
|     assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens." | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| @@ -141,8 +146,6 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     # Just to be sure.. set a regular modified change.. | ||||
|     set_modified_original_ignore_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| @@ -153,17 +156,17 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa | ||||
|  | ||||
|     res = client.get(url_for("preview_page", uuid="first")) | ||||
|  | ||||
|     # Should no longer be in the preview | ||||
|     assert b'new ignore stuff' not in res.data | ||||
|     # SHOULD BE be in the preview, it was added in set_modified_original_ignore_response() | ||||
|     # and we have "new ignore stuff" in ignore_text | ||||
|     # it is only ignored, it is not removed (it will be highlighted too) | ||||
|     assert b'new ignore stuff' in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| # When adding some ignore text, it should not trigger a change, even if something else on that line changes | ||||
| def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     #live_server_setup(live_server) | ||||
|     ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ" | ||||
|     set_original_ignore_response() | ||||
|  | ||||
| @@ -172,6 +175,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|         url_for("settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-global_ignore_text": ignore_text, | ||||
|             'application-fetch_backend': "html_requests" | ||||
|         }, | ||||
| @@ -192,9 +196,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|  | ||||
|     # Goto the edit page of the item, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     #Adding some ignore text should not trigger a change | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"}, | ||||
| @@ -210,20 +212,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # so that we are sure everything is viewed and in a known 'nothing changed' state | ||||
|     res = client.get(url_for("diff_history_page", uuid="first")) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     # It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
| ##### | ||||
|  | ||||
|  | ||||
|     #  Make a change which includes the ignore text | ||||
|     # Make a change which includes the ignore text, it should be ignored and no 'change' triggered | ||||
|     # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list | ||||
|     set_modified_ignore_response() | ||||
|  | ||||
|     # Trigger a check | ||||
| @@ -233,6 +230,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|  | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|   | ||||
| @@ -18,12 +18,13 @@ class TestDiffBuilder(unittest.TestCase): | ||||
|  | ||||
|         watch['last_viewed'] = 110 | ||||
|  | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         # Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python | ||||
|         watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4())) | ||||
|  | ||||
|         p = watch.get_next_snapshot_key_to_last_viewed | ||||
|         assert p == "112", "Correct last-viewed timestamp was detected" | ||||
|   | ||||
| @@ -286,8 +286,8 @@ class update_worker(threading.Thread): | ||||
|                         # Re #342 | ||||
|                         # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. | ||||
|                         # We then convert/.decode('utf-8') for the notification etc | ||||
|                         if not isinstance(contents, (bytes, bytearray)): | ||||
|                             raise Exception("Error - returned data from the fetch handler SHOULD be bytes") | ||||
| #                        if not isinstance(contents, (bytes, bytearray)): | ||||
| #                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes") | ||||
|                     except PermissionError as e: | ||||
|                         logger.critical(f"File permission error updating file, watch: {uuid}") | ||||
|                         logger.critical(str(e)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user