mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			19 Commits
		
	
	
		
			skip_when_
			...
			2548-trigg
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 16a85e2a60 | ||
|   | ecafa27833 | ||
|   | f7d4e58613 | ||
|   | 5bb47e47db | ||
|   | 03151da68e | ||
|   | a16a70229d | ||
|   | 9476c1076b | ||
|   | a4959b5971 | ||
|   | a278fa22f2 | ||
|   | d39530b261 | ||
|   | d4b4355ff5 | ||
|   | c1c8de3104 | ||
|   | 5a768d7db3 | ||
|   | f38429ec93 | ||
|   | 783926962d | ||
|   | 6cd1d50a4f | ||
|   | 54a4970a4c | ||
|   | fd00453e6d | ||
|   | 2842ffb205 | 
| @@ -37,6 +37,7 @@ RUN pip install --target=/dependencies playwright~=1.41.2 \ | ||||
|  | ||||
| # Final image stage | ||||
| FROM python:${PYTHON_VERSION}-slim-bookworm | ||||
| LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io" | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     libxslt1.1 \ | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| recursive-include changedetectionio/apprise_plugin * | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/model * | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.46.04' | ||||
| __version__ = '0.47.03' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
|   | ||||
| @@ -58,7 +58,7 @@ class Watch(Resource): | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
|         if request.args.get('recheck'): | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             return "OK", 200 | ||||
|         if request.args.get('paused', '') == 'paused': | ||||
|             self.datastore.data['watching'].get(uuid).pause() | ||||
| @@ -246,7 +246,7 @@ class CreateWatch(Resource): | ||||
|  | ||||
|         new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) | ||||
|         if new_uuid: | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|             return {'uuid': new_uuid}, 201 | ||||
|         else: | ||||
|             return "Invalid or unsupported URL", 400 | ||||
| @@ -303,7 +303,7 @@ class CreateWatch(Resource): | ||||
|  | ||||
|         if request.args.get('recheck_all'): | ||||
|             for uuid in self.datastore.data['watching'].keys(): | ||||
|                 self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|                 self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             return {'status': "OK"}, 200 | ||||
|  | ||||
|         return list, 200 | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| # include the decorator | ||||
| from apprise.decorators import notify | ||||
| from loguru import logger | ||||
|  | ||||
| @notify(on="delete") | ||||
| @notify(on="deletes") | ||||
| @@ -64,10 +65,12 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): | ||||
|             auth = (URLBase.unquote(results.get('user'))) | ||||
|  | ||||
|     # Try to auto-guess if it's JSON | ||||
|     h = 'application/json; charset=utf-8' | ||||
|     try: | ||||
|         json.loads(body) | ||||
|         headers['Content-Type'] = 'application/json; charset=utf-8' | ||||
|         headers['Content-Type'] = h | ||||
|     except ValueError as e: | ||||
|         logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}") | ||||
|         pass | ||||
|  | ||||
|     r(results.get('url'), | ||||
|   | ||||
| @@ -1,4 +1,7 @@ | ||||
| import importlib | ||||
| from concurrent.futures import ThreadPoolExecutor | ||||
|  | ||||
| from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
|  | ||||
| from functools import wraps | ||||
| @@ -30,7 +33,6 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     def long_task(uuid, preferred_proxy): | ||||
|         import time | ||||
|         from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions | ||||
|         from changedetectionio.processors.text_json_diff import text_json_diff | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|  | ||||
|         status = {'status': '', 'length': 0, 'text': ''} | ||||
| @@ -38,8 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         contents = '' | ||||
|         now = time.time() | ||||
|         try: | ||||
|             update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid) | ||||
|             update_handler.call_browser() | ||||
|             processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") | ||||
|             update_handler = processor_module.perform_site_check(datastore=datastore, | ||||
|                                                                  watch_uuid=uuid | ||||
|                                                                  ) | ||||
|  | ||||
|             update_handler.call_browser(preferred_proxy_id=preferred_proxy) | ||||
|         # title, size is len contents not len xfer | ||||
|         except content_fetcher_exceptions.Non200ErrorCodeReceived as e: | ||||
|             if e.status_code == 404: | ||||
| @@ -48,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                 status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"}) | ||||
|             else: | ||||
|                 status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"}) | ||||
|         except text_json_diff.FilterNotFoundInResponse: | ||||
|         except FilterNotFoundInResponse: | ||||
|             status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"}) | ||||
|         except content_fetcher_exceptions.EmptyReply as e: | ||||
|             if e.status_code == 403 or e.status_code == 401: | ||||
|   | ||||
| @@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT | ||||
|         datastore.data['watching'][uuid]['processor'] = 'restock_diff' | ||||
|         datastore.data['watching'][uuid].clear_watch() | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|         return redirect(url_for("index")) | ||||
|  | ||||
|     @login_required | ||||
|   | ||||
| @@ -17,7 +17,6 @@ | ||||
| </script> | ||||
|  | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script> | ||||
| <!--<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>--> | ||||
| <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> | ||||
|  | ||||
| <div class="edit-form monospaced-textarea"> | ||||
|   | ||||
| @@ -75,6 +75,7 @@ class fetcher(Fetcher): | ||||
|         self.headers = r.headers | ||||
|  | ||||
|         if not r.content or not len(r.content): | ||||
|             logger.debug(f"Requests returned empty content for '{url}'") | ||||
|             if not empty_pages_are_a_change: | ||||
|                 raise EmptyReply(url=url, status_code=r.status_code) | ||||
|             else: | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import datetime | ||||
| import importlib | ||||
|  | ||||
| import flask_login | ||||
| import locale | ||||
| @@ -12,9 +11,7 @@ import threading | ||||
| import time | ||||
| import timeago | ||||
|  | ||||
| from .content_fetchers.exceptions import ReplyWithContentButNoText | ||||
| from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor | ||||
| from .processors.text_json_diff.processor import FilterNotFoundInResponse | ||||
| from .safe_jinja import render as jinja_render | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from copy import deepcopy | ||||
| @@ -791,7 +788,6 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # Recast it if need be to right data Watch handler | ||||
|             watch_class = get_custom_watch_obj_for_processor(form.data.get('processor')) | ||||
|             datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid]) | ||||
|  | ||||
|             flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.") | ||||
|  | ||||
|             # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds | ||||
| @@ -799,7 +795,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             datastore.needs_write_urgent = True | ||||
|  | ||||
|             # Queue the watch for immediate recheck, with a higher priority | ||||
|             update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|             update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|             # Diff page [edit] link should go back to diff page | ||||
|             if request.args.get("next") and request.args.get("next") == 'diff': | ||||
| @@ -980,7 +976,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 importer = import_url_list() | ||||
|                 importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) | ||||
|                 for uuid in importer.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|                 if len(importer.remaining_data) == 0: | ||||
|                     return redirect(url_for('index')) | ||||
| @@ -993,7 +989,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 d_importer = import_distill_io_json() | ||||
|                 d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) | ||||
|                 for uuid in d_importer.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|             # XLSX importer | ||||
|             if request.files and request.files.get('xlsx_file'): | ||||
| @@ -1017,7 +1013,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                     w_importer.run(data=file, flash=flash, datastore=datastore) | ||||
|  | ||||
|                 for uuid in w_importer.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|         # Could be some remaining, or we could be on GET | ||||
|         form = forms.importForm(formdata=request.form if request.method == 'POST' else None) | ||||
| @@ -1381,78 +1377,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def watch_get_preview_rendered(uuid): | ||||
|         from flask import jsonify | ||||
|         '''For when viewing the "preview" of the rendered text from inside of Edit''' | ||||
|         now = time.time() | ||||
|         import brotli | ||||
|         from . import forms | ||||
|  | ||||
|         text_after_filter = '' | ||||
|         tmp_watch = deepcopy(datastore.data['watching'].get(uuid)) | ||||
|  | ||||
|         if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir): | ||||
|             # Splice in the temporary stuff from the form | ||||
|             form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None, | ||||
|                                                        data=request.form | ||||
|                                                        ) | ||||
|             # Only update vars that came in via the AJAX post | ||||
|             p = {k: v for k, v in form.data.items() if k in request.form.keys()} | ||||
|             tmp_watch.update(p) | ||||
|  | ||||
|             latest_filename = next(reversed(tmp_watch.history)) | ||||
|             html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br") | ||||
|             with open(html_fname, 'rb') as f: | ||||
|                 decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8') | ||||
|  | ||||
|                 # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser() | ||||
|                 processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") | ||||
|                 update_handler = processor_module.perform_site_check(datastore=datastore, | ||||
|                                                                      watch_uuid=uuid # probably not needed anymore anyway? | ||||
|                                                                      ) | ||||
|                 # Use the last loaded HTML as the input | ||||
|                 update_handler.fetcher.content = decompressed_data | ||||
|                 update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') | ||||
|                 try: | ||||
|                     changed_detected, update_obj, text_after_filter = update_handler.run_changedetection( | ||||
|                         watch=tmp_watch, | ||||
|                         skip_when_checksum_same=False, | ||||
|                     ) | ||||
|                 except FilterNotFoundInResponse as e: | ||||
|                     text_after_filter = f"Filter not found in HTML: {str(e)}" | ||||
|                 except ReplyWithContentButNoText as e: | ||||
|                     text_after_filter = f"Filter found but no text (empty result)" | ||||
|                 except Exception as e: | ||||
|                     text_after_filter = f"Error: {str(e)}" | ||||
|  | ||||
|             if not text_after_filter.strip(): | ||||
|                 text_after_filter = 'Empty content' | ||||
|  | ||||
|         # because run_changedetection always returns bytes due to saving the snapshots etc | ||||
|         text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter | ||||
|  | ||||
|         do_anchor = datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|  | ||||
|         trigger_line_numbers = [] | ||||
|         try: | ||||
|             text_before_filter = html_tools.html_to_text(html_content=decompressed_data, | ||||
|                                                          render_anchor_tag_content=do_anchor) | ||||
|  | ||||
|             trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, | ||||
|                                                                 wordlist=tmp_watch['trigger_text'], | ||||
|                                                                 mode='line numbers' | ||||
|                                                                 ) | ||||
|         except Exception as e: | ||||
|             text_before_filter = f"Error: {str(e)}" | ||||
|  | ||||
|         logger.trace(f"Parsed in {time.time() - now:.3f}s") | ||||
|  | ||||
|         return jsonify( | ||||
|             { | ||||
|                 'after_filter': text_after_filter, | ||||
|                 'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter, | ||||
|                 'trigger_line_numbers': trigger_line_numbers | ||||
|             } | ||||
|         ) | ||||
|         from .processors.text_json_diff import prepare_filter_prevew | ||||
|         return prepare_filter_prevew(watch_uuid=uuid, datastore=datastore) | ||||
|  | ||||
|  | ||||
|     @app.route("/form/add/quickwatch", methods=['POST']) | ||||
| @@ -1515,7 +1442,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         new_uuid = datastore.clone(uuid) | ||||
|         if new_uuid: | ||||
|             if not datastore.data['watching'].get(uuid).get('paused'): | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid})) | ||||
|             flash('Cloned.') | ||||
|  | ||||
|         return redirect(url_for('index')) | ||||
| @@ -1536,7 +1463,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         if uuid: | ||||
|             if uuid not in running_uuids: | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             i = 1 | ||||
|  | ||||
|         elif tag: | ||||
| @@ -1547,7 +1474,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                         continue | ||||
|                     if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                         update_q.put( | ||||
|                             queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}) | ||||
|                             queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}) | ||||
|                         ) | ||||
|                         i += 1 | ||||
|  | ||||
| @@ -1557,9 +1484,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                     if with_errors and not watch.get('last_error'): | ||||
|                         continue | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) | ||||
|                     i += 1 | ||||
|  | ||||
|         flash(f"{i} watches queued for rechecking.") | ||||
|         return redirect(url_for('index', tag=tag)) | ||||
|  | ||||
| @@ -1616,7 +1542,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     # Recheck and require a full reprocessing | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             flash("{} watches queued for rechecking".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'clear-errors'): | ||||
| @@ -1940,7 +1866,7 @@ def ticker_thread_check_time_launch_checks(): | ||||
|                         f"{now - watch['last_checked']:0.2f}s since last checked") | ||||
|  | ||||
|                     # Into the queue with you | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid})) | ||||
|  | ||||
|                     # Reset for next time | ||||
|                     watch.jitter_seconds = 0 | ||||
|   | ||||
| @@ -476,7 +476,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|  | ||||
|     title = StringField('Title', default='') | ||||
|  | ||||
|     ignore_text = StringListField('Remove lines containing', [ValidateListRegex()]) | ||||
|     ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()]) | ||||
|     headers = StringDictKeyValue('Request headers') | ||||
|     body = TextAreaField('Request body', [validators.Optional()]) | ||||
|     method = SelectField('Request method', choices=valid_method, default=default_method) | ||||
|   | ||||
| @@ -3,11 +3,11 @@ from lxml import etree | ||||
| import json | ||||
| import re | ||||
|  | ||||
|  | ||||
| # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis | ||||
| TEXT_FILTER_LIST_LINE_SUFFIX = "<br>" | ||||
|  | ||||
| TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ') | ||||
| PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$' | ||||
|  | ||||
| # 'price' , 'lowPrice', 'highPrice' are usually under here | ||||
| # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here | ||||
| LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] | ||||
| @@ -326,6 +326,7 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None | ||||
| #          - "line numbers" return a list of line numbers that match (int list) | ||||
| # | ||||
| # wordlist - list of regex's (str) or words (str) | ||||
| # Preserves all linefeeds and other whitespacing, its not the job of this to remove that | ||||
| def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     i = 0 | ||||
|     output = [] | ||||
| @@ -341,32 +342,30 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|         else: | ||||
|             ignore_text.append(k.strip()) | ||||
|  | ||||
|     for line in content.splitlines(): | ||||
|     for line in content.splitlines(keepends=True): | ||||
|         i += 1 | ||||
|         # Always ignore blank lines in this mode. (when this function gets called) | ||||
|         got_match = False | ||||
|         if len(line.strip()): | ||||
|             for l in ignore_text: | ||||
|                 if l.lower() in line.lower(): | ||||
|         for l in ignore_text: | ||||
|             if l.lower() in line.lower(): | ||||
|                 got_match = True | ||||
|  | ||||
|         if not got_match: | ||||
|             for r in ignore_regex: | ||||
|                 if r.search(line): | ||||
|                     got_match = True | ||||
|  | ||||
|             if not got_match: | ||||
|                 for r in ignore_regex: | ||||
|                     if r.search(line): | ||||
|                         got_match = True | ||||
|  | ||||
|             if not got_match: | ||||
|                 # Not ignored | ||||
|                 output.append(line.encode('utf8')) | ||||
|             else: | ||||
|                 ignored_line_numbers.append(i) | ||||
|  | ||||
|         if not got_match: | ||||
|             # Not ignored, and should preserve "keepends" | ||||
|             output.append(line) | ||||
|         else: | ||||
|             ignored_line_numbers.append(i) | ||||
|  | ||||
|     # Used for finding out what to highlight | ||||
|     if mode == "line numbers": | ||||
|         return ignored_line_numbers | ||||
|  | ||||
|     return "\n".encode('utf8').join(output) | ||||
|     return ''.join(output) | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
|   | ||||
| @@ -6,6 +6,8 @@ import re | ||||
| from pathlib import Path | ||||
| from loguru import logger | ||||
|  | ||||
| from ..html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
|  | ||||
| # Allowable protocols, protects against javascript: etc | ||||
| # file:// is further checked by ALLOW_FILE_URI | ||||
| SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' | ||||
| @@ -36,8 +38,9 @@ class model(watch_base): | ||||
|     jitter_seconds = 0 | ||||
|  | ||||
|     def __init__(self, *arg, **kw): | ||||
|         self.__datastore_path = kw['datastore_path'] | ||||
|         del kw['datastore_path'] | ||||
|         self.__datastore_path = kw.get('datastore_path') | ||||
|         if kw.get('datastore_path'): | ||||
|             del kw['datastore_path'] | ||||
|         super(model, self).__init__(*arg, **kw) | ||||
|         if kw.get('default'): | ||||
|             self.update(kw['default']) | ||||
| @@ -171,6 +174,10 @@ class model(watch_base): | ||||
|         """ | ||||
|         tmp_history = {} | ||||
|  | ||||
|         # In the case we are only using the watch for processing without history | ||||
|         if not self.watch_data_dir: | ||||
|             return [] | ||||
|  | ||||
|         # Read the history file as a dict | ||||
|         fname = os.path.join(self.watch_data_dir, "history.txt") | ||||
|         if os.path.isfile(fname): | ||||
| @@ -307,13 +314,13 @@ class model(watch_base): | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(brotli.compress(contents, mode=brotli.MODE_TEXT)) | ||||
|                     f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)) | ||||
|         else: | ||||
|             snapshot_fname = f"{snapshot_id}.txt" | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(contents) | ||||
|                     f.write(contents.encode('utf-8')) | ||||
|  | ||||
|         # Append to index | ||||
|         # @todo check last char was \n | ||||
| @@ -345,14 +352,32 @@ class model(watch_base): | ||||
|         return seconds | ||||
|  | ||||
|     # Iterate over all history texts and see if something new exists | ||||
|     def lines_contain_something_unique_compared_to_history(self, lines: list): | ||||
|         local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) | ||||
|     # Always applying .strip() to start/end but optionally replace any other whitespace | ||||
|     def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False): | ||||
|         local_lines = [] | ||||
|         if lines: | ||||
|             if ignore_whitespace: | ||||
|                 if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk | ||||
|                     local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines]) | ||||
|                 else: | ||||
|                     local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines]) | ||||
|             else: | ||||
|                 if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk | ||||
|                     local_lines = set([l.strip().lower() for l in lines]) | ||||
|                 else: | ||||
|                     local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) | ||||
|  | ||||
|  | ||||
|         # Compare each lines (set) against each history text file (set) looking for something new.. | ||||
|         existing_history = set({}) | ||||
|         for k, v in self.history.items(): | ||||
|             content = self.get_history_snapshot(k) | ||||
|             alist = set([line.strip().lower() for line in content.splitlines()]) | ||||
|  | ||||
|             if ignore_whitespace: | ||||
|                 alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()]) | ||||
|             else: | ||||
|                 alist = set([line.strip().lower() for line in content.splitlines()]) | ||||
|  | ||||
|             existing_history = existing_history.union(alist) | ||||
|  | ||||
|         # Check that everything in local_lines(new stuff) already exists in existing_history - it should | ||||
| @@ -396,8 +421,8 @@ class model(watch_base): | ||||
|     @property | ||||
|     def watch_data_dir(self): | ||||
|         # The base dir of the watch data | ||||
|         return os.path.join(self.__datastore_path, self['uuid']) | ||||
|      | ||||
|         return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None | ||||
|  | ||||
|     def get_error_text(self): | ||||
|         """Return the text saved from a previous request that resulted in a non-200 error""" | ||||
|         fname = os.path.join(self.watch_data_dir, "last-error.txt") | ||||
|   | ||||
| @@ -18,6 +18,7 @@ class difference_detection_processor(): | ||||
|     screenshot = None | ||||
|     watch = None | ||||
|     xpath_data = None | ||||
|     preferred_proxy = None | ||||
|  | ||||
|     def __init__(self, *args, datastore, watch_uuid, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
| @@ -26,7 +27,8 @@ class difference_detection_processor(): | ||||
|         # Generic fetcher that should be extended (requests, playwright etc) | ||||
|         self.fetcher = Fetcher() | ||||
|  | ||||
|     def call_browser(self): | ||||
|     def call_browser(self, preferred_proxy_id=None): | ||||
|  | ||||
|         from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
|         # Protect against file:// access | ||||
| @@ -42,7 +44,7 @@ class difference_detection_processor(): | ||||
|         prefer_fetch_backend = self.watch.get('fetch_backend', 'system') | ||||
|  | ||||
|         # Proxy ID "key" | ||||
|         preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid')) | ||||
|         preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid')) | ||||
|  | ||||
|         # Pluggable content self.fetcher | ||||
|         if not prefer_fetch_backend or prefer_fetch_backend == 'system': | ||||
| @@ -155,7 +157,7 @@ class difference_detection_processor(): | ||||
|         # After init, call run_changedetection() which will do the actual change-detection | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run_changedetection(self, watch, skip_when_checksum_same: bool = True): | ||||
|     def run_changedetection(self, watch): | ||||
|         update_obj = {'last_notification_error': False, 'last_error': False} | ||||
|         some_data = 'xxxxx' | ||||
|         update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() | ||||
|   | ||||
| @@ -27,22 +27,27 @@ def _search_prop_by_value(matches, value): | ||||
|                 return prop[1]  # Yield the desired value and exit the function | ||||
|  | ||||
| def _deduplicate_prices(data): | ||||
|     seen = set() | ||||
|     unique_data = [] | ||||
|     import re | ||||
|  | ||||
|     ''' | ||||
|     Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159" | ||||
|     Get all the values, clean it and add it to a set then return the unique values | ||||
|     ''' | ||||
|     unique_data = set() | ||||
|  | ||||
|     # Return the complete 'datum' where its price was not seen before | ||||
|     for datum in data: | ||||
|         # Convert 'value' to float if it can be a numeric string, otherwise leave it as is | ||||
|         try: | ||||
|             normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value | ||||
|         except ValueError: | ||||
|             normalized_value = datum.value | ||||
|  | ||||
|         # If the normalized value hasn't been seen yet, add it to unique data | ||||
|         if normalized_value not in seen: | ||||
|             unique_data.append(datum) | ||||
|             seen.add(normalized_value) | ||||
|      | ||||
|     return unique_data | ||||
|         if isinstance(datum.value, list): | ||||
|             # Process each item in the list | ||||
|             normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value]) | ||||
|             unique_data.update(normalized_value) | ||||
|         else: | ||||
|             # Process single value | ||||
|             v = float(re.sub(r'[^\d.]', '', str(datum.value))) | ||||
|             unique_data.add(v) | ||||
|  | ||||
|     return list(unique_data) | ||||
|  | ||||
|  | ||||
| # should return Restock() | ||||
| @@ -83,14 +88,13 @@ def get_itemprop_availability(html_content) -> Restock: | ||||
|         if price_result: | ||||
|             # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and | ||||
|             # parse that for the UI? | ||||
|             prices_found = set(str(item.value).replace('$', '') for item in price_result) | ||||
|             if len(price_result) > 1 and len(prices_found) > 1: | ||||
|             if len(price_result) > 1 and len(price_result) > 1: | ||||
|                 # See of all prices are different, in the case that one product has many embedded data types with the same price | ||||
|                 # One might have $121.95 and another 121.95 etc | ||||
|                 logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.") | ||||
|                 logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.") | ||||
|                 raise MoreThanOnePriceFound() | ||||
|  | ||||
|             value['price'] = price_result[0].value | ||||
|             value['price'] = price_result[0] | ||||
|  | ||||
|         pricecurrency_result = pricecurrency_parse.find(data) | ||||
|         if pricecurrency_result: | ||||
| @@ -140,7 +144,7 @@ class perform_site_check(difference_detection_processor): | ||||
|     screenshot = None | ||||
|     xpath_data = None | ||||
|  | ||||
|     def run_changedetection(self, watch, skip_when_checksum_same=True): | ||||
|     def run_changedetection(self, watch): | ||||
|         import hashlib | ||||
|  | ||||
|         if not watch: | ||||
| @@ -220,7 +224,7 @@ class perform_site_check(difference_detection_processor): | ||||
|             itemprop_availability['original_price'] = itemprop_availability.get('price') | ||||
|             update_obj['restock']["original_price"] = itemprop_availability.get('price') | ||||
|  | ||||
|         if not self.fetcher.instock_data and not itemprop_availability.get('availability'): | ||||
|         if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'): | ||||
|             raise ProcessorException( | ||||
|                 message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", | ||||
|                 url=watch.get('url'), | ||||
| @@ -237,6 +241,14 @@ class perform_site_check(difference_detection_processor): | ||||
|             update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False | ||||
|             logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.") | ||||
|  | ||||
|         # Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that. | ||||
|         if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock': | ||||
|             if update_obj['restock'].get('in_stock'): | ||||
|                 logger.warning( | ||||
|                     f"Lie detected in the availability machine data!! when scraping said its not in stock!! itemprop was '{itemprop_availability}' and scraped from browser was '{self.fetcher.instock_data}' update obj was {update_obj['restock']} ") | ||||
|                 logger.warning(f"Setting instock to FALSE, scraper found '{self.fetcher.instock_data}' in the body but metadata reported not-in-stock") | ||||
|                 update_obj['restock']["in_stock"] = False | ||||
|  | ||||
|         # What we store in the snapshot | ||||
|         price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else "" | ||||
|         snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}" | ||||
| @@ -299,4 +311,4 @@ class perform_site_check(difference_detection_processor): | ||||
|         # Always record the new checksum | ||||
|         update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|         return changed_detected, update_obj, snapshot_content.encode('utf-8').strip() | ||||
|         return changed_detected, update_obj, snapshot_content.strip() | ||||
|   | ||||
| @@ -0,0 +1,115 @@ | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
|  | ||||
| def _task(watch, update_handler): | ||||
|     from changedetectionio.content_fetchers.exceptions import ReplyWithContentButNoText | ||||
|     from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse | ||||
|  | ||||
|     text_after_filter = '' | ||||
|  | ||||
|     try: | ||||
|         # The slow process (we run 2 of these in parallel) | ||||
|         changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch) | ||||
|     except FilterNotFoundInResponse as e: | ||||
|         text_after_filter = f"Filter not found in HTML: {str(e)}" | ||||
|     except ReplyWithContentButNoText as e: | ||||
|         text_after_filter = f"Filter found but no text (empty result)" | ||||
|     except Exception as e: | ||||
|         text_after_filter = f"Error: {str(e)}" | ||||
|  | ||||
|     if not text_after_filter.strip(): | ||||
|         text_after_filter = 'Empty content' | ||||
|  | ||||
|     # because run_changedetection always returns bytes due to saving the snapshots etc | ||||
|     text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter | ||||
|  | ||||
|     return text_after_filter | ||||
|  | ||||
|  | ||||
| def prepare_filter_prevew(datastore, watch_uuid): | ||||
|     '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])''' | ||||
|     from changedetectionio import forms, html_tools | ||||
|     from changedetectionio.model.Watch import model as watch_model | ||||
|     from concurrent.futures import ProcessPoolExecutor | ||||
|     from copy import deepcopy | ||||
|     from flask import request, jsonify | ||||
|     import brotli | ||||
|     import importlib | ||||
|     import os | ||||
|     import time | ||||
|     now = time.time() | ||||
|  | ||||
|     text_after_filter = '' | ||||
|     text_before_filter = '' | ||||
|     trigger_line_numbers = [] | ||||
|     ignore_line_numbers = [] | ||||
|  | ||||
|     tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid)) | ||||
|  | ||||
|     if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir): | ||||
|         # Splice in the temporary stuff from the form | ||||
|         form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None, | ||||
|                                                    data=request.form | ||||
|                                                    ) | ||||
|  | ||||
|         # Only update vars that came in via the AJAX post | ||||
|         p = {k: v for k, v in form.data.items() if k in request.form.keys()} | ||||
|         tmp_watch.update(p) | ||||
|         blank_watch_no_filters = watch_model() | ||||
|         blank_watch_no_filters['url'] = tmp_watch.get('url') | ||||
|  | ||||
|         latest_filename = next(reversed(tmp_watch.history)) | ||||
|         html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br") | ||||
|         with open(html_fname, 'rb') as f: | ||||
|             decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8') | ||||
|  | ||||
|             # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser() | ||||
|             processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") | ||||
|             update_handler = processor_module.perform_site_check(datastore=datastore, | ||||
|                                                                  watch_uuid=tmp_watch.get('uuid')  # probably not needed anymore anyway? | ||||
|                                                                  ) | ||||
|             # Use the last loaded HTML as the input | ||||
|             update_handler.datastore = datastore | ||||
|             update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string | ||||
|             update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') | ||||
|  | ||||
|             # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk | ||||
|             # Do this as a parallel process because it could take some time | ||||
|             with ProcessPoolExecutor(max_workers=2) as executor: | ||||
|                 future1 = executor.submit(_task, tmp_watch, update_handler) | ||||
|                 future2 = executor.submit(_task, blank_watch_no_filters, update_handler) | ||||
|  | ||||
|                 text_after_filter = future1.result() | ||||
|                 text_before_filter = future2.result() | ||||
|  | ||||
|     try: | ||||
|         trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, | ||||
|                                                             wordlist=tmp_watch['trigger_text'], | ||||
|                                                             mode='line numbers' | ||||
|                                                             ) | ||||
|     except Exception as e: | ||||
|         text_before_filter = f"Error: {str(e)}" | ||||
|  | ||||
|     try: | ||||
|         text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, | ||||
|                                                            wordlist=text_to_ignore, | ||||
|                                                            mode='line numbers' | ||||
|                                                            ) | ||||
|     except Exception as e: | ||||
|         text_before_filter = f"Error: {str(e)}" | ||||
|  | ||||
|     logger.trace(f"Parsed in {time.time() - now:.3f}s") | ||||
|  | ||||
|     return jsonify( | ||||
|         { | ||||
|             'after_filter': text_after_filter, | ||||
|             'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter, | ||||
|             'duration': time.time() - now, | ||||
|             'trigger_line_numbers': trigger_line_numbers, | ||||
|             'ignore_line_numbers': ignore_line_numbers, | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ import re | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio.processors import difference_detection_processor | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from loguru import logger | ||||
| @@ -35,8 +35,7 @@ class PDFToHTMLToolNotFound(ValueError): | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|     def run_changedetection(self, watch, skip_when_checksum_same=True): | ||||
|  | ||||
|     def run_changedetection(self, watch): | ||||
|         changed_detected = False | ||||
|         html_content = "" | ||||
|         screenshot = False  # as bytes | ||||
| @@ -59,9 +58,6 @@ class perform_site_check(difference_detection_processor): | ||||
|         # Watches added automatically in the queue manager will skip if its the same checksum as the previous run | ||||
|         # Saves a lot of CPU | ||||
|         update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() | ||||
|         if skip_when_checksum_same: | ||||
|             if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'): | ||||
|                 raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame() | ||||
|  | ||||
|         # Fetching complete, now filters | ||||
|  | ||||
| @@ -205,22 +201,14 @@ class perform_site_check(difference_detection_processor): | ||||
|         if watch.get('trim_text_whitespace'): | ||||
|             stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()) | ||||
|  | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         # Also used to calculate/show what was removed | ||||
|         text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|         text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|         # @todo whitespace coming from missing rtrim()? | ||||
|         # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about. | ||||
|         # Rewrite's the processing text based on only what diff result they want to see | ||||
|  | ||||
|         if watch.has_special_diff_filter_options_set() and len(watch.history.keys()): | ||||
|             # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences | ||||
|             from changedetectionio import diff | ||||
| @@ -235,12 +223,12 @@ class perform_site_check(difference_detection_processor): | ||||
|                                              line_feed_sep="\n", | ||||
|                                              include_change_type_prefix=False) | ||||
|  | ||||
|             watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter) | ||||
|             watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8')) | ||||
|  | ||||
|             if not rendered_diff and stripped_text_from_html: | ||||
|                 # We had some content, but no differences were found | ||||
|                 # Store our new file as the MD5 so it will trigger in the future | ||||
|                 c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest() | ||||
|                 c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|                 return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') | ||||
|             else: | ||||
|                 stripped_text_from_html = rendered_diff | ||||
| @@ -261,14 +249,6 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         update_obj["last_check_status"] = self.fetcher.get_last_status_code() | ||||
|  | ||||
|         # If there's text to skip | ||||
|         # @todo we could abstract out the get_text() to handle this cleaner | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         if len(text_to_ignore): | ||||
|             stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
|         else: | ||||
|             stripped_text_from_html = stripped_text_from_html.encode('utf8') | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = watch.get('extract_text', []) | ||||
|         if len(extract_text) > 0: | ||||
| @@ -277,39 +257,53 @@ class perform_site_check(difference_detection_processor): | ||||
|                 # incase they specified something in '/.../x' | ||||
|                 if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                     regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                     result = re.findall(regex.encode('utf-8'), stripped_text_from_html) | ||||
|                     result = re.findall(regex, stripped_text_from_html) | ||||
|  | ||||
|                     for l in result: | ||||
|                         if type(l) is tuple: | ||||
|                             # @todo - some formatter option default (between groups) | ||||
|                             regex_matched_output += list(l) + [b'\n'] | ||||
|                             regex_matched_output += list(l) + ['\n'] | ||||
|                         else: | ||||
|                             # @todo - some formatter option default (between each ungrouped result) | ||||
|                             regex_matched_output += [l] + [b'\n'] | ||||
|                             regex_matched_output += [l] + ['\n'] | ||||
|                 else: | ||||
|                     # Doesnt look like regex, just hunt for plaintext and return that which matches | ||||
|                     # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes | ||||
|                     r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE) | ||||
|                     r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                     res = r.findall(stripped_text_from_html) | ||||
|                     if res: | ||||
|                         for match in res: | ||||
|                             regex_matched_output += [match] + [b'\n'] | ||||
|                             regex_matched_output += [match] + ['\n'] | ||||
|  | ||||
|             ########################################################## | ||||
|             stripped_text_from_html = b'' | ||||
|             text_content_before_ignored_filter = b'' | ||||
|             stripped_text_from_html = '' | ||||
|  | ||||
|             if regex_matched_output: | ||||
|                 # @todo some formatter for presentation? | ||||
|                 stripped_text_from_html = b''.join(regex_matched_output) | ||||
|                 text_content_before_ignored_filter = stripped_text_from_html | ||||
|                 stripped_text_from_html = ''.join(regex_matched_output) | ||||
|  | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
| ### CALCULATE MD5 | ||||
|         # If there's text to ignore | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         text_for_checksuming = stripped_text_from_html | ||||
|         if text_to_ignore: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() | ||||
|         if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|         else: | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest() | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         blocked = False | ||||
| @@ -337,19 +331,33 @@ class perform_site_check(difference_detection_processor): | ||||
|             if result: | ||||
|                 blocked = True | ||||
|  | ||||
|         # The main thing that all this at the moment comes down to :) | ||||
|         if watch.get('previous_md5') != fetched_md5: | ||||
|             changed_detected = True | ||||
|  | ||||
|         # Looks like something changed, but did it match all the rules? | ||||
|         if blocked: | ||||
|             changed_detected = False | ||||
|         else: | ||||
|             # The main thing that all this at the moment comes down to :) | ||||
|             if watch.get('previous_md5') != fetched_md5: | ||||
|                 changed_detected = True | ||||
|  | ||||
|             # Always record the new checksum | ||||
|             update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|             # On the first run of a site, watch['previous_md5'] will be None, set it the current one. | ||||
|             if not watch.get('previous_md5'): | ||||
|                 watch['previous_md5'] = fetched_md5 | ||||
|  | ||||
|         logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") | ||||
|  | ||||
|         if changed_detected: | ||||
|             if watch.get('check_unique_lines', False): | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) | ||||
|                 ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace') | ||||
|  | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                     lines=stripped_text_from_html.splitlines(), | ||||
|                     ignore_whitespace=ignore_whitespace | ||||
|                 ) | ||||
|  | ||||
|                 # One or more lines? unsure? | ||||
|                 if not has_unique_lines: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
| @@ -357,12 +365,6 @@ class perform_site_check(difference_detection_processor): | ||||
|                 else: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") | ||||
|  | ||||
|         # Always record the new checksum | ||||
|         update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|         # On the first run of a site, watch['previous_md5'] will be None, set it the current one. | ||||
|         if not watch.get('previous_md5'): | ||||
|             watch['previous_md5'] = fetched_md5 | ||||
|  | ||||
|         # stripped_text_from_html - Everything after filters and NO 'ignored' content | ||||
|         return changed_detected, update_obj, stripped_text_from_html | ||||
|   | ||||
| @@ -1,56 +0,0 @@ | ||||
| /** | ||||
|  * debounce | ||||
|  * @param {integer} milliseconds This param indicates the number of milliseconds | ||||
|  *     to wait after the last call before calling the original function. | ||||
|  * @param {object} What "this" refers to in the returned function. | ||||
|  * @return {function} This returns a function that when called will wait the | ||||
|  *     indicated number of milliseconds after the last call before | ||||
|  *     calling the original function. | ||||
|  */ | ||||
| Function.prototype.debounce = function (milliseconds, context) { | ||||
|     var baseFunction = this, | ||||
|         timer = null, | ||||
|         wait = milliseconds; | ||||
|  | ||||
|     return function () { | ||||
|         var self = context || this, | ||||
|             args = arguments; | ||||
|  | ||||
|         function complete() { | ||||
|             baseFunction.apply(self, args); | ||||
|             timer = null; | ||||
|         } | ||||
|  | ||||
|         if (timer) { | ||||
|             clearTimeout(timer); | ||||
|         } | ||||
|  | ||||
|         timer = setTimeout(complete, wait); | ||||
|     }; | ||||
| }; | ||||
|  | ||||
| /** | ||||
| * throttle | ||||
| * @param {integer} milliseconds This param indicates the number of milliseconds | ||||
| *     to wait between calls before calling the original function. | ||||
| * @param {object} What "this" refers to in the returned function. | ||||
| * @return {function} This returns a function that when called will wait the | ||||
| *     indicated number of milliseconds between calls before | ||||
| *     calling the original function. | ||||
| */ | ||||
| Function.prototype.throttle = function (milliseconds, context) { | ||||
|     var baseFunction = this, | ||||
|         lastEventTimestamp = null, | ||||
|         limit = milliseconds; | ||||
|  | ||||
|     return function () { | ||||
|         var self = context || this, | ||||
|             args = arguments, | ||||
|             now = Date.now(); | ||||
|  | ||||
|         if (!lastEventTimestamp || now - lastEventTimestamp >= limit) { | ||||
|             lastEventTimestamp = now; | ||||
|             baseFunction.apply(self, args); | ||||
|         } | ||||
|     }; | ||||
| }; | ||||
| @@ -1,64 +1,106 @@ | ||||
| (function($) { | ||||
| (function ($) { | ||||
|     /** | ||||
|      * debounce | ||||
|      * @param {integer} milliseconds This param indicates the number of milliseconds | ||||
|      *     to wait after the last call before calling the original function. | ||||
|      * @param {object} What "this" refers to in the returned function. | ||||
|      * @return {function} This returns a function that when called will wait the | ||||
|      *     indicated number of milliseconds after the last call before | ||||
|      *     calling the original function. | ||||
|      */ | ||||
|     Function.prototype.debounce = function (milliseconds, context) { | ||||
|         var baseFunction = this, | ||||
|             timer = null, | ||||
|             wait = milliseconds; | ||||
|  | ||||
| /* | ||||
|     $('#code-block').highlightLines([ | ||||
|       { | ||||
|         'color': '#dd0000', | ||||
|         'lines': [10, 12] | ||||
|       }, | ||||
|       { | ||||
|         'color': '#ee0000', | ||||
|         'lines': [15, 18] | ||||
|       } | ||||
|     ]); | ||||
|   }); | ||||
| */ | ||||
|         return function () { | ||||
|             var self = context || this, | ||||
|                 args = arguments; | ||||
|  | ||||
|   $.fn.highlightLines = function(configurations) { | ||||
|     return this.each(function() { | ||||
|       const $pre = $(this); | ||||
|       const textContent = $pre.text(); | ||||
|       const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings | ||||
|             function complete() { | ||||
|                 baseFunction.apply(self, args); | ||||
|                 timer = null; | ||||
|             } | ||||
|  | ||||
|       // Build a map of line numbers to styles | ||||
|       const lineStyles = {}; | ||||
|             if (timer) { | ||||
|                 clearTimeout(timer); | ||||
|             } | ||||
|  | ||||
|       configurations.forEach(config => { | ||||
|         const { color, lines: lineNumbers } = config; | ||||
|         lineNumbers.forEach(lineNumber => { | ||||
|           lineStyles[lineNumber] = color; | ||||
|             timer = setTimeout(complete, wait); | ||||
|         }; | ||||
|     }; | ||||
|  | ||||
|     /** | ||||
|      * throttle | ||||
|      * @param {integer} milliseconds This param indicates the number of milliseconds | ||||
|      *     to wait between calls before calling the original function. | ||||
|      * @param {object} What "this" refers to in the returned function. | ||||
|      * @return {function} This returns a function that when called will wait the | ||||
|      *     indicated number of milliseconds between calls before | ||||
|      *     calling the original function. | ||||
|      */ | ||||
|     Function.prototype.throttle = function (milliseconds, context) { | ||||
|         var baseFunction = this, | ||||
|             lastEventTimestamp = null, | ||||
|             limit = milliseconds; | ||||
|  | ||||
|         return function () { | ||||
|             var self = context || this, | ||||
|                 args = arguments, | ||||
|                 now = Date.now(); | ||||
|  | ||||
|             if (!lastEventTimestamp || now - lastEventTimestamp >= limit) { | ||||
|                 lastEventTimestamp = now; | ||||
|                 baseFunction.apply(self, args); | ||||
|             } | ||||
|         }; | ||||
|     }; | ||||
|  | ||||
|     $.fn.highlightLines = function (configurations) { | ||||
|         return this.each(function () { | ||||
|             const $pre = $(this); | ||||
|             const textContent = $pre.text(); | ||||
|             const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings | ||||
|  | ||||
|             // Build a map of line numbers to styles | ||||
|             const lineStyles = {}; | ||||
|  | ||||
|             configurations.forEach(config => { | ||||
|                 const {color, lines: lineNumbers} = config; | ||||
|                 lineNumbers.forEach(lineNumber => { | ||||
|                     lineStyles[lineNumber] = color; | ||||
|                 }); | ||||
|             }); | ||||
|  | ||||
|             // Function to escape HTML characters | ||||
|             function escapeHtml(text) { | ||||
|                 return text.replace(/[&<>"'`=\/]/g, function (s) { | ||||
|                     return "&#" + s.charCodeAt(0) + ";"; | ||||
|                 }); | ||||
|             } | ||||
|  | ||||
|             // Process each line | ||||
|             const processedLines = lines.map((line, index) => { | ||||
|                 const lineNumber = index + 1; // Line numbers start at 1 | ||||
|                 const escapedLine = escapeHtml(line); | ||||
|                 const color = lineStyles[lineNumber]; | ||||
|  | ||||
|                 if (color) { | ||||
|                     // Wrap the line in a span with inline style | ||||
|                     return `<span style="background-color: ${color}">${escapedLine}</span>`; | ||||
|                 } else { | ||||
|                     return escapedLine; | ||||
|                 } | ||||
|             }); | ||||
|  | ||||
|             // Join the lines back together | ||||
|             const newContent = processedLines.join('\n'); | ||||
|  | ||||
|             // Set the new content as HTML | ||||
|             $pre.html(newContent); | ||||
|         }); | ||||
|       }); | ||||
|  | ||||
|       // Function to escape HTML characters | ||||
|       function escapeHtml(text) { | ||||
|         return text.replace(/[&<>"'`=\/]/g, function(s) { | ||||
|           return "&#" + s.charCodeAt(0) + ";"; | ||||
|         }); | ||||
|       } | ||||
|  | ||||
|       // Process each line | ||||
|       const processedLines = lines.map((line, index) => { | ||||
|         const lineNumber = index + 1; // Line numbers start at 1 | ||||
|         const escapedLine = escapeHtml(line); | ||||
|         const color = lineStyles[lineNumber]; | ||||
|  | ||||
|         if (color) { | ||||
|           // Wrap the line in a span with inline style | ||||
|           return `<span style="background-color: ${color}">${escapedLine}</span>`; | ||||
|         } else { | ||||
|           return escapedLine; | ||||
|         } | ||||
|       }); | ||||
|  | ||||
|       // Join the lines back together | ||||
|       const newContent = processedLines.join('\n'); | ||||
|  | ||||
|       // Set the new content as HTML | ||||
|       $pre.html(newContent); | ||||
|     }); | ||||
|   }; | ||||
|    $.fn.miniTabs = function(tabsConfig, options) { | ||||
|     }; | ||||
|     $.fn.miniTabs = function (tabsConfig, options) { | ||||
|         const settings = { | ||||
|             tabClass: 'minitab', | ||||
|             tabsContainerClass: 'minitabs', | ||||
| @@ -66,10 +108,10 @@ | ||||
|             ...(options || {}) | ||||
|         }; | ||||
|  | ||||
|         return this.each(function() { | ||||
|         return this.each(function () { | ||||
|             const $wrapper = $(this); | ||||
|             const $contents = $wrapper.find('div[id]').hide(); | ||||
|             const $tabsContainer = $('<div>', { class: settings.tabsContainerClass }).prependTo($wrapper); | ||||
|             const $tabsContainer = $('<div>', {class: settings.tabsContainerClass}).prependTo($wrapper); | ||||
|  | ||||
|             // Generate tabs | ||||
|             Object.entries(tabsConfig).forEach(([tabTitle, contentSelector], index) => { | ||||
| @@ -84,7 +126,7 @@ | ||||
|             }); | ||||
|  | ||||
|             // Tab click event | ||||
|             $tabsContainer.on('click', `.${settings.tabClass}`, function(e) { | ||||
|             $tabsContainer.on('click', `.${settings.tabClass}`, function (e) { | ||||
|                 e.preventDefault(); | ||||
|                 const $tab = $(this); | ||||
|                 const target = $tab.data('target'); | ||||
| @@ -103,7 +145,7 @@ | ||||
|     // Object to store ongoing requests by namespace | ||||
|     const requests = {}; | ||||
|  | ||||
|     $.abortiveSingularAjax = function(options) { | ||||
|     $.abortiveSingularAjax = function (options) { | ||||
|         const namespace = options.namespace || 'default'; | ||||
|  | ||||
|         // Abort the current request in this namespace if it's still ongoing | ||||
|   | ||||
| @@ -1,14 +1,14 @@ | ||||
| $(function () { | ||||
|     /* add container before each proxy location to show status */ | ||||
|  | ||||
|     var option_li = $('.fetch-backend-proxy li').filter(function() { | ||||
|         return $("input",this)[0].value.length >0; | ||||
|     }); | ||||
|  | ||||
|     //var option_li = $('.fetch-backend-proxy li'); | ||||
|     var isActive = false; | ||||
|     $(option_li).prepend('<div class="proxy-status"></div>'); | ||||
|     $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>'); | ||||
|  | ||||
|     function setup_html_widget() { | ||||
|         var option_li = $('.fetch-backend-proxy li').filter(function () { | ||||
|             return $("input", this)[0].value.length > 0; | ||||
|         }); | ||||
|         $(option_li).prepend('<div class="proxy-status"></div>'); | ||||
|         $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>'); | ||||
|     } | ||||
|  | ||||
|     function set_proxy_check_status(proxy_key, state) { | ||||
|         // select input by value name | ||||
| @@ -59,8 +59,14 @@ $(function () { | ||||
|     } | ||||
|  | ||||
|     $('#check-all-proxies').click(function (e) { | ||||
|  | ||||
|         e.preventDefault() | ||||
|         $('body').addClass('proxy-check-active'); | ||||
|  | ||||
|         if (!$('body').hasClass('proxy-check-active')) { | ||||
|             setup_html_widget(); | ||||
|             $('body').addClass('proxy-check-active'); | ||||
|         } | ||||
|  | ||||
|         $('.proxy-check-details').html(''); | ||||
|         $('.proxy-status').html('<span class="spinner"></span>').fadeIn(); | ||||
|         $('.proxy-timing').html(''); | ||||
|   | ||||
| @@ -26,8 +26,7 @@ function set_active_tab() { | ||||
|     if (tab.length) { | ||||
|         tab[0].parentElement.className = "active"; | ||||
|     } | ||||
|     // hash could move the page down | ||||
|     window.scrollTo(0, 0); | ||||
|  | ||||
| } | ||||
|  | ||||
| function focus_error_tab() { | ||||
|   | ||||
| @@ -49,4 +49,9 @@ $(document).ready(function () { | ||||
|         $("#overlay").toggleClass('visible'); | ||||
|         heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)'; | ||||
|     }); | ||||
|  | ||||
|     setInterval(function () { | ||||
|         $('body').toggleClass('spinner-active', $.active > 0); | ||||
|     }, 2000); | ||||
|  | ||||
| }); | ||||
|   | ||||
| @@ -26,25 +26,28 @@ function request_textpreview_update() { | ||||
|         data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val(); | ||||
|     }); | ||||
|  | ||||
|     $('body').toggleClass('spinner-active', 1); | ||||
|  | ||||
|     $.abortiveSingularAjax({ | ||||
|         type: "POST", | ||||
|         url: preview_text_edit_filters_url, | ||||
|         data: data, | ||||
|         namespace: 'watchEdit' | ||||
|     }).done(function (data) { | ||||
|         console.debug(data['duration']) | ||||
|         $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']); | ||||
|  | ||||
|         $('#filters-and-triggers #text-preview-inner') | ||||
|             .text(data['after_filter']) | ||||
|             .highlightLines([ | ||||
|                 { | ||||
|                     'color': '#ee0000', | ||||
|                     'lines': data['trigger_line_numbers'] | ||||
|                 }, | ||||
|                 { | ||||
|                     'color': '#757575', | ||||
|                     'lines': data['ignore_line_numbers'] | ||||
|                 } | ||||
|             ]); | ||||
|  | ||||
|  | ||||
|  | ||||
|             ]) | ||||
|     }).fail(function (error) { | ||||
|         if (error.statusText === 'abort') { | ||||
|             console.log('Request was aborted due to a new request being fired.'); | ||||
| @@ -73,18 +76,13 @@ $(document).ready(function () { | ||||
|     $("#text-preview-inner").css('max-height', (vh-300)+"px"); | ||||
|     $("#text-preview-before-inner").css('max-height', (vh-300)+"px"); | ||||
|  | ||||
|     // Realtime preview of 'Filters & Text' setup | ||||
|     var debounced_request_textpreview_update = request_textpreview_update.debounce(100); | ||||
|  | ||||
|     $("#activate-text-preview").click(function (e) { | ||||
|         $('body').toggleClass('preview-text-enabled') | ||||
|         request_textpreview_update(); | ||||
|  | ||||
|         const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off'; | ||||
|         $("#text-preview-refresh")[method]('click', debounced_request_textpreview_update); | ||||
|         $('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update); | ||||
|         $('input:visible')[method]('keyup blur change', debounced_request_textpreview_update); | ||||
|         $("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update); | ||||
|         $('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000)); | ||||
|         $('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000)); | ||||
|         $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000)); | ||||
|     }); | ||||
|     $('.minitabs-wrapper').miniTabs({ | ||||
|         "Content after filters": "#text-preview-inner", | ||||
|   | ||||
| @@ -25,15 +25,19 @@ ul#requests-extra_proxies { | ||||
|  | ||||
| body.proxy-check-active { | ||||
|   #request { | ||||
|     // Padding set by flex layout | ||||
|     /* | ||||
|     .proxy-status { | ||||
|       width: 2em; | ||||
|     } | ||||
|     */ | ||||
|  | ||||
|     .proxy-check-details { | ||||
|       font-size: 80%; | ||||
|       color: #555; | ||||
|       display: block; | ||||
|       padding-left: 4em; | ||||
|       padding-left: 2em; | ||||
|       max-width: 500px; | ||||
|     } | ||||
|  | ||||
|     .proxy-timing { | ||||
|   | ||||
| @@ -7,6 +7,16 @@ | ||||
|     border-top: none; | ||||
|   } | ||||
|  | ||||
|   .minitabs-content { | ||||
|     width: 100%; | ||||
|     display: flex; | ||||
|     > div { | ||||
|       flex: 1 1 auto; | ||||
|       min-width: 0; | ||||
|       overflow: scroll; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   .minitabs { | ||||
|     display: flex; | ||||
|     border-bottom: 1px solid #ccc; | ||||
|   | ||||
| @@ -42,9 +42,8 @@ body.preview-text-enabled { | ||||
|     color: var(--color-text-input); | ||||
|     font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */ | ||||
|     font-size: 70%; | ||||
|     overflow-x: scroll; | ||||
|     word-break: break-word; | ||||
|     white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */ | ||||
|     overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */ | ||||
|   } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -106,10 +106,34 @@ button.toggle-button { | ||||
|   padding: 5px; | ||||
|   display: flex; | ||||
|   justify-content: space-between; | ||||
|   border-bottom: 2px solid var(--color-menu-accent); | ||||
|   align-items: center; | ||||
| } | ||||
|  | ||||
| #pure-menu-horizontal-spinner { | ||||
|   height: 3px; | ||||
|   background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000); | ||||
|   background-size: 400% 400%; | ||||
|   width: 100%; | ||||
|   animation: gradient 200s ease infinite; | ||||
| } | ||||
|  | ||||
| body.spinner-active { | ||||
|   #pure-menu-horizontal-spinner { | ||||
|     animation: gradient 1s ease infinite; | ||||
|   } | ||||
| } | ||||
|  | ||||
| @keyframes gradient { | ||||
| 	0% { | ||||
| 		background-position: 0% 50%; | ||||
| 	} | ||||
| 	50% { | ||||
| 		background-position: 100% 50%; | ||||
| 	} | ||||
| 	100% { | ||||
| 		background-position: 0% 50%; | ||||
| 	} | ||||
| } | ||||
| .pure-menu-heading { | ||||
|   color: var(--color-text-menu-heading); | ||||
| } | ||||
| @@ -123,8 +147,14 @@ button.toggle-button { | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| .tab-pane-inner { | ||||
|   // .tab-pane-inner will have the #id that the tab button jumps/anchors to | ||||
|   scroll-margin-top: 200px; | ||||
| } | ||||
|  | ||||
| section.content { | ||||
|   padding-top: 5em; | ||||
|   padding-top: 100px; | ||||
|   padding-bottom: 1em; | ||||
|   flex-direction: column; | ||||
|   display: flex; | ||||
| @@ -907,6 +937,7 @@ $form-edge-padding: 20px; | ||||
| } | ||||
|  | ||||
| .tab-pane-inner { | ||||
|  | ||||
|   &:not(:target) { | ||||
|     display: none; | ||||
|   } | ||||
|   | ||||
| @@ -119,19 +119,22 @@ ul#requests-extra_proxies { | ||||
|   #request label[for=proxy] { | ||||
|     display: inline-block; } | ||||
|  | ||||
| body.proxy-check-active #request .proxy-status { | ||||
|   width: 2em; } | ||||
|  | ||||
| body.proxy-check-active #request .proxy-check-details { | ||||
|   font-size: 80%; | ||||
|   color: #555; | ||||
|   display: block; | ||||
|   padding-left: 4em; } | ||||
|  | ||||
| body.proxy-check-active #request .proxy-timing { | ||||
|   font-size: 80%; | ||||
|   padding-left: 1rem; | ||||
|   color: var(--color-link); } | ||||
| body.proxy-check-active #request { | ||||
|   /* | ||||
|     .proxy-status { | ||||
|       width: 2em; | ||||
|     } | ||||
|     */ } | ||||
|   body.proxy-check-active #request .proxy-check-details { | ||||
|     font-size: 80%; | ||||
|     color: #555; | ||||
|     display: block; | ||||
|     padding-left: 2em; | ||||
|     max-width: 500px; } | ||||
|   body.proxy-check-active #request .proxy-timing { | ||||
|     font-size: 80%; | ||||
|     padding-left: 1rem; | ||||
|     color: var(--color-link); } | ||||
|  | ||||
| #recommended-proxy { | ||||
|   display: grid; | ||||
| @@ -434,6 +437,13 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark { | ||||
|     padding: 20px; | ||||
|     border: 1px solid #ccc; | ||||
|     border-top: none; } | ||||
|   .minitabs-wrapper .minitabs-content { | ||||
|     width: 100%; | ||||
|     display: flex; } | ||||
|     .minitabs-wrapper .minitabs-content > div { | ||||
|       flex: 1 1 auto; | ||||
|       min-width: 0; | ||||
|       overflow: scroll; } | ||||
|   .minitabs-wrapper .minitabs { | ||||
|     display: flex; | ||||
|     border-bottom: 1px solid #ccc; } | ||||
| @@ -488,11 +498,9 @@ body.preview-text-enabled { | ||||
|     font-family: "Courier New", Courier, monospace; | ||||
|     /* Sets the font to a monospace type */ | ||||
|     font-size: 70%; | ||||
|     overflow-x: scroll; | ||||
|     word-break: break-word; | ||||
|     white-space: pre-wrap; | ||||
|     /* Preserves whitespace and line breaks like <pre> */ | ||||
|     overflow-wrap: break-word; | ||||
|     /* Allows long words to break and wrap to the next line */ } | ||||
|     /* Preserves whitespace and line breaks like <pre> */ } | ||||
|  | ||||
| #activate-text-preview { | ||||
|   right: 0; | ||||
| @@ -568,9 +576,26 @@ button.toggle-button { | ||||
|   padding: 5px; | ||||
|   display: flex; | ||||
|   justify-content: space-between; | ||||
|   border-bottom: 2px solid var(--color-menu-accent); | ||||
|   align-items: center; } | ||||
|  | ||||
| #pure-menu-horizontal-spinner { | ||||
|   height: 3px; | ||||
|   background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000); | ||||
|   background-size: 400% 400%; | ||||
|   width: 100%; | ||||
|   animation: gradient 200s ease infinite; } | ||||
|  | ||||
| body.spinner-active #pure-menu-horizontal-spinner { | ||||
|   animation: gradient 1s ease infinite; } | ||||
|  | ||||
| @keyframes gradient { | ||||
|   0% { | ||||
|     background-position: 0% 50%; } | ||||
|   50% { | ||||
|     background-position: 100% 50%; } | ||||
|   100% { | ||||
|     background-position: 0% 50%; } } | ||||
|  | ||||
| .pure-menu-heading { | ||||
|   color: var(--color-text-menu-heading); } | ||||
|  | ||||
| @@ -580,8 +605,11 @@ button.toggle-button { | ||||
|     background-color: var(--color-background-menu-link-hover); | ||||
|     color: var(--color-text-menu-link-hover); } | ||||
|  | ||||
| .tab-pane-inner { | ||||
|   scroll-margin-top: 200px; } | ||||
|  | ||||
| section.content { | ||||
|   padding-top: 5em; | ||||
|   padding-top: 100px; | ||||
|   padding-bottom: 1em; | ||||
|   flex-direction: column; | ||||
|   display: flex; | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from flask import ( | ||||
|     flash | ||||
| ) | ||||
|  | ||||
| from .html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
| from . model import App, Watch | ||||
| from copy import deepcopy, copy | ||||
| from os import path, unlink | ||||
| @@ -750,17 +751,17 @@ class ChangeDetectionStore: | ||||
|     def update_5(self): | ||||
|         # If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings | ||||
|         # In other words - the watch notification_title and notification_body are not needed if they are the same as the default one | ||||
|         current_system_body = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n ")) | ||||
|         current_system_title = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n ")) | ||||
|         current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE) | ||||
|         current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE) | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             try: | ||||
|                 watch_body = watch.get('notification_body', '') | ||||
|                 if watch_body and watch_body.translate(str.maketrans('', '', "\r\n ")) == current_system_body: | ||||
|                 if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body: | ||||
|                     # Looks the same as the default one, so unset it | ||||
|                     watch['notification_body'] = None | ||||
|  | ||||
|                 watch_title = watch.get('notification_title', '') | ||||
|                 if watch_title and watch_title.translate(str.maketrans('', '', "\r\n ")) == current_system_title: | ||||
|                 if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title: | ||||
|                     # Looks the same as the default one, so unset it | ||||
|                     watch['notification_title'] = None | ||||
|             except Exception as e: | ||||
|   | ||||
| @@ -35,7 +35,9 @@ | ||||
|  | ||||
|   <body class=""> | ||||
|     <div class="header"> | ||||
|       <div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu"> | ||||
|     <div class="pure-menu-fixed" style="width: 100%;"> | ||||
|       <div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu"> | ||||
|  | ||||
|         {% if has_password and not current_user.is_authenticated %} | ||||
|           <a class="pure-menu-heading" href="https://changedetection.io" rel="noopener"> | ||||
|             <strong>Change</strong>Detection.io</a> | ||||
| @@ -129,7 +131,12 @@ | ||||
|           </li> | ||||
|         </ul> | ||||
|       </div> | ||||
|       <div id="pure-menu-horizontal-spinner"></div> | ||||
|       </div> | ||||
|  | ||||
|     </div> | ||||
|  | ||||
|  | ||||
|     {% if hosted_sticky %} | ||||
|       <div class="sticky-tab" id="hosted-sticky"> | ||||
|         <a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a> | ||||
|   | ||||
| @@ -26,7 +26,6 @@ | ||||
| </script> | ||||
| <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script> | ||||
| {% if playwright_enabled %} | ||||
| @@ -330,9 +329,9 @@ nav | ||||
|                         {{ render_checkbox_field(form.filter_text_added) }} | ||||
|                         {{ render_checkbox_field(form.filter_text_replaced) }} | ||||
|                         {{ render_checkbox_field(form.filter_text_removed) }} | ||||
|                     <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span> | ||||
|                     <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br> | ||||
|                     <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> | ||||
|                     <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br> | ||||
|                     <span class="pure-form-message-inline"> So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br> | ||||
|                     <span class="pure-form-message-inline"> When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.check_unique_lines) }} | ||||
| @@ -371,7 +370,7 @@ nav | ||||
| ") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                             <li>Matching text will be <strong>removed</strong> from the text snapshot</li> | ||||
|                             <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li> | ||||
|                             <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> | ||||
|                             <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> | ||||
|                             <li>Changing this will affect the comparison checksum which may trigger an alert</li> | ||||
| @@ -398,7 +397,9 @@ Unavailable") }} | ||||
|                 </fieldset> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }} | ||||
|                         {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/ | ||||
|  or | ||||
| keyword") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                     <ul> | ||||
|                         <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match; | ||||
| @@ -424,14 +425,15 @@ Unavailable") }} | ||||
|                     </script> | ||||
|                     <br> | ||||
|                     {#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#} | ||||
|  | ||||
|                     <div class="minitabs-wrapper"> | ||||
|                         <div id="text-preview-inner" class="monospace-preview"> | ||||
|                             <p>Loading...</p> | ||||
|                         </div> | ||||
|                         <div id="text-preview-before-inner" style="display: none;" class="monospace-preview"> | ||||
|                             <p>Loading...</p> | ||||
|                         </div> | ||||
|                       <div class="minitabs-content"> | ||||
|                           <div id="text-preview-inner" class="monospace-preview"> | ||||
|                               <p>Loading...</p> | ||||
|                           </div> | ||||
|                           <div id="text-preview-before-inner" style="display: none;" class="monospace-preview"> | ||||
|                               <p>Loading...</p> | ||||
|                           </div> | ||||
|                       </div> | ||||
|                     </div> | ||||
|             </div> | ||||
|           </div> | ||||
|   | ||||
| @@ -172,7 +172,7 @@ nav | ||||
|                     <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br> | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                             <li>Matching text will be <strong>removed</strong> from the text snapshot</li> | ||||
|                             <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li> | ||||
|                             <li>Note: This is applied globally in addition to the per-watch rules.</li> | ||||
|                             <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> | ||||
|                             <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> | ||||
|   | ||||
							
								
								
									
										6
									
								
								changedetectionio/tests/itemprop_test_examples/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								changedetectionio/tests/itemprop_test_examples/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| # A list of real world examples! | ||||
|  | ||||
| Always the price should be 666.66 for our tests | ||||
|  | ||||
| see test_restock_itemprop.py::test_special_prop_examples | ||||
|  | ||||
							
								
								
									
										25
									
								
								changedetectionio/tests/itemprop_test_examples/a.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								changedetectionio/tests/itemprop_test_examples/a.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| <div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection" | ||||
|      data-testid="price-section" | ||||
|      data-optly-product-tile-price-section="true"><span | ||||
|         class="PriceRange ProductPrice variant-huge" itemprop="offers" | ||||
|         itemscope="" itemtype="http://schema.org/Offer"><div | ||||
|         class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span | ||||
|         aria-hidden="true" class="Price variant-huge" data-testid="price" | ||||
|         itemprop="price"><sup class="sup" data-testid="price-symbol" | ||||
|                               itemprop="priceCurrency" content="AUD">$</sup><span | ||||
|         class="dollars" data-testid="price-value" itemprop="price" | ||||
|         content="155.55">155.55</span><span class="extras"><span class="sup" | ||||
|                                                               data-testid="price-sup"></span></span></span></span> | ||||
| </div> | ||||
|  | ||||
| <script type="application/ld+json">{ | ||||
|                                 "@type": "Product", | ||||
|                                 "@context": "https://schema.org", | ||||
|                                 "name": "test", | ||||
|                                 "description": "test", | ||||
|                                 "offers": { | ||||
|                                     "@type": "Offer", | ||||
|                                     "priceCurrency": "AUD", | ||||
|                                     "price": 155.55 | ||||
|                                 }, | ||||
|                             }</script> | ||||
| @@ -16,4 +16,4 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(3) | ||||
|     wait_for_all_checks(client) | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import json | ||||
| import os | ||||
| from flask import url_for | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
|  | ||||
| def set_response(): | ||||
| @@ -18,7 +19,6 @@ def set_response(): | ||||
|         f.write(data) | ||||
|     time.sleep(1) | ||||
|  | ||||
|  | ||||
| def test_socks5(client, live_server, measure_memory_usage): | ||||
|     live_server_setup(live_server) | ||||
|     set_response() | ||||
| @@ -79,3 +79,24 @@ def test_socks5(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Should see the proper string | ||||
|     assert "Awesome, you made it".encode('utf-8') in res.data | ||||
|  | ||||
|     # PROXY CHECKER WIDGET CHECK - this needs more checking | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("check_proxies.start_check", uuid=uuid), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     # It's probably already finished super fast :( | ||||
|     #assert b"RUNNING" in res.data | ||||
|      | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get( | ||||
|         url_for("check_proxies.get_recheck_status", uuid=uuid), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"OK" in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
|   | ||||
| @@ -77,6 +77,8 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|  | ||||
|     # The trigger line is REMOVED,  this should trigger | ||||
|     set_original(excluding='The golden line') | ||||
|  | ||||
|     # Check in the processor here what's going on, its triggering empty-reply and no change. | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
| @@ -151,7 +153,6 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|  | ||||
|     # A line thats not the trigger should not trigger anything | ||||
|     res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     assert b'1 watches queued for rechecking.' in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
| @@ -173,6 +174,5 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|         assert b'-Oh yes please-' in response | ||||
|         assert '网站监测 内容更新了'.encode('utf-8') in response | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|   | ||||
| @@ -65,11 +65,8 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|     live_server_setup(live_server) | ||||
|     # Use a mix of case in ZzZ to prove it works case-insensitive. | ||||
|     ignore_text = "out of stoCk\r\nfoobar" | ||||
|  | ||||
|     set_original_ignore_response() | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
| @@ -127,13 +124,24 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|     # 2548 | ||||
|     # Going back to the ORIGINAL should NOT trigger a change | ||||
|     set_original_ignore_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|     # Now we set a change where the text is gone, it should now trigger | ||||
|  | ||||
|     # Now we set a change where the text is gone AND its different content, it should now trigger | ||||
|     set_modified_response_minus_block_text() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|   | ||||
| @@ -5,7 +5,7 @@ import time | ||||
| from flask import url_for | ||||
|  | ||||
| from ..html_tools import * | ||||
| from .util import live_server_setup | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def test_setup(live_server): | ||||
| @@ -119,12 +119,10 @@ across multiple lines | ||||
|  | ||||
|  | ||||
| def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     set_original_response() | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for("test_endpoint", _external=True) | ||||
| @@ -132,7 +130,8 @@ def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|         url_for("import_page"), data={"urls": test_url}, follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(1) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Goto the edit page, add the filter data | ||||
|     # Not sure why \r needs to be added - absent of the #changetext this is not necessary | ||||
|     subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext" | ||||
| @@ -148,6 +147,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|         follow_redirects=True, | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Check it saved | ||||
|     res = client.get( | ||||
| @@ -156,10 +156,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|     assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'1 watches queued for rechecking.' in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # so that we set the state to 'unviewed' after all the edits | ||||
|     client.get(url_for("diff_history_page", uuid="first")) | ||||
| @@ -168,10 +168,11 @@ def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|     set_modified_response() | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'1 watches queued for rechecking.' in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # There should not be an unviewed change, as changes should be removed | ||||
|     res = client.get(url_for("index")) | ||||
|   | ||||
| @@ -71,7 +71,7 @@ def test_setup(client, live_server, measure_memory_usage): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_check_filter_multiline(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|    # live_server_setup(live_server) | ||||
|     set_multiline_response() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|   | ||||
| @@ -33,13 +33,17 @@ def test_strip_regex_text_func(): | ||||
|  | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines) | ||||
|  | ||||
|     assert b"but 1 lines" in stripped_content | ||||
|     assert b"igNORe-cAse text" not in stripped_content | ||||
|     assert b"but 1234 lines" not in stripped_content | ||||
|     assert b"really" not in stripped_content | ||||
|     assert b"not this" not in stripped_content | ||||
|     assert "but 1 lines" in stripped_content | ||||
|     assert "igNORe-cAse text" not in stripped_content | ||||
|     assert "but 1234 lines" not in stripped_content | ||||
|     assert "really" not in stripped_content | ||||
|     assert "not this" not in stripped_content | ||||
|  | ||||
|     # Check line number reporting | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers") | ||||
|     assert stripped_content == [2, 5, 6, 7, 8, 10] | ||||
|  | ||||
|     # Check that linefeeds are preserved when there are is no matching ignores | ||||
|     content = "some text\n\nand other text\n" | ||||
|     stripped_content = html_tools.strip_ignore_text(content, ignore_lines) | ||||
|     assert content == stripped_content | ||||
|   | ||||
| @@ -22,10 +22,15 @@ def test_strip_text_func(): | ||||
|     ignore_lines = ["sometimes"] | ||||
|  | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines) | ||||
|     assert "sometimes" not in stripped_content | ||||
|     assert "Some content" in stripped_content | ||||
|  | ||||
|     assert b"sometimes" not in stripped_content | ||||
|     assert b"Some content" in stripped_content | ||||
|     # Check that line feeds dont get chewed up when something is found | ||||
|     test_content = "Some initial text\n\nWhich is across multiple lines\n\nZZZZz\n\n\nSo let's see what happens." | ||||
|     ignore = ['something irrelevent but just to check', 'XXXXX', 'YYYYY', 'ZZZZZ'] | ||||
|  | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore) | ||||
|     assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens." | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| @@ -141,8 +146,6 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     # Just to be sure.. set a regular modified change.. | ||||
|     set_modified_original_ignore_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| @@ -153,17 +156,17 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa | ||||
|  | ||||
|     res = client.get(url_for("preview_page", uuid="first")) | ||||
|  | ||||
|     # Should no longer be in the preview | ||||
|     assert b'new ignore stuff' not in res.data | ||||
|     # SHOULD BE be in the preview, it was added in set_modified_original_ignore_response() | ||||
|     # and we have "new ignore stuff" in ignore_text | ||||
|     # it is only ignored, it is not removed (it will be highlighted too) | ||||
|     assert b'new ignore stuff' in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| # When adding some ignore text, it should not trigger a change, even if something else on that line changes | ||||
| def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     #live_server_setup(live_server) | ||||
|     ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ" | ||||
|     set_original_ignore_response() | ||||
|  | ||||
| @@ -172,6 +175,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|         url_for("settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-global_ignore_text": ignore_text, | ||||
|             'application-fetch_backend': "html_requests" | ||||
|         }, | ||||
| @@ -192,9 +196,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|  | ||||
|     # Goto the edit page of the item, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     #Adding some ignore text should not trigger a change | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"}, | ||||
| @@ -210,20 +212,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # so that we are sure everything is viewed and in a known 'nothing changed' state | ||||
|     res = client.get(url_for("diff_history_page", uuid="first")) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     # It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
| ##### | ||||
|  | ||||
|  | ||||
|     #  Make a change which includes the ignore text | ||||
|     # Make a change which includes the ignore text, it should be ignored and no 'change' triggered | ||||
|     # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list | ||||
|     set_modified_ignore_response() | ||||
|  | ||||
|     # Trigger a check | ||||
| @@ -233,6 +230,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|  | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|   | ||||
							
								
								
									
										78
									
								
								changedetectionio/tests/test_live_preview.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								changedetectionio/tests/test_live_preview.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| from flask import url_for | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
|  | ||||
| def set_response(): | ||||
|  | ||||
|     data = f"""<html> | ||||
|        <body>Awesome, you made it<br> | ||||
| yeah the socks request worked<br> | ||||
| something to ignore<br> | ||||
| something to trigger<br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(data) | ||||
|  | ||||
| def test_content_filter_live_preview(client, live_server, measure_memory_usage): | ||||
|     live_server_setup(live_server) | ||||
|     set_response() | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": ''}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid=uuid), | ||||
|         data={ | ||||
|             "include_filters": "", | ||||
|             "fetch_backend": 'html_requests', | ||||
|             "ignore_text": "something to ignore", | ||||
|             "trigger_text": "something to trigger", | ||||
|             "url": test_url, | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # The endpoint is a POST and accepts the form values to override the watch preview | ||||
|     import json | ||||
|  | ||||
|     # DEFAULT OUTPUT WITHOUT ANYTHING UPDATED/CHANGED - SHOULD SEE THE WATCH DEFAULTS | ||||
|     res = client.post( | ||||
|         url_for("watch_get_preview_rendered", uuid=uuid) | ||||
|     ) | ||||
|     default_return = json.loads(res.data.decode('utf-8')) | ||||
|     assert default_return.get('after_filter') | ||||
|     assert default_return.get('before_filter') | ||||
|     assert default_return.get('ignore_line_numbers') == [3] # "something to ignore" line 3 | ||||
|     assert default_return.get('trigger_line_numbers') == [4] # "something to trigger" line 4 | ||||
|  | ||||
|     # SEND AN UPDATE AND WE SHOULD SEE THE OUTPUT CHANGE SO WE KNOW TO HIGHLIGHT NEW STUFF | ||||
|     res = client.post( | ||||
|         url_for("watch_get_preview_rendered", uuid=uuid), | ||||
|         data={ | ||||
|             "include_filters": "", | ||||
|             "fetch_backend": 'html_requests', | ||||
|             "ignore_text": "sOckS", # Also be sure case insensitive works | ||||
|             "trigger_text": "AweSOme", | ||||
|             "url": test_url, | ||||
|         }, | ||||
|     ) | ||||
|     reply = json.loads(res.data.decode('utf-8')) | ||||
|     assert reply.get('after_filter') | ||||
|     assert reply.get('before_filter') | ||||
|     assert reply.get('ignore_line_numbers') == [2]  # Ignored - "socks" on line 2 | ||||
|     assert reply.get('trigger_line_numbers') == [1]  # Triggers "Awesome" in line 1 | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
							
								
								
									
										72
									
								
								changedetectionio/tests/test_preview_endpoints.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								changedetectionio/tests/test_preview_endpoints.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| # `subtractive_selectors` should still work in `source:` type requests | ||||
| def test_fetch_pdf(client, live_server, measure_memory_usage): | ||||
|     import shutil | ||||
|     shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf") | ||||
|  | ||||
|     live_server_setup(live_server) | ||||
|     test_url = url_for('test_pdf_endpoint', _external=True) | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # PDF header should not be there (it was converted to text) | ||||
|     assert b'PDF' not in res.data[:10] | ||||
|     assert b'hello world' in res.data | ||||
|  | ||||
|     # So we know if the file changes in other ways | ||||
|     import hashlib | ||||
|     original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() | ||||
|     # We should have one | ||||
|     assert len(original_md5) > 0 | ||||
|     # And it's going to be in the document | ||||
|     assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data | ||||
|  | ||||
|     shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf") | ||||
|     changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() | ||||
|     res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'1 watches queued for rechecking.' in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Now something should be ready, indicated by having a 'unviewed' class | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     # The original checksum should be not be here anymore (cdio adds it to the bottom of the text) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert original_md5.encode('utf-8') not in res.data | ||||
|     assert changed_md5.encode('utf-8') in res.data | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("diff_history_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert original_md5.encode('utf-8') in res.data | ||||
|     assert changed_md5.encode('utf-8') in res.data | ||||
|  | ||||
|     assert b'here is a change' in res.data | ||||
| @@ -3,7 +3,7 @@ import os | ||||
| import time | ||||
|  | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output | ||||
| from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output | ||||
| from ..notification import default_notification_format | ||||
|  | ||||
| instock_props = [ | ||||
| @@ -413,3 +413,31 @@ def test_data_sanity(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first")) | ||||
|     assert test_url2.encode('utf-8') in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| # All examples should give a prive of 666.66 | ||||
| def test_special_prop_examples(client, live_server): | ||||
|     import glob | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt") | ||||
|     files = glob.glob(check_path) | ||||
|     assert files | ||||
|     for test_example_filename in files: | ||||
|         with open(test_example_filename, 'r') as example_f: | ||||
|             with open("test-datastore/endpoint-content.txt", "w") as test_f: | ||||
|                 test_f.write(f"<html><body>{example_f.read()}</body></html>") | ||||
|  | ||||
|             # Now fetch it and check the price worked | ||||
|             client.post( | ||||
|                 url_for("form_quick_watch_add"), | ||||
|                 data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, | ||||
|                 follow_redirects=True | ||||
|             ) | ||||
|             wait_for_all_checks(client) | ||||
|             res = client.get(url_for("index")) | ||||
|             assert b'ception' not in res.data | ||||
|             assert b'155.55' in res.data | ||||
|   | ||||
| @@ -18,12 +18,13 @@ class TestDiffBuilder(unittest.TestCase): | ||||
|  | ||||
|         watch['last_viewed'] = 110 | ||||
|  | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents=b"hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         # Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python | ||||
|         watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4())) | ||||
|         watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4())) | ||||
|  | ||||
|         p = watch.get_next_snapshot_key_to_last_viewed | ||||
|         assert p == "112", "Correct last-viewed timestamp was detected" | ||||
|   | ||||
| @@ -260,9 +260,6 @@ class update_worker(threading.Thread): | ||||
|                     try: | ||||
|                         # Processor is what we are using for detecting the "Change" | ||||
|                         processor = watch.get('processor', 'text_json_diff') | ||||
|                         # Abort processing when the content was the same as the last fetch | ||||
|                         skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same') | ||||
|  | ||||
|  | ||||
|                         # Init a new 'difference_detection_processor', first look in processors | ||||
|                         processor_module_name = f"changedetectionio.processors.{processor}.processor" | ||||
| @@ -278,16 +275,13 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                         update_handler.call_browser() | ||||
|  | ||||
|                         changed_detected, update_obj, contents = update_handler.run_changedetection( | ||||
|                             watch=watch, | ||||
|                             skip_when_checksum_same=skip_when_same_checksum, | ||||
|                         ) | ||||
|                         changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch) | ||||
|  | ||||
|                         # Re #342 | ||||
|                         # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. | ||||
|                         # We then convert/.decode('utf-8') for the notification etc | ||||
|                         if not isinstance(contents, (bytes, bytearray)): | ||||
|                             raise Exception("Error - returned data from the fetch handler SHOULD be bytes") | ||||
| #                        if not isinstance(contents, (bytes, bytearray)): | ||||
| #                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes") | ||||
|                     except PermissionError as e: | ||||
|                         logger.critical(f"File permission error updating file, watch: {uuid}") | ||||
|                         logger.critical(str(e)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user