Compare commits

...

22 Commits

Author SHA1 Message Date
dgtlmoon
16a85e2a60 Closes #2548 2024-10-14 11:11:00 +02:00
dgtlmoon
ecafa27833 UI - More work on tab buttons hiding behind menu/header :-) 2024-10-11 22:54:09 +02:00
dgtlmoon
f7d4e58613 0.47.03 2024-10-11 17:33:00 +02:00
dgtlmoon
5bb47e47db Remove same checksum skip check - saved a little CPU but added a lot of complexity (#2700) 2024-10-11 17:28:42 +02:00
dgtlmoon
03151da68e UI - Fix scroll offset / tab buttons hiding behind menu/header 2024-10-11 16:04:08 +02:00
dgtlmoon
a16a70229d 0.47.01 2024-10-11 15:02:17 +02:00
dgtlmoon
9476c1076b Adding missing apprise_plugin for pypi/pip based installs 2024-10-11 15:01:27 +02:00
dgtlmoon
a4959b5971 0.47.00 2024-10-11 13:04:56 +02:00
dgtlmoon
a278fa22f2 Restock multiprice improvements (#2698) 2024-10-11 11:43:35 +02:00
dgtlmoon
d39530b261 Test - Simple test for live preview 2024-10-11 11:07:12 +02:00
dgtlmoon
d4b4355ff5 Adding test for proxy checker/scanner (#2697) 2024-10-11 09:52:55 +02:00
dgtlmoon
c1c8de3104 Fixing proxy checker (#2696) 2024-10-11 00:19:19 +02:00
dgtlmoon
5a768d7db3 UTF-8 handling fixes, Improvements to whitespace filtering (#2691) 2024-10-10 14:59:39 +02:00
dgtlmoon
f38429ec93 Testing - Tidyup (#2693) 2024-10-10 12:45:23 +02:00
dgtlmoon
783926962d Filters & Text - Preview refactor/improvements (#2689) 2024-10-09 09:17:32 +02:00
Marc
6cd1d50a4f Build - Add image source label to Dockerfile (Better Renovate and others support) (#2690) 2024-10-09 08:30:23 +02:00
dgtlmoon
54a4970a4c Custom JSON/POST Notifications - Log when it could not apply the application/json content-type header 2024-10-08 09:48:38 +02:00
dgtlmoon
fd00453e6d UI - Filters live preview - improvements to layout 2024-10-08 08:59:10 +02:00
dgtlmoon
2842ffb205 Restock - Use the scraped 'Not in stock' product status over the metadata version (many website lie in the metadata) (#2684) 2024-10-07 20:10:35 +02:00
dgtlmoon
ec4e2f5649 UI - Better 40x error message (#2685) 2024-10-07 16:52:19 +02:00
dgtlmoon
fe8e3d1cb1 Visual Selector - Including <button> (#2686) 2024-10-07 16:52:04 +02:00
dgtlmoon
69fbafbdb7 Stock/not-in-stock scraper - slight reliability improvement (#2687) 2024-10-07 16:51:47 +02:00
49 changed files with 859 additions and 448 deletions

View File

@@ -37,6 +37,7 @@ RUN pip install --target=/dependencies playwright~=1.41.2 \
# Final image stage # Final image stage
FROM python:${PYTHON_VERSION}-slim-bookworm FROM python:${PYTHON_VERSION}-slim-bookworm
LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
libxslt1.1 \ libxslt1.1 \

View File

@@ -1,4 +1,5 @@
recursive-include changedetectionio/api * recursive-include changedetectionio/api *
recursive-include changedetectionio/apprise_plugin *
recursive-include changedetectionio/blueprint * recursive-include changedetectionio/blueprint *
recursive-include changedetectionio/content_fetchers * recursive-include changedetectionio/content_fetchers *
recursive-include changedetectionio/model * recursive-include changedetectionio/model *

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki # Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.46.04' __version__ = '0.47.03'
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError

View File

@@ -58,7 +58,7 @@ class Watch(Resource):
abort(404, message='No watch exists with the UUID of {}'.format(uuid)) abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if request.args.get('recheck'): if request.args.get('recheck'):
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return "OK", 200 return "OK", 200
if request.args.get('paused', '') == 'paused': if request.args.get('paused', '') == 'paused':
self.datastore.data['watching'].get(uuid).pause() self.datastore.data['watching'].get(uuid).pause()
@@ -246,7 +246,7 @@ class CreateWatch(Resource):
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
if new_uuid: if new_uuid:
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
return {'uuid': new_uuid}, 201 return {'uuid': new_uuid}, 201
else: else:
return "Invalid or unsupported URL", 400 return "Invalid or unsupported URL", 400
@@ -303,7 +303,7 @@ class CreateWatch(Resource):
if request.args.get('recheck_all'): if request.args.get('recheck_all'):
for uuid in self.datastore.data['watching'].keys(): for uuid in self.datastore.data['watching'].keys():
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return {'status': "OK"}, 200 return {'status': "OK"}, 200
return list, 200 return list, 200

View File

@@ -1,5 +1,6 @@
# include the decorator # include the decorator
from apprise.decorators import notify from apprise.decorators import notify
from loguru import logger
@notify(on="delete") @notify(on="delete")
@notify(on="deletes") @notify(on="deletes")
@@ -64,10 +65,12 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
auth = (URLBase.unquote(results.get('user'))) auth = (URLBase.unquote(results.get('user')))
# Try to auto-guess if it's JSON # Try to auto-guess if it's JSON
h = 'application/json; charset=utf-8'
try: try:
json.loads(body) json.loads(body)
headers['Content-Type'] = 'application/json; charset=utf-8' headers['Content-Type'] = h
except ValueError as e: except ValueError as e:
logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}")
pass pass
r(results.get('url'), r(results.get('url'),

View File

@@ -1,4 +1,7 @@
import importlib
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio.store import ChangeDetectionStore from changedetectionio.store import ChangeDetectionStore
from functools import wraps from functools import wraps
@@ -30,7 +33,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
def long_task(uuid, preferred_proxy): def long_task(uuid, preferred_proxy):
import time import time
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
from changedetectionio.processors.text_json_diff import text_json_diff
from changedetectionio.safe_jinja import render as jinja_render from changedetectionio.safe_jinja import render as jinja_render
status = {'status': '', 'length': 0, 'text': ''} status = {'status': '', 'length': 0, 'text': ''}
@@ -38,8 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
contents = '' contents = ''
now = time.time() now = time.time()
try: try:
update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid) processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler.call_browser() update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid
)
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
# title, size is len contents not len xfer # title, size is len contents not len xfer
except content_fetcher_exceptions.Non200ErrorCodeReceived as e: except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 404: if e.status_code == 404:
@@ -48,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"}) status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
else: else:
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"}) status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
except text_json_diff.FilterNotFoundInResponse: except FilterNotFoundInResponse:
status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"}) status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
except content_fetcher_exceptions.EmptyReply as e: except content_fetcher_exceptions.EmptyReply as e:
if e.status_code == 403 or e.status_code == 401: if e.status_code == 403 or e.status_code == 401:

View File

@@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
datastore.data['watching'][uuid]['processor'] = 'restock_diff' datastore.data['watching'][uuid]['processor'] = 'restock_diff'
datastore.data['watching'][uuid].clear_watch() datastore.data['watching'][uuid].clear_watch()
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return redirect(url_for("index")) return redirect(url_for("index"))
@login_required @login_required

View File

@@ -17,7 +17,6 @@
</script> </script>
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
<!--<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>-->
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
<div class="edit-form monospaced-textarea"> <div class="edit-form monospaced-textarea">

View File

@@ -4,7 +4,9 @@ from loguru import logger
from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
import os import os
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary' # Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
# available_fetchers() will scan this implementation looking for anything starting with html_ # available_fetchers() will scan this implementation looking for anything starting with html_
# this information is used in the form selections # this information is used in the form selections

View File

@@ -75,6 +75,7 @@ class fetcher(Fetcher):
self.headers = r.headers self.headers = r.headers
if not r.content or not len(r.content): if not r.content or not len(r.content):
logger.debug(f"Requests returned empty content for '{url}'")
if not empty_pages_are_a_change: if not empty_pages_are_a_change:
raise EmptyReply(url=url, status_code=r.status_code) raise EmptyReply(url=url, status_code=r.status_code)
else: else:

View File

@@ -154,10 +154,14 @@ function isItemInStock() {
} }
elementText = ""; elementText = "";
if (element.tagName.toLowerCase() === "input") { try {
elementText = element.value.toLowerCase().trim(); if (element.tagName.toLowerCase() === "input") {
} else { elementText = element.value.toLowerCase().trim();
elementText = getElementBaseText(element); } else {
elementText = getElementBaseText(element);
}
} catch (e) {
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
} }
if (elementText.length) { if (elementText.length) {

View File

@@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import datetime import datetime
import importlib
import flask_login import flask_login
import locale import locale
@@ -12,9 +11,7 @@ import threading
import time import time
import timeago import timeago
from .content_fetchers.exceptions import ReplyWithContentButNoText
from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
from .processors.text_json_diff.processor import FilterNotFoundInResponse
from .safe_jinja import render as jinja_render from .safe_jinja import render as jinja_render
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from copy import deepcopy from copy import deepcopy
@@ -791,7 +788,6 @@ def changedetection_app(config=None, datastore_o=None):
# Recast it if need be to right data Watch handler # Recast it if need be to right data Watch handler
watch_class = get_custom_watch_obj_for_processor(form.data.get('processor')) watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid]) datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.") flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
@@ -799,7 +795,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.needs_write_urgent = True datastore.needs_write_urgent = True
# Queue the watch for immediate recheck, with a higher priority # Queue the watch for immediate recheck, with a higher priority
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# Diff page [edit] link should go back to diff page # Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff': if request.args.get("next") and request.args.get("next") == 'diff':
@@ -980,7 +976,7 @@ def changedetection_app(config=None, datastore_o=None):
importer = import_url_list() importer = import_url_list()
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
for uuid in importer.new_uuids: for uuid in importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
if len(importer.remaining_data) == 0: if len(importer.remaining_data) == 0:
return redirect(url_for('index')) return redirect(url_for('index'))
@@ -993,7 +989,7 @@ def changedetection_app(config=None, datastore_o=None):
d_importer = import_distill_io_json() d_importer = import_distill_io_json()
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids: for uuid in d_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# XLSX importer # XLSX importer
if request.files and request.files.get('xlsx_file'): if request.files and request.files.get('xlsx_file'):
@@ -1017,7 +1013,7 @@ def changedetection_app(config=None, datastore_o=None):
w_importer.run(data=file, flash=flash, datastore=datastore) w_importer.run(data=file, flash=flash, datastore=datastore)
for uuid in w_importer.new_uuids: for uuid in w_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# Could be some remaining, or we could be on GET # Could be some remaining, or we could be on GET
form = forms.importForm(formdata=request.form if request.method == 'POST' else None) form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
@@ -1381,78 +1377,9 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/edit/<string:uuid>/preview-rendered", methods=['POST']) @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
@login_optionally_required @login_optionally_required
def watch_get_preview_rendered(uuid): def watch_get_preview_rendered(uuid):
from flask import jsonify
'''For when viewing the "preview" of the rendered text from inside of Edit''' '''For when viewing the "preview" of the rendered text from inside of Edit'''
now = time.time() from .processors.text_json_diff import prepare_filter_prevew
import brotli return prepare_filter_prevew(watch_uuid=uuid, datastore=datastore)
from . import forms
text_after_filter = ''
tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=request.form
)
# Only update vars that came in via the AJAX post
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
tmp_watch.update(p)
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid # probably not needed anymore anyway?
)
# Use the last loaded HTML as the input
update_handler.fetcher.content = decompressed_data
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
try:
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
watch=tmp_watch,
skip_when_checksum_same=False,
)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"
if not text_after_filter.strip():
text_after_filter = 'Empty content'
# because run_changedetection always returns bytes due to saving the snapshots etc
text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
do_anchor = datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
trigger_line_numbers = []
try:
text_before_filter = html_tools.html_to_text(html_content=decompressed_data,
render_anchor_tag_content=do_anchor)
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
wordlist=tmp_watch['trigger_text'],
mode='line numbers'
)
except Exception as e:
text_before_filter = f"Error: {str(e)}"
logger.trace(f"Parsed in {time.time() - now:.3f}s")
return jsonify(
{
'after_filter': text_after_filter,
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
'trigger_line_numbers': trigger_line_numbers
}
)
@app.route("/form/add/quickwatch", methods=['POST']) @app.route("/form/add/quickwatch", methods=['POST'])
@@ -1515,7 +1442,7 @@ def changedetection_app(config=None, datastore_o=None):
new_uuid = datastore.clone(uuid) new_uuid = datastore.clone(uuid)
if new_uuid: if new_uuid:
if not datastore.data['watching'].get(uuid).get('paused'): if not datastore.data['watching'].get(uuid).get('paused'):
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
flash('Cloned.') flash('Cloned.')
return redirect(url_for('index')) return redirect(url_for('index'))
@@ -1536,7 +1463,7 @@ def changedetection_app(config=None, datastore_o=None):
if uuid: if uuid:
if uuid not in running_uuids: if uuid not in running_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
i = 1 i = 1
elif tag: elif tag:
@@ -1547,7 +1474,7 @@ def changedetection_app(config=None, datastore_o=None):
continue continue
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put( update_q.put(
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}) queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
) )
i += 1 i += 1
@@ -1557,9 +1484,8 @@ def changedetection_app(config=None, datastore_o=None):
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
if with_errors and not watch.get('last_error'): if with_errors and not watch.get('last_error'):
continue continue
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
i += 1 i += 1
flash(f"{i} watches queued for rechecking.") flash(f"{i} watches queued for rechecking.")
return redirect(url_for('index', tag=tag)) return redirect(url_for('index', tag=tag))
@@ -1616,7 +1542,7 @@ def changedetection_app(config=None, datastore_o=None):
uuid = uuid.strip() uuid = uuid.strip()
if datastore.data['watching'].get(uuid): if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing # Recheck and require a full reprocessing
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
flash("{} watches queued for rechecking".format(len(uuids))) flash("{} watches queued for rechecking".format(len(uuids)))
elif (op == 'clear-errors'): elif (op == 'clear-errors'):
@@ -1940,7 +1866,7 @@ def ticker_thread_check_time_launch_checks():
f"{now - watch['last_checked']:0.2f}s since last checked") f"{now - watch['last_checked']:0.2f}s since last checked")
# Into the queue with you # Into the queue with you
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
# Reset for next time # Reset for next time
watch.jitter_seconds = 0 watch.jitter_seconds = 0

View File

@@ -476,7 +476,7 @@ class processor_text_json_diff_form(commonSettingsForm):
title = StringField('Title', default='') title = StringField('Title', default='')
ignore_text = StringListField('Remove lines containing', [ValidateListRegex()]) ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()])
headers = StringDictKeyValue('Request headers') headers = StringDictKeyValue('Request headers')
body = TextAreaField('Request body', [validators.Optional()]) body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method) method = SelectField('Request method', choices=valid_method, default=default_method)

View File

@@ -3,11 +3,11 @@ from lxml import etree
import json import json
import re import re
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>" TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$' PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
# 'price' , 'lowPrice', 'highPrice' are usually under here # 'price' , 'lowPrice', 'highPrice' are usually under here
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
@@ -326,6 +326,7 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
# - "line numbers" return a list of line numbers that match (int list) # - "line numbers" return a list of line numbers that match (int list)
# #
# wordlist - list of regex's (str) or words (str) # wordlist - list of regex's (str) or words (str)
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
def strip_ignore_text(content, wordlist, mode="content"): def strip_ignore_text(content, wordlist, mode="content"):
i = 0 i = 0
output = [] output = []
@@ -341,32 +342,30 @@ def strip_ignore_text(content, wordlist, mode="content"):
else: else:
ignore_text.append(k.strip()) ignore_text.append(k.strip())
for line in content.splitlines(): for line in content.splitlines(keepends=True):
i += 1 i += 1
# Always ignore blank lines in this mode. (when this function gets called) # Always ignore blank lines in this mode. (when this function gets called)
got_match = False got_match = False
if len(line.strip()): for l in ignore_text:
for l in ignore_text: if l.lower() in line.lower():
if l.lower() in line.lower(): got_match = True
if not got_match:
for r in ignore_regex:
if r.search(line):
got_match = True got_match = True
if not got_match: if not got_match:
for r in ignore_regex: # Not ignored, and should preserve "keepends"
if r.search(line): output.append(line)
got_match = True else:
ignored_line_numbers.append(i)
if not got_match:
# Not ignored
output.append(line.encode('utf8'))
else:
ignored_line_numbers.append(i)
# Used for finding out what to highlight # Used for finding out what to highlight
if mode == "line numbers": if mode == "line numbers":
return ignored_line_numbers return ignored_line_numbers
return "\n".encode('utf8').join(output) return ''.join(output)
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
from xml.sax.saxutils import escape as xml_escape from xml.sax.saxutils import escape as xml_escape

View File

@@ -6,6 +6,8 @@ import re
from pathlib import Path from pathlib import Path
from loguru import logger from loguru import logger
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
# Allowable protocols, protects against javascript: etc # Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI # file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
@@ -36,8 +38,9 @@ class model(watch_base):
jitter_seconds = 0 jitter_seconds = 0
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
self.__datastore_path = kw['datastore_path'] self.__datastore_path = kw.get('datastore_path')
del kw['datastore_path'] if kw.get('datastore_path'):
del kw['datastore_path']
super(model, self).__init__(*arg, **kw) super(model, self).__init__(*arg, **kw)
if kw.get('default'): if kw.get('default'):
self.update(kw['default']) self.update(kw['default'])
@@ -171,6 +174,10 @@ class model(watch_base):
""" """
tmp_history = {} tmp_history = {}
# In the case we are only using the watch for processing without history
if not self.watch_data_dir:
return []
# Read the history file as a dict # Read the history file as a dict
fname = os.path.join(self.watch_data_dir, "history.txt") fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname): if os.path.isfile(fname):
@@ -307,13 +314,13 @@ class model(watch_base):
dest = os.path.join(self.watch_data_dir, snapshot_fname) dest = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(dest): if not os.path.exists(dest):
with open(dest, 'wb') as f: with open(dest, 'wb') as f:
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT)) f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
else: else:
snapshot_fname = f"{snapshot_id}.txt" snapshot_fname = f"{snapshot_id}.txt"
dest = os.path.join(self.watch_data_dir, snapshot_fname) dest = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(dest): if not os.path.exists(dest):
with open(dest, 'wb') as f: with open(dest, 'wb') as f:
f.write(contents) f.write(contents.encode('utf-8'))
# Append to index # Append to index
# @todo check last char was \n # @todo check last char was \n
@@ -345,14 +352,32 @@ class model(watch_base):
return seconds return seconds
# Iterate over all history texts and see if something new exists # Iterate over all history texts and see if something new exists
def lines_contain_something_unique_compared_to_history(self, lines: list): # Always applying .strip() to start/end but optionally replace any other whitespace
local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
local_lines = []
if lines:
if ignore_whitespace:
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
else:
local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
else:
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
local_lines = set([l.strip().lower() for l in lines])
else:
local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
# Compare each lines (set) against each history text file (set) looking for something new.. # Compare each lines (set) against each history text file (set) looking for something new..
existing_history = set({}) existing_history = set({})
for k, v in self.history.items(): for k, v in self.history.items():
content = self.get_history_snapshot(k) content = self.get_history_snapshot(k)
alist = set([line.strip().lower() for line in content.splitlines()])
if ignore_whitespace:
alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
else:
alist = set([line.strip().lower() for line in content.splitlines()])
existing_history = existing_history.union(alist) existing_history = existing_history.union(alist)
# Check that everything in local_lines(new stuff) already exists in existing_history - it should # Check that everything in local_lines(new stuff) already exists in existing_history - it should
@@ -396,8 +421,8 @@ class model(watch_base):
@property @property
def watch_data_dir(self): def watch_data_dir(self):
# The base dir of the watch data # The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid']) return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
def get_error_text(self): def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error""" """Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error.txt") fname = os.path.join(self.watch_data_dir, "last-error.txt")

View File

@@ -18,6 +18,7 @@ class difference_detection_processor():
screenshot = None screenshot = None
watch = None watch = None
xpath_data = None xpath_data = None
preferred_proxy = None
def __init__(self, *args, datastore, watch_uuid, **kwargs): def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -26,7 +27,8 @@ class difference_detection_processor():
# Generic fetcher that should be extended (requests, playwright etc) # Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher() self.fetcher = Fetcher()
def call_browser(self): def call_browser(self, preferred_proxy_id=None):
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
# Protect against file:// access # Protect against file:// access
@@ -42,7 +44,7 @@ class difference_detection_processor():
prefer_fetch_backend = self.watch.get('fetch_backend', 'system') prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
# Proxy ID "key" # Proxy ID "key"
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid')) preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
# Pluggable content self.fetcher # Pluggable content self.fetcher
if not prefer_fetch_backend or prefer_fetch_backend == 'system': if not prefer_fetch_backend or prefer_fetch_backend == 'system':
@@ -155,7 +157,7 @@ class difference_detection_processor():
# After init, call run_changedetection() which will do the actual change-detection # After init, call run_changedetection() which will do the actual change-detection
@abstractmethod @abstractmethod
def run_changedetection(self, watch, skip_when_checksum_same: bool = True): def run_changedetection(self, watch):
update_obj = {'last_notification_error': False, 'last_error': False} update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx' some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()

View File

@@ -27,22 +27,27 @@ def _search_prop_by_value(matches, value):
return prop[1] # Yield the desired value and exit the function return prop[1] # Yield the desired value and exit the function
def _deduplicate_prices(data): def _deduplicate_prices(data):
seen = set() import re
unique_data = []
'''
Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
Get all the values, clean it and add it to a set then return the unique values
'''
unique_data = set()
# Return the complete 'datum' where its price was not seen before
for datum in data: for datum in data:
# Convert 'value' to float if it can be a numeric string, otherwise leave it as is
try:
normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
except ValueError:
normalized_value = datum.value
# If the normalized value hasn't been seen yet, add it to unique data if isinstance(datum.value, list):
if normalized_value not in seen: # Process each item in the list
unique_data.append(datum) normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value])
seen.add(normalized_value) unique_data.update(normalized_value)
else:
return unique_data # Process single value
v = float(re.sub(r'[^\d.]', '', str(datum.value)))
unique_data.add(v)
return list(unique_data)
# should return Restock() # should return Restock()
@@ -83,14 +88,13 @@ def get_itemprop_availability(html_content) -> Restock:
if price_result: if price_result:
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
# parse that for the UI? # parse that for the UI?
prices_found = set(str(item.value).replace('$', '') for item in price_result) if len(price_result) > 1 and len(price_result) > 1:
if len(price_result) > 1 and len(prices_found) > 1:
# See of all prices are different, in the case that one product has many embedded data types with the same price # See of all prices are different, in the case that one product has many embedded data types with the same price
# One might have $121.95 and another 121.95 etc # One might have $121.95 and another 121.95 etc
logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.") logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
raise MoreThanOnePriceFound() raise MoreThanOnePriceFound()
value['price'] = price_result[0].value value['price'] = price_result[0]
pricecurrency_result = pricecurrency_parse.find(data) pricecurrency_result = pricecurrency_parse.find(data)
if pricecurrency_result: if pricecurrency_result:
@@ -140,7 +144,7 @@ class perform_site_check(difference_detection_processor):
screenshot = None screenshot = None
xpath_data = None xpath_data = None
def run_changedetection(self, watch, skip_when_checksum_same=True): def run_changedetection(self, watch):
import hashlib import hashlib
if not watch: if not watch:
@@ -220,7 +224,7 @@ class perform_site_check(difference_detection_processor):
itemprop_availability['original_price'] = itemprop_availability.get('price') itemprop_availability['original_price'] = itemprop_availability.get('price')
update_obj['restock']["original_price"] = itemprop_availability.get('price') update_obj['restock']["original_price"] = itemprop_availability.get('price')
if not self.fetcher.instock_data and not itemprop_availability.get('availability'): if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
raise ProcessorException( raise ProcessorException(
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
url=watch.get('url'), url=watch.get('url'),
@@ -237,6 +241,14 @@ class perform_site_check(difference_detection_processor):
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.") logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.")
# Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that.
if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock':
if update_obj['restock'].get('in_stock'):
logger.warning(
f"Lie detected in the availability machine data!! when scraping said its not in stock!! itemprop was '{itemprop_availability}' and scraped from browser was '{self.fetcher.instock_data}' update obj was {update_obj['restock']} ")
logger.warning(f"Setting instock to FALSE, scraper found '{self.fetcher.instock_data}' in the body but metadata reported not-in-stock")
update_obj['restock']["in_stock"] = False
# What we store in the snapshot # What we store in the snapshot
price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else "" price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}" snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}"
@@ -299,4 +311,4 @@ class perform_site_check(difference_detection_processor):
# Always record the new checksum # Always record the new checksum
update_obj["previous_md5"] = fetched_md5 update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip() return changed_detected, update_obj, snapshot_content.strip()

View File

@@ -0,0 +1,115 @@
from loguru import logger
def _task(watch, update_handler):
from changedetectionio.content_fetchers.exceptions import ReplyWithContentButNoText
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
text_after_filter = ''
try:
# The slow process (we run 2 of these in parallel)
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"
if not text_after_filter.strip():
text_after_filter = 'Empty content'
# because run_changedetection always returns bytes due to saving the snapshots etc
text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
return text_after_filter
def prepare_filter_prevew(datastore, watch_uuid):
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
from changedetectionio import forms, html_tools
from changedetectionio.model.Watch import model as watch_model
from concurrent.futures import ProcessPoolExecutor
from copy import deepcopy
from flask import request, jsonify
import brotli
import importlib
import os
import time
now = time.time()
text_after_filter = ''
text_before_filter = ''
trigger_line_numbers = []
ignore_line_numbers = []
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=request.form
)
# Only update vars that came in via the AJAX post
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
tmp_watch.update(p)
blank_watch_no_filters = watch_model()
blank_watch_no_filters['url'] = tmp_watch.get('url')
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=tmp_watch.get('uuid') # probably not needed anymore anyway?
)
# Use the last loaded HTML as the input
update_handler.datastore = datastore
update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
# Do this as a parallel process because it could take some time
with ProcessPoolExecutor(max_workers=2) as executor:
future1 = executor.submit(_task, tmp_watch, update_handler)
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
text_after_filter = future1.result()
text_before_filter = future2.result()
try:
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
wordlist=tmp_watch['trigger_text'],
mode='line numbers'
)
except Exception as e:
text_before_filter = f"Error: {str(e)}"
try:
text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
wordlist=text_to_ignore,
mode='line numbers'
)
except Exception as e:
text_before_filter = f"Error: {str(e)}"
logger.trace(f"Parsed in {time.time() - now:.3f}s")
return jsonify(
{
'after_filter': text_after_filter,
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
'duration': time.time() - now,
'trigger_line_numbers': trigger_line_numbers,
'ignore_line_numbers': ignore_line_numbers,
}
)

View File

@@ -7,7 +7,7 @@ import re
import urllib3 import urllib3
from changedetectionio.processors import difference_detection_processor from changedetectionio.processors import difference_detection_processor
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
from changedetectionio import html_tools, content_fetchers from changedetectionio import html_tools, content_fetchers
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from loguru import logger from loguru import logger
@@ -35,8 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(difference_detection_processor): class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch, skip_when_checksum_same=True): def run_changedetection(self, watch):
changed_detected = False changed_detected = False
html_content = "" html_content = ""
screenshot = False # as bytes screenshot = False # as bytes
@@ -59,9 +58,6 @@ class perform_site_check(difference_detection_processor):
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU # Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
if skip_when_checksum_same:
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()
# Fetching complete, now filters # Fetching complete, now filters
@@ -205,22 +201,14 @@ class perform_site_check(difference_detection_processor):
if watch.get('trim_text_whitespace'): if watch.get('trim_text_whitespace'):
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()) stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
if watch.get('remove_duplicate_lines'):
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
if watch.get('sort_text_alphabetically'):
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
# Re #340 - return the content before the 'ignore text' was applied # Re #340 - return the content before the 'ignore text' was applied
# Also used to calculate/show what was removed # Also used to calculate/show what was removed
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') text_content_before_ignored_filter = stripped_text_from_html
# @todo whitespace coming from missing rtrim()? # @todo whitespace coming from missing rtrim()?
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about. # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
# Rewrite's the processing text based on only what diff result they want to see # Rewrite's the processing text based on only what diff result they want to see
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()): if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
from changedetectionio import diff from changedetectionio import diff
@@ -235,12 +223,12 @@ class perform_site_check(difference_detection_processor):
line_feed_sep="\n", line_feed_sep="\n",
include_change_type_prefix=False) include_change_type_prefix=False)
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter) watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
if not rendered_diff and stripped_text_from_html: if not rendered_diff and stripped_text_from_html:
# We had some content, but no differences were found # We had some content, but no differences were found
# Store our new file as the MD5 so it will trigger in the future # Store our new file as the MD5 so it will trigger in the future
c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest() c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
else: else:
stripped_text_from_html = rendered_diff stripped_text_from_html = rendered_diff
@@ -261,14 +249,6 @@ class perform_site_check(difference_detection_processor):
update_obj["last_check_status"] = self.fetcher.get_last_status_code() update_obj["last_check_status"] = self.fetcher.get_last_status_code()
# If there's text to skip
# @todo we could abstract out the get_text() to handle this cleaner
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
if len(text_to_ignore):
stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
else:
stripped_text_from_html = stripped_text_from_html.encode('utf8')
# 615 Extract text by regex # 615 Extract text by regex
extract_text = watch.get('extract_text', []) extract_text = watch.get('extract_text', [])
if len(extract_text) > 0: if len(extract_text) > 0:
@@ -277,39 +257,53 @@ class perform_site_check(difference_detection_processor):
# incase they specified something in '/.../x' # incase they specified something in '/.../x'
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
result = re.findall(regex.encode('utf-8'), stripped_text_from_html) result = re.findall(regex, stripped_text_from_html)
for l in result: for l in result:
if type(l) is tuple: if type(l) is tuple:
# @todo - some formatter option default (between groups) # @todo - some formatter option default (between groups)
regex_matched_output += list(l) + [b'\n'] regex_matched_output += list(l) + ['\n']
else: else:
# @todo - some formatter option default (between each ungrouped result) # @todo - some formatter option default (between each ungrouped result)
regex_matched_output += [l] + [b'\n'] regex_matched_output += [l] + ['\n']
else: else:
# Doesnt look like regex, just hunt for plaintext and return that which matches # Doesnt look like regex, just hunt for plaintext and return that which matches
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE) r = re.compile(re.escape(s_re), re.IGNORECASE)
res = r.findall(stripped_text_from_html) res = r.findall(stripped_text_from_html)
if res: if res:
for match in res: for match in res:
regex_matched_output += [match] + [b'\n'] regex_matched_output += [match] + ['\n']
########################################################## ##########################################################
stripped_text_from_html = b'' stripped_text_from_html = ''
text_content_before_ignored_filter = b''
if regex_matched_output: if regex_matched_output:
# @todo some formatter for presentation? # @todo some formatter for presentation?
stripped_text_from_html = b''.join(regex_matched_output) stripped_text_from_html = ''.join(regex_matched_output)
text_content_before_ignored_filter = stripped_text_from_html
if watch.get('remove_duplicate_lines'):
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
if watch.get('sort_text_alphabetically'):
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
### CALCULATE MD5
# If there's text to ignore
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
text_for_checksuming = stripped_text_from_html
if text_to_ignore:
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
# Re #133 - if we should strip whitespaces from triggering the change detected comparison # Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False): if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
else: else:
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest() fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
############ Blocking rules, after checksum ################# ############ Blocking rules, after checksum #################
blocked = False blocked = False
@@ -337,19 +331,33 @@ class perform_site_check(difference_detection_processor):
if result: if result:
blocked = True blocked = True
# The main thing that all this at the moment comes down to :)
if watch.get('previous_md5') != fetched_md5:
changed_detected = True
# Looks like something changed, but did it match all the rules? # Looks like something changed, but did it match all the rules?
if blocked: if blocked:
changed_detected = False changed_detected = False
else:
# The main thing that all this at the moment comes down to :)
if watch.get('previous_md5') != fetched_md5:
changed_detected = True
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
if changed_detected: if changed_detected:
if watch.get('check_unique_lines', False): if watch.get('check_unique_lines', False):
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
lines=stripped_text_from_html.splitlines(),
ignore_whitespace=ignore_whitespace
)
# One or more lines? unsure? # One or more lines? unsure?
if not has_unique_lines: if not has_unique_lines:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
@@ -357,12 +365,6 @@ class perform_site_check(difference_detection_processor):
else: else:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
# stripped_text_from_html - Everything after filters and NO 'ignored' content # stripped_text_from_html - Everything after filters and NO 'ignored' content
return changed_detected, update_obj, stripped_text_from_html return changed_detected, update_obj, stripped_text_from_html

View File

@@ -1,56 +0,0 @@
/**
* debounce
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait after the last call before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds after the last call before
* calling the original function.
*/
Function.prototype.debounce = function (milliseconds, context) {
var baseFunction = this,
timer = null,
wait = milliseconds;
return function () {
var self = context || this,
args = arguments;
function complete() {
baseFunction.apply(self, args);
timer = null;
}
if (timer) {
clearTimeout(timer);
}
timer = setTimeout(complete, wait);
};
};
/**
* throttle
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait between calls before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds between calls before
* calling the original function.
*/
Function.prototype.throttle = function (milliseconds, context) {
var baseFunction = this,
lastEventTimestamp = null,
limit = milliseconds;
return function () {
var self = context || this,
args = arguments,
now = Date.now();
if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
lastEventTimestamp = now;
baseFunction.apply(self, args);
}
};
};

View File

@@ -1,64 +1,106 @@
(function($) { (function ($) {
/**
* debounce
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait after the last call before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds after the last call before
* calling the original function.
*/
Function.prototype.debounce = function (milliseconds, context) {
var baseFunction = this,
timer = null,
wait = milliseconds;
/* return function () {
$('#code-block').highlightLines([ var self = context || this,
{ args = arguments;
'color': '#dd0000',
'lines': [10, 12]
},
{
'color': '#ee0000',
'lines': [15, 18]
}
]);
});
*/
$.fn.highlightLines = function(configurations) { function complete() {
return this.each(function() { baseFunction.apply(self, args);
const $pre = $(this); timer = null;
const textContent = $pre.text(); }
const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings
// Build a map of line numbers to styles if (timer) {
const lineStyles = {}; clearTimeout(timer);
}
configurations.forEach(config => { timer = setTimeout(complete, wait);
const { color, lines: lineNumbers } = config; };
lineNumbers.forEach(lineNumber => { };
lineStyles[lineNumber] = color;
/**
* throttle
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait between calls before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds between calls before
* calling the original function.
*/
Function.prototype.throttle = function (milliseconds, context) {
var baseFunction = this,
lastEventTimestamp = null,
limit = milliseconds;
return function () {
var self = context || this,
args = arguments,
now = Date.now();
if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
lastEventTimestamp = now;
baseFunction.apply(self, args);
}
};
};
$.fn.highlightLines = function (configurations) {
return this.each(function () {
const $pre = $(this);
const textContent = $pre.text();
const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings
// Build a map of line numbers to styles
const lineStyles = {};
configurations.forEach(config => {
const {color, lines: lineNumbers} = config;
lineNumbers.forEach(lineNumber => {
lineStyles[lineNumber] = color;
});
});
// Function to escape HTML characters
function escapeHtml(text) {
return text.replace(/[&<>"'`=\/]/g, function (s) {
return "&#" + s.charCodeAt(0) + ";";
});
}
// Process each line
const processedLines = lines.map((line, index) => {
const lineNumber = index + 1; // Line numbers start at 1
const escapedLine = escapeHtml(line);
const color = lineStyles[lineNumber];
if (color) {
// Wrap the line in a span with inline style
return `<span style="background-color: ${color}">${escapedLine}</span>`;
} else {
return escapedLine;
}
});
// Join the lines back together
const newContent = processedLines.join('\n');
// Set the new content as HTML
$pre.html(newContent);
}); });
}); };
$.fn.miniTabs = function (tabsConfig, options) {
// Function to escape HTML characters
function escapeHtml(text) {
return text.replace(/[&<>"'`=\/]/g, function(s) {
return "&#" + s.charCodeAt(0) + ";";
});
}
// Process each line
const processedLines = lines.map((line, index) => {
const lineNumber = index + 1; // Line numbers start at 1
const escapedLine = escapeHtml(line);
const color = lineStyles[lineNumber];
if (color) {
// Wrap the line in a span with inline style
return `<span style="background-color: ${color}">${escapedLine}</span>`;
} else {
return escapedLine;
}
});
// Join the lines back together
const newContent = processedLines.join('\n');
// Set the new content as HTML
$pre.html(newContent);
});
};
$.fn.miniTabs = function(tabsConfig, options) {
const settings = { const settings = {
tabClass: 'minitab', tabClass: 'minitab',
tabsContainerClass: 'minitabs', tabsContainerClass: 'minitabs',
@@ -66,10 +108,10 @@
...(options || {}) ...(options || {})
}; };
return this.each(function() { return this.each(function () {
const $wrapper = $(this); const $wrapper = $(this);
const $contents = $wrapper.find('div[id]').hide(); const $contents = $wrapper.find('div[id]').hide();
const $tabsContainer = $('<div>', { class: settings.tabsContainerClass }).prependTo($wrapper); const $tabsContainer = $('<div>', {class: settings.tabsContainerClass}).prependTo($wrapper);
// Generate tabs // Generate tabs
Object.entries(tabsConfig).forEach(([tabTitle, contentSelector], index) => { Object.entries(tabsConfig).forEach(([tabTitle, contentSelector], index) => {
@@ -84,7 +126,7 @@
}); });
// Tab click event // Tab click event
$tabsContainer.on('click', `.${settings.tabClass}`, function(e) { $tabsContainer.on('click', `.${settings.tabClass}`, function (e) {
e.preventDefault(); e.preventDefault();
const $tab = $(this); const $tab = $(this);
const target = $tab.data('target'); const target = $tab.data('target');
@@ -103,7 +145,7 @@
// Object to store ongoing requests by namespace // Object to store ongoing requests by namespace
const requests = {}; const requests = {};
$.abortiveSingularAjax = function(options) { $.abortiveSingularAjax = function (options) {
const namespace = options.namespace || 'default'; const namespace = options.namespace || 'default';
// Abort the current request in this namespace if it's still ongoing // Abort the current request in this namespace if it's still ongoing

View File

@@ -1,14 +1,14 @@
$(function () { $(function () {
/* add container before each proxy location to show status */ /* add container before each proxy location to show status */
var option_li = $('.fetch-backend-proxy li').filter(function() {
return $("input",this)[0].value.length >0;
});
//var option_li = $('.fetch-backend-proxy li');
var isActive = false; var isActive = false;
$(option_li).prepend('<div class="proxy-status"></div>');
$(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>'); function setup_html_widget() {
var option_li = $('.fetch-backend-proxy li').filter(function () {
return $("input", this)[0].value.length > 0;
});
$(option_li).prepend('<div class="proxy-status"></div>');
$(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
}
function set_proxy_check_status(proxy_key, state) { function set_proxy_check_status(proxy_key, state) {
// select input by value name // select input by value name
@@ -59,8 +59,14 @@ $(function () {
} }
$('#check-all-proxies').click(function (e) { $('#check-all-proxies').click(function (e) {
e.preventDefault() e.preventDefault()
$('body').addClass('proxy-check-active');
if (!$('body').hasClass('proxy-check-active')) {
setup_html_widget();
$('body').addClass('proxy-check-active');
}
$('.proxy-check-details').html(''); $('.proxy-check-details').html('');
$('.proxy-status').html('<span class="spinner"></span>').fadeIn(); $('.proxy-status').html('<span class="spinner"></span>').fadeIn();
$('.proxy-timing').html(''); $('.proxy-timing').html('');

View File

@@ -26,8 +26,7 @@ function set_active_tab() {
if (tab.length) { if (tab.length) {
tab[0].parentElement.className = "active"; tab[0].parentElement.className = "active";
} }
// hash could move the page down
window.scrollTo(0, 0);
} }
function focus_error_tab() { function focus_error_tab() {

View File

@@ -49,4 +49,9 @@ $(document).ready(function () {
$("#overlay").toggleClass('visible'); $("#overlay").toggleClass('visible');
heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)'; heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)';
}); });
setInterval(function () {
$('body').toggleClass('spinner-active', $.active > 0);
}, 2000);
}); });

View File

@@ -26,25 +26,28 @@ function request_textpreview_update() {
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val(); data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
}); });
$('body').toggleClass('spinner-active', 1);
$.abortiveSingularAjax({ $.abortiveSingularAjax({
type: "POST", type: "POST",
url: preview_text_edit_filters_url, url: preview_text_edit_filters_url,
data: data, data: data,
namespace: 'watchEdit' namespace: 'watchEdit'
}).done(function (data) { }).done(function (data) {
console.debug(data['duration'])
$('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']); $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
$('#filters-and-triggers #text-preview-inner') $('#filters-and-triggers #text-preview-inner')
.text(data['after_filter']) .text(data['after_filter'])
.highlightLines([ .highlightLines([
{ {
'color': '#ee0000', 'color': '#ee0000',
'lines': data['trigger_line_numbers'] 'lines': data['trigger_line_numbers']
},
{
'color': '#757575',
'lines': data['ignore_line_numbers']
} }
]); ])
}).fail(function (error) { }).fail(function (error) {
if (error.statusText === 'abort') { if (error.statusText === 'abort') {
console.log('Request was aborted due to a new request being fired.'); console.log('Request was aborted due to a new request being fired.');
@@ -73,18 +76,13 @@ $(document).ready(function () {
$("#text-preview-inner").css('max-height', (vh-300)+"px"); $("#text-preview-inner").css('max-height', (vh-300)+"px");
$("#text-preview-before-inner").css('max-height', (vh-300)+"px"); $("#text-preview-before-inner").css('max-height', (vh-300)+"px");
// Realtime preview of 'Filters & Text' setup
var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
$("#activate-text-preview").click(function (e) { $("#activate-text-preview").click(function (e) {
$('body').toggleClass('preview-text-enabled') $('body').toggleClass('preview-text-enabled')
request_textpreview_update(); request_textpreview_update();
const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off'; const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
$("#text-preview-refresh")[method]('click', debounced_request_textpreview_update); $('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000));
$('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update); $('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000));
$('input:visible')[method]('keyup blur change', debounced_request_textpreview_update); $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000));
$("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update);
}); });
$('.minitabs-wrapper').miniTabs({ $('.minitabs-wrapper').miniTabs({
"Content after filters": "#text-preview-inner", "Content after filters": "#text-preview-inner",

View File

@@ -25,15 +25,19 @@ ul#requests-extra_proxies {
body.proxy-check-active { body.proxy-check-active {
#request { #request {
// Padding set by flex layout
/*
.proxy-status { .proxy-status {
width: 2em; width: 2em;
} }
*/
.proxy-check-details { .proxy-check-details {
font-size: 80%; font-size: 80%;
color: #555; color: #555;
display: block; display: block;
padding-left: 4em; padding-left: 2em;
max-width: 500px;
} }
.proxy-timing { .proxy-timing {

View File

@@ -7,6 +7,16 @@
border-top: none; border-top: none;
} }
.minitabs-content {
width: 100%;
display: flex;
> div {
flex: 1 1 auto;
min-width: 0;
overflow: scroll;
}
}
.minitabs { .minitabs {
display: flex; display: flex;
border-bottom: 1px solid #ccc; border-bottom: 1px solid #ccc;

View File

@@ -42,9 +42,8 @@ body.preview-text-enabled {
color: var(--color-text-input); color: var(--color-text-input);
font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */ font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
font-size: 70%; font-size: 70%;
overflow-x: scroll; word-break: break-word;
white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */ white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
} }
} }

View File

@@ -106,10 +106,34 @@ button.toggle-button {
padding: 5px; padding: 5px;
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
border-bottom: 2px solid var(--color-menu-accent);
align-items: center; align-items: center;
} }
#pure-menu-horizontal-spinner {
height: 3px;
background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
background-size: 400% 400%;
width: 100%;
animation: gradient 200s ease infinite;
}
body.spinner-active {
#pure-menu-horizontal-spinner {
animation: gradient 1s ease infinite;
}
}
@keyframes gradient {
0% {
background-position: 0% 50%;
}
50% {
background-position: 100% 50%;
}
100% {
background-position: 0% 50%;
}
}
.pure-menu-heading { .pure-menu-heading {
color: var(--color-text-menu-heading); color: var(--color-text-menu-heading);
} }
@@ -123,8 +147,14 @@ button.toggle-button {
} }
} }
.tab-pane-inner {
// .tab-pane-inner will have the #id that the tab button jumps/anchors to
scroll-margin-top: 200px;
}
section.content { section.content {
padding-top: 5em; padding-top: 100px;
padding-bottom: 1em; padding-bottom: 1em;
flex-direction: column; flex-direction: column;
display: flex; display: flex;
@@ -907,6 +937,7 @@ $form-edge-padding: 20px;
} }
.tab-pane-inner { .tab-pane-inner {
&:not(:target) { &:not(:target) {
display: none; display: none;
} }

View File

@@ -119,19 +119,22 @@ ul#requests-extra_proxies {
#request label[for=proxy] { #request label[for=proxy] {
display: inline-block; } display: inline-block; }
body.proxy-check-active #request .proxy-status { body.proxy-check-active #request {
width: 2em; } /*
.proxy-status {
body.proxy-check-active #request .proxy-check-details { width: 2em;
font-size: 80%; }
color: #555; */ }
display: block; body.proxy-check-active #request .proxy-check-details {
padding-left: 4em; } font-size: 80%;
color: #555;
body.proxy-check-active #request .proxy-timing { display: block;
font-size: 80%; padding-left: 2em;
padding-left: 1rem; max-width: 500px; }
color: var(--color-link); } body.proxy-check-active #request .proxy-timing {
font-size: 80%;
padding-left: 1rem;
color: var(--color-link); }
#recommended-proxy { #recommended-proxy {
display: grid; display: grid;
@@ -434,6 +437,13 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
padding: 20px; padding: 20px;
border: 1px solid #ccc; border: 1px solid #ccc;
border-top: none; } border-top: none; }
.minitabs-wrapper .minitabs-content {
width: 100%;
display: flex; }
.minitabs-wrapper .minitabs-content > div {
flex: 1 1 auto;
min-width: 0;
overflow: scroll; }
.minitabs-wrapper .minitabs { .minitabs-wrapper .minitabs {
display: flex; display: flex;
border-bottom: 1px solid #ccc; } border-bottom: 1px solid #ccc; }
@@ -488,11 +498,9 @@ body.preview-text-enabled {
font-family: "Courier New", Courier, monospace; font-family: "Courier New", Courier, monospace;
/* Sets the font to a monospace type */ /* Sets the font to a monospace type */
font-size: 70%; font-size: 70%;
overflow-x: scroll; word-break: break-word;
white-space: pre-wrap; white-space: pre-wrap;
/* Preserves whitespace and line breaks like <pre> */ /* Preserves whitespace and line breaks like <pre> */ }
overflow-wrap: break-word;
/* Allows long words to break and wrap to the next line */ }
#activate-text-preview { #activate-text-preview {
right: 0; right: 0;
@@ -568,9 +576,26 @@ button.toggle-button {
padding: 5px; padding: 5px;
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
border-bottom: 2px solid var(--color-menu-accent);
align-items: center; } align-items: center; }
#pure-menu-horizontal-spinner {
height: 3px;
background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
background-size: 400% 400%;
width: 100%;
animation: gradient 200s ease infinite; }
body.spinner-active #pure-menu-horizontal-spinner {
animation: gradient 1s ease infinite; }
@keyframes gradient {
0% {
background-position: 0% 50%; }
50% {
background-position: 100% 50%; }
100% {
background-position: 0% 50%; } }
.pure-menu-heading { .pure-menu-heading {
color: var(--color-text-menu-heading); } color: var(--color-text-menu-heading); }
@@ -580,8 +605,11 @@ button.toggle-button {
background-color: var(--color-background-menu-link-hover); background-color: var(--color-background-menu-link-hover);
color: var(--color-text-menu-link-hover); } color: var(--color-text-menu-link-hover); }
.tab-pane-inner {
scroll-margin-top: 200px; }
section.content { section.content {
padding-top: 5em; padding-top: 100px;
padding-bottom: 1em; padding-bottom: 1em;
flex-direction: column; flex-direction: column;
display: flex; display: flex;

View File

@@ -4,6 +4,7 @@ from flask import (
flash flash
) )
from .html_tools import TRANSLATE_WHITESPACE_TABLE
from . model import App, Watch from . model import App, Watch
from copy import deepcopy, copy from copy import deepcopy, copy
from os import path, unlink from os import path, unlink
@@ -750,17 +751,17 @@ class ChangeDetectionStore:
def update_5(self): def update_5(self):
# If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings # If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings
# In other words - the watch notification_title and notification_body are not needed if they are the same as the default one # In other words - the watch notification_title and notification_body are not needed if they are the same as the default one
current_system_body = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n ")) current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
current_system_title = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n ")) current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
for uuid, watch in self.data['watching'].items(): for uuid, watch in self.data['watching'].items():
try: try:
watch_body = watch.get('notification_body', '') watch_body = watch.get('notification_body', '')
if watch_body and watch_body.translate(str.maketrans('', '', "\r\n ")) == current_system_body: if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
# Looks the same as the default one, so unset it # Looks the same as the default one, so unset it
watch['notification_body'] = None watch['notification_body'] = None
watch_title = watch.get('notification_title', '') watch_title = watch.get('notification_title', '')
if watch_title and watch_title.translate(str.maketrans('', '', "\r\n ")) == current_system_title: if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
# Looks the same as the default one, so unset it # Looks the same as the default one, so unset it
watch['notification_title'] = None watch['notification_title'] = None
except Exception as e: except Exception as e:

View File

@@ -35,7 +35,9 @@
<body class=""> <body class="">
<div class="header"> <div class="header">
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu"> <div class="pure-menu-fixed" style="width: 100%;">
<div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
{% if has_password and not current_user.is_authenticated %} {% if has_password and not current_user.is_authenticated %}
<a class="pure-menu-heading" href="https://changedetection.io" rel="noopener"> <a class="pure-menu-heading" href="https://changedetection.io" rel="noopener">
<strong>Change</strong>Detection.io</a> <strong>Change</strong>Detection.io</a>
@@ -129,7 +131,12 @@
</li> </li>
</ul> </ul>
</div> </div>
<div id="pure-menu-horizontal-spinner"></div>
</div>
</div> </div>
{% if hosted_sticky %} {% if hosted_sticky %}
<div class="sticky-tab" id="hosted-sticky"> <div class="sticky-tab" id="hosted-sticky">
<a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a> <a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a>

View File

@@ -26,7 +26,6 @@
</script> </script>
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
{% if playwright_enabled %} {% if playwright_enabled %}
@@ -330,9 +329,9 @@ nav
{{ render_checkbox_field(form.filter_text_added) }} {{ render_checkbox_field(form.filter_text_added) }}
{{ render_checkbox_field(form.filter_text_replaced) }} {{ render_checkbox_field(form.filter_text_replaced) }}
{{ render_checkbox_field(form.filter_text_removed) }} {{ render_checkbox_field(form.filter_text_removed) }}
<span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span> <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br>
<span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br> <span class="pure-form-message-inline">&nbsp;So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> <span class="pure-form-message-inline">&nbsp;When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
</fieldset> </fieldset>
<fieldset class="pure-control-group"> <fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }} {{ render_checkbox_field(form.check_unique_lines) }}
@@ -371,7 +370,7 @@ nav
") }} ") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>Matching text will be <strong>removed</strong> from the text snapshot</li> <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li> <li>Changing this will affect the comparison checksum which may trigger an alert</li>
@@ -398,7 +397,9 @@ Unavailable") }}
</fieldset> </fieldset>
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }} {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
or
keyword") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match; <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
@@ -424,14 +425,15 @@ Unavailable") }}
</script> </script>
<br> <br>
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#} {#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
<div class="minitabs-wrapper"> <div class="minitabs-wrapper">
<div id="text-preview-inner" class="monospace-preview"> <div class="minitabs-content">
<p>Loading...</p> <div id="text-preview-inner" class="monospace-preview">
</div> <p>Loading...</p>
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview"> </div>
<p>Loading...</p> <div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
</div> <p>Loading...</p>
</div>
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -172,7 +172,7 @@ nav
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br> <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>Matching text will be <strong>removed</strong> from the text snapshot</li> <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
<li>Note: This is applied globally in addition to the per-watch rules.</li> <li>Note: This is applied globally in addition to the per-watch rules.</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>

View File

@@ -0,0 +1,6 @@
# A list of real world examples!
Always the price should be 666.66 for our tests
see test_restock_itemprop.py::test_special_prop_examples

View File

@@ -0,0 +1,25 @@
<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
data-testid="price-section"
data-optly-product-tile-price-section="true"><span
class="PriceRange ProductPrice variant-huge" itemprop="offers"
itemscope="" itemtype="http://schema.org/Offer"><div
class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
aria-hidden="true" class="Price variant-huge" data-testid="price"
itemprop="price"><sup class="sup" data-testid="price-symbol"
itemprop="priceCurrency" content="AUD">$</sup><span
class="dollars" data-testid="price-value" itemprop="price"
content="155.55">155.55</span><span class="extras"><span class="sup"
data-testid="price-sup"></span></span></span></span>
</div>
<script type="application/ld+json">{
"@type": "Product",
"@context": "https://schema.org",
"name": "test",
"description": "test",
"offers": {
"@type": "Offer",
"priceCurrency": "AUD",
"price": 155.55
},
}</script>

View File

@@ -16,4 +16,4 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(3) wait_for_all_checks(client)

View File

@@ -1,7 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import json
import os import os
from flask import url_for from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def set_response(): def set_response():
@@ -18,7 +19,6 @@ def set_response():
f.write(data) f.write(data)
time.sleep(1) time.sleep(1)
def test_socks5(client, live_server, measure_memory_usage): def test_socks5(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
set_response() set_response()
@@ -79,3 +79,24 @@ def test_socks5(client, live_server, measure_memory_usage):
# Should see the proper string # Should see the proper string
assert "Awesome, you made it".encode('utf-8') in res.data assert "Awesome, you made it".encode('utf-8') in res.data
# PROXY CHECKER WIDGET CHECK - this needs more checking
uuid = extract_UUID_from_client(client)
res = client.get(
url_for("check_proxies.start_check", uuid=uuid),
follow_redirects=True
)
# It's probably already finished super fast :(
#assert b"RUNNING" in res.data
wait_for_all_checks(client)
res = client.get(
url_for("check_proxies.get_recheck_status", uuid=uuid),
follow_redirects=True
)
assert b"OK" in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -77,6 +77,8 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
# The trigger line is REMOVED, this should trigger # The trigger line is REMOVED, this should trigger
set_original(excluding='The golden line') set_original(excluding='The golden line')
# Check in the processor here what's going on, its triggering empty-reply and no change.
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
res = client.get(url_for("index")) res = client.get(url_for("index"))
@@ -151,7 +153,6 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
# A line thats not the trigger should not trigger anything # A line thats not the trigger should not trigger anything
res = client.get(url_for("form_watch_checknow"), follow_redirects=True) res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client) wait_for_all_checks(client)
@@ -173,6 +174,5 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
assert b'-Oh yes please-' in response assert b'-Oh yes please-' in response
assert '网站监测 内容更新了'.encode('utf-8') in response assert '网站监测 内容更新了'.encode('utf-8') in response
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data

View File

@@ -65,11 +65,8 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
live_server_setup(live_server) live_server_setup(live_server)
# Use a mix of case in ZzZ to prove it works case-insensitive. # Use a mix of case in ZzZ to prove it works case-insensitive.
ignore_text = "out of stoCk\r\nfoobar" ignore_text = "out of stoCk\r\nfoobar"
set_original_ignore_response() set_original_ignore_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
@@ -127,13 +124,24 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data assert b'/test-endpoint' in res.data
# 2548
# Going back to the ORIGINAL should NOT trigger a change
set_original_ignore_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Now we set a change where the text is gone, it should now trigger
# Now we set a change where the text is gone AND its different content, it should now trigger
set_modified_response_minus_block_text() set_modified_response_minus_block_text()
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data

View File

@@ -5,7 +5,7 @@ import time
from flask import url_for from flask import url_for
from ..html_tools import * from ..html_tools import *
from .util import live_server_setup from .util import live_server_setup, wait_for_all_checks
def test_setup(live_server): def test_setup(live_server):
@@ -119,12 +119,10 @@ across multiple lines
def test_element_removal_full(client, live_server, measure_memory_usage): def test_element_removal_full(client, live_server, measure_memory_usage):
sleep_time_for_fetch_thread = 3 #live_server_setup(live_server)
set_original_response() set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for("test_endpoint", _external=True) test_url = url_for("test_endpoint", _external=True)
@@ -132,7 +130,8 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
url_for("import_page"), data={"urls": test_url}, follow_redirects=True url_for("import_page"), data={"urls": test_url}, follow_redirects=True
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(1) wait_for_all_checks(client)
# Goto the edit page, add the filter data # Goto the edit page, add the filter data
# Not sure why \r needs to be added - absent of the #changetext this is not necessary # Not sure why \r needs to be added - absent of the #changetext this is not necessary
subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext" subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
@@ -148,6 +147,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
follow_redirects=True, follow_redirects=True,
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
wait_for_all_checks(client)
# Check it saved # Check it saved
res = client.get( res = client.get(
@@ -156,10 +156,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data
# Trigger a check # Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True) res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
# Give the thread time to pick it up wait_for_all_checks(client)
time.sleep(sleep_time_for_fetch_thread)
# so that we set the state to 'unviewed' after all the edits # so that we set the state to 'unviewed' after all the edits
client.get(url_for("diff_history_page", uuid="first")) client.get(url_for("diff_history_page", uuid="first"))
@@ -168,10 +168,11 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
set_modified_response() set_modified_response()
# Trigger a check # Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True) res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
# There should not be an unviewed change, as changes should be removed # There should not be an unviewed change, as changes should be removed
res = client.get(url_for("index")) res = client.get(url_for("index"))

View File

@@ -71,7 +71,7 @@ def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_filter_multiline(client, live_server, measure_memory_usage): def test_check_filter_multiline(client, live_server, measure_memory_usage):
#live_server_setup(live_server) # live_server_setup(live_server)
set_multiline_response() set_multiline_response()
# Add our URL to the import page # Add our URL to the import page

View File

@@ -33,13 +33,17 @@ def test_strip_regex_text_func():
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines) stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"but 1 lines" in stripped_content assert "but 1 lines" in stripped_content
assert b"igNORe-cAse text" not in stripped_content assert "igNORe-cAse text" not in stripped_content
assert b"but 1234 lines" not in stripped_content assert "but 1234 lines" not in stripped_content
assert b"really" not in stripped_content assert "really" not in stripped_content
assert b"not this" not in stripped_content assert "not this" not in stripped_content
# Check line number reporting # Check line number reporting
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers") stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
assert stripped_content == [2, 5, 6, 7, 8, 10] assert stripped_content == [2, 5, 6, 7, 8, 10]
# Check that linefeeds are preserved when there are is no matching ignores
content = "some text\n\nand other text\n"
stripped_content = html_tools.strip_ignore_text(content, ignore_lines)
assert content == stripped_content

View File

@@ -22,10 +22,15 @@ def test_strip_text_func():
ignore_lines = ["sometimes"] ignore_lines = ["sometimes"]
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines) stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert "sometimes" not in stripped_content
assert "Some content" in stripped_content
assert b"sometimes" not in stripped_content # Check that line feeds dont get chewed up when something is found
assert b"Some content" in stripped_content test_content = "Some initial text\n\nWhich is across multiple lines\n\nZZZZz\n\n\nSo let's see what happens."
ignore = ['something irrelevent but just to check', 'XXXXX', 'YYYYY', 'ZZZZZ']
stripped_content = html_tools.strip_ignore_text(test_content, ignore)
assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."
def set_original_ignore_response(): def set_original_ignore_response():
test_return_data = """<html> test_return_data = """<html>
@@ -141,8 +146,6 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
# Just to be sure.. set a regular modified change.. # Just to be sure.. set a regular modified change..
set_modified_original_ignore_response() set_modified_original_ignore_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -153,17 +156,17 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
res = client.get(url_for("preview_page", uuid="first")) res = client.get(url_for("preview_page", uuid="first"))
# Should no longer be in the preview # SHOULD BE be in the preview, it was added in set_modified_original_ignore_response()
assert b'new ignore stuff' not in res.data # and we have "new ignore stuff" in ignore_text
# it is only ignored, it is not removed (it will be highlighted too)
assert b'new ignore stuff' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
# When adding some ignore text, it should not trigger a change, even if something else on that line changes
def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage): def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ" ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
set_original_ignore_response() set_original_ignore_response()
@@ -172,6 +175,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
url_for("settings_page"), url_for("settings_page"),
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-global_ignore_text": ignore_text, "application-global_ignore_text": ignore_text,
'application-fetch_backend': "html_requests" 'application-fetch_backend': "html_requests"
}, },
@@ -192,9 +196,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
# Give the thread time to pick it up # Give the thread time to pick it up
wait_for_all_checks(client) wait_for_all_checks(client)
#Adding some ignore text should not trigger a change
# Goto the edit page of the item, add our ignore text
# Add our URL to the import page
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"}, data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"},
@@ -210,20 +212,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
# Trigger a check # Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client) wait_for_all_checks(client)
# It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
# so that we are sure everything is viewed and in a known 'nothing changed' state
res = client.get(url_for("diff_history_page", uuid="first"))
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data assert b'/test-endpoint' in res.data
#####
# Make a change which includes the ignore text, it should be ignored and no 'change' triggered
# Make a change which includes the ignore text # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
set_modified_ignore_response() set_modified_ignore_response()
# Trigger a check # Trigger a check
@@ -233,6 +230,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
# It should report nothing found (no new 'unviewed' class) # It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data assert b'/test-endpoint' in res.data

View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def set_response():
data = f"""<html>
<body>Awesome, you made it<br>
yeah the socks request worked<br>
something to ignore<br>
something to trigger<br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data)
def test_content_filter_live_preview(client, live_server, measure_memory_usage):
live_server_setup(live_server)
set_response()
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": ''},
follow_redirects=True
)
uuid = extract_UUID_from_client(client)
res = client.post(
url_for("edit_page", uuid=uuid),
data={
"include_filters": "",
"fetch_backend": 'html_requests',
"ignore_text": "something to ignore",
"trigger_text": "something to trigger",
"url": test_url,
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
# The endpoint is a POST and accepts the form values to override the watch preview
import json
# DEFAULT OUTPUT WITHOUT ANYTHING UPDATED/CHANGED - SHOULD SEE THE WATCH DEFAULTS
res = client.post(
url_for("watch_get_preview_rendered", uuid=uuid)
)
default_return = json.loads(res.data.decode('utf-8'))
assert default_return.get('after_filter')
assert default_return.get('before_filter')
assert default_return.get('ignore_line_numbers') == [3] # "something to ignore" line 3
assert default_return.get('trigger_line_numbers') == [4] # "something to trigger" line 4
# SEND AN UPDATE AND WE SHOULD SEE THE OUTPUT CHANGE SO WE KNOW TO HIGHLIGHT NEW STUFF
res = client.post(
url_for("watch_get_preview_rendered", uuid=uuid),
data={
"include_filters": "",
"fetch_backend": 'html_requests',
"ignore_text": "sOckS", # Also be sure case insensitive works
"trigger_text": "AweSOme",
"url": test_url,
},
)
reply = json.loads(res.data.decode('utf-8'))
assert reply.get('after_filter')
assert reply.get('before_filter')
assert reply.get('ignore_line_numbers') == [2] # Ignored - "socks" on line 2
assert reply.get('trigger_line_numbers') == [1] # Triggers "Awesome" in line 1
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
# `subtractive_selectors` should still work in `source:` type requests
def test_fetch_pdf(client, live_server, measure_memory_usage):
import shutil
shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
live_server_setup(live_server)
test_url = url_for('test_pdf_endpoint', _external=True)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# PDF header should not be there (it was converted to text)
assert b'PDF' not in res.data[:10]
assert b'hello world' in res.data
# So we know if the file changes in other ways
import hashlib
original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
# We should have one
assert len(original_md5) > 0
# And it's going to be in the document
assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data
shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client)
# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("index"))
assert b'unviewed' in res.data
# The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert original_md5.encode('utf-8') not in res.data
assert changed_md5.encode('utf-8') in res.data
res = client.get(
url_for("diff_history_page", uuid="first"),
follow_redirects=True
)
assert original_md5.encode('utf-8') in res.data
assert changed_md5.encode('utf-8') in res.data
assert b'here is a change' in res.data

View File

@@ -3,7 +3,7 @@ import os
import time import time
from flask import url_for from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
from ..notification import default_notification_format from ..notification import default_notification_format
instock_props = [ instock_props = [
@@ -413,3 +413,31 @@ def test_data_sanity(client, live_server):
res = client.get( res = client.get(
url_for("edit_page", uuid="first")) url_for("edit_page", uuid="first"))
assert test_url2.encode('utf-8') in res.data assert test_url2.encode('utf-8') in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# All examples should give a prive of 666.66
def test_special_prop_examples(client, live_server):
import glob
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
files = glob.glob(check_path)
assert files
for test_example_filename in files:
with open(test_example_filename, 'r') as example_f:
with open("test-datastore/endpoint-content.txt", "w") as test_f:
test_f.write(f"<html><body>{example_f.read()}</body></html>")
# Now fetch it and check the price worked
client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'ception' not in res.data
assert b'155.55' in res.data

View File

@@ -18,12 +18,13 @@ class TestDiffBuilder(unittest.TestCase):
watch['last_viewed'] = 110 watch['last_viewed'] = 110
watch.save_history_text(contents=b"hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4())) # Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python
watch.save_history_text(contents=b"hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4())) watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
watch.save_history_text(contents=b"hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4())) watch.save_history_text(contents="hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
watch.save_history_text(contents=b"hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4())) watch.save_history_text(contents="hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
watch.save_history_text(contents=b"hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4())) watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
watch.save_history_text(contents=b"hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4())) watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
p = watch.get_next_snapshot_key_to_last_viewed p = watch.get_next_snapshot_key_to_last_viewed
assert p == "112", "Correct last-viewed timestamp was detected" assert p == "112", "Correct last-viewed timestamp was detected"

View File

@@ -260,9 +260,6 @@ class update_worker(threading.Thread):
try: try:
# Processor is what we are using for detecting the "Change" # Processor is what we are using for detecting the "Change"
processor = watch.get('processor', 'text_json_diff') processor = watch.get('processor', 'text_json_diff')
# Abort processing when the content was the same as the last fetch
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
# Init a new 'difference_detection_processor', first look in processors # Init a new 'difference_detection_processor', first look in processors
processor_module_name = f"changedetectionio.processors.{processor}.processor" processor_module_name = f"changedetectionio.processors.{processor}.processor"
@@ -278,16 +275,13 @@ class update_worker(threading.Thread):
update_handler.call_browser() update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection( changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
watch=watch,
skip_when_checksum_same=skip_when_same_checksum,
)
# Re #342 # Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc # We then convert/.decode('utf-8') for the notification etc
if not isinstance(contents, (bytes, bytearray)): # if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes") # raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e: except PermissionError as e:
logger.critical(f"File permission error updating file, watch: {uuid}") logger.critical(f"File permission error updating file, watch: {uuid}")
logger.critical(str(e)) logger.critical(str(e))
@@ -338,7 +332,8 @@ class update_worker(threading.Thread):
elif e.status_code == 500: elif e.status_code == 500:
err_text = "Error - 500 (Internal server error) received from the web site" err_text = "Error - 500 (Internal server error) received from the web site"
else: else:
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code)) extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
if e.screenshot: if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True) watch.save_screenshot(screenshot=e.screenshot, as_error=True)