Compare commits

..

29 Commits

Author SHA1 Message Date
dgtlmoon
dc96a5ff69 WIP 2022-12-19 15:02:41 +01:00
dgtlmoon
7a1d2d924e Dark mode - system setting var is not required (its cookie based) 2022-12-19 14:13:57 +01:00
dgtlmoon
c3731cf055 0.40.0.3 2022-12-19 12:41:52 +01:00
dgtlmoon
a287e5a86c Visual Selector - Select smallest/most precise element first, better filtering of zero size elements 2022-12-19 12:33:31 +01:00
dgtlmoon
235535c327 Fetching - Check the most overdue watch first (#1242) 2022-12-17 15:40:57 +01:00
dgtlmoon
44dc62da2d Overview list - Checkbox action "Recheck" 2022-12-16 18:35:09 +01:00
dgtlmoon
0c380c170f Playwright - Better error reporting and re-try fetch on fail once (#1238) 2022-12-16 18:06:14 +01:00
dgtlmoon
b7a2501d64 Fetching - Always sort the key order of JSON content for less false alerts (May cause an alert on upgrade, but will be better going forwards) #1219 2022-12-15 09:13:09 +01:00
dgtlmoon
e970fef991 Fetcher + VisualSelector - xPath filter with attribute filter was breaking the element finder 2022-12-14 19:06:49 +01:00
dgtlmoon
b76148a0f4 Fetcher - CPU usage - Skip processing if the previous checksum and the just fetched one was the same (#925) 2022-12-14 15:08:34 +01:00
dgtlmoon
93cc30437f Playwright+BrowserSteps - Fetch changes - Fetch simply after page starts rendering + delay seconds, disable service workers 2022-12-14 12:16:04 +01:00
dgtlmoon
6562d6e0d4 Improve ARM/rust build comment 2022-12-13 12:28:20 +01:00
dgtlmoon
6c217cc3b6 README.md - Improving JSONPath example for LD+JSON product data 2022-12-11 11:14:52 +01:00
dgtlmoon
f30cdf0674 0.40.0.2 2022-12-08 22:36:59 +01:00
dgtlmoon
14da0646a7 Price follower - Dont scan for ldjson data when 'no' was clicked on the suggestion (#1207) 2022-12-08 22:35:37 +01:00
dgtlmoon
b413cdecc7 Adding missing parts for pip build Re #1206 2022-12-08 21:54:55 +01:00
dgtlmoon
7bf52d9275 0.40.0 2022-12-08 20:09:42 +01:00
dgtlmoon
09e6624afd VisualSelector - Exclude items that are not interactable or visible 2022-12-08 20:08:41 +01:00
dgtlmoon
b58fd995b5 Automatically offer to track LD+JSON product price data (#1204) 2022-12-08 19:28:20 +01:00
dgtlmoon
f7bb8a0afa UI - favicon callback no longer needed 2022-12-07 12:14:36 +01:00
dgtlmoon
3e333496c1 Test cleanups (#1196) 2022-12-07 12:03:28 +01:00
Amro Hendawi
ee776a9627 Update runtime.txt (#1198) 2022-12-07 00:17:58 +01:00
dgtlmoon
65db4d68e3 Dark mode - HTML template tidy up (#1197) 2022-12-06 23:50:49 +01:00
dgtlmoon
74d93d10c3 UI - watch tags also known as watch tag / label 2022-12-06 23:16:22 +01:00
dgtlmoon
37aef0530a Notification templates - bug in update, was updating the main system instead of the watch notification_title incorrectly 2022-12-06 18:29:09 +01:00
dgtlmoon
f86763dc7a Extract data - minor improvement to example 2022-12-06 10:53:23 +01:00
dgtlmoon
13c25f9b92 Darkmode - Pause/Mute notification colour fix, re #1195 2022-12-06 10:49:24 +01:00
dgtlmoon
265f622e75 Notification - Support for standard API calls post:// posts:// get:// gets:// delete:// deletes:// put:// puts:// (#1194) 2022-12-05 20:49:08 +01:00
dgtlmoon
c12db2b725 Notifications - tokens/jinja2 templating (#1184) 2022-12-05 19:58:43 +01:00
40 changed files with 676 additions and 235 deletions

View File

@@ -1,7 +1,7 @@
# pip dependencies install stage # pip dependencies install stage
FROM python:3.8-slim as builder FROM python:3.8-slim as builder
# rustc compiler would be needed on ARM type devices but theres an issue with some deps not building.. # See `cryptography` pin comment in requirements.txt
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -31,8 +31,7 @@ RUN pip install --target=/dependencies playwright~=1.27.1 \
# Final image stage # Final image stage
FROM python:3.8-slim FROM python:3.8-slim
# Actual packages needed at runtime, usually due to the notification (apprise) backend # See `cryptography` pin comment in requirements.txt
# rustc compiler would be needed on ARM type devices but theres an issue with some deps not building..
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
# Re #93, #73, excluding rustc (adds another 430Mb~) # Re #93, #73, excluding rustc (adds another 430Mb~)

View File

@@ -1,9 +1,10 @@
recursive-include changedetectionio/api * recursive-include changedetectionio/api *
recursive-include changedetectionio/templates * recursive-include changedetectionio/blueprint *
recursive-include changedetectionio/static *
recursive-include changedetectionio/model * recursive-include changedetectionio/model *
recursive-include changedetectionio/tests *
recursive-include changedetectionio/res * recursive-include changedetectionio/res *
recursive-include changedetectionio/static *
recursive-include changedetectionio/templates *
recursive-include changedetectionio/tests *
prune changedetectionio/static/package-lock.json prune changedetectionio/static/package-lock.json
prune changedetectionio/static/styles/node_modules prune changedetectionio/static/styles/node_modules
prune changedetectionio/static/styles/package-lock.json prune changedetectionio/static/styles/package-lock.json

View File

@@ -187,11 +187,29 @@ When you enable a `json:` or `jq:` filter, you can even automatically extract an
<html> <html>
... ...
<script type="application/ld+json"> <script type="application/ld+json">
{"@context":"http://schema.org","@type":"Product","name":"Nan Optipro Stage 1 Baby Formula 800g","price": 23.50 }
{
"@context":"http://schema.org/",
"@type":"Product",
"offers":{
"@type":"Offer",
"availability":"http://schema.org/InStock",
"price":"3949.99",
"priceCurrency":"USD",
"url":"https://www.newegg.com/p/3D5-000D-001T1"
},
"description":"Cobratype King Cobra Hero Desktop Gaming PC",
"name":"Cobratype King Cobra Hero Desktop Gaming PC",
"sku":"3D5-000D-001T1",
"itemCondition":"NewCondition"
}
</script> </script>
``` ```
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure `json:$..price` or `jq:..price` would give `3949.99`, or you can extract the whole structure (use a JSONpath test website to validate with)
The application also supports notifying you that it can follow this information automatically
## Proxy Configuration ## Proxy Configuration

View File

@@ -10,6 +10,7 @@ import threading
import time import time
import timeago import timeago
from changedetectionio import queuedWatchMetaData
from copy import deepcopy from copy import deepcopy
from distutils.util import strtobool from distutils.util import strtobool
from feedgen.feed import FeedGenerator from feedgen.feed import FeedGenerator
@@ -35,7 +36,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools from changedetectionio import html_tools
from changedetectionio.api import api_v1 from changedetectionio.api import api_v1
__version__ = '0.39.22.1' __version__ = '0.40.0.3'
datastore = None datastore = None
@@ -95,6 +96,12 @@ def init_app_secret(datastore_path):
return secret return secret
@app.template_global()
def get_darkmode_state():
css_dark_mode = request.cookies.get('css_dark_mode', 'false')
return 'true' if css_dark_mode and strtobool(css_dark_mode) else 'false'
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread # We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
# running or something similar. # running or something similar.
@app.template_filter('format_last_checked_time') @app.template_filter('format_last_checked_time')
@@ -202,10 +209,6 @@ def changedetection_app(config=None, datastore_o=None):
watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo', watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
def getDarkModeSetting():
css_dark_mode = request.cookies.get('css_dark_mode')
return True if (css_dark_mode == 'true' or css_dark_mode == True) else False
# Setup cors headers to allow all domains # Setup cors headers to allow all domains
# https://flask-cors.readthedocs.io/en/latest/ # https://flask-cors.readthedocs.io/en/latest/
# CORS(app) # CORS(app)
@@ -402,10 +405,8 @@ def changedetection_app(config=None, datastore_o=None):
sorted_watches.append(watch) sorted_watches.append(watch)
existing_tags = datastore.get_all_tags() existing_tags = datastore.get_all_tags()
form = forms.quickWatchForm(request.form) form = forms.quickWatchForm(request.form)
output = render_template("watch-overview.html", output = render_template("watch-overview.html",
dark_mode=getDarkModeSetting(),
form=form, form=form,
watches=sorted_watches, watches=sorted_watches,
tags=existing_tags, tags=existing_tags,
@@ -415,7 +416,7 @@ def changedetection_app(config=None, datastore_o=None):
# Don't link to hosting when we're on the hosting environment # Don't link to hosting when we're on the hosting environment
hosted_sticky=os.getenv("SALTED_PASS", False) == False, hosted_sticky=os.getenv("SALTED_PASS", False) == False,
guid=datastore.data['app_guid'], guid=datastore.data['app_guid'],
queued_uuids=[uuid for p,uuid in update_q.queue]) queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue])
if session.get('share-link'): if session.get('share-link'):
@@ -595,25 +596,16 @@ def changedetection_app(config=None, datastore_o=None):
using_default_check_time = False using_default_check_time = False
break break
# Use the default if its the same as system wide # Use the default if it's the same as system-wide.
if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']: if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
extra_update_obj['fetch_backend'] = None extra_update_obj['fetch_backend'] = None
# Ignore text # Ignore text
form_ignore_text = form.ignore_text.data form_ignore_text = form.ignore_text.data
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form_ignore_text:
if len(datastore.data['watching'][uuid].history):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
if len(datastore.data['watching'][uuid].history):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Be sure proxy value is None # Be sure proxy value is None
if datastore.proxy_list is not None and form.data['proxy'] == '': if datastore.proxy_list is not None and form.data['proxy'] == '':
extra_update_obj['proxy'] = None extra_update_obj['proxy'] = None
@@ -631,7 +623,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.needs_write_urgent = True datastore.needs_write_urgent = True
# Queue the watch for immediate recheck, with a higher priority # Queue the watch for immediate recheck, with a higher priority
update_q.put((1, uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
# Diff page [edit] link should go back to diff page # Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff': if request.args.get("next") and request.args.get("next") == 'diff':
@@ -664,7 +656,6 @@ def changedetection_app(config=None, datastore_o=None):
browser_steps_config=browser_step_ui_config, browser_steps_config=browser_step_ui_config,
current_base_url=datastore.data['settings']['application']['base_url'], current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
dark_mode=getDarkModeSetting(),
form=form, form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time, has_empty_checktime=using_default_check_time,
@@ -752,7 +743,6 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("settings.html", output = render_template("settings.html",
form=form, form=form,
dark_mode=getDarkModeSetting(),
current_base_url = datastore.data['settings']['application']['base_url'], current_base_url = datastore.data['settings']['application']['base_url'],
hide_remove_pass=os.getenv("SALTED_PASS", False), hide_remove_pass=os.getenv("SALTED_PASS", False),
api_key=datastore.data['settings']['application'].get('api_access_token'), api_key=datastore.data['settings']['application'].get('api_access_token'),
@@ -765,8 +755,11 @@ def changedetection_app(config=None, datastore_o=None):
@login_required @login_required
def import_page(): def import_page():
remaining_urls = [] remaining_urls = []
from changedetectionio import forms
form = forms.importForm(request.form)
if request.method == 'POST': if request.method == 'POST':
from .importer import import_url_list, import_distill_io_json from .importer import import_url_list, import_distill_io_json, import_changedetection_io_zip
# URL List import # URL List import
if request.values.get('urls') and len(request.values.get('urls').strip()): if request.values.get('urls') and len(request.values.get('urls').strip()):
@@ -774,7 +767,7 @@ def changedetection_app(config=None, datastore_o=None):
importer = import_url_list() importer = import_url_list()
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore) importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
for uuid in importer.new_uuids: for uuid in importer.new_uuids:
update_q.put((1, uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
if len(importer.remaining_data) == 0: if len(importer.remaining_data) == 0:
return redirect(url_for('index')) return redirect(url_for('index'))
@@ -787,13 +780,22 @@ def changedetection_app(config=None, datastore_o=None):
d_importer = import_distill_io_json() d_importer = import_distill_io_json()
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids: for uuid in d_importer.new_uuids:
update_q.put((1, uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
if request.files.get("backup_zip_file"):
if not form.validate():
flash("An error occurred, please see below.", "error")
else:
d_importer = import_changedetection_io_zip()
d_importer.run(data=None, flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids:
# Queue without priority, we will examine their own rule to find out if it should be checked
update_q.put(queuedWatchMetaData.PrioritizedItem(item={'uuid': uuid, 'skip_when_checksum_same': True}))
# Could be some remaining, or we could be on GET # Could be some remaining, or we could be on GET
output = render_template("import.html", output = render_template("import.html",
dark_mode=getDarkModeSetting(), form=form,
import_url_list_remaining="\n".join(remaining_urls), import_url_list_remaining="\n".join(remaining_urls),
original_distill_json='' original_distill_json=''
) )
@@ -893,7 +895,6 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("diff.html", output = render_template("diff.html",
current_diff_url=watch['url'], current_diff_url=watch['url'],
current_previous_version=str(previous_version), current_previous_version=str(previous_version),
dark_mode=getDarkModeSetting(),
extra_stylesheets=extra_stylesheets, extra_stylesheets=extra_stylesheets,
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
extract_form=extract_form, extract_form=extract_form,
@@ -944,7 +945,6 @@ def changedetection_app(config=None, datastore_o=None):
content=content, content=content,
history_n=watch.history_n, history_n=watch.history_n,
extra_stylesheets=extra_stylesheets, extra_stylesheets=extra_stylesheets,
dark_mode=getDarkModeSetting(),
# current_diff_url=watch['url'], # current_diff_url=watch['url'],
watch=watch, watch=watch,
uuid=uuid, uuid=uuid,
@@ -991,7 +991,6 @@ def changedetection_app(config=None, datastore_o=None):
content=content, content=content,
history_n=watch.history_n, history_n=watch.history_n,
extra_stylesheets=extra_stylesheets, extra_stylesheets=extra_stylesheets,
dark_mode=getDarkModeSetting(),
ignored_line_numbers=ignored_line_numbers, ignored_line_numbers=ignored_line_numbers,
triggered_line_numbers=trigger_line_numbers, triggered_line_numbers=trigger_line_numbers,
current_diff_url=watch['url'], current_diff_url=watch['url'],
@@ -1010,15 +1009,10 @@ def changedetection_app(config=None, datastore_o=None):
def notification_logs(): def notification_logs():
global notification_debug_log global notification_debug_log
output = render_template("notification-log.html", output = render_template("notification-log.html",
dark_mode=getDarkModeSetting(),
logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."]) logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
return output return output
@app.route("/favicon.ico", methods=['GET'])
def favicon():
return send_from_directory("static/images", path="favicon.ico")
# We're good but backups are even better! # We're good but backups are even better!
@app.route("/backup", methods=['GET']) @app.route("/backup", methods=['GET'])
@login_required @login_required
@@ -1161,7 +1155,7 @@ def changedetection_app(config=None, datastore_o=None):
if not add_paused and new_uuid: if not add_paused and new_uuid:
# Straight into the queue. # Straight into the queue.
update_q.put((1, new_uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.") flash("Watch added.")
if add_paused: if add_paused:
@@ -1198,7 +1192,7 @@ def changedetection_app(config=None, datastore_o=None):
uuid = list(datastore.data['watching'].keys()).pop() uuid = list(datastore.data['watching'].keys()).pop()
new_uuid = datastore.clone(uuid) new_uuid = datastore.clone(uuid)
update_q.put((5, new_uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.') flash('Cloned.')
return redirect(url_for('index')) return redirect(url_for('index'))
@@ -1206,7 +1200,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/api/checknow", methods=['GET']) @app.route("/api/checknow", methods=['GET'])
@login_required @login_required
def form_watch_checknow(): def form_watch_checknow():
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
tag = request.args.get('tag') tag = request.args.get('tag')
uuid = request.args.get('uuid') uuid = request.args.get('uuid')
i = 0 i = 0
@@ -1215,11 +1209,9 @@ def changedetection_app(config=None, datastore_o=None):
for t in running_update_threads: for t in running_update_threads:
running_uuids.append(t.current_uuid) running_uuids.append(t.current_uuid)
# @todo check thread is running and skip
if uuid: if uuid:
if uuid not in running_uuids: if uuid not in running_uuids:
update_q.put((1, uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
i = 1 i = 1
elif tag != None: elif tag != None:
@@ -1227,14 +1219,14 @@ def changedetection_app(config=None, datastore_o=None):
for watch_uuid, watch in datastore.data['watching'].items(): for watch_uuid, watch in datastore.data['watching'].items():
if (tag != None and tag in watch['tag']): if (tag != None and tag in watch['tag']):
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put((1, watch_uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
i += 1 i += 1
else: else:
# No tag, no uuid, add everything. # No tag, no uuid, add everything.
for watch_uuid, watch in datastore.data['watching'].items(): for watch_uuid, watch in datastore.data['watching'].items():
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put((1, watch_uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
i += 1 i += 1
flash("{} watches are queued for rechecking.".format(i)) flash("{} watches are queued for rechecking.".format(i))
return redirect(url_for('index', tag=tag)) return redirect(url_for('index', tag=tag))
@@ -1281,6 +1273,14 @@ def changedetection_app(config=None, datastore_o=None):
datastore.data['watching'][uuid.strip()]['notification_muted'] = False datastore.data['watching'][uuid.strip()]['notification_muted'] = False
flash("{} watches un-muted".format(len(uuids))) flash("{} watches un-muted".format(len(uuids)))
elif (op == 'recheck'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
flash("{} watches un-muted".format(len(uuids)))
elif (op == 'notification-default'): elif (op == 'notification-default'):
from changedetectionio.notification import ( from changedetectionio.notification import (
default_notification_format_for_watch default_notification_format_for_watch
@@ -1353,6 +1353,10 @@ def changedetection_app(config=None, datastore_o=None):
import changedetectionio.blueprint.browser_steps as browser_steps import changedetectionio.blueprint.browser_steps as browser_steps
app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps') app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps')
import changedetectionio.blueprint.price_data_follower as price_data_follower
app.register_blueprint(price_data_follower.construct_blueprint(datastore, update_q), url_prefix='/price_data_follower')
# @todo handle ctrl break # @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
threading.Thread(target=notification_runner).start() threading.Thread(target=notification_runner).start()
@@ -1458,7 +1462,11 @@ def ticker_thread_check_time_launch_checks():
watch_uuid_list = [] watch_uuid_list = []
while True: while True:
try: try:
watch_uuid_list = datastore.data['watching'].keys() # Get a list of watches sorted by last_checked, [1] because it gets passed a tuple
# This is so we examine the most over-due first
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked',0)):
watch_uuid_list.append(k[0])
except RuntimeError as e: except RuntimeError as e:
# RuntimeError: dictionary changed size during iteration # RuntimeError: dictionary changed size during iteration
time.sleep(0.1) time.sleep(0.1)
@@ -1498,7 +1506,7 @@ def ticker_thread_check_time_launch_checks():
seconds_since_last_recheck = now - watch['last_checked'] seconds_since_last_recheck = now - watch['last_checked']
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]:
# Proxies can be set to have a limit on seconds between which they can be called # Proxies can be set to have a limit on seconds between which they can be called
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid) watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
@@ -1529,8 +1537,9 @@ def ticker_thread_check_time_launch_checks():
priority, priority,
watch.jitter_seconds, watch.jitter_seconds,
now - watch['last_checked'])) now - watch['last_checked']))
# Into the queue with you # Into the queue with you
update_q.put((priority, uuid)) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
# Reset for next time # Reset for next time
watch.jitter_seconds = 0 watch.jitter_seconds = 0

View File

@@ -1,3 +1,4 @@
from changedetectionio import queuedWatchMetaData
from flask_restful import abort, Resource from flask_restful import abort, Resource
from flask import request, make_response from flask import request, make_response
import validators import validators
@@ -24,7 +25,7 @@ class Watch(Resource):
abort(404, message='No watch exists with the UUID of {}'.format(uuid)) abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if request.args.get('recheck'): if request.args.get('recheck'):
self.update_q.put((1, uuid)) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
return "OK", 200 return "OK", 200
# Return without history, get that via another API call # Return without history, get that via another API call
@@ -100,7 +101,7 @@ class CreateWatch(Resource):
extras = {'title': json_data['title'].strip()} if json_data.get('title') else {} extras = {'title': json_data['title'].strip()} if json_data.get('title') else {}
new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras) new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras)
self.update_q.put((1, new_uuid)) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201 return {'uuid': new_uuid}, 201
# Return concise list of available watches and some very basic info # Return concise list of available watches and some very basic info
@@ -118,7 +119,7 @@ class CreateWatch(Resource):
if request.args.get('recheck_all'): if request.args.get('recheck_all'):
for uuid in self.datastore.data['watching'].keys(): for uuid in self.datastore.data['watching'].keys():
self.update_q.put((1, uuid)) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
return {'status': "OK"}, 200 return {'status': "OK"}, 200
return list, 200 return list, 200

View File

@@ -75,15 +75,13 @@ class steppable_browser_interface():
def action_goto_url(self, url, optional_value): def action_goto_url(self, url, optional_value):
# self.page.set_viewport_size({"width": 1280, "height": 5000}) # self.page.set_viewport_size({"width": 1280, "height": 5000})
now = time.time() now = time.time()
response = self.page.goto(url, timeout=0, wait_until='domcontentloaded') response = self.page.goto(url, timeout=0, wait_until='commit')
print("Time to goto URL", time.time() - now)
# Wait_until = commit # Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading. # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors' # This seemed to solve nearly all 'TimeoutErrors'
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) print("Time to goto URL ", time.time() - now)
self.page.wait_for_timeout(extra_wait * 1000)
def action_click_element_containing_text(self, selector=None, value=''): def action_click_element_containing_text(self, selector=None, value=''):
if not len(value.strip()): if not len(value.strip()):

View File

@@ -0,0 +1,33 @@
from distutils.util import strtobool
from flask import Blueprint, flash, redirect, url_for
from flask_login import login_required
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import queuedWatchMetaData
from queue import PriorityQueue
PRICE_DATA_TRACK_ACCEPT = 'accepted'
PRICE_DATA_TRACK_REJECT = 'rejected'
def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue):
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
@login_required
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
def accept(uuid):
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
return redirect(url_for("form_watch_checknow", uuid=uuid))
@login_required
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
def reject(uuid):
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
return redirect(url_for("index"))
return price_data_follower_blueprint

View File

@@ -23,6 +23,9 @@ class Non200ErrorCodeReceived(Exception):
self.page_text = html_tools.html_to_text(page_html) self.page_text = html_tools.html_to_text(page_html)
return return
class checksumFromPreviousCheckWasTheSame(Exception):
def __init__(self):
return
class JSActionExceptions(Exception): class JSActionExceptions(Exception):
def __init__(self, status_code, url, screenshot, message=''): def __init__(self, status_code, url, screenshot, message=''):
@@ -39,7 +42,7 @@ class BrowserStepsStepTimout(Exception):
class PageUnloadable(Exception): class PageUnloadable(Exception):
def __init__(self, status_code, url, screenshot=False, message=False): def __init__(self, status_code, url, message, screenshot=False):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
@@ -286,6 +289,8 @@ class base_html_playwright(Fetcher):
proxy=self.proxy, proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others # This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True, bypass_csp=True,
# Can't think why we need the service workers for our use case?
service_workers='block',
# Should never be needed # Should never be needed
accept_downloads=False accept_downloads=False
) )
@@ -294,24 +299,34 @@ class base_html_playwright(Fetcher):
if len(request_headers): if len(request_headers):
context.set_extra_http_headers(request_headers) context.set_extra_http_headers(request_headers)
try:
self.page.set_default_navigation_timeout(90000) self.page.set_default_navigation_timeout(90000)
self.page.set_default_timeout(90000) self.page.set_default_timeout(90000)
# Listen for all console events and handle errors # Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
# Bug - never set viewport size BEFORE page.goto # Goto page
try:
# Waits for the next navigation. Using Python context manager
# prevents a race condition between clicking and waiting for a navigation.
with self.page.expect_navigation():
response = self.page.goto(url, wait_until='load')
# Wait_until = commit # Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading. # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors' # This seemed to solve nearly all 'TimeoutErrors'
response = self.page.goto(url, wait_until='commit')
except playwright._impl._api_types.Error as e:
# Retry once - https://github.com/browserless/chrome/issues/2485
# Sometimes errors related to invalid cert's and other can be random
print ("Content Fetcher > retrying request got error - ", str(e))
time.sleep(1)
response = self.page.goto(url, wait_until='commit')
except Exception as e:
print ("Content Fetcher > Other exception when page.goto", str(e))
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e))
# Execute any browser steps
try:
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
self.page.wait_for_timeout(extra_wait * 1000) self.page.wait_for_timeout(extra_wait * 1000)
@@ -324,17 +339,15 @@ class base_html_playwright(Fetcher):
# This can be ok, we will try to grab what we could retrieve # This can be ok, we will try to grab what we could retrieve
pass pass
except Exception as e: except Exception as e:
print ("other exception when page.goto") print ("Content Fetcher > Other exception when executing custom JS code", str(e))
print (str(e))
context.close() context.close()
browser.close() browser.close()
raise PageUnloadable(url=url, status_code=None) raise PageUnloadable(url=url, status_code=None, message=str(e))
if response is None: if response is None:
context.close() context.close()
browser.close() browser.close()
print ("response object was none") print ("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page # Bug 2(?) Set the viewport size AFTER loading the page
@@ -353,7 +366,7 @@ class base_html_playwright(Fetcher):
if len(self.page.content().strip()) == 0: if len(self.page.content().strip()) == 0:
context.close() context.close()
browser.close() browser.close()
print ("Content was empty") print ("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page # Bug 2(?) Set the viewport size AFTER loading the page
@@ -498,7 +511,7 @@ class base_html_webdriver(Fetcher):
try: try:
self.driver.quit() self.driver.quit()
except Exception as e: except Exception as e:
print("Exception in chrome shutdown/quit" + str(e)) print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py! # "html_requests" is listed as the default fetcher in store.py!

View File

@@ -1,11 +1,13 @@
import hashlib import hashlib
import json
import logging import logging
import os import os
import re import re
import time
import urllib3 import urllib3
from changedetectionio import content_fetcher, html_tools from changedetectionio import content_fetcher, html_tools
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from copy import deepcopy
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -38,8 +40,7 @@ class perform_site_check():
return regex return regex
def run(self, uuid): def run(self, uuid, skip_when_checksum_same=True):
from copy import deepcopy
changed_detected = False changed_detected = False
screenshot = False # as bytes screenshot = False # as bytes
stripped_text_from_html = "" stripped_text_from_html = ""
@@ -122,6 +123,14 @@ class perform_site_check():
self.screenshot = fetcher.screenshot self.screenshot = fetcher.screenshot
self.xpath_data = fetcher.xpath_data self.xpath_data = fetcher.xpath_data
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
if skip_when_checksum_same:
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
raise content_fetcher.checksumFromPreviousCheckWasTheSame()
# Fetching complete, now filters # Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base? # @todo move to class / maybe inside of fetcher abstract base?
@@ -140,7 +149,7 @@ class perform_site_check():
is_html = False is_html = False
is_json = False is_json = False
include_filters_rule = watch.get('include_filters', []) include_filters_rule = deepcopy(watch.get('include_filters', []))
# include_filters_rule = watch['include_filters'] # include_filters_rule = watch['include_filters']
subtractive_selectors = watch.get( subtractive_selectors = watch.get(
"subtractive_selectors", [] "subtractive_selectors", []
@@ -148,6 +157,10 @@ class perform_site_check():
"global_subtractive_selectors", [] "global_subtractive_selectors", []
) )
# Inject a virtual LD+JSON price tracker rule
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
include_filters_rule.append(html_tools.LD_JSON_PRODUCT_OFFER_SELECTOR)
has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip()) has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip()) has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
@@ -155,6 +168,14 @@ class perform_site_check():
include_filters_rule.append("json:$") include_filters_rule.append("json:$")
has_filter_rule = True has_filter_rule = True
if is_json:
# Sort the JSON so we dont get false alerts when the content is just re-ordered
try:
fetcher.content = json.dumps(json.loads(fetcher.content), sort_keys=True)
except Exception as e:
# Might have just been a snippet, or otherwise bad JSON, continue
pass
if has_filter_rule: if has_filter_rule:
json_filter_prefixes = ['json:', 'jq:'] json_filter_prefixes = ['json:', 'jq:']
for filter in include_filters_rule: for filter in include_filters_rule:
@@ -162,6 +183,8 @@ class perform_site_check():
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter) stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
is_html = False is_html = False
if is_html or is_source: if is_html or is_source:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
@@ -173,9 +196,13 @@ class perform_site_check():
# Don't run get_text or xpath/css filters on plaintext # Don't run get_text or xpath/css filters on plaintext
stripped_text_from_html = html_content stripped_text_from_html = html_content
else: else:
# Does it have some ld+json price data? used for easier monitoring
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(fetcher.content)
# Then we assume HTML # Then we assume HTML
if has_filter_rule: if has_filter_rule:
html_content = "" html_content = ""
for filter_rule in include_filters_rule: for filter_rule in include_filters_rule:
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.." # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):

View File

@@ -3,6 +3,7 @@ import re
from wtforms import ( from wtforms import (
BooleanField, BooleanField,
FileField,
Form, Form,
IntegerField, IntegerField,
RadioField, RadioField,
@@ -425,6 +426,14 @@ class watchForm(commonSettingsForm):
result = False result = False
return result return result
# datastore.data['settings']['requests']..
class importForm(Form):
#backup_zip_file = FileField("File", validators=[validators.regexp('\.zip$', flags=re.IGNORECASE)])
backup_zip_file = FileField("File")
def validate_backup_zip_file(form, field):
if field.data:
x=1
# datastore.data['settings']['requests'].. # datastore.data['settings']['requests']..
class globalSettingsRequestForm(Form): class globalSettingsRequestForm(Form):

View File

@@ -10,6 +10,10 @@ import re
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>" TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
# 'price' , 'lowPrice', 'highPrice' are usually under here
# all of those may or may not appear on different websites
LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
class JSONNotFound(ValueError): class JSONNotFound(ValueError):
def __init__(self, msg): def __init__(self, msg):
ValueError.__init__(self, msg) ValueError.__init__(self, msg)
@@ -127,8 +131,10 @@ def _get_stripped_text_from_json_match(match):
return stripped_text_from_html return stripped_text_from_html
def extract_json_as_string(content, json_filter): # content - json
# json_filter - ie json:$..price
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
stripped_text_from_html = False stripped_text_from_html = False
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson> # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
@@ -139,8 +145,13 @@ def extract_json_as_string(content, json_filter):
# Foreach <script json></script> blob.. just return the first that matches json_filter # Foreach <script json></script> blob.. just return the first that matches json_filter
s = [] s = []
soup = BeautifulSoup(content, 'html.parser') soup = BeautifulSoup(content, 'html.parser')
if ensure_is_ldjson_info_type:
bs_result = soup.findAll('script', {"type": "application/ld+json"})
else:
bs_result = soup.findAll('script') bs_result = soup.findAll('script')
if not bs_result: if not bs_result:
raise JSONNotFound("No parsable JSON found in this document") raise JSONNotFound("No parsable JSON found in this document")
@@ -156,7 +167,14 @@ def extract_json_as_string(content, json_filter):
continue continue
else: else:
stripped_text_from_html = _parse_json(json_data, json_filter) stripped_text_from_html = _parse_json(json_data, json_filter)
if stripped_text_from_html: if ensure_is_ldjson_info_type:
# Could sometimes be list, string or something else random
if isinstance(json_data, dict):
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html:
break
elif stripped_text_from_html:
break break
if not stripped_text_from_html: if not stripped_text_from_html:
@@ -243,6 +261,18 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
return text_content return text_content
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
def has_ldjson_product_info(content):
try:
pricing_data = extract_json_as_string(content=content, json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, ensure_is_ldjson_info_type="product")
except JSONNotFound as e:
# Totally fine
return False
x=bool(pricing_data)
return x
def workarounds_for_obfuscations(content): def workarounds_for_obfuscations(content):
""" """
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis

View File

@@ -1,4 +1,5 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from flask import request, url_for, current_app
import time import time
import validators import validators
@@ -20,6 +21,26 @@ class Importer():
datastore): datastore):
pass pass
class import_changedetection_io_zip(Importer):
def run(self,
data,
flash,
datastore,
):
# `data` should be none, we will hit up request directly
import zipfile
import io
with zipfile.ZipFile(io.BytesIO(request.files["backup_zip_file"].read()), 'r') as zf:
p =zf.namelist()
for file in zf.namelist():
x=1
class import_url_list(Importer): class import_url_list(Importer):
""" """

View File

@@ -27,7 +27,6 @@ class model(dict):
'base_url' : None, 'base_url' : None,
'extract_title_as_title': False, 'extract_title_as_title': False,
'empty_pages_are_a_change': False, 'empty_pages_are_a_change': False,
'css_dark_mode': False,
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), 'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, 'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum 'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum

View File

@@ -14,10 +14,10 @@ from changedetectionio.notification import (
class model(dict): class model(dict):
__newest_history_key = None __newest_history_key = None
__history_n=0 __history_n = 0
__base_config = { __base_config = {
#'history': {}, # Dict of timestamp and output stripped filename (removed) # 'history': {}, # Dict of timestamp and output stripped filename (removed)
#'newest_history_key': 0, (removed, taken from history.txt index) # 'newest_history_key': 0, (removed, taken from history.txt index)
'body': None, 'body': None,
'check_unique_lines': False, # On change-detected, compare against all history if its something new 'check_unique_lines': False, # On change-detected, compare against all history if its something new
'check_count': 0, 'check_count': 0,
@@ -26,6 +26,8 @@ class model(dict):
'extract_title_as_title': False, 'extract_title_as_title': False,
'fetch_backend': None, 'fetch_backend': None,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'has_ldjson_price_data': None,
'track_ldjson_price_data': None,
'headers': {}, # Extra headers to send 'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum 'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'include_filters': [], 'include_filters': [],
@@ -42,6 +44,7 @@ class model(dict):
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'paused': False, 'paused': False,
'previous_md5': False, 'previous_md5': False,
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'proxy': None, # Preferred proxy connection 'proxy': None, # Preferred proxy connection
'subtractive_selectors': [], 'subtractive_selectors': [],
'tag': None, 'tag': None,

View File

@@ -0,0 +1,10 @@
from dataclasses import dataclass, field
from typing import Any
# So that we can queue some metadata in `item`
# https://docs.python.org/3/library/queue.html#queue.PriorityQueue
#
@dataclass(order=True)
class PrioritizedItem:
priority: int
item: Any=field(compare=False)

View File

@@ -1,3 +1,6 @@
// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
// All rights reserved.
// @file Scrape the page looking for elements of concern (%ELEMENTS%) // @file Scrape the page looking for elements of concern (%ELEMENTS%)
// http://matatk.agrip.org.uk/tests/position-and-width/ // http://matatk.agrip.org.uk/tests/position-and-width/
// https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate // https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
@@ -81,8 +84,16 @@ var bbox;
for (var i = 0; i < elements.length; i++) { for (var i = 0; i < elements.length; i++) {
bbox = elements[i].getBoundingClientRect(); bbox = elements[i].getBoundingClientRect();
// Forget really small ones // Exclude items that are not interactable or visible
if (bbox['width'] < 10 && bbox['height'] < 10) { if(elements[i].style.opacity === "0") {
continue
}
if(elements[i].style.display === "none" || elements[i].style.pointerEvents === "none" ) {
continue
}
// Skip really small ones, and where width or height ==0
if (bbox['width'] * bbox['height'] < 100) {
continue; continue;
} }
@@ -138,7 +149,6 @@ for (var i = 0; i < elements.length; i++) {
} }
// Inject the current one set in the include_filters, which may be a CSS rule // Inject the current one set in the include_filters, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated. // used for displaying the current one in VisualSelector, where its not one we generated.
if (include_filters.length) { if (include_filters.length) {
@@ -166,10 +176,23 @@ if (include_filters.length) {
} }
if (q) { if (q) {
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
if (q.hasOwnProperty('getBoundingClientRect')) {
bbox = q.getBoundingClientRect(); bbox = q.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter element, scroll from top was "+scroll_y) console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
} else { } else {
console.log("xpath_element_scraper: filter element "+f+" was not found"); try {
// Try and see we can find its ownerElement
bbox = q.ownerElement.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) {
console.log("xpath_element_scraper: error looking up ownerElement")
}
}
}
if(!q) {
console.log("xpath_element_scraper: filter element " + f + " was not found");
} }
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
@@ -184,5 +207,9 @@ if (include_filters.length) {
} }
} }
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
// so that we dont select the wrapping element by mistake and be unable to select what we want
size_pos.sort((a, b) => (a.width*a.height > b.width*b.height) ? 1 : -1)
// Window.width required for proper scaling in the frontend // Window.width required for proper scaling in the frontend
return {'size_pos': size_pos, 'browser_width': window.innerWidth}; return {'size_pos': size_pos, 'browser_width': window.innerWidth};

View File

@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg width="83.39" height="89.648" enable-background="new 0 0 122.406 122.881" version="1.1" viewBox="0 0 83.39 89.648" xml:space="preserve" xmlns="http://www.w3.org/2000/svg"><g transform="translate(5e-4 -33.234)"><path d="m44.239 42.946-39.111 39.896 34.908 34.91 39.09-39.876-1.149-34.931zm-0.91791 42.273c0.979-0.979 1.507-1.99 1.577-3.027 0.077-1.043-0.248-2.424-0.967-4.135-0.725-1.717-1.348-3.346-1.87-4.885s-0.814-3.014-0.897-4.432c-0.07-1.42 0.134-2.768 0.624-4.045 0.477-1.279 1.348-2.545 2.607-3.804 2.099-2.099 4.535-3.123 7.314-3.065 2.773 0.063 5.457 1.158 8.04 3.294l2.881 3.034c1.946 2.607 2.799 5.33 2.557 8.166-0.235 2.83-1.532 5.426-3.893 7.785l-6.296-6.297c1.291-1.291 2.035-2.531 2.238-3.727 0.191-1.197-0.165-2.252-1.081-3.168-0.821-0.82-1.717-1.195-2.69-1.139-0.967 0.064-1.908 0.547-2.817 1.457-0.922 0.922-1.393 1.914-1.412 2.977s0.306 2.416 0.973 4.064c0.661 1.652 1.24 3.25 1.736 4.801 0.496 1.553 0.782 3.035 0.858 4.445 0.076 1.426-0.127 2.787-0.591 4.104-0.477 1.316-1.336 2.596-2.588 3.848-2.125 2.125-4.522 3.186-7.212 3.18s-5.311-1.063-7.855-3.16l-3.747 3.746-2.964-2.965 3.766-3.764c-2.423-2.996-3.568-5.998-3.447-9.02 0.127-3.014 1.476-5.813 4.045-8.383l6.278 6.277c-1.412 1.412-2.175 2.799-2.277 4.16-0.108 1.367 0.414 2.627 1.571 3.783 0.839 0.84 1.755 1.26 2.741 1.242 0.985-0.017 1.92-0.47 2.798-1.347zm21.127-46.435h17.457c-0.0269 2.2368 0.69936 16.025 0.69936 16.025l0.785 23.858c0.019 0.609-0.221 1.164-0.619 1.564l5e-3 4e-3 -41.236 42.022c-0.82213 0.8378-2.175 0.83-3.004 0l-37.913-37.91c-0.83-0.83-0.83-2.176 0-3.006l41.236-42.021c0.39287-0.42671 1.502-0.53568 1.502-0.53568zm18.011 11.59c-59.392-29.687-29.696-14.843 0 0z"/></g></svg>

After

Width:  |  Height:  |  Size: 1.7 KiB

View File

@@ -1,4 +1,5 @@
// Horrible proof of concept code :) // Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
// All rights reserved.
// yes - this is really a hack, if you are a front-ender and want to help, please get in touch! // yes - this is really a hack, if you are a front-ender and want to help, please get in touch!
$(document).ready(function () { $(document).ready(function () {
@@ -177,9 +178,10 @@ $(document).ready(function () {
// Basically, find the most 'deepest' // Basically, find the most 'deepest'
var found = 0; var found = 0;
ctx.fillStyle = 'rgba(205,0,0,0.35)'; ctx.fillStyle = 'rgba(205,0,0,0.35)';
for (var i = selector_data['size_pos'].length; i !== 0; i--) { // Will be sorted by smallest width*height first
for (var i = 0; i <= selector_data['size_pos'].length; i++) {
// draw all of them? let them choose somehow? // draw all of them? let them choose somehow?
var sel = selector_data['size_pos'][i - 1]; var sel = selector_data['size_pos'][i];
// If we are in a bounding-box // If we are in a bounding-box
if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
&& &&
@@ -195,7 +197,7 @@ $(document).ready(function () {
// no need to keep digging // no need to keep digging
// @todo or, O to go out/up, I to go in // @todo or, O to go out/up, I to go in
// or double click to go up/out the selector? // or double click to go up/out the selector?
current_selected_i = i - 1; current_selected_i = i;
found += 1; found += 1;
break; break;
} }

View File

@@ -121,17 +121,19 @@ html[data-darkmode="true"] {
--color-icon-github-hover: var(--color-grey-700); --color-icon-github-hover: var(--color-grey-700);
--color-watch-table-error: var(--color-light-red); --color-watch-table-error: var(--color-light-red);
--color-watch-table-row-text: var(--color-grey-800); } --color-watch-table-row-text: var(--color-grey-800); }
html[data-darkmode="true"] .watch-controls img {
opacity: 0.4; }
html[data-darkmode="true"] .watch-table .unviewed {
color: #fff; }
html[data-darkmode="true"] .watch-table .unviewed.error {
color: var(--color-watch-table-error); }
html[data-darkmode="true"] .icon-spread { html[data-darkmode="true"] .icon-spread {
filter: hue-rotate(-10deg) brightness(1.5); } filter: hue-rotate(-10deg) brightness(1.5); }
html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after, html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after,
html[data-darkmode="true"] .watch-table .current-diff-url::after { html[data-darkmode="true"] .watch-table .current-diff-url::after {
filter: invert(0.5) hue-rotate(10deg) brightness(2); } filter: invert(0.5) hue-rotate(10deg) brightness(2); }
html[data-darkmode="true"] .watch-table .watch-controls .state-off img {
opacity: 0.3; }
html[data-darkmode="true"] .watch-table .watch-controls .state-on img {
opacity: 1.0; }
html[data-darkmode="true"] .watch-table .unviewed {
color: #fff; }
html[data-darkmode="true"] .watch-table .unviewed.error {
color: var(--color-watch-table-error); }
#diff-ui { #diff-ui {
background: var(--color-background); background: var(--color-background);

View File

@@ -140,18 +140,6 @@ html[data-darkmode="true"] {
--color-watch-table-error: var(--color-light-red); --color-watch-table-error: var(--color-light-red);
--color-watch-table-row-text: var(--color-grey-800); --color-watch-table-row-text: var(--color-grey-800);
// Anything that can't be manipulated through variables follows.
.watch-controls {
img {
opacity: 0.4;
}
}
.watch-table .unviewed {
color: #fff;
&.error {
color: var(--color-watch-table-error);
}
}
.icon-spread { .icon-spread {
filter: hue-rotate(-10deg) brightness(1.5); filter: hue-rotate(-10deg) brightness(1.5);
@@ -163,5 +151,25 @@ html[data-darkmode="true"] {
.current-diff-url::after { .current-diff-url::after {
filter: invert(.5) hue-rotate(10deg) brightness(2); filter: invert(.5) hue-rotate(10deg) brightness(2);
} }
.watch-controls {
.state-off {
img {
opacity: 0.3;
}
}
.state-on {
img {
opacity: 1.0;
}
}
}
.unviewed {
color: #fff;
&.error {
color: var(--color-watch-table-error);
}
}
} }
} }

View File

@@ -1009,3 +1009,30 @@ ul {
border-radius: 5px; border-radius: 5px;
color: var(--color-warning); color: var(--color-warning);
} }
/* automatic price following helpers */
.tracking-ldjson-price-data {
background-color: var(--color-background-button-green);
color: #000;
padding: 3px;
border-radius: 3px;
white-space: nowrap;
}
.ldjson-price-track-offer {
a.pure-button {
border-radius: 3px;
padding: 3px;
background-color: var(--color-background-button-green);
}
font-weight: bold;
font-style: italic;
}
.price-follow-tag-icon {
display: inline-block;
height: 0.8rem;
vertical-align: middle;
}

View File

@@ -124,17 +124,19 @@ html[data-darkmode="true"] {
--color-icon-github-hover: var(--color-grey-700); --color-icon-github-hover: var(--color-grey-700);
--color-watch-table-error: var(--color-light-red); --color-watch-table-error: var(--color-light-red);
--color-watch-table-row-text: var(--color-grey-800); } --color-watch-table-row-text: var(--color-grey-800); }
html[data-darkmode="true"] .watch-controls img {
opacity: 0.4; }
html[data-darkmode="true"] .watch-table .unviewed {
color: #fff; }
html[data-darkmode="true"] .watch-table .unviewed.error {
color: var(--color-watch-table-error); }
html[data-darkmode="true"] .icon-spread { html[data-darkmode="true"] .icon-spread {
filter: hue-rotate(-10deg) brightness(1.5); } filter: hue-rotate(-10deg) brightness(1.5); }
html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after, html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after,
html[data-darkmode="true"] .watch-table .current-diff-url::after { html[data-darkmode="true"] .watch-table .current-diff-url::after {
filter: invert(0.5) hue-rotate(10deg) brightness(2); } filter: invert(0.5) hue-rotate(10deg) brightness(2); }
html[data-darkmode="true"] .watch-table .watch-controls .state-off img {
opacity: 0.3; }
html[data-darkmode="true"] .watch-table .watch-controls .state-on img {
opacity: 1.0; }
html[data-darkmode="true"] .watch-table .unviewed {
color: #fff; }
html[data-darkmode="true"] .watch-table .unviewed.error {
color: var(--color-watch-table-error); }
/* spinner */ /* spinner */
.spinner, .spinner,
@@ -943,3 +945,24 @@ ul {
display: inline; display: inline;
height: 26px; height: 26px;
vertical-align: middle; } vertical-align: middle; }
/* automatic price following helpers */
.tracking-ldjson-price-data {
background-color: var(--color-background-button-green);
color: #000;
padding: 3px;
border-radius: 3px;
white-space: nowrap; }
.ldjson-price-track-offer {
font-weight: bold;
font-style: italic; }
.ldjson-price-track-offer a.pure-button {
border-radius: 3px;
padding: 3px;
background-color: var(--color-background-button-green); }
.price-follow-tag-icon {
display: inline-block;
height: 0.8rem;
vertical-align: middle; }

View File

@@ -250,12 +250,15 @@ class ChangeDetectionStore:
def clear_watch_history(self, uuid): def clear_watch_history(self, uuid):
import pathlib import pathlib
self.__data['watching'][uuid].update( self.__data['watching'][uuid].update({
{'last_checked': 0, 'last_checked': 0,
'has_ldjson_price_data': None,
'last_error': False,
'last_notification_error': False,
'last_viewed': 0, 'last_viewed': 0,
'previous_md5': False, 'previous_md5': False,
'last_notification_error': False, 'track_ldjson_price_data': None,
'last_error': False}) })
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"): for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
@@ -637,7 +640,7 @@ class ChangeDetectionStore:
n_title = watch.get('notification_title') n_title = watch.get('notification_title')
if n_title: if n_title:
self.data['settings']['application']['notification_title'] = re.sub(r, r'{{\1}}', n_title) watch['notification_title'] = re.sub(r, r'{{\1}}', n_title)
n_urls = watch.get('notification_urls') n_urls = watch.get('notification_urls')
if n_urls: if n_urls:

View File

@@ -72,7 +72,7 @@
</tr> </tr>
<tr> <tr>
<td><code>{{ '{{ watch_tag }}' }}</code></td> <td><code>{{ '{{ watch_tag }}' }}</code></td>
<td>The tag of the watch.</td> <td>The watch label / tag</td>
</tr> </tr>
<tr> <tr>
<td><code>{{ '{{ preview_url }}' }}</code></td> <td><code>{{ '{{ preview_url }}' }}</code></td>

View File

@@ -1,5 +1,5 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en" data-darkmode="{{ dark_mode|lower }}"> <html lang="en" data-darkmode="{{ get_darkmode_state() }}">
<head> <head>
<meta charset="utf-8"/> <meta charset="utf-8"/>

View File

@@ -125,7 +125,7 @@
<p> <p>
For example, to extract only the numbers from text &dash;</br> For example, to extract only the numbers from text &dash;</br>
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code></br> <strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code></br>
<strong>RegEx to extract:</strong> <code>Temperature ([0-9\.]+)</code><br/> <strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br/>
</p> </p>
<p> <p>
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a> <a href="https://RegExr.com/">Be sure to test your RegEx here.</a>

View File

@@ -1,5 +1,6 @@
{% extends 'base.html' %} {% extends 'base.html' %}
{% block content %} {% block content %}
{% from '_helpers.jinja' import render_field %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="edit-form monospaced-textarea"> <div class="edit-form monospaced-textarea">
@@ -7,11 +8,12 @@
<ul> <ul>
<li class="tab" id=""><a href="#url-list">URL List</a></li> <li class="tab" id=""><a href="#url-list">URL List</a></li>
<li class="tab"><a href="#distill-io">Distill.io</a></li> <li class="tab"><a href="#distill-io">Distill.io</a></li>
<li class="tab"><a href="#changedetection-io">Changedetection.io</a></li>
</ul> </ul>
</div> </div>
<div class="box-wrap inner"> <div class="box-wrap inner">
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST"> <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST" enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="tab-pane-inner" id="url-list"> <div class="tab-pane-inner" id="url-list">
<fieldset class="pure-group"> <fieldset class="pure-group">
@@ -77,6 +79,12 @@
" rows="25">{{ original_distill_json }}</textarea> " rows="25">{{ original_distill_json }}</textarea>
</fieldset> </fieldset>
</div> </div>
<div class="tab-pane-inner" id="changedetection-io">
Upload your changedetection.io backup ZIP here</br>
<fieldset class="pure-group">
{{ render_field(form.backup_zip_file) }}
</fieldset>
</div>
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button> <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
</form> </form>

View File

@@ -14,7 +14,7 @@
<div id="watch-add-wrapper-zone"> <div id="watch-add-wrapper-zone">
<div> <div>
{{ render_simple_field(form.url, placeholder="https://...", required=true) }} {{ render_simple_field(form.url, placeholder="https://...", required=true) }}
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }} {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch label / tag") }}
</div> </div>
<div> <div>
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }} {{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
@@ -32,6 +32,7 @@
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unpause">UnPause</button> <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unpause">UnPause</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="mute">Mute</button> <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="mute">Mute</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unmute">UnMute</button> <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unmute">UnMute</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button> <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button>
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button> <button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
</div> </div>
@@ -88,9 +89,9 @@
</td> </td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a> <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" /></a> <a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" title="Create a link to share watch config with others" /></a>
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %} {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}
{% if watch.last_error is defined and watch.last_error != False %} {% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}</div> <div class="fetch-error">{{ watch.last_error }}</div>
@@ -98,6 +99,12 @@
{% if watch.last_notification_error is defined and watch.last_notification_error != False %} {% if watch.last_notification_error is defined and watch.last_notification_error != False %}
<div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div> <div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
{% endif %} {% endif %}
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
<div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{% endif %}
{% if watch['track_ldjson_price_data'] == 'accepted' %}
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="price-follow-tag-icon"/> Price</span>
{% endif %}
{% if not active_tag %} {% if not active_tag %}
<span class="watch-tag-list">{{ watch.tag}}</span> <span class="watch-tag-list">{{ watch.tag}}</span>
{% endif %} {% endif %}

View File

@@ -0,0 +1,146 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI
def set_response_with_ldjson():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
<script type="application/ld+json">
{
"@context":"https://schema.org/",
"@type":"Product",
"@id":"https://www.some-virtual-phone-shop.com/celular-iphone-14/p",
"name":"Celular Iphone 14 Pro Max 256Gb E Sim A16 Bionic",
"brand":{
"@type":"Brand",
"name":"APPLE"
},
"image":"https://www.some-virtual-phone-shop.com/15509426/image.jpg",
"description":"You dont need it",
"mpn":"111111",
"sku":"22222",
"offers":{
"@type":"AggregateOffer",
"lowPrice":8097000,
"highPrice":8099900,
"priceCurrency":"COP",
"offers":[
{
"@type":"Offer",
"price":8097000,
"priceCurrency":"COP",
"availability":"http://schema.org/InStock",
"sku":"102375961",
"itemCondition":"http://schema.org/NewCondition",
"seller":{
"@type":"Organization",
"name":"ajax"
}
}
],
"offerCount":1
}
}
</script>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
def set_response_without_ldjson():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
# actually only really used by the distll.io importer, but could be handy too
def test_check_ldjson_price_autodetect(client, live_server):
live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
set_response_with_ldjson()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(3)
# Should get a notice that it's available
res = client.get(url_for("index"))
assert b'ldjson-price-track-offer' in res.data
# Accept it
uuid = extract_UUID_from_client(client)
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
time.sleep(2)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
# Offer should be gone
res = client.get(url_for("index"))
assert b'Embedded price data' not in res.data
assert b'tracking-ldjson-price-data' in res.data
# and last snapshop (via API) should be just the price
api_key = extract_api_key_from_UI(client)
res = client.get(
url_for("watchsinglehistory", uuid=uuid, timestamp='latest'),
headers={'x-api-key': api_key},
)
# Should see this (dont know where the whitespace came from)
assert b'"highPrice": 8099900' in res.data
# And not this cause its not the ld-json
assert b"So let's see what happens" not in res.data
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
##########################################################################################
# And we shouldnt see the offer
set_response_without_ldjson()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(3)
res = client.get(url_for("index"))
assert b'ldjson-price-track-offer' not in res.data
##########################################################################################
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -121,7 +121,7 @@ def test_element_removal_full(client, live_server):
url_for("import_page"), data={"urls": test_url}, follow_redirects=True url_for("import_page"), data={"urls": test_url}, follow_redirects=True
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(1)
# Goto the edit page, add the filter data # Goto the edit page, add the filter data
# Not sure why \r needs to be added - absent of the #changetext this is not necessary # Not sure why \r needs to be added - absent of the #changetext this is not necessary
subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext" subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"

View File

@@ -38,9 +38,6 @@ def test_check_encoding_detection(client, live_server):
follow_redirects=True follow_redirects=True
) )
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(2) time.sleep(2)

View File

@@ -77,7 +77,8 @@ def test_DNS_errors(client, live_server):
time.sleep(3) time.sleep(3)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'Name or service not known' in res.data found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
assert found_name_resolution_error
# Should always record that we tried # Should always record that we tried
assert bytes("just now".encode('utf-8')) in res.data assert bytes("just now".encode('utf-8')) in res.data

View File

@@ -101,9 +101,6 @@ def test_check_ignore_text_functionality(client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
@@ -199,9 +196,6 @@ def test_check_global_ignore_text_functionality(client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)

View File

@@ -69,8 +69,6 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server):
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
set_some_changed_response() set_some_changed_response()
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
@@ -104,9 +102,6 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
@@ -119,11 +114,9 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server):
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
# Make a change # Make a change
set_some_changed_response() set_some_changed_response()

View File

@@ -394,6 +394,48 @@ def check_json_ext_filter(json_filter, client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_ignore_json_order(client, live_server):
# A change in order shouldn't trigger a notification
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write('{"hello" : 123, "world": 123}')
# Add our URL to the import page
test_url = url_for('test_endpoint', content_type="application/json", _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(2)
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write('{"world" : 123, "hello": 123}')
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Just to be sure it still works
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write('{"world" : 123, "hello": 124}')
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_jsonpath_ext_filter(client, live_server): def test_check_jsonpath_ext_filter(client, live_server):
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)

View File

@@ -237,6 +237,7 @@ def test_check_notification(client, live_server):
) )
def test_notification_validation(client, live_server): def test_notification_validation(client, live_server):
time.sleep(1) time.sleep(1)
# re #242 - when you edited an existing new entry, it would not correctly show the notification settings # re #242 - when you edited an existing new entry, it would not correctly show the notification settings
@@ -266,43 +267,15 @@ def test_notification_validation(client, live_server):
# ) # )
# assert b"Notification Body and Title is required when a Notification URL is used" in res.data # assert b"Notification Body and Title is required when a Notification URL is used" in res.data
# Now adding a wrong token should give us an error
# Disabled for now
# res = client.post(
# url_for("settings_page"),
# data={"application-notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
# "application-notification_body": "Rubbish: {{rubbish}}\n",
# "application-notification_format": "Text",
# "application-notification_urls": "json://localhost/foobar",
# "requests-time_between_check-minutes": 180,
# "fetch_backend": "html_requests"
# },
# follow_redirects=True
# )
# assert bytes("Token 'rubbish' is not a valid token or is unknown".encode('utf-8')) in res.data
# And trying to define an invalid Jinja2 template should also throw an error
res = client.post(
url_for("settings_page"),
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
"application-notification_body": "Rubbish: {{ rubbish }\n",
"application-notification_urls": "json://foobar.com",
"application-minutes_between_check": 180,
"application-fetch_backend": "html_requests"
},
follow_redirects=True
)
assert bytes("This is not a valid Jinja2 template".encode('utf-8')) in res.data
# cleanup for the next # cleanup for the next
client.get( client.get(
url_for("form_delete", uuid="all"), url_for("form_delete", uuid="all"),
follow_redirects=True follow_redirects=True
) )
def test_notification_jinja2(client, live_server):
#live_server_setup(live_server)
def test_notification_custom_endpoint_and_jinja2(client, live_server):
time.sleep(1) time.sleep(1)
# test_endpoint - that sends the contents of a file # test_endpoint - that sends the contents of a file
@@ -341,6 +314,7 @@ def test_notification_jinja2(client, live_server):
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2) time.sleep(2)
with open("test-datastore/notification.txt", 'r') as f: with open("test-datastore/notification.txt", 'r') as f:
x=f.read() x=f.read()
j = json.loads(x) j = json.loads(x)
@@ -352,4 +326,6 @@ def test_notification_jinja2(client, live_server):
with open("test-datastore/notification-url.txt", 'r') as f: with open("test-datastore/notification-url.txt", 'r') as f:
notification_url = f.read() notification_url = f.read()
assert 'xxx=http' in notification_url assert 'xxx=http' in notification_url
os.unlink("test-datastore/notification-url.txt") os.unlink("test-datastore/notification-url.txt")

View File

@@ -20,6 +20,8 @@ def test_headers_in_request(client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(1)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
data={"urls": test_url}, data={"urls": test_url},
@@ -174,6 +176,7 @@ def test_method_in_request(client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(2)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
data={"urls": test_url}, data={"urls": test_url},
@@ -181,6 +184,8 @@ def test_method_in_request(client, live_server):
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(2)
# Attempt to add a method which is not valid # Attempt to add a method which is not valid
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
@@ -206,7 +211,7 @@ def test_method_in_request(client, live_server):
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
# Give the thread time to pick up the first version # Give the thread time to pick up the first version
time.sleep(5) time.sleep(2)
# The service should echo back the request verb # The service should echo back the request verb
res = client.get( res = client.get(
@@ -217,7 +222,7 @@ def test_method_in_request(client, live_server):
# The test call service will return the verb as the body # The test call service will return the verb as the body
assert b"PATCH" in res.data assert b"PATCH" in res.data
time.sleep(5) time.sleep(2)
watches_with_method = 0 watches_with_method = 0
with open('test-datastore/url-watches.json') as f: with open('test-datastore/url-watches.json') as f:

View File

@@ -4,6 +4,7 @@ import queue
import time import time
from changedetectionio import content_fetcher from changedetectionio import content_fetcher
from changedetectionio import queuedWatchMetaData
from changedetectionio.fetch_site_status import FilterNotFoundInResponse from changedetectionio.fetch_site_status import FilterNotFoundInResponse
# A single update worker # A single update worker
@@ -157,11 +158,12 @@ class update_worker(threading.Thread):
while not self.app.config.exit.is_set(): while not self.app.config.exit.is_set():
try: try:
priority, uuid = self.q.get(block=False) queued_item_data = self.q.get(block=False)
except queue.Empty: except queue.Empty:
pass pass
else: else:
uuid = queued_item_data.item.get('uuid')
self.current_uuid = uuid self.current_uuid = uuid
if uuid in list(self.datastore.data['watching'].keys()): if uuid in list(self.datastore.data['watching'].keys()):
@@ -171,11 +173,11 @@ class update_worker(threading.Thread):
update_obj= {} update_obj= {}
xpath_data = False xpath_data = False
process_changedetection_results = True process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url'])) print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
now = time.time() now = time.time()
try: try:
changed_detected, update_obj, contents = update_handler.run(uuid) changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
# Re #342 # Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc # We then convert/.decode('utf-8') for the notification etc
@@ -241,6 +243,10 @@ class update_worker(threading.Thread):
process_changedetection_results = True process_changedetection_results = True
except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
# Yes fine, so nothing todo
pass
except content_fetcher.BrowserStepsStepTimout as e: except content_fetcher.BrowserStepsStepTimout as e:
if not self.datastore.data['watching'].get(uuid): if not self.datastore.data['watching'].get(uuid):

View File

@@ -29,8 +29,9 @@ apprise~=1.2.0
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
paho-mqtt paho-mqtt
# Pinned version of cryptography otherwise # This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1"
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly # so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found"
# (introduced once apprise became a dep)
cryptography~=3.4 cryptography~=3.4
# Used for CSS filtering # Used for CSS filtering

View File

@@ -1 +1 @@
python-3.8.12 python-3.9.15