mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-11 03:56:26 +00:00
Compare commits
21 Commits
jinja2-not
...
ticket-123
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b749d9a923 | ||
|
|
b76148a0f4 | ||
|
|
93cc30437f | ||
|
|
6562d6e0d4 | ||
|
|
6c217cc3b6 | ||
|
|
f30cdf0674 | ||
|
|
14da0646a7 | ||
|
|
b413cdecc7 | ||
|
|
7bf52d9275 | ||
|
|
09e6624afd | ||
|
|
b58fd995b5 | ||
|
|
f7bb8a0afa | ||
|
|
3e333496c1 | ||
|
|
ee776a9627 | ||
|
|
65db4d68e3 | ||
|
|
74d93d10c3 | ||
|
|
37aef0530a | ||
|
|
f86763dc7a | ||
|
|
13c25f9b92 | ||
|
|
265f622e75 | ||
|
|
c12db2b725 |
@@ -1,7 +1,7 @@
|
||||
# pip dependencies install stage
|
||||
FROM python:3.8-slim as builder
|
||||
|
||||
# rustc compiler would be needed on ARM type devices but theres an issue with some deps not building..
|
||||
# See `cryptography` pin comment in requirements.txt
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
@@ -31,8 +31,7 @@ RUN pip install --target=/dependencies playwright~=1.27.1 \
|
||||
# Final image stage
|
||||
FROM python:3.8-slim
|
||||
|
||||
# Actual packages needed at runtime, usually due to the notification (apprise) backend
|
||||
# rustc compiler would be needed on ARM type devices but theres an issue with some deps not building..
|
||||
# See `cryptography` pin comment in requirements.txt
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
|
||||
# Re #93, #73, excluding rustc (adds another 430Mb~)
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
recursive-include changedetectionio/api *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/blueprint *
|
||||
recursive-include changedetectionio/model *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/res *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
prune changedetectionio/static/styles/node_modules
|
||||
prune changedetectionio/static/styles/package-lock.json
|
||||
|
||||
22
README.md
22
README.md
@@ -187,11 +187,29 @@ When you enable a `json:` or `jq:` filter, you can even automatically extract an
|
||||
<html>
|
||||
...
|
||||
<script type="application/ld+json">
|
||||
{"@context":"http://schema.org","@type":"Product","name":"Nan Optipro Stage 1 Baby Formula 800g","price": 23.50 }
|
||||
|
||||
{
|
||||
"@context":"http://schema.org/",
|
||||
"@type":"Product",
|
||||
"offers":{
|
||||
"@type":"Offer",
|
||||
"availability":"http://schema.org/InStock",
|
||||
"price":"3949.99",
|
||||
"priceCurrency":"USD",
|
||||
"url":"https://www.newegg.com/p/3D5-000D-001T1"
|
||||
},
|
||||
"description":"Cobratype King Cobra Hero Desktop Gaming PC",
|
||||
"name":"Cobratype King Cobra Hero Desktop Gaming PC",
|
||||
"sku":"3D5-000D-001T1",
|
||||
"itemCondition":"NewCondition"
|
||||
}
|
||||
</script>
|
||||
```
|
||||
|
||||
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
|
||||
`json:$..price` or `jq:..price` would give `3949.99`, or you can extract the whole structure (use a JSONpath test website to validate with)
|
||||
|
||||
The application also supports notifying you that it can follow this information automatically
|
||||
|
||||
|
||||
## Proxy Configuration
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import threading
|
||||
import time
|
||||
import timeago
|
||||
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from copy import deepcopy
|
||||
from distutils.util import strtobool
|
||||
from feedgen.feed import FeedGenerator
|
||||
@@ -35,7 +36,7 @@ from flask_wtf import CSRFProtect
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.39.22.1'
|
||||
__version__ = '0.40.0.2'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -95,6 +96,12 @@ def init_app_secret(datastore_path):
|
||||
|
||||
return secret
|
||||
|
||||
|
||||
@app.template_global()
|
||||
def get_darkmode_state():
|
||||
css_dark_mode = request.cookies.get('css_dark_mode', 'false')
|
||||
return 'true' if css_dark_mode and strtobool(css_dark_mode) else 'false'
|
||||
|
||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
@@ -202,10 +209,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
def getDarkModeSetting():
|
||||
css_dark_mode = request.cookies.get('css_dark_mode')
|
||||
return True if (css_dark_mode == 'true' or css_dark_mode == True) else False
|
||||
|
||||
# Setup cors headers to allow all domains
|
||||
# https://flask-cors.readthedocs.io/en/latest/
|
||||
# CORS(app)
|
||||
@@ -402,10 +405,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
sorted_watches.append(watch)
|
||||
|
||||
existing_tags = datastore.get_all_tags()
|
||||
|
||||
form = forms.quickWatchForm(request.form)
|
||||
output = render_template("watch-overview.html",
|
||||
dark_mode=getDarkModeSetting(),
|
||||
form=form,
|
||||
watches=sorted_watches,
|
||||
tags=existing_tags,
|
||||
@@ -415,7 +416,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Don't link to hosting when we're on the hosting environment
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
guid=datastore.data['app_guid'],
|
||||
queued_uuids=[uuid for p,uuid in update_q.queue])
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue])
|
||||
|
||||
|
||||
if session.get('share-link'):
|
||||
@@ -595,25 +596,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
using_default_check_time = False
|
||||
break
|
||||
|
||||
# Use the default if its the same as system wide
|
||||
# Use the default if it's the same as system-wide.
|
||||
if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
|
||||
extra_update_obj['fetch_backend'] = None
|
||||
|
||||
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
|
||||
|
||||
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
|
||||
if form_ignore_text:
|
||||
if len(datastore.data['watching'][uuid].history):
|
||||
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
||||
|
||||
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
|
||||
if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
|
||||
if len(datastore.data['watching'][uuid].history):
|
||||
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
||||
|
||||
# Be sure proxy value is None
|
||||
if datastore.proxy_list is not None and form.data['proxy'] == '':
|
||||
extra_update_obj['proxy'] = None
|
||||
@@ -631,7 +623,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
update_q.put((1, uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -664,7 +656,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
browser_steps_config=browser_step_ui_config,
|
||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
dark_mode=getDarkModeSetting(),
|
||||
form=form,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
has_empty_checktime=using_default_check_time,
|
||||
@@ -752,7 +743,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
output = render_template("settings.html",
|
||||
form=form,
|
||||
dark_mode=getDarkModeSetting(),
|
||||
current_base_url = datastore.data['settings']['application']['base_url'],
|
||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
@@ -774,7 +764,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
importer = import_url_list()
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
|
||||
for uuid in importer.new_uuids:
|
||||
update_q.put((1, uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
if len(importer.remaining_data) == 0:
|
||||
return redirect(url_for('index'))
|
||||
@@ -787,13 +777,12 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
update_q.put((1, uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
output = render_template("import.html",
|
||||
dark_mode=getDarkModeSetting(),
|
||||
import_url_list_remaining="\n".join(remaining_urls),
|
||||
original_distill_json=''
|
||||
)
|
||||
@@ -893,7 +882,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
output = render_template("diff.html",
|
||||
current_diff_url=watch['url'],
|
||||
current_previous_version=str(previous_version),
|
||||
dark_mode=getDarkModeSetting(),
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
|
||||
extract_form=extract_form,
|
||||
@@ -944,7 +932,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
content=content,
|
||||
history_n=watch.history_n,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
dark_mode=getDarkModeSetting(),
|
||||
# current_diff_url=watch['url'],
|
||||
watch=watch,
|
||||
uuid=uuid,
|
||||
@@ -991,7 +978,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
content=content,
|
||||
history_n=watch.history_n,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
dark_mode=getDarkModeSetting(),
|
||||
ignored_line_numbers=ignored_line_numbers,
|
||||
triggered_line_numbers=trigger_line_numbers,
|
||||
current_diff_url=watch['url'],
|
||||
@@ -1010,15 +996,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
def notification_logs():
|
||||
global notification_debug_log
|
||||
output = render_template("notification-log.html",
|
||||
dark_mode=getDarkModeSetting(),
|
||||
logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/favicon.ico", methods=['GET'])
|
||||
def favicon():
|
||||
return send_from_directory("static/images", path="favicon.ico")
|
||||
|
||||
# We're good but backups are even better!
|
||||
@app.route("/backup", methods=['GET'])
|
||||
@login_required
|
||||
@@ -1161,7 +1142,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if not add_paused and new_uuid:
|
||||
# Straight into the queue.
|
||||
update_q.put((1, new_uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash("Watch added.")
|
||||
|
||||
if add_paused:
|
||||
@@ -1198,7 +1179,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
update_q.put((5, new_uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
||||
flash('Cloned.')
|
||||
|
||||
return redirect(url_for('index'))
|
||||
@@ -1206,7 +1187,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@app.route("/api/checknow", methods=['GET'])
|
||||
@login_required
|
||||
def form_watch_checknow():
|
||||
|
||||
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
|
||||
tag = request.args.get('tag')
|
||||
uuid = request.args.get('uuid')
|
||||
i = 0
|
||||
@@ -1215,11 +1196,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# @todo check thread is running and skip
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
update_q.put((1, uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
i = 1
|
||||
|
||||
elif tag != None:
|
||||
@@ -1227,14 +1206,14 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if (tag != None and tag in watch['tag']):
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
update_q.put((1, watch_uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
|
||||
i += 1
|
||||
|
||||
else:
|
||||
# No tag, no uuid, add everything.
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
update_q.put((1, watch_uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
|
||||
i += 1
|
||||
flash("{} watches are queued for rechecking.".format(i))
|
||||
return redirect(url_for('index', tag=tag))
|
||||
@@ -1353,6 +1332,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
import changedetectionio.blueprint.browser_steps as browser_steps
|
||||
app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps')
|
||||
|
||||
import changedetectionio.blueprint.price_data_follower as price_data_follower
|
||||
app.register_blueprint(price_data_follower.construct_blueprint(datastore, update_q), url_prefix='/price_data_follower')
|
||||
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
threading.Thread(target=notification_runner).start()
|
||||
@@ -1498,7 +1481,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]:
|
||||
|
||||
# Proxies can be set to have a limit on seconds between which they can be called
|
||||
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
@@ -1529,8 +1512,9 @@ def ticker_thread_check_time_launch_checks():
|
||||
priority,
|
||||
watch.jitter_seconds,
|
||||
now - watch['last_checked']))
|
||||
|
||||
# Into the queue with you
|
||||
update_q.put((priority, uuid))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request, make_response
|
||||
import validators
|
||||
@@ -24,7 +25,7 @@ class Watch(Resource):
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
if request.args.get('recheck'):
|
||||
self.update_q.put((1, uuid))
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
@@ -100,7 +101,7 @@ class CreateWatch(Resource):
|
||||
extras = {'title': json_data['title'].strip()} if json_data.get('title') else {}
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras)
|
||||
self.update_q.put((1, new_uuid))
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
|
||||
# Return concise list of available watches and some very basic info
|
||||
@@ -118,7 +119,7 @@ class CreateWatch(Resource):
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
self.update_q.put((1, uuid))
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
return {'status': "OK"}, 200
|
||||
|
||||
return list, 200
|
||||
|
||||
@@ -75,15 +75,13 @@ class steppable_browser_interface():
|
||||
def action_goto_url(self, url, optional_value):
|
||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||
now = time.time()
|
||||
response = self.page.goto(url, timeout=0, wait_until='domcontentloaded')
|
||||
print("Time to goto URL", time.time() - now)
|
||||
response = self.page.goto(url, timeout=0, wait_until='commit')
|
||||
|
||||
# Wait_until = commit
|
||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
||||
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
||||
# This seemed to solve nearly all 'TimeoutErrors'
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
|
||||
self.page.wait_for_timeout(extra_wait * 1000)
|
||||
print("Time to goto URL ", time.time() - now)
|
||||
|
||||
def action_click_element_containing_text(self, selector=None, value=''):
|
||||
if not len(value.strip()):
|
||||
|
||||
33
changedetectionio/blueprint/price_data_follower/__init__.py
Normal file
33
changedetectionio/blueprint/price_data_follower/__init__.py
Normal file
@@ -0,0 +1,33 @@
|
||||
|
||||
from distutils.util import strtobool
|
||||
from flask import Blueprint, flash, redirect, url_for
|
||||
from flask_login import login_required
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from queue import PriorityQueue
|
||||
|
||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||
PRICE_DATA_TRACK_REJECT = 'rejected'
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue):
|
||||
|
||||
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
|
||||
def accept(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
return redirect(url_for("form_watch_checknow", uuid=uuid))
|
||||
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||
def reject(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||
return redirect(url_for("index"))
|
||||
|
||||
|
||||
return price_data_follower_blueprint
|
||||
|
||||
|
||||
@@ -23,6 +23,9 @@ class Non200ErrorCodeReceived(Exception):
|
||||
self.page_text = html_tools.html_to_text(page_html)
|
||||
return
|
||||
|
||||
class checksumFromPreviousCheckWasTheSame(Exception):
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
class JSActionExceptions(Exception):
|
||||
def __init__(self, status_code, url, screenshot, message=''):
|
||||
@@ -286,6 +289,8 @@ class base_html_playwright(Fetcher):
|
||||
proxy=self.proxy,
|
||||
# This is needed to enable JavaScript execution on GitHub and others
|
||||
bypass_csp=True,
|
||||
# Can't think why we need the service workers for our use case?
|
||||
service_workers='block',
|
||||
# Should never be needed
|
||||
accept_downloads=False
|
||||
)
|
||||
@@ -306,8 +311,7 @@ class base_html_playwright(Fetcher):
|
||||
|
||||
# Waits for the next navigation. Using Python context manager
|
||||
# prevents a race condition between clicking and waiting for a navigation.
|
||||
with self.page.expect_navigation():
|
||||
response = self.page.goto(url, wait_until='load')
|
||||
response = self.page.goto(url, wait_until='commit')
|
||||
# Wait_until = commit
|
||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
||||
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
||||
|
||||
@@ -2,10 +2,11 @@ import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import urllib3
|
||||
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from copy import deepcopy
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
@@ -38,8 +39,7 @@ class perform_site_check():
|
||||
|
||||
return regex
|
||||
|
||||
def run(self, uuid):
|
||||
from copy import deepcopy
|
||||
def run(self, uuid, skip_when_checksum_same=True):
|
||||
changed_detected = False
|
||||
screenshot = False # as bytes
|
||||
stripped_text_from_html = ""
|
||||
@@ -122,6 +122,14 @@ class perform_site_check():
|
||||
self.screenshot = fetcher.screenshot
|
||||
self.xpath_data = fetcher.xpath_data
|
||||
|
||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||
# Saves a lot of CPU
|
||||
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
|
||||
if skip_when_checksum_same:
|
||||
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
|
||||
raise content_fetcher.checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
|
||||
# Fetching complete, now filters
|
||||
# @todo move to class / maybe inside of fetcher abstract base?
|
||||
|
||||
@@ -140,7 +148,7 @@ class perform_site_check():
|
||||
is_html = False
|
||||
is_json = False
|
||||
|
||||
include_filters_rule = watch.get('include_filters', [])
|
||||
include_filters_rule = deepcopy(watch.get('include_filters', []))
|
||||
# include_filters_rule = watch['include_filters']
|
||||
subtractive_selectors = watch.get(
|
||||
"subtractive_selectors", []
|
||||
@@ -148,6 +156,10 @@ class perform_site_check():
|
||||
"global_subtractive_selectors", []
|
||||
)
|
||||
|
||||
# Inject a virtual LD+JSON price tracker rule
|
||||
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
||||
include_filters_rule.append(html_tools.LD_JSON_PRODUCT_OFFER_SELECTOR)
|
||||
|
||||
has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
|
||||
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
|
||||
|
||||
@@ -173,9 +185,13 @@ class perform_site_check():
|
||||
# Don't run get_text or xpath/css filters on plaintext
|
||||
stripped_text_from_html = html_content
|
||||
else:
|
||||
# Does it have some ld+json price data? used for easier monitoring
|
||||
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(fetcher.content)
|
||||
|
||||
# Then we assume HTML
|
||||
if has_filter_rule:
|
||||
html_content = ""
|
||||
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
|
||||
@@ -10,6 +10,10 @@ import re
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
# all of those may or may not appear on different websites
|
||||
LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
|
||||
|
||||
class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
@@ -127,8 +131,10 @@ def _get_stripped_text_from_json_match(match):
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
def extract_json_as_string(content, json_filter):
|
||||
|
||||
# content - json
|
||||
# json_filter - ie json:$..price
|
||||
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
|
||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||
stripped_text_from_html = False
|
||||
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
|
||||
@@ -139,7 +145,12 @@ def extract_json_as_string(content, json_filter):
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
s = []
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
bs_result = soup.findAll('script')
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
bs_result = soup.findAll('script', {"type": "application/ld+json"})
|
||||
else:
|
||||
bs_result = soup.findAll('script')
|
||||
|
||||
|
||||
if not bs_result:
|
||||
raise JSONNotFound("No parsable JSON found in this document")
|
||||
@@ -156,7 +167,14 @@ def extract_json_as_string(content, json_filter):
|
||||
continue
|
||||
else:
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
if stripped_text_from_html:
|
||||
if ensure_is_ldjson_info_type:
|
||||
# Could sometimes be list, string or something else random
|
||||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html:
|
||||
break
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
|
||||
if not stripped_text_from_html:
|
||||
@@ -243,6 +261,18 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||
|
||||
return text_content
|
||||
|
||||
|
||||
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
||||
def has_ldjson_product_info(content):
|
||||
try:
|
||||
pricing_data = extract_json_as_string(content=content, json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, ensure_is_ldjson_info_type="product")
|
||||
except JSONNotFound as e:
|
||||
# Totally fine
|
||||
return False
|
||||
x=bool(pricing_data)
|
||||
return x
|
||||
|
||||
|
||||
def workarounds_for_obfuscations(content):
|
||||
"""
|
||||
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
|
||||
|
||||
@@ -14,49 +14,52 @@ from changedetectionio.notification import (
|
||||
|
||||
class model(dict):
|
||||
__newest_history_key = None
|
||||
__history_n=0
|
||||
__history_n = 0
|
||||
__base_config = {
|
||||
#'history': {}, # Dict of timestamp and output stripped filename (removed)
|
||||
#'newest_history_key': 0, (removed, taken from history.txt index)
|
||||
'body': None,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'check_count': 0,
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': None,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'method': 'GET',
|
||||
# Custom notification content
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'notification_title': None,
|
||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'subtractive_selectors': [],
|
||||
'tag': None,
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
# Requires setting to None on submit if it's the same as the default
|
||||
# Should be all None by default, so we use the system default in this case.
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||
'title': None,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': None,
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
}
|
||||
# 'history': {}, # Dict of timestamp and output stripped filename (removed)
|
||||
# 'newest_history_key': 0, (removed, taken from history.txt index)
|
||||
'body': None,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'check_count': 0,
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': None,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'has_ldjson_price_data': None,
|
||||
'track_ldjson_price_data': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'method': 'GET',
|
||||
# Custom notification content
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'notification_title': None,
|
||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'subtractive_selectors': [],
|
||||
'tag': None,
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
# Requires setting to None on submit if it's the same as the default
|
||||
# Should be all None by default, so we use the system default in this case.
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||
'title': None,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': None,
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
}
|
||||
jitter_seconds = 0
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
|
||||
10
changedetectionio/queuedWatchMetaData.py
Normal file
10
changedetectionio/queuedWatchMetaData.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
# So that we can queue some metadata in `item`
|
||||
# https://docs.python.org/3/library/queue.html#queue.PriorityQueue
|
||||
#
|
||||
@dataclass(order=True)
|
||||
class PrioritizedItem:
|
||||
priority: int
|
||||
item: Any=field(compare=False)
|
||||
@@ -81,6 +81,14 @@ var bbox;
|
||||
for (var i = 0; i < elements.length; i++) {
|
||||
bbox = elements[i].getBoundingClientRect();
|
||||
|
||||
// Exclude items that are not interactable or visible
|
||||
if(elements[i].style.opacity === "0") {
|
||||
continue
|
||||
}
|
||||
if(elements[i].style.display === "none" || elements[i].style.pointerEvents === "none" ) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Forget really small ones
|
||||
if (bbox['width'] < 10 && bbox['height'] < 10) {
|
||||
continue;
|
||||
@@ -166,10 +174,23 @@ if (include_filters.length) {
|
||||
}
|
||||
|
||||
if (q) {
|
||||
bbox = q.getBoundingClientRect();
|
||||
console.log("xpath_element_scraper: Got filter element, scroll from top was "+scroll_y)
|
||||
} else {
|
||||
console.log("xpath_element_scraper: filter element "+f+" was not found");
|
||||
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
|
||||
if (q.hasOwnProperty('getBoundingClientRect')) {
|
||||
bbox = q.getBoundingClientRect();
|
||||
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
|
||||
} else {
|
||||
try {
|
||||
// Try and see we can find its ownerElement
|
||||
bbox = q.ownerElement.getBoundingClientRect();
|
||||
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
|
||||
} catch (e) {
|
||||
console.log("xpath_element_scraper: error looking up ownerElement")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!q) {
|
||||
console.log("xpath_element_scraper: filter element " + f + " was not found");
|
||||
}
|
||||
|
||||
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
|
||||
|
||||
2
changedetectionio/static/images/price-tag-icon.svg
Normal file
2
changedetectionio/static/images/price-tag-icon.svg
Normal file
@@ -0,0 +1,2 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="83.39" height="89.648" enable-background="new 0 0 122.406 122.881" version="1.1" viewBox="0 0 83.39 89.648" xml:space="preserve" xmlns="http://www.w3.org/2000/svg"><g transform="translate(5e-4 -33.234)"><path d="m44.239 42.946-39.111 39.896 34.908 34.91 39.09-39.876-1.149-34.931zm-0.91791 42.273c0.979-0.979 1.507-1.99 1.577-3.027 0.077-1.043-0.248-2.424-0.967-4.135-0.725-1.717-1.348-3.346-1.87-4.885s-0.814-3.014-0.897-4.432c-0.07-1.42 0.134-2.768 0.624-4.045 0.477-1.279 1.348-2.545 2.607-3.804 2.099-2.099 4.535-3.123 7.314-3.065 2.773 0.063 5.457 1.158 8.04 3.294l2.881 3.034c1.946 2.607 2.799 5.33 2.557 8.166-0.235 2.83-1.532 5.426-3.893 7.785l-6.296-6.297c1.291-1.291 2.035-2.531 2.238-3.727 0.191-1.197-0.165-2.252-1.081-3.168-0.821-0.82-1.717-1.195-2.69-1.139-0.967 0.064-1.908 0.547-2.817 1.457-0.922 0.922-1.393 1.914-1.412 2.977s0.306 2.416 0.973 4.064c0.661 1.652 1.24 3.25 1.736 4.801 0.496 1.553 0.782 3.035 0.858 4.445 0.076 1.426-0.127 2.787-0.591 4.104-0.477 1.316-1.336 2.596-2.588 3.848-2.125 2.125-4.522 3.186-7.212 3.18s-5.311-1.063-7.855-3.16l-3.747 3.746-2.964-2.965 3.766-3.764c-2.423-2.996-3.568-5.998-3.447-9.02 0.127-3.014 1.476-5.813 4.045-8.383l6.278 6.277c-1.412 1.412-2.175 2.799-2.277 4.16-0.108 1.367 0.414 2.627 1.571 3.783 0.839 0.84 1.755 1.26 2.741 1.242 0.985-0.017 1.92-0.47 2.798-1.347zm21.127-46.435h17.457c-0.0269 2.2368 0.69936 16.025 0.69936 16.025l0.785 23.858c0.019 0.609-0.221 1.164-0.619 1.564l5e-3 4e-3 -41.236 42.022c-0.82213 0.8378-2.175 0.83-3.004 0l-37.913-37.91c-0.83-0.83-0.83-2.176 0-3.006l41.236-42.021c0.39287-0.42671 1.502-0.53568 1.502-0.53568zm18.011 11.59c-59.392-29.687-29.696-14.843 0 0z"/></g></svg>
|
||||
|
After Width: | Height: | Size: 1.7 KiB |
@@ -121,17 +121,19 @@ html[data-darkmode="true"] {
|
||||
--color-icon-github-hover: var(--color-grey-700);
|
||||
--color-watch-table-error: var(--color-light-red);
|
||||
--color-watch-table-row-text: var(--color-grey-800); }
|
||||
html[data-darkmode="true"] .watch-controls img {
|
||||
opacity: 0.4; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed {
|
||||
color: #fff; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed.error {
|
||||
color: var(--color-watch-table-error); }
|
||||
html[data-darkmode="true"] .icon-spread {
|
||||
filter: hue-rotate(-10deg) brightness(1.5); }
|
||||
html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after,
|
||||
html[data-darkmode="true"] .watch-table .current-diff-url::after {
|
||||
filter: invert(0.5) hue-rotate(10deg) brightness(2); }
|
||||
html[data-darkmode="true"] .watch-table .watch-controls .state-off img {
|
||||
opacity: 0.3; }
|
||||
html[data-darkmode="true"] .watch-table .watch-controls .state-on img {
|
||||
opacity: 1.0; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed {
|
||||
color: #fff; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed.error {
|
||||
color: var(--color-watch-table-error); }
|
||||
|
||||
#diff-ui {
|
||||
background: var(--color-background);
|
||||
|
||||
@@ -140,18 +140,6 @@ html[data-darkmode="true"] {
|
||||
--color-watch-table-error: var(--color-light-red);
|
||||
--color-watch-table-row-text: var(--color-grey-800);
|
||||
|
||||
// Anything that can't be manipulated through variables follows.
|
||||
.watch-controls {
|
||||
img {
|
||||
opacity: 0.4;
|
||||
}
|
||||
}
|
||||
.watch-table .unviewed {
|
||||
color: #fff;
|
||||
&.error {
|
||||
color: var(--color-watch-table-error);
|
||||
}
|
||||
}
|
||||
|
||||
.icon-spread {
|
||||
filter: hue-rotate(-10deg) brightness(1.5);
|
||||
@@ -163,5 +151,25 @@ html[data-darkmode="true"] {
|
||||
.current-diff-url::after {
|
||||
filter: invert(.5) hue-rotate(10deg) brightness(2);
|
||||
}
|
||||
|
||||
.watch-controls {
|
||||
.state-off {
|
||||
img {
|
||||
opacity: 0.3;
|
||||
}
|
||||
}
|
||||
.state-on {
|
||||
img {
|
||||
opacity: 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.unviewed {
|
||||
color: #fff;
|
||||
&.error {
|
||||
color: var(--color-watch-table-error);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1009,3 +1009,30 @@ ul {
|
||||
border-radius: 5px;
|
||||
color: var(--color-warning);
|
||||
}
|
||||
|
||||
/* automatic price following helpers */
|
||||
.tracking-ldjson-price-data {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #000;
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.ldjson-price-track-offer {
|
||||
a.pure-button {
|
||||
border-radius: 3px;
|
||||
padding: 3px;
|
||||
background-color: var(--color-background-button-green);
|
||||
}
|
||||
|
||||
font-weight: bold;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.price-follow-tag-icon {
|
||||
display: inline-block;
|
||||
height: 0.8rem;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
|
||||
@@ -124,17 +124,19 @@ html[data-darkmode="true"] {
|
||||
--color-icon-github-hover: var(--color-grey-700);
|
||||
--color-watch-table-error: var(--color-light-red);
|
||||
--color-watch-table-row-text: var(--color-grey-800); }
|
||||
html[data-darkmode="true"] .watch-controls img {
|
||||
opacity: 0.4; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed {
|
||||
color: #fff; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed.error {
|
||||
color: var(--color-watch-table-error); }
|
||||
html[data-darkmode="true"] .icon-spread {
|
||||
filter: hue-rotate(-10deg) brightness(1.5); }
|
||||
html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after,
|
||||
html[data-darkmode="true"] .watch-table .current-diff-url::after {
|
||||
filter: invert(0.5) hue-rotate(10deg) brightness(2); }
|
||||
html[data-darkmode="true"] .watch-table .watch-controls .state-off img {
|
||||
opacity: 0.3; }
|
||||
html[data-darkmode="true"] .watch-table .watch-controls .state-on img {
|
||||
opacity: 1.0; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed {
|
||||
color: #fff; }
|
||||
html[data-darkmode="true"] .watch-table .unviewed.error {
|
||||
color: var(--color-watch-table-error); }
|
||||
|
||||
/* spinner */
|
||||
.spinner,
|
||||
@@ -943,3 +945,24 @@ ul {
|
||||
display: inline;
|
||||
height: 26px;
|
||||
vertical-align: middle; }
|
||||
|
||||
/* automatic price following helpers */
|
||||
.tracking-ldjson-price-data {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #000;
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap; }
|
||||
|
||||
.ldjson-price-track-offer {
|
||||
font-weight: bold;
|
||||
font-style: italic; }
|
||||
.ldjson-price-track-offer a.pure-button {
|
||||
border-radius: 3px;
|
||||
padding: 3px;
|
||||
background-color: var(--color-background-button-green); }
|
||||
|
||||
.price-follow-tag-icon {
|
||||
display: inline-block;
|
||||
height: 0.8rem;
|
||||
vertical-align: middle; }
|
||||
|
||||
@@ -250,12 +250,15 @@ class ChangeDetectionStore:
|
||||
def clear_watch_history(self, uuid):
|
||||
import pathlib
|
||||
|
||||
self.__data['watching'][uuid].update(
|
||||
{'last_checked': 0,
|
||||
'last_viewed': 0,
|
||||
'previous_md5': False,
|
||||
'last_notification_error': False,
|
||||
'last_error': False})
|
||||
self.__data['watching'][uuid].update({
|
||||
'last_checked': 0,
|
||||
'has_ldjson_price_data': None,
|
||||
'last_error': False,
|
||||
'last_notification_error': False,
|
||||
'last_viewed': 0,
|
||||
'previous_md5': False,
|
||||
'track_ldjson_price_data': None,
|
||||
})
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
|
||||
@@ -637,7 +640,7 @@ class ChangeDetectionStore:
|
||||
|
||||
n_title = watch.get('notification_title')
|
||||
if n_title:
|
||||
self.data['settings']['application']['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
watch['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
|
||||
n_urls = watch.get('notification_urls')
|
||||
if n_urls:
|
||||
|
||||
@@ -72,7 +72,7 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ watch_tag }}' }}</code></td>
|
||||
<td>The tag of the watch.</td>
|
||||
<td>The watch label / tag</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ preview_url }}' }}</code></td>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" data-darkmode="{{ dark_mode|lower }}">
|
||||
<html lang="en" data-darkmode="{{ get_darkmode_state() }}">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
|
||||
@@ -125,7 +125,7 @@
|
||||
<p>
|
||||
For example, to extract only the numbers from text ‐</br>
|
||||
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code></br>
|
||||
<strong>RegEx to extract:</strong> <code>Temperature ([0-9\.]+)</code><br/>
|
||||
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br/>
|
||||
</p>
|
||||
<p>
|
||||
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
<div id="watch-add-wrapper-zone">
|
||||
<div>
|
||||
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
|
||||
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
||||
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch label / tag") }}
|
||||
</div>
|
||||
<div>
|
||||
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||
@@ -88,9 +88,9 @@
|
||||
</td>
|
||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" /></a>
|
||||
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" title="Create a link to share watch config with others" /></a>
|
||||
|
||||
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
|
||||
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}
|
||||
|
||||
{% if watch.last_error is defined and watch.last_error != False %}
|
||||
<div class="fetch-error">{{ watch.last_error }}</div>
|
||||
@@ -98,6 +98,12 @@
|
||||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
||||
<div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
|
||||
{% endif %}
|
||||
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
|
||||
<div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{% endif %}
|
||||
{% if watch['track_ldjson_price_data'] == 'accepted' %}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="price-follow-tag-icon"/> Price</span>
|
||||
{% endif %}
|
||||
{% if not active_tag %}
|
||||
<span class="watch-tag-list">{{ watch.tag}}</span>
|
||||
{% endif %}
|
||||
|
||||
146
changedetectionio/tests/test_automatic_follow_ldjson_price.py
Normal file
146
changedetectionio/tests/test_automatic_follow_ldjson_price.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI
|
||||
|
||||
def set_response_with_ldjson():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<div class="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some text that will change</div>
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context":"https://schema.org/",
|
||||
"@type":"Product",
|
||||
"@id":"https://www.some-virtual-phone-shop.com/celular-iphone-14/p",
|
||||
"name":"Celular Iphone 14 Pro Max 256Gb E Sim A16 Bionic",
|
||||
"brand":{
|
||||
"@type":"Brand",
|
||||
"name":"APPLE"
|
||||
},
|
||||
"image":"https://www.some-virtual-phone-shop.com/15509426/image.jpg",
|
||||
"description":"You dont need it",
|
||||
"mpn":"111111",
|
||||
"sku":"22222",
|
||||
"offers":{
|
||||
"@type":"AggregateOffer",
|
||||
"lowPrice":8097000,
|
||||
"highPrice":8099900,
|
||||
"priceCurrency":"COP",
|
||||
"offers":[
|
||||
{
|
||||
"@type":"Offer",
|
||||
"price":8097000,
|
||||
"priceCurrency":"COP",
|
||||
"availability":"http://schema.org/InStock",
|
||||
"sku":"102375961",
|
||||
"itemCondition":"http://schema.org/NewCondition",
|
||||
"seller":{
|
||||
"@type":"Organization",
|
||||
"name":"ajax"
|
||||
}
|
||||
}
|
||||
],
|
||||
"offerCount":1
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
def set_response_without_ldjson():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<div class="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some text that will change</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
# actually only really used by the distll.io importer, but could be handy too
|
||||
def test_check_ldjson_price_autodetect(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
set_response_with_ldjson()
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
|
||||
# Should get a notice that it's available
|
||||
res = client.get(url_for("index"))
|
||||
assert b'ldjson-price-track-offer' in res.data
|
||||
|
||||
# Accept it
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
|
||||
time.sleep(2)
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(2)
|
||||
# Offer should be gone
|
||||
res = client.get(url_for("index"))
|
||||
assert b'Embedded price data' not in res.data
|
||||
assert b'tracking-ldjson-price-data' in res.data
|
||||
|
||||
# and last snapshop (via API) should be just the price
|
||||
api_key = extract_api_key_from_UI(client)
|
||||
res = client.get(
|
||||
url_for("watchsinglehistory", uuid=uuid, timestamp='latest'),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
|
||||
# Should see this (dont know where the whitespace came from)
|
||||
assert b'"highPrice": 8099900' in res.data
|
||||
# And not this cause its not the ld-json
|
||||
assert b"So let's see what happens" not in res.data
|
||||
|
||||
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
##########################################################################################
|
||||
# And we shouldnt see the offer
|
||||
set_response_without_ldjson()
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'ldjson-price-track-offer' not in res.data
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
@@ -121,7 +121,7 @@ def test_element_removal_full(client, live_server):
|
||||
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(1)
|
||||
# Goto the edit page, add the filter data
|
||||
# Not sure why \r needs to be added - absent of the #changetext this is not necessary
|
||||
subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
|
||||
|
||||
@@ -38,9 +38,6 @@ def test_check_encoding_detection(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
@@ -77,7 +77,8 @@ def test_DNS_errors(client, live_server):
|
||||
time.sleep(3)
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
assert b'Name or service not known' in res.data
|
||||
found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
|
||||
assert found_name_resolution_error
|
||||
# Should always record that we tried
|
||||
assert bytes("just now".encode('utf-8')) in res.data
|
||||
|
||||
|
||||
@@ -101,9 +101,6 @@ def test_check_ignore_text_functionality(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
@@ -199,9 +196,6 @@ def test_check_global_ignore_text_functionality(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
|
||||
@@ -69,8 +69,6 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server):
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
set_some_changed_response()
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
@@ -104,9 +102,6 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
@@ -119,11 +114,9 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server):
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Make a change
|
||||
set_some_changed_response()
|
||||
|
||||
|
||||
@@ -237,6 +237,7 @@ def test_check_notification(client, live_server):
|
||||
)
|
||||
|
||||
def test_notification_validation(client, live_server):
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# re #242 - when you edited an existing new entry, it would not correctly show the notification settings
|
||||
@@ -266,43 +267,15 @@ def test_notification_validation(client, live_server):
|
||||
# )
|
||||
# assert b"Notification Body and Title is required when a Notification URL is used" in res.data
|
||||
|
||||
# Now adding a wrong token should give us an error
|
||||
# Disabled for now
|
||||
# res = client.post(
|
||||
# url_for("settings_page"),
|
||||
# data={"application-notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
|
||||
# "application-notification_body": "Rubbish: {{rubbish}}\n",
|
||||
# "application-notification_format": "Text",
|
||||
# "application-notification_urls": "json://localhost/foobar",
|
||||
# "requests-time_between_check-minutes": 180,
|
||||
# "fetch_backend": "html_requests"
|
||||
# },
|
||||
# follow_redirects=True
|
||||
# )
|
||||
# assert bytes("Token 'rubbish' is not a valid token or is unknown".encode('utf-8')) in res.data
|
||||
|
||||
# And trying to define an invalid Jinja2 template should also throw an error
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
"application-notification_body": "Rubbish: {{ rubbish }\n",
|
||||
"application-notification_urls": "json://foobar.com",
|
||||
"application-minutes_between_check": 180,
|
||||
"application-fetch_backend": "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert bytes("This is not a valid Jinja2 template".encode('utf-8')) in res.data
|
||||
|
||||
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
def test_notification_jinja2(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
|
||||
def test_notification_custom_endpoint_and_jinja2(client, live_server):
|
||||
time.sleep(1)
|
||||
|
||||
# test_endpoint - that sends the contents of a file
|
||||
@@ -341,6 +314,7 @@ def test_notification_jinja2(client, live_server):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
x=f.read()
|
||||
j = json.loads(x)
|
||||
@@ -352,4 +326,6 @@ def test_notification_jinja2(client, live_server):
|
||||
with open("test-datastore/notification-url.txt", 'r') as f:
|
||||
notification_url = f.read()
|
||||
assert 'xxx=http' in notification_url
|
||||
os.unlink("test-datastore/notification-url.txt")
|
||||
|
||||
os.unlink("test-datastore/notification-url.txt")
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ def test_headers_in_request(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
@@ -174,6 +176,7 @@ def test_method_in_request(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(2)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
@@ -181,6 +184,8 @@ def test_method_in_request(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Attempt to add a method which is not valid
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
@@ -206,7 +211,7 @@ def test_method_in_request(client, live_server):
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Give the thread time to pick up the first version
|
||||
time.sleep(5)
|
||||
time.sleep(2)
|
||||
|
||||
# The service should echo back the request verb
|
||||
res = client.get(
|
||||
@@ -217,7 +222,7 @@ def test_method_in_request(client, live_server):
|
||||
# The test call service will return the verb as the body
|
||||
assert b"PATCH" in res.data
|
||||
|
||||
time.sleep(5)
|
||||
time.sleep(2)
|
||||
|
||||
watches_with_method = 0
|
||||
with open('test-datastore/url-watches.json') as f:
|
||||
|
||||
@@ -4,6 +4,7 @@ import queue
|
||||
import time
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.fetch_site_status import FilterNotFoundInResponse
|
||||
|
||||
# A single update worker
|
||||
@@ -157,11 +158,12 @@ class update_worker(threading.Thread):
|
||||
while not self.app.config.exit.is_set():
|
||||
|
||||
try:
|
||||
priority, uuid = self.q.get(block=False)
|
||||
queued_item_data = self.q.get(block=False)
|
||||
except queue.Empty:
|
||||
pass
|
||||
|
||||
else:
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
self.current_uuid = uuid
|
||||
|
||||
if uuid in list(self.datastore.data['watching'].keys()):
|
||||
@@ -171,11 +173,11 @@ class update_worker(threading.Thread):
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
process_changedetection_results = True
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
@@ -241,6 +243,10 @@ class update_worker(threading.Thread):
|
||||
|
||||
process_changedetection_results = True
|
||||
|
||||
except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
|
||||
# Yes fine, so nothing todo
|
||||
pass
|
||||
|
||||
except content_fetcher.BrowserStepsStepTimout as e:
|
||||
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
|
||||
@@ -29,8 +29,9 @@ apprise~=1.2.0
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
paho-mqtt
|
||||
|
||||
# Pinned version of cryptography otherwise
|
||||
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
|
||||
# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1"
|
||||
# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found"
|
||||
# (introduced once apprise became a dep)
|
||||
cryptography~=3.4
|
||||
|
||||
# Used for CSS filtering
|
||||
|
||||
@@ -1 +1 @@
|
||||
python-3.8.12
|
||||
python-3.9.15
|
||||
Reference in New Issue
Block a user