mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-09 01:25:38 +00:00
Compare commits
23 Commits
dont-creat
...
mark-selec
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4648fe7b02 | ||
|
|
e7ac356d99 | ||
|
|
e874df4ffc | ||
|
|
d1f44d0345 | ||
|
|
8536af0845 | ||
|
|
9076ba6bd3 | ||
|
|
43af18e2bc | ||
|
|
ad75e8cdd0 | ||
|
|
f604643356 | ||
|
|
d5fd22f693 | ||
|
|
1d9d11b3f5 | ||
|
|
f49464f451 | ||
|
|
bc6bde4062 | ||
|
|
2863167f45 | ||
|
|
ce3966c104 | ||
|
|
d5f574ca17 | ||
|
|
c96ece170a | ||
|
|
1fb90bbddc | ||
|
|
55b6ae86e8 | ||
|
|
66b892f770 | ||
|
|
3b80bb2f0e | ||
|
|
e6d2d87b31 | ||
|
|
6e71088cde |
6
.github/workflows/containers.yml
vendored
6
.github/workflows/containers.yml
vendored
@@ -98,7 +98,8 @@ jobs:
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
provenance: false
|
||||
# Looks like this was disabled
|
||||
# provenance: false
|
||||
|
||||
# A new tagged release is required, which builds :tag and :latest
|
||||
- name: Build and push :tag
|
||||
@@ -117,7 +118,8 @@ jobs:
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
provenance: false
|
||||
# Looks like this was disabled
|
||||
# provenance: false
|
||||
|
||||
- name: Image digest
|
||||
run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}
|
||||
|
||||
38
.github/workflows/pypi.yml
vendored
38
.github/workflows/pypi.yml
vendored
@@ -1,38 +0,0 @@
|
||||
name: PyPi Test and Push tagged release
|
||||
|
||||
# Triggers the workflow on push or pull request events
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["ChangeDetection.io Test"]
|
||||
tags: '*.*'
|
||||
types: [completed]
|
||||
|
||||
|
||||
jobs:
|
||||
test-build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
|
||||
- name: Test that pip builds without error
|
||||
run: |
|
||||
pip3 --version
|
||||
python3 -m pip install wheel
|
||||
python3 setup.py bdist_wheel
|
||||
python3 -m pip install dist/changedetection.io-*-none-any.whl --force
|
||||
changedetection.io -d /tmp -p 10000 &
|
||||
sleep 3
|
||||
curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
||||
killall -9 changedetection.io
|
||||
|
||||
# https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
|
||||
# https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
|
||||
|
||||
# https://github.com/docker/buildx/issues/495#issuecomment-918925854
|
||||
#if: ${{ github.event_name == 'release'}}
|
||||
7
.github/workflows/test-only.yml
vendored
7
.github/workflows/test-only.yml
vendored
@@ -50,10 +50,13 @@ jobs:
|
||||
run: |
|
||||
|
||||
# Selenium fetch
|
||||
docker run -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
|
||||
# Playwright/Browserless fetch
|
||||
docker run -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
||||
|
||||
# restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
- name: Test proxy interaction
|
||||
run: |
|
||||
|
||||
36
.github/workflows/test-pip-build.yml
vendored
Normal file
36
.github/workflows/test-pip-build.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: ChangeDetection.io PIP package test
|
||||
|
||||
# Triggers the workflow on push or pull request events
|
||||
|
||||
# This line doesnt work, even tho it is the documented one
|
||||
on: [push, pull_request]
|
||||
|
||||
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
|
||||
# @todo: some kind of path filter for requirements.txt and Dockerfile
|
||||
jobs:
|
||||
test-pip-build-basics:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
|
||||
- name: Test that the basic pip built package runs without error
|
||||
run: |
|
||||
set -e
|
||||
mkdir dist
|
||||
pip3 install wheel
|
||||
python3 setup.py bdist_wheel
|
||||
pip3 install -r requirements.txt
|
||||
rm ./changedetection.py
|
||||
rm -rf changedetectio
|
||||
|
||||
pip3 install dist/changedetection.io*.whl
|
||||
changedetection.io -d /tmp -p 10000 &
|
||||
sleep 3
|
||||
curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
||||
killall -9 changedetection.io
|
||||
@@ -1,6 +1,7 @@
|
||||
recursive-include changedetectionio/api *
|
||||
recursive-include changedetectionio/blueprint *
|
||||
recursive-include changedetectionio/model *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/res *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/templates *
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
## Web Site Change Detection, Monitoring and Notification.
|
||||
## Web Site Change Detection, Restock monitoring and notifications.
|
||||
|
||||
**_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**
|
||||
|
||||
@@ -65,6 +65,7 @@ Requires Playwright to be enabled.
|
||||
- Get notified when certain keywords appear in Twitter search results
|
||||
- Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
|
||||
- Get alerts when new job positions are open on Bamboo HR and other job platforms
|
||||
- Website defacement monitoring
|
||||
|
||||
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ from flask import (
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.40.3'
|
||||
__version__ = '0.41.1'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -64,6 +64,9 @@ app.config.exit = Event()
|
||||
|
||||
app.config['NEW_VERSION_AVAILABLE'] = False
|
||||
|
||||
if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['SERVER_NAME'] = os.getenv('FLASK_SERVER_NAME')
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
# Disables caching of the templates
|
||||
@@ -337,8 +340,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if len(dates) < 2:
|
||||
continue
|
||||
|
||||
prev_fname = watch.history[dates[-2]]
|
||||
|
||||
if not watch.viewed:
|
||||
# Re #239 - GUID needs to be individual for each event
|
||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||
@@ -359,9 +360,12 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||
fe.title(title=watch_title)
|
||||
latest_fname = watch.history[dates[-1]]
|
||||
|
||||
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="<br>")
|
||||
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||
include_equal=False,
|
||||
line_feed_sep="<br>")
|
||||
|
||||
fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
|
||||
type='CDATA')
|
||||
|
||||
@@ -426,15 +430,26 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
has_unviewed=datastore.has_unviewed,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=existing_tags,
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
|
||||
if session.get('share-link'):
|
||||
del(session['share-link'])
|
||||
return output
|
||||
|
||||
resp = make_response(output)
|
||||
|
||||
# The template can run on cookie or url query info
|
||||
if request.args.get('sort'):
|
||||
resp.set_cookie('sort', request.args.get('sort'))
|
||||
if request.args.get('order'):
|
||||
resp.set_cookie('order', request.args.get('order'))
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
|
||||
# AJAX endpoint for sending a test
|
||||
@@ -459,11 +474,19 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
try:
|
||||
n_object = {'watch_url': request.form['window_url'],
|
||||
'notification_urls': request.form['notification_urls'].splitlines(),
|
||||
'notification_title': request.form['notification_title'].strip(),
|
||||
'notification_body': request.form['notification_body'].strip(),
|
||||
'notification_format': request.form['notification_format'].strip()
|
||||
'notification_urls': request.form['notification_urls'].splitlines()
|
||||
}
|
||||
|
||||
# Only use if present, if not set in n_object it should use the default system value
|
||||
if 'notification_format' in request.form and request.form['notification_format'].strip():
|
||||
n_object['notification_format'] = request.form.get('notification_format', '').strip()
|
||||
|
||||
if 'notification_title' in request.form and request.form['notification_title'].strip():
|
||||
n_object['notification_title'] = request.form.get('notification_title', '').strip()
|
||||
|
||||
if 'notification_body' in request.form and request.form['notification_body'].strip():
|
||||
n_object['notification_body'] = request.form.get('notification_body', '').strip()
|
||||
|
||||
notification_q.put(n_object)
|
||||
except Exception as e:
|
||||
return make_response({'error': str(e)}, 400)
|
||||
@@ -511,8 +534,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||
|
||||
def edit_page(uuid):
|
||||
from changedetectionio import forms
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
from . import forms
|
||||
from .blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
from . import processors
|
||||
|
||||
using_default_check_time = True
|
||||
# More for testing, possible to return the first/only
|
||||
@@ -527,6 +551,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
flash("No watch with the UUID %s found." % (uuid), "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
switch_processor = request.args.get('switch_processor')
|
||||
if switch_processor:
|
||||
for p in processors.available_processors():
|
||||
if p[0] == switch_processor:
|
||||
datastore.data['watching'][uuid]['processor'] = switch_processor
|
||||
flash(f"Switched to mode - {p[1]}.")
|
||||
datastore.clear_watch_history(uuid)
|
||||
redirect(url_for('edit_page', uuid=uuid))
|
||||
|
||||
# be sure we update with a copy instead of accidently editing the live object by reference
|
||||
default = deepcopy(datastore.data['watching'][uuid])
|
||||
|
||||
@@ -587,6 +620,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if datastore.proxy_list is not None and form.data['proxy'] == '':
|
||||
extra_update_obj['proxy'] = None
|
||||
|
||||
# Unsetting all filter_text methods should make it go back to default
|
||||
# This particularly affects tests running
|
||||
if 'filter_text_added' in form.data and not form.data.get('filter_text_added') \
|
||||
and 'filter_text_replaced' in form.data and not form.data.get('filter_text_replaced') \
|
||||
and 'filter_text_removed' in form.data and not form.data.get('filter_text_removed'):
|
||||
extra_update_obj['filter_text_added'] = True
|
||||
extra_update_obj['filter_text_replaced'] = True
|
||||
extra_update_obj['filter_text_removed'] = True
|
||||
|
||||
|
||||
datastore.data['watching'][uuid].update(form.data)
|
||||
datastore.data['watching'][uuid].update(extra_update_obj)
|
||||
|
||||
@@ -633,6 +676,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
|
||||
|
||||
output = render_template("edit.html",
|
||||
available_processors=processors.available_processors(),
|
||||
browser_steps_config=browser_step_ui_config,
|
||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
@@ -735,6 +779,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@login_optionally_required
|
||||
def import_page():
|
||||
remaining_urls = []
|
||||
from . import forms
|
||||
|
||||
if request.method == 'POST':
|
||||
from .importer import import_url_list, import_distill_io_json
|
||||
|
||||
@@ -742,7 +788,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
importer = import_url_list()
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor'))
|
||||
for uuid in importer.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
@@ -760,9 +806,12 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
|
||||
|
||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None,
|
||||
# data=default,
|
||||
)
|
||||
# Could be some remaining, or we could be on GET
|
||||
output = render_template("import.html",
|
||||
form=form,
|
||||
import_url_list_remaining="\n".join(remaining_urls),
|
||||
original_distill_json=''
|
||||
)
|
||||
@@ -828,28 +877,22 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Save the current newest history as the most recently viewed
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
newest_file = history[dates[-1]]
|
||||
|
||||
# Read as binary and force decode as UTF-8
|
||||
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||
try:
|
||||
with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
newest_version_file_contents = watch.get_history_snapshot(dates[-1])
|
||||
except Exception as e:
|
||||
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
|
||||
newest_version_file_contents = "Unable to read {}.\n".format(dates[-1])
|
||||
|
||||
previous_version = request.args.get('previous_version')
|
||||
try:
|
||||
previous_file = history[previous_version]
|
||||
except KeyError:
|
||||
# Not present, use a default value, the second one in the sorted list.
|
||||
previous_file = history[dates[-2]]
|
||||
previous_timestamp = dates[-2]
|
||||
if previous_version:
|
||||
previous_timestamp = previous_version
|
||||
|
||||
try:
|
||||
with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
previous_version_file_contents = watch.get_history_snapshot(previous_timestamp)
|
||||
except Exception as e:
|
||||
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
|
||||
previous_version_file_contents = "Unable to read {}.\n".format(previous_timestamp)
|
||||
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
@@ -929,37 +972,35 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
return output
|
||||
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
filename = watch.history[timestamp]
|
||||
try:
|
||||
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
tmp = f.readlines()
|
||||
tmp = watch.get_history_snapshot(timestamp).splitlines()
|
||||
|
||||
# Get what needs to be highlighted
|
||||
ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
|
||||
# Get what needs to be highlighted
|
||||
ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
|
||||
|
||||
# .readlines will keep the \n, but we will parse it here again, in the future tidy this up
|
||||
ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
|
||||
wordlist=ignore_rules,
|
||||
mode='line numbers'
|
||||
)
|
||||
# .readlines will keep the \n, but we will parse it here again, in the future tidy this up
|
||||
ignored_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
|
||||
wordlist=ignore_rules,
|
||||
mode='line numbers'
|
||||
)
|
||||
|
||||
trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
|
||||
wordlist=watch['trigger_text'],
|
||||
mode='line numbers'
|
||||
)
|
||||
# Prepare the classes and lines used in the template
|
||||
i=0
|
||||
for l in tmp:
|
||||
classes=[]
|
||||
i+=1
|
||||
if i in ignored_line_numbers:
|
||||
classes.append('ignored')
|
||||
if i in trigger_line_numbers:
|
||||
classes.append('triggered')
|
||||
content.append({'line': l, 'classes': ' '.join(classes)})
|
||||
trigger_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
|
||||
wordlist=watch['trigger_text'],
|
||||
mode='line numbers'
|
||||
)
|
||||
# Prepare the classes and lines used in the template
|
||||
i=0
|
||||
for l in tmp:
|
||||
classes=[]
|
||||
i+=1
|
||||
if i in ignored_line_numbers:
|
||||
classes.append('ignored')
|
||||
if i in trigger_line_numbers:
|
||||
classes.append('triggered')
|
||||
content.append({'line': l, 'classes': ' '.join(classes)})
|
||||
|
||||
except Exception as e:
|
||||
content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''})
|
||||
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||
|
||||
output = render_template("preview.html",
|
||||
content=content,
|
||||
@@ -1126,7 +1167,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
return redirect(url_for('index'))
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
@@ -1235,6 +1277,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
flash("{} watches unpaused".format(len(uuids)))
|
||||
|
||||
elif (op == 'mark-viewed'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.set_last_viewed(uuid, int(time.time()))
|
||||
flash("{} watches updated".format(len(uuids)))
|
||||
|
||||
elif (op == 'mute'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
|
||||
@@ -179,9 +179,7 @@ class WatchSingleHistory(Resource):
|
||||
if timestamp == 'latest':
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
|
||||
# @todo - Check for UTF-8 compatability
|
||||
with open(watch.history[timestamp], 'r') as f:
|
||||
content = f.read()
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = "text/plain"
|
||||
|
||||
@@ -78,18 +78,18 @@ class ReplyWithContentButNoText(Exception):
|
||||
return
|
||||
|
||||
class Fetcher():
|
||||
error = None
|
||||
status_code = None
|
||||
content = None
|
||||
headers = None
|
||||
browser_steps = None
|
||||
browser_steps_screenshot_path = None
|
||||
|
||||
content = None
|
||||
error = None
|
||||
fetcher_description = "No description"
|
||||
headers = None
|
||||
status_code = None
|
||||
webdriver_js_execute_code = None
|
||||
xpath_element_js = ""
|
||||
|
||||
xpath_data = None
|
||||
xpath_element_js = ""
|
||||
instock_data = None
|
||||
instock_data_js = ""
|
||||
|
||||
# Will be needed in the future by the VisualSelector, always get this where possible.
|
||||
screenshot = False
|
||||
@@ -103,6 +103,7 @@ class Fetcher():
|
||||
from pkg_resources import resource_string
|
||||
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
|
||||
self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
|
||||
self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
|
||||
|
||||
|
||||
@abstractmethod
|
||||
@@ -373,7 +374,6 @@ class base_html_playwright(Fetcher):
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
self.status_code = response.status
|
||||
self.content = self.page.content()
|
||||
self.headers = response.all_headers()
|
||||
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
@@ -383,6 +383,7 @@ class base_html_playwright(Fetcher):
|
||||
self.page.evaluate("var include_filters=''")
|
||||
|
||||
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
||||
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
|
||||
@@ -10,7 +10,7 @@ def same_slicer(l, a, b):
|
||||
return l[a:b]
|
||||
|
||||
# like .compare but a little different output
|
||||
def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True):
|
||||
def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True, include_replaced=True, include_change_type_prefix=True):
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)
|
||||
|
||||
# @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
|
||||
@@ -19,33 +19,38 @@ def customSequenceMatcher(before, after, include_equal=False, include_removed=Tr
|
||||
g = before[alo:ahi]
|
||||
yield g
|
||||
elif include_removed and tag == 'delete':
|
||||
g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
|
||||
row_prefix = "(removed) " if include_change_type_prefix else ''
|
||||
g = [ row_prefix + i for i in same_slicer(before, alo, ahi)]
|
||||
yield g
|
||||
elif tag == 'replace':
|
||||
g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
|
||||
g += ["(into) " + i for i in same_slicer(after, blo, bhi)]
|
||||
elif include_replaced and tag == 'replace':
|
||||
row_prefix = "(changed) " if include_change_type_prefix else ''
|
||||
g = [row_prefix + i for i in same_slicer(before, alo, ahi)]
|
||||
row_prefix = "(into) " if include_change_type_prefix else ''
|
||||
g += [row_prefix + i for i in same_slicer(after, blo, bhi)]
|
||||
yield g
|
||||
elif include_added and tag == 'insert':
|
||||
g = ["(added) " + i for i in same_slicer(after, blo, bhi)]
|
||||
row_prefix = "(added) " if include_change_type_prefix else ''
|
||||
g = [row_prefix + i for i in same_slicer(after, blo, bhi)]
|
||||
yield g
|
||||
|
||||
# only_differences - only return info about the differences, no context
|
||||
# line_feed_sep could be "<br>" or "<li>" or "\n" etc
|
||||
def render_diff(previous_file, newest_file, include_equal=False, include_removed=True, include_added=True, line_feed_sep="\n"):
|
||||
with open(newest_file, 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, include_replaced=True, line_feed_sep="\n", include_change_type_prefix=True):
|
||||
|
||||
if previous_file:
|
||||
with open(previous_file, 'r') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
|
||||
if previous_version_file_contents:
|
||||
previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()]
|
||||
else:
|
||||
previous_version_file_contents = ""
|
||||
|
||||
rendered_diff = customSequenceMatcher(previous_version_file_contents,
|
||||
newest_version_file_contents,
|
||||
include_equal, include_removed, include_added)
|
||||
rendered_diff = customSequenceMatcher(before=previous_version_file_contents,
|
||||
after=newest_version_file_contents,
|
||||
include_equal=include_equal,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix)
|
||||
|
||||
# Recursively join lists
|
||||
f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L])
|
||||
|
||||
@@ -344,13 +344,15 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("A system-error occurred when validating your jq expression")
|
||||
|
||||
class quickWatchForm(Form):
|
||||
from . import processors
|
||||
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tag = StringField('Group tag', [validators.Optional()])
|
||||
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
|
||||
# Common to a single watch and the global settings
|
||||
class commonSettingsForm(Form):
|
||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers()])
|
||||
@@ -361,6 +363,10 @@ class commonSettingsForm(Form):
|
||||
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
|
||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1,
|
||||
message="Should contain one or more seconds")])
|
||||
class importForm(Form):
|
||||
from . import processors
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
urls = TextAreaField('URLs')
|
||||
|
||||
class SingleBrowserStep(Form):
|
||||
|
||||
@@ -393,11 +399,19 @@ class watchForm(commonSettingsForm):
|
||||
body = TextAreaField('Request body', [validators.Optional()])
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||
check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
|
||||
|
||||
filter_text_added = BooleanField('Added lines', default=True)
|
||||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||
filter_text_removed = BooleanField('Removed lines', default=True)
|
||||
|
||||
# @todo this class could be moved to its own text_json_diff_watchForm and this goes to restock_diff_Watchform perhaps
|
||||
in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
|
||||
|
||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
|
||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
@@ -287,3 +287,18 @@ def workarounds_for_obfuscations(content):
|
||||
content = re.sub('<!--\s+-->', '', content)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def get_triggered_text(content, trigger_text):
|
||||
triggered_text = []
|
||||
result = strip_ignore_text(content=content,
|
||||
wordlist=trigger_text,
|
||||
mode="line numbers")
|
||||
|
||||
i = 1
|
||||
for p in content.splitlines():
|
||||
if i in result:
|
||||
triggered_text.append(p)
|
||||
i += 1
|
||||
|
||||
return triggered_text
|
||||
|
||||
@@ -29,6 +29,7 @@ class import_url_list(Importer):
|
||||
data,
|
||||
flash,
|
||||
datastore,
|
||||
processor=None
|
||||
):
|
||||
|
||||
urls = data.split("\n")
|
||||
@@ -52,7 +53,11 @@ class import_url_list(Importer):
|
||||
# Flask wtform validators wont work with basic auth, use validators package
|
||||
# Up to 5000 per batch so we dont flood the server
|
||||
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False)
|
||||
extras = None
|
||||
if processor:
|
||||
extras = {'processor': processor}
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
|
||||
@@ -20,15 +20,21 @@ base_config = {
|
||||
'body': None,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'check_count': 0,
|
||||
'date_created': None,
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': 'system',
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'filter_text_added': True,
|
||||
'filter_text_replaced': True,
|
||||
'filter_text_removed': True,
|
||||
'has_ldjson_price_data': None,
|
||||
'track_ldjson_price_data': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'in_stock_only' : True, # Only trigger change on going to instock from out-of-stock
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
@@ -239,9 +245,32 @@ class model(dict):
|
||||
bump = self.history
|
||||
return self.__newest_history_key
|
||||
|
||||
def get_history_snapshot(self, timestamp):
|
||||
import brotli
|
||||
filepath = self.history[timestamp]
|
||||
|
||||
# See if a brotli versions exists and switch to that
|
||||
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||
filepath = f"{filepath}.br"
|
||||
|
||||
# OR in the backup case that the .br does not exist, but the plain one does
|
||||
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||
if os.path.isfile(filepath.replace('.br', '')):
|
||||
filepath = filepath.replace('.br', '')
|
||||
|
||||
if filepath.endswith('.br'):
|
||||
# Brotli doesnt have a fileheader to detect it, so we rely on filename
|
||||
# https://www.rfc-editor.org/rfc/rfc7932
|
||||
with open(filepath, 'rb') as f:
|
||||
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||
import brotli
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -250,16 +279,21 @@ class model(dict):
|
||||
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
||||
time.sleep(timestamp - self.__newest_history_key)
|
||||
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
# Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames.
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
||||
# most sites are utf-8 and some are even broken utf-8
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents)
|
||||
f.close()
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents)
|
||||
|
||||
# Append to index
|
||||
# @todo check last char was \n
|
||||
@@ -296,7 +330,8 @@ class model(dict):
|
||||
# Compare each lines (set) against each history text file (set) looking for something new..
|
||||
existing_history = set({})
|
||||
for k, v in self.history.items():
|
||||
alist = set([line.decode('utf-8').strip().lower() for line in open(v, 'rb')])
|
||||
content = self.get_history_snapshot(k)
|
||||
alist = set([line.strip().lower() for line in content.splitlines()])
|
||||
existing_history = existing_history.union(alist)
|
||||
|
||||
# Check that everything in local_lines(new stuff) already exists in existing_history - it should
|
||||
@@ -311,17 +346,6 @@ class model(dict):
|
||||
# False is not an option for AppRise, must be type None
|
||||
return None
|
||||
|
||||
def get_screenshot_as_jpeg(self):
|
||||
|
||||
# Created by save_screenshot()
|
||||
fname = os.path.join(self.watch_data_dir, "last-screenshot.jpg")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
# False is not an option for AppRise, must be type None
|
||||
return None
|
||||
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
@@ -368,6 +392,7 @@ class model(dict):
|
||||
return fname
|
||||
return False
|
||||
|
||||
|
||||
def pause(self):
|
||||
self['paused'] = True
|
||||
|
||||
@@ -397,8 +422,8 @@ class model(dict):
|
||||
# self.history will be keyed with the full path
|
||||
for k, fname in self.history.items():
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, "r") as f:
|
||||
contents = f.read()
|
||||
if True:
|
||||
contents = self.get_history_snapshot(k)
|
||||
res = re.findall(regex, contents, re.MULTILINE)
|
||||
if res:
|
||||
if not csv_writer:
|
||||
@@ -434,3 +459,38 @@ class model(dict):
|
||||
# Return list of tags, stripped and lowercase, used for searching
|
||||
def all_tags(self):
|
||||
return [s.strip().lower() for s in self.get('tag','').split(',')]
|
||||
|
||||
def has_special_diff_filter_options_set(self):
|
||||
|
||||
# All False - nothing would be done, so act like it's not processable
|
||||
if not self.get('filter_text_added', True) and not self.get('filter_text_replaced', True) and not self.get('filter_text_removed', True):
|
||||
return False
|
||||
|
||||
# Or one is set
|
||||
if not self.get('filter_text_added', True) or not self.get('filter_text_replaced', True) or not self.get('filter_text_removed', True):
|
||||
return True
|
||||
|
||||
# None is set
|
||||
return False
|
||||
|
||||
|
||||
def get_last_fetched_before_filters(self):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
dates = list(self.history.keys())
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
with open(filepath, 'rb') as f:
|
||||
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
def save_last_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||
|
||||
@@ -5,17 +5,18 @@ import json
|
||||
|
||||
valid_tokens = {
|
||||
'base_url': '',
|
||||
'watch_url': '',
|
||||
'watch_uuid': '',
|
||||
'watch_title': '',
|
||||
'watch_tag': '',
|
||||
'current_snapshot': '',
|
||||
'diff': '',
|
||||
'diff_added': '',
|
||||
'diff_removed': '',
|
||||
'diff_full': '',
|
||||
'diff_removed': '',
|
||||
'diff_url': '',
|
||||
'preview_url': '',
|
||||
'current_snapshot': ''
|
||||
'triggered_text': '',
|
||||
'watch_tag': '',
|
||||
'watch_title': '',
|
||||
'watch_url': '',
|
||||
'watch_uuid': '',
|
||||
}
|
||||
|
||||
default_notification_format_for_watch = 'System default'
|
||||
@@ -88,7 +89,7 @@ def process_notification(n_object, datastore):
|
||||
n_body = jinja2_env.from_string(n_object.get('notification_body', default_notification_body)).render(**notification_parameters)
|
||||
n_title = jinja2_env.from_string(n_object.get('notification_title', default_notification_title)).render(**notification_parameters)
|
||||
n_format = valid_notification_formats.get(
|
||||
n_object['notification_format'],
|
||||
n_object.get('notification_format', default_notification_format),
|
||||
valid_notification_formats[default_notification_format],
|
||||
)
|
||||
|
||||
@@ -211,17 +212,18 @@ def create_notification_parameters(n_object, datastore):
|
||||
tokens.update(
|
||||
{
|
||||
'base_url': base_url if base_url is not None else '',
|
||||
'watch_url': watch_url,
|
||||
'watch_uuid': uuid,
|
||||
'watch_title': watch_title if watch_title is not None else '',
|
||||
'watch_tag': watch_tag if watch_tag is not None else '',
|
||||
'diff_url': diff_url,
|
||||
'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else '',
|
||||
'diff': n_object.get('diff', ''), # Null default in the case we use a test
|
||||
'diff_added': n_object.get('diff_added', ''), # Null default in the case we use a test
|
||||
'diff_removed': n_object.get('diff_removed', ''), # Null default in the case we use a test
|
||||
'diff_full': n_object.get('diff_full', ''), # Null default in the case we use a test
|
||||
'diff_removed': n_object.get('diff_removed', ''), # Null default in the case we use a test
|
||||
'diff_url': diff_url,
|
||||
'preview_url': preview_url,
|
||||
'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else ''
|
||||
'triggered_text': n_object.get('triggered_text', ''),
|
||||
'watch_tag': watch_tag if watch_tag is not None else '',
|
||||
'watch_title': watch_title if watch_title is not None else '',
|
||||
'watch_url': watch_url,
|
||||
'watch_uuid': uuid,
|
||||
})
|
||||
|
||||
return tokens
|
||||
|
||||
11
changedetectionio/processors/README.md
Normal file
11
changedetectionio/processors/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Change detection post-processors
|
||||
|
||||
The concept here is to be able to switch between different domain specific problems to solve.
|
||||
|
||||
- `text_json_diff` The traditional text and JSON comparison handler
|
||||
- `restock_diff` Only cares about detecting if a product looks like it has some text that suggests that it's out of stock, otherwise assumes that it's in stock.
|
||||
|
||||
Some suggestions for the future
|
||||
|
||||
- `graphical`
|
||||
- `restock_and_price` - extract price AND stock text
|
||||
24
changedetectionio/processors/__init__.py
Normal file
24
changedetectionio/processors/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from abc import abstractmethod
|
||||
import hashlib
|
||||
|
||||
|
||||
class difference_detection_processor():
|
||||
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def run(self, uuid, skip_when_checksum_same=True):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
changed_detected = False
|
||||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
|
||||
|
||||
def available_processors():
|
||||
from . import restock_diff, text_json_diff
|
||||
x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
|
||||
# @todo Make this smarter with introspection of sorts.
|
||||
return x
|
||||
125
changedetectionio/processors/restock_diff.py
Normal file
125
changedetectionio/processors/restock_diff.py
Normal file
@@ -0,0 +1,125 @@
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import urllib3
|
||||
from . import difference_detection_processor
|
||||
from changedetectionio import content_fetcher
|
||||
from copy import deepcopy
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
name = 'Re-stock detection for single product pages'
|
||||
description = 'Detects if the product goes back to in-stock'
|
||||
|
||||
class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.datastore = datastore
|
||||
|
||||
def run(self, uuid, skip_when_checksum_same=True):
|
||||
|
||||
# DeepCopy so we can be sure we don't accidently change anything by reference
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
extra_headers = watch.get('headers', [])
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
request_headers = deepcopy(self.datastore.data['settings']['headers'])
|
||||
request_headers.update(extra_headers)
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
|
||||
url = watch.link
|
||||
|
||||
request_body = self.datastore.data['watching'][uuid].get('body')
|
||||
request_method = self.datastore.data['watching'][uuid].get('method')
|
||||
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
|
||||
|
||||
# Pluggable content fetcher
|
||||
prefer_backend = watch.get_fetch_backend
|
||||
if not prefer_backend or prefer_backend == 'system':
|
||||
prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
|
||||
|
||||
if hasattr(content_fetcher, prefer_backend):
|
||||
klass = getattr(content_fetcher, prefer_backend)
|
||||
else:
|
||||
# If the klass doesnt exist, just use a default
|
||||
klass = getattr(content_fetcher, "html_requests")
|
||||
|
||||
proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
proxy_url = None
|
||||
if proxy_id:
|
||||
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||
print("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
|
||||
fetcher = klass(proxy_override=proxy_url)
|
||||
|
||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||
if watch['webdriver_delay'] is not None:
|
||||
fetcher.render_extract_delay = watch.get('webdriver_delay')
|
||||
elif system_webdriver_delay is not None:
|
||||
fetcher.render_extract_delay = system_webdriver_delay
|
||||
|
||||
# Could be removed if requests/plaintext could also return some info?
|
||||
if prefer_backend != 'html_webdriver':
|
||||
raise Exception("Re-stock detection requires Chrome or compatible webdriver/playwright fetcher to work")
|
||||
|
||||
if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
|
||||
fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
|
||||
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
|
||||
fetcher.quit()
|
||||
|
||||
self.screenshot = fetcher.screenshot
|
||||
self.xpath_data = fetcher.xpath_data
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
|
||||
update_obj["last_check_status"] = fetcher.get_last_status_code()
|
||||
|
||||
# Main detection method
|
||||
fetched_md5 = None
|
||||
if fetcher.instock_data:
|
||||
fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest()
|
||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||
update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False
|
||||
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
changed_detected = False
|
||||
|
||||
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
|
||||
# Yes if we only care about it going to instock, AND we are in stock
|
||||
if watch.get('in_stock_only') and update_obj["in_stock"]:
|
||||
changed_detected = True
|
||||
|
||||
if not watch.get('in_stock_only'):
|
||||
# All cases
|
||||
changed_detected = True
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
return changed_detected, update_obj, fetcher.instock_data.encode('utf-8')
|
||||
@@ -10,10 +10,14 @@ import urllib3
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from copy import deepcopy
|
||||
from . import difference_detection_processor
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
description = 'Detects all text changes where possible'
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
@@ -25,7 +29,7 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check():
|
||||
class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
@@ -55,7 +59,7 @@ class perform_site_check():
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
|
||||
if not watch:
|
||||
return
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||
@@ -275,6 +279,34 @@ class perform_site_check():
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
|
||||
|
||||
# @todo whitespace coming from missing rtrim()?
|
||||
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
|
||||
# Rewrite's the processing text based on only what diff result they want to see
|
||||
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
|
||||
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
|
||||
from .. import diff
|
||||
# needs to not include (added) etc or it may get used twice
|
||||
# Replace the processed text with the preferred result
|
||||
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
|
||||
newest_version_file_contents=stripped_text_from_html,
|
||||
include_equal=False, # not the same lines
|
||||
include_added=watch.get('filter_text_added', True),
|
||||
include_removed=watch.get('filter_text_removed', True),
|
||||
include_replaced=watch.get('filter_text_replaced', True),
|
||||
line_feed_sep="\n",
|
||||
include_change_type_prefix=False)
|
||||
|
||||
watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
|
||||
|
||||
if not rendered_diff and stripped_text_from_html:
|
||||
# We had some content, but no differences were found
|
||||
# Store our new file as the MD5 so it will trigger in the future
|
||||
c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest()
|
||||
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
|
||||
else:
|
||||
stripped_text_from_html = rendered_diff
|
||||
|
||||
# Treat pages with no renderable text content as a change? No by default
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||
@@ -333,6 +365,7 @@ class perform_site_check():
|
||||
blocked = True
|
||||
# Filter and trigger works the same, so reuse it
|
||||
# It should return the line numbers that match
|
||||
# Unblock flow if the trigger was found (some text remained after stripped what didnt match)
|
||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||
wordlist=trigger_text,
|
||||
mode="line numbers")
|
||||
97
changedetectionio/res/stock-not-in-stock.js
Normal file
97
changedetectionio/res/stock-not-in-stock.js
Normal file
@@ -0,0 +1,97 @@
|
||||
function isItemInStock() {
|
||||
// @todo Pass these in so the same list can be used in non-JS fetchers
|
||||
const outOfStockTexts = [
|
||||
'0 in stock',
|
||||
'agotado',
|
||||
'artikel zurzeit vergriffen',
|
||||
'as soon as stock is available',
|
||||
'available for back order',
|
||||
'backordered',
|
||||
'brak na stanie',
|
||||
'brak w magazynie',
|
||||
'coming soon',
|
||||
'currently unavailable',
|
||||
'en rupture de stock',
|
||||
'item is no longer available',
|
||||
'message if back in stock',
|
||||
'nachricht bei',
|
||||
'nicht auf lager',
|
||||
'nicht lieferbar',
|
||||
'nicht zur verfügung',
|
||||
'no disponible temporalmente',
|
||||
'no longer in stock',
|
||||
'not available',
|
||||
'not in stock',
|
||||
'notify me when available',
|
||||
'não estamos a aceitar encomendas',
|
||||
'out of stock',
|
||||
'out-of-stock',
|
||||
'produkt niedostępny',
|
||||
'sold out',
|
||||
'temporarily out of stock',
|
||||
'temporarily unavailable',
|
||||
'we do not currently have an estimate of when this product will be back in stock.',
|
||||
'zur zeit nicht an lager',
|
||||
];
|
||||
|
||||
|
||||
const negateOutOfStockRegexs = [
|
||||
'[0-9] in stock'
|
||||
]
|
||||
var negateOutOfStockRegexs_r = [];
|
||||
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
|
||||
negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g'));
|
||||
}
|
||||
|
||||
|
||||
const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0);
|
||||
|
||||
// REGEXS THAT REALLY MEAN IT'S IN STOCK
|
||||
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
|
||||
const element = elementsWithZeroChildren[i];
|
||||
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
|
||||
var elementText="";
|
||||
if (element.tagName.toLowerCase() === "input") {
|
||||
elementText = element.value.toLowerCase();
|
||||
} else {
|
||||
elementText = element.textContent.toLowerCase();
|
||||
}
|
||||
|
||||
if (elementText.length) {
|
||||
// try which ones could mean its in stock
|
||||
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
|
||||
if (negateOutOfStockRegexs_r[i].test(elementText)) {
|
||||
return 'Possibly in stock';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
|
||||
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
|
||||
const element = elementsWithZeroChildren[i];
|
||||
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
|
||||
var elementText="";
|
||||
if (element.tagName.toLowerCase() === "input") {
|
||||
elementText = element.value.toLowerCase();
|
||||
} else {
|
||||
elementText = element.textContent.toLowerCase();
|
||||
}
|
||||
|
||||
if (elementText.length) {
|
||||
// and these mean its out of stock
|
||||
for (const outOfStockText of outOfStockTexts) {
|
||||
if (elementText.includes(outOfStockText)) {
|
||||
return elementText; // item is out of stock
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
|
||||
}
|
||||
|
||||
// returns the element text that makes it think it's out of stock
|
||||
return isItemInStock();
|
||||
@@ -28,3 +28,11 @@ pytest tests/test_notification.py
|
||||
# Re-run with HIDE_REFERER set - could affect login
|
||||
export HIDE_REFERER=True
|
||||
pytest tests/test_access_control.py
|
||||
|
||||
# Re-run a few tests that will trigger brotli based storage
|
||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||
pytest tests/test_access_control.py
|
||||
pytest tests/test_notification.py
|
||||
pytest tests/test_backend.py
|
||||
pytest tests/test_rss.py
|
||||
pytest tests/test_unique_lines.py
|
||||
3
changedetectionio/static/images/generic-icon.svg
Normal file
3
changedetectionio/static/images/generic-icon.svg
Normal file
@@ -0,0 +1,3 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
<svg width="61.649mm" height="61.649mm" version="1.1" viewBox="0 0 61.649 61.649" xml:space="preserve" xmlns="http://www.w3.org/2000/svg"><g transform="translate(66.269 -15.463)" fill="#3056d3"><g transform="matrix(1.423 0 0 1.423 101.16 69.23)" fill="#3056d3"><g transform="matrix(.8229 0 0 .8229 -23.378 -2.3935)" fill="#3056d3"><path d="m-88.248-43.007a26.323 26.323 0 0 0-26.323 26.323 26.323 26.323 0 0 0 26.323 26.323 26.323 26.323 0 0 0 26.323-26.323 26.323 26.323 0 0 0-26.323-26.323zm0 2.8417a23.482 23.482 0 0 1 23.482 23.482 23.482 23.482 0 0 1-23.482 23.482 23.482 23.482 0 0 1-23.482-23.482 23.482 23.482 0 0 1 23.482-23.482z"/><g transform="matrix(.26458 0 0 .26458 -115.65 -44.085)"><path d="m33.02 64.43c0.35-0.05 2.04-0.13 2.04-0.13h25.53s3.17 0.32 3.67 0.53c2.5 1.05 3.98 1.89 6.04 3.57 0.72 0.58 4.12 4.01 4.12 4.01l51.67 57.39s1.61 1.65 1.97 1.94c1.2 0.97 2.48 1.96 3.98 2.32 0.5 0.12 2.72 0.21 2.72 0.21h27.32l-8.83-9.04s-1.31-1.65-1.44-1.94c-0.45-0.93-0.59-2.59-0.13-3.51 0.35-0.69 1.46-1.87 2.23-1.98 1.03-0.14 2.12-0.39 3.02 0.14 0.33 0.2 1.64 1.32 1.64 1.32l17.49 17.49s1.35 1.09 1.6 1.6c0.17 0.34 0.29 0.82 0.15 1.18-0.17 0.42-1.42 1.63-1.42 1.63l-0.94 0.98-15.69 16.37s-1.44 1.4-1.79 1.67c-0.76 0.6-1.99 0.89-2.96 0.9-1.03 0-2.62-1.11-3.26-1.91-0.6-0.76-1.1-2.22-0.77-3.13 0.16-0.45 1.28-1.85 1.28-1.85l11.36-11.3-29.47-0.02-1.68 0.09s-4.16-0.66-5.26-1.03c-1.63-0.56-3.44-1.82-4.75-2.93-0.39-0.33-1.8-1.92-1.8-1.92l-51.7-59.28s-2-2.06-2.43-2.43c-1.37-1.17-2-1.62-3.76-2.34-0.44-0.18-3.45-0.55-3.45-0.55l-24.13-0.22s-2.23-0.15-2.61-0.22c-1.08-0.21-2.16-1.07-2.81-1.83-0.79-0.92-0.59-3.06 0.06-4.09 0.57-0.89 2.14-1.52 3.19-1.66z"/><path d="m86.1 109.7-17.13 19.65s-2 2.06-2.43 2.43c-1.37 1.17-2 1.62-3.76 2.34-0.44 0.18-3.45 0.55-3.45 0.55l-24.13 0.22s-2.23 0.15-2.61 0.22c-1.08 0.21-2.16 1.07-2.81 1.83-0.79 0.92-0.59 3.06 0.06 4.09 0.57 0.89 2.14 1.52 3.19 1.66 0.35 0.05 2.04 0.13 2.04 0.13h25.53s3.17-0.32 3.67-0.53c2.5-1.05 3.98-1.89 6.04-3.57 0.72-0.58 4.12-4.01 4.12-4.01l17.38-19.3z"/><path d="m177.81 67.6c-0.17-0.42-1.42-1.63-1.42-1.63l-0.94-0.98-15.69-16.37s-1.44-1.4-1.79-1.67c-0.76-0.6-1.99-0.89-2.96-0.9-1.03 0-2.62 1.11-3.26 1.91-0.6 0.76-1.1 2.22-0.77 3.13 0.16 0.45 1.28 1.85 1.28 1.85l11.36 11.3-29.47 0.02-1.68-0.09s-4.16 0.66-5.26 1.03c-1.63 0.56-3.44 1.82-4.75 2.93-0.39 0.33-1.8 1.92-1.8 1.92l-18.91 21.69 5.98 5.98 18.38-20.41s1.61-1.65 1.97-1.94c1.2-0.97 2.48-1.96 3.98-2.32 0.5-0.12 2.72-0.21 2.72-0.21h27.32l-8.83 9.04s-1.31 1.65-1.44 1.94c-0.45 0.93-0.59 2.59-0.13 3.51 0.35 0.69 1.46 1.87 2.23 1.98 1.03 0.14 2.12 0.39 3.02-0.14 0.33-0.2 1.64-1.32 1.64-1.32l17.49-17.49s1.35-1.09 1.6-1.6c0.17-0.34 0.29-0.82 0.15-1.18z"/></g></g></g></g></svg>
|
||||
|
After Width: | Height: | Size: 2.7 KiB |
@@ -26,9 +26,6 @@ $(document).ready(function() {
|
||||
data = {
|
||||
window_url : window.location.href,
|
||||
notification_urls : $('.notification-urls').val(),
|
||||
notification_title : $('.notification-title').val(),
|
||||
notification_body : $('.notification-body').val(),
|
||||
notification_format : $('.notification-format').val(),
|
||||
}
|
||||
for (key in data) {
|
||||
if (!data[key].length) {
|
||||
|
||||
@@ -241,6 +241,10 @@ body:before {
|
||||
font-size: 85%;
|
||||
}
|
||||
|
||||
.button-xsmall {
|
||||
font-size: 70%;
|
||||
}
|
||||
|
||||
.fetch-error {
|
||||
padding-top: 1em;
|
||||
font-size: 80%;
|
||||
@@ -889,6 +893,21 @@ body.full-width {
|
||||
font-size: .875em;
|
||||
}
|
||||
}
|
||||
.text-filtering {
|
||||
h3 {
|
||||
margin-top: 0;
|
||||
}
|
||||
border: 1px solid #ccc;
|
||||
padding: 1rem;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 1rem;
|
||||
fieldset:last-of-type {
|
||||
padding-bottom: 0;
|
||||
.pure-control-group {
|
||||
padding-bottom: 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ul {
|
||||
@@ -1044,3 +1063,30 @@ ul {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
|
||||
#quick-watch-processor-type {
|
||||
color: #fff;
|
||||
ul {
|
||||
padding: 0.3rem;
|
||||
|
||||
li {
|
||||
list-style: none;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
.restock-label {
|
||||
&.in-stock {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #fff;
|
||||
}
|
||||
&.not-in-stock {
|
||||
background-color: var(--color-background-button-cancel);
|
||||
color: #777;
|
||||
}
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
@@ -432,6 +432,9 @@ body:before {
|
||||
.button-small {
|
||||
font-size: 85%; }
|
||||
|
||||
.button-xsmall {
|
||||
font-size: 70%; }
|
||||
|
||||
.fetch-error {
|
||||
padding-top: 1em;
|
||||
font-size: 80%;
|
||||
@@ -869,6 +872,17 @@ body.full-width .edit-form {
|
||||
color: var(--color-text-input-description); }
|
||||
.edit-form .pure-form-message-inline code {
|
||||
font-size: .875em; }
|
||||
.edit-form .text-filtering {
|
||||
border: 1px solid #ccc;
|
||||
padding: 1rem;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 1rem; }
|
||||
.edit-form .text-filtering h3 {
|
||||
margin-top: 0; }
|
||||
.edit-form .text-filtering fieldset:last-of-type {
|
||||
padding-bottom: 0; }
|
||||
.edit-form .text-filtering fieldset:last-of-type .pure-control-group {
|
||||
padding-bottom: 0; }
|
||||
|
||||
ul {
|
||||
padding-left: 1em;
|
||||
@@ -980,3 +994,22 @@ ul {
|
||||
display: inline-block;
|
||||
height: 0.8rem;
|
||||
vertical-align: middle; }
|
||||
|
||||
#quick-watch-processor-type {
|
||||
color: #fff; }
|
||||
#quick-watch-processor-type ul {
|
||||
padding: 0.3rem; }
|
||||
#quick-watch-processor-type ul li {
|
||||
list-style: none;
|
||||
font-size: 0.8rem; }
|
||||
|
||||
.restock-label {
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap; }
|
||||
.restock-label.in-stock {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #fff; }
|
||||
.restock-label.not-in-stock {
|
||||
background-color: var(--color-background-button-cancel);
|
||||
color: #777; }
|
||||
|
||||
@@ -287,6 +287,7 @@ class ChangeDetectionStore:
|
||||
'method',
|
||||
'paused',
|
||||
'previous_md5',
|
||||
'processor',
|
||||
'subtractive_selectors',
|
||||
'tag',
|
||||
'text_should_not_be_present',
|
||||
@@ -315,7 +316,8 @@ class ChangeDetectionStore:
|
||||
# #Re 569
|
||||
new_watch = Watch.model(datastore_path=self.datastore_path, default={
|
||||
'url': url,
|
||||
'tag': tag
|
||||
'tag': tag,
|
||||
'date_created': int(time.time())
|
||||
})
|
||||
|
||||
new_uuid = new_watch['uuid']
|
||||
@@ -360,11 +362,6 @@ class ChangeDetectionStore:
|
||||
f.write(screenshot)
|
||||
f.close()
|
||||
|
||||
# Make a JPEG that's used in notifications (due to being a smaller size) available
|
||||
from PIL import Image
|
||||
im1 = Image.open(target_path)
|
||||
im1.convert('RGB').save(target_path.replace('.png','.jpg'), quality=int(os.getenv("NOTIFICATION_SCREENSHOT_JPG_QUALITY", 75)))
|
||||
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
@@ -683,3 +680,13 @@ class ChangeDetectionStore:
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
# We don't know when the date_created was in the past until now, so just add an index number for now.
|
||||
def update_11(self):
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if not watch.get('date_created'):
|
||||
watch['date_created'] = i
|
||||
i+=1
|
||||
return
|
||||
|
||||
|
||||
@@ -17,14 +17,15 @@
|
||||
<li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code>tgram://</code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
<li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>)</li>
|
||||
<li>Accepts the <code>{{ '{{token}}' }}</code> placeholders listed below</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="notifications-wrapper">
|
||||
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Send test notification</a>
|
||||
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a>
|
||||
{% if emailprefix %}
|
||||
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a>
|
||||
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email</a>
|
||||
{% endif %}
|
||||
<a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Notification debug logs</a>
|
||||
<a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a>
|
||||
</div>
|
||||
</div>
|
||||
<div id="notification-customisation" class="pure-control-group">
|
||||
@@ -103,6 +104,10 @@
|
||||
<td>The current snapshot value, useful when combined with JSON or CSS filters
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{triggered_text}}' }}</code></td>
|
||||
<td>Text that tripped the trigger from filters</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<div class="pure-form-message-inline">
|
||||
|
||||
@@ -34,8 +34,15 @@
|
||||
{% if playwright_enabled %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<li class="tab"><a href="#restock">Restock Detection</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
@@ -51,6 +58,16 @@
|
||||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
|
||||
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br>
|
||||
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a>
|
||||
{% else %}
|
||||
Current mode: <strong>Re-stock detection.</strong><br>
|
||||
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a>
|
||||
{% endif %}
|
||||
</span>
|
||||
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, class="m-d") }}
|
||||
@@ -214,6 +231,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
<div class="pure-control-group">
|
||||
<strong>Pro-tips:</strong><br>
|
||||
@@ -226,12 +244,6 @@ User-Agent: wonderbra 1.0") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
<div class="pure-control-group">
|
||||
{% set field = render_field(form.include_filters,
|
||||
rows=5,
|
||||
@@ -269,37 +281,39 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
|
||||
footer
|
||||
nav
|
||||
.stockticker") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
|
||||
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
|
||||
/some.regex\d{2}/ for case-INsensitive regex
|
||||
") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
<li>Use the preview/show current tab to see ignores</li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
<div class="text-filtering">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>Text filtering</h3>
|
||||
Limit trigger/ignore/block/extract to;<br>
|
||||
{{ render_checkbox_field(form.filter_text_added) }}
|
||||
{{ render_checkbox_field(form.filter_text_replaced) }}
|
||||
{{ render_checkbox_field(form.filter_text_removed) }}
|
||||
<span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span>
|
||||
<span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
|
||||
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
|
||||
</fieldset>
|
||||
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line
|
||||
/some.regex\d{2}/ for case-INsensitive regex
|
||||
") }}
|
||||
") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
|
||||
@@ -310,6 +324,21 @@ nav
|
||||
</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
|
||||
/some.regex\d{2}/ for case-INsensitive regex
|
||||
") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
<li>Use the preview/show current tab to see ignores</li>
|
||||
</ul>
|
||||
</span>
|
||||
|
||||
</fieldset>
|
||||
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
|
||||
@@ -344,8 +373,22 @@ Unavailable") }}
|
||||
</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<div class="tab-pane-inner" id="restock">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.in_stock_only) }}
|
||||
<span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
|
||||
|
||||
@@ -378,6 +421,7 @@ Unavailable") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div id="actions">
|
||||
<div class="pure-control-group">
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.jinja' import render_field %}
|
||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<div class="edit-form monospaced-textarea">
|
||||
|
||||
@@ -14,7 +15,6 @@
|
||||
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
<fieldset class="pure-group">
|
||||
<legend>
|
||||
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
|
||||
(,):
|
||||
@@ -23,7 +23,7 @@
|
||||
<br>
|
||||
URLs which do not pass validation will stay in the textarea.
|
||||
</legend>
|
||||
|
||||
{{ render_field(form.processor, class="processor") }}
|
||||
|
||||
<textarea name="urls" class="pure-input-1-2" placeholder="https://"
|
||||
style="width: 100%;
|
||||
@@ -31,15 +31,17 @@
|
||||
white-space: pre;
|
||||
overflow-wrap: normal;
|
||||
overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
|
||||
</fieldset>
|
||||
|
||||
<div id="quick-watch-processor-type">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="distill-io">
|
||||
|
||||
|
||||
<fieldset class="pure-group">
|
||||
|
||||
<legend>
|
||||
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
|
||||
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
|
||||
@@ -75,7 +77,7 @@
|
||||
]
|
||||
}
|
||||
" rows="25">{{ original_distill_json }}</textarea>
|
||||
</fieldset>
|
||||
|
||||
</div>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
|
||||
</form>
|
||||
|
||||
@@ -21,6 +21,10 @@
|
||||
{{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
</div>
|
||||
</div>
|
||||
<div id="quick-watch-processor-type">
|
||||
{{ render_simple_field(form.processor, title="Edit first then Watch") }}
|
||||
</div>
|
||||
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
|
||||
</form>
|
||||
@@ -28,12 +32,13 @@
|
||||
<form class="pure-form" action="{{ url_for('form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<div id="checkbox-operations">
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="pause">Pause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unpause">UnPause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="mute">Mute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unmute">UnMute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause">Pause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause">UnPause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute">Mute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute">UnMute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
|
||||
</div>
|
||||
<div>
|
||||
@@ -45,18 +50,18 @@
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{% set sort_order = request.args.get('order', 'asc') == 'asc' %}
|
||||
{% set sort_attribute = request.args.get('sort', 'last_changed') %}
|
||||
{% set sort_order = sort_order or 'asc' %}
|
||||
{% set sort_attribute = sort_attribute or 'last_changed' %}
|
||||
{% set pagination_page = request.args.get('page', 0) %}
|
||||
|
||||
<div id="watch-table-wrapper">
|
||||
<table class="pure-table pure-table-striped watch-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all"/> #</th>
|
||||
<th></th>
|
||||
{% set link_order = "desc" if sort_order else "asc" %}
|
||||
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
|
||||
{% set arrow_span = "" %}
|
||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all"/> <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||
@@ -65,14 +70,13 @@
|
||||
</thead>
|
||||
<tbody>
|
||||
|
||||
{% set sorted_watches = watches|sort(attribute=sort_attribute, reverse=sort_order) %}
|
||||
{% for watch in sorted_watches %}
|
||||
{% for watch in watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc') %}
|
||||
|
||||
{# WIP for pagination, disabled for now
|
||||
{% if not ( loop.index >= 3 and loop.index <=4) %}{% continue %}{% endif %} -->
|
||||
#}
|
||||
<tr id="{{ watch.uuid }}"
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
|
||||
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
|
||||
@@ -113,12 +117,27 @@
|
||||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
||||
<div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
|
||||
{% endif %}
|
||||
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
|
||||
<div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
|
||||
<div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{% endif %}
|
||||
{% if watch['track_ldjson_price_data'] == 'accepted' %}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon"/> Price</span>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% if watch['track_ldjson_price_data'] == 'accepted' %}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon"/> Price</span>
|
||||
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<span class="restock-label {{'in-stock' if watch['in_stock'] else 'not-in-stock' }}" title="detecting restock conditions">
|
||||
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
||||
{% if watch['last_checked'] %}
|
||||
{% if watch['in_stock'] %} In stock {% else %} Not in stock {% endif %}
|
||||
{% else %}
|
||||
Not yet checked
|
||||
{% endif %}
|
||||
</span>
|
||||
{% endif %}
|
||||
|
||||
{% if not active_tag %}
|
||||
<span class="watch-tag-list">{{ watch.tag}}</span>
|
||||
{% endif %}
|
||||
|
||||
2
changedetectionio/tests/restock/__init__.py
Normal file
2
changedetectionio/tests/restock/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Tests for the app."""
|
||||
|
||||
3
changedetectionio/tests/restock/conftest.py
Normal file
3
changedetectionio/tests/restock/conftest.py
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from .. import conftest
|
||||
106
changedetectionio/tests/restock/test_restock.py
Normal file
106
changedetectionio/tests/restock/test_restock.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/python3
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
default_notification_format,
|
||||
default_notification_title,
|
||||
valid_notification_formats,
|
||||
)
|
||||
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div>price: $10.99</div>
|
||||
<div id="sametext">Out of stock</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def set_back_in_stock_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div>price: $10.99</div>
|
||||
<div id="sametext">Available!</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||
def test_restock_detection(client, live_server):
|
||||
|
||||
set_original_response()
|
||||
#assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
|
||||
time.sleep(1)
|
||||
live_server_setup(live_server)
|
||||
#####################
|
||||
notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
|
||||
|
||||
|
||||
#####################
|
||||
# Set this up for when we remove the notification from the watch, it should fallback with these details
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={"application-notification_urls": notification_url,
|
||||
"application-notification_title": "fallback-title "+default_notification_title,
|
||||
"application-notification_body": "fallback-body "+default_notification_body,
|
||||
"application-notification_format": default_notification_format,
|
||||
"requests-time_between_check-minutes": 180,
|
||||
'application-fetch_backend': "html_webdriver"},
|
||||
follow_redirects=True
|
||||
)
|
||||
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
||||
test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
|
||||
|
||||
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": '', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Is it correctly show as NOT in stock?
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'not-in-stock' in res.data
|
||||
|
||||
# Is it correctly shown as in stock
|
||||
set_back_in_stock_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'not-in-stock' not in res.data
|
||||
|
||||
# We should have a notification
|
||||
time.sleep(2)
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
# Default behaviour is to only fire notification when it goes OUT OF STOCK -> IN STOCK
|
||||
# So here there should be no file, because we go IN STOCK -> OUT OF STOCK
|
||||
set_original_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
assert not os.path.isfile("test-datastore/notification.txt")
|
||||
176
changedetectionio/tests/test_add_replace_remove_filter.py
Normal file
176
changedetectionio/tests/test_add_replace_remove_filter.py
Normal file
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup
|
||||
from changedetectionio import html_tools
|
||||
|
||||
|
||||
def set_original(excluding=None, add_line=None):
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
<p>Some initial text</p>
|
||||
<p>So let's see what happens.</p>
|
||||
<p>and a new line!</p>
|
||||
<p>The golden line</p>
|
||||
<p>A BREAK TO MAKE THE TOP LINE STAY AS "REMOVED" OR IT WILL GET COUNTED AS "CHANGED INTO"</p>
|
||||
<p>Something irrelevant</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
if add_line:
|
||||
c=test_return_data.splitlines()
|
||||
c.insert(5, add_line)
|
||||
test_return_data = "\n".join(c)
|
||||
|
||||
if excluding:
|
||||
output = ""
|
||||
for i in test_return_data.splitlines():
|
||||
if not excluding in i:
|
||||
output += f"{i}\n"
|
||||
|
||||
test_return_data = output
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
def test_setup(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_check_removed_line_contains_trigger(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
set_original()
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"trigger_text": 'The golden line',
|
||||
"url": test_url,
|
||||
'fetch_backend': "html_requests",
|
||||
'filter_text_removed': 'y'},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
set_original(excluding='Something irrelevant')
|
||||
|
||||
# A line thats not the trigger should not trigger anything
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# The trigger line is REMOVED, this should trigger
|
||||
set_original(excluding='The golden line')
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
|
||||
# Now add it back, and we should not get a trigger
|
||||
client.get(url_for("mark_all_viewed"), follow_redirects=True)
|
||||
set_original(excluding=None)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# Remove it again, and we should get a trigger
|
||||
set_original(excluding='The golden line')
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
|
||||
def test_check_add_line_contains_trigger(client, live_server):
|
||||
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://') + "?xxx={{ watch_url }}"
|
||||
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
"application-notification_body": 'triggered text was -{{triggered_text}}-',
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
"application-notification_urls": test_notification_url,
|
||||
"application-minutes_between_check": 180,
|
||||
"application-fetch_backend": "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
set_original()
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"trigger_text": 'Oh yes please',
|
||||
"url": test_url,
|
||||
'fetch_backend': "html_requests",
|
||||
'filter_text_removed': '',
|
||||
'filter_text_added': 'y'},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
set_original(excluding='Something irrelevant')
|
||||
|
||||
# A line thats not the trigger should not trigger anything
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# The trigger line is ADDED, this should trigger
|
||||
set_original(add_line='<p>Oh yes please</p>')
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
response= f.read()
|
||||
assert '-Oh yes please-' in response
|
||||
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -87,7 +87,10 @@ def test_check_block_changedetection_text_NOT_present(client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"text_should_not_be_present": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
|
||||
data={"text_should_not_be_present": ignore_text,
|
||||
"url": test_url,
|
||||
'fetch_backend': "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -129,7 +132,6 @@ def test_check_block_changedetection_text_NOT_present(client, live_server):
|
||||
set_modified_response_minus_block_text()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
|
||||
@@ -59,6 +59,8 @@ def test_http_error_handler(client, live_server):
|
||||
_runner_test_http_errors(client, live_server, 404, 'Page not found')
|
||||
_runner_test_http_errors(client, live_server, 500, '(Internal server Error) received')
|
||||
_runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
# Just to be sure error text is properly handled
|
||||
def test_DNS_errors(client, live_server):
|
||||
@@ -81,4 +83,48 @@ def test_DNS_errors(client, live_server):
|
||||
assert found_name_resolution_error
|
||||
# Should always record that we tried
|
||||
assert bytes("just now".encode('utf-8')) in res.data
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
# Re 1513
|
||||
def test_low_level_errors_clear_correctly(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("<html><body><div id=here>Hello world</div></body></html>")
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": "https://dfkjasdkfjaidjfsdajfksdajfksdjfDOESNTEXIST.com"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(2)
|
||||
|
||||
# We should see the DNS error
|
||||
res = client.get(url_for("index"))
|
||||
found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
|
||||
assert found_name_resolution_error
|
||||
|
||||
# Update with what should work
|
||||
client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"url": test_url,
|
||||
"fetch_backend": "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Now the error should be gone
|
||||
time.sleep(2)
|
||||
res = client.get(url_for("index"))
|
||||
found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
|
||||
assert not found_name_resolution_error
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -9,7 +9,7 @@ def test_setup(live_server):
|
||||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_regex_text_func():
|
||||
from ..fetchers import text_json_diff as fetch_site_status
|
||||
from ..processors import text_json_diff as fetch_site_status
|
||||
|
||||
test_content = """
|
||||
but sometimes we want to remove the lines.
|
||||
|
||||
@@ -11,7 +11,8 @@ def test_setup(live_server):
|
||||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_text_func():
|
||||
from ..fetchers import text_json_diff as fetch_site_status
|
||||
from ..processors import text_json_diff as fetch_site_status
|
||||
|
||||
|
||||
test_content = """
|
||||
Some content
|
||||
|
||||
@@ -198,8 +198,8 @@ def test_check_json_without_filter(client, live_server):
|
||||
)
|
||||
|
||||
# Should still see '"html": "<b>"'
|
||||
assert b'"<b>' in res.data
|
||||
assert res.data.count(b'{\n') >= 2
|
||||
assert b'"html": "<b>"' in res.data
|
||||
assert res.data.count(b'{') >= 2
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -73,16 +73,12 @@ def test_check_notification(client, live_server):
|
||||
# We write the PNG to disk, but a JPEG should appear in the notification
|
||||
# Write the last screenshot png
|
||||
testimage_png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
|
||||
# This one is created when we save the screenshot from the webdriver/playwright session (converted from PNG)
|
||||
testimage_jpg = '/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AKp//2Q=='
|
||||
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
datastore = 'test-datastore'
|
||||
with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
|
||||
f.write(base64.b64decode(testimage_png))
|
||||
with open(os.path.join(datastore, str(uuid), 'last-screenshot.jpg'), 'wb') as f:
|
||||
f.write(base64.b64decode(testimage_jpg))
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
@@ -162,12 +158,12 @@ def test_check_notification(client, live_server):
|
||||
|
||||
# Check the attachment was added, and that it is a JPEG from the original PNG
|
||||
notification_submission_object = json.loads(notification_submission)
|
||||
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.jpg'
|
||||
# We keep PNG screenshots for now
|
||||
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
|
||||
assert len(notification_submission_object['attachments'][0]['base64'])
|
||||
assert notification_submission_object['attachments'][0]['mimetype'] == 'image/jpeg'
|
||||
assert notification_submission_object['attachments'][0]['mimetype'] == 'image/png'
|
||||
jpeg_in_attachment = base64.b64decode(notification_submission_object['attachments'][0]['base64'])
|
||||
assert b'JFIF' in jpeg_in_attachment
|
||||
assert testimage_png not in notification_submission
|
||||
|
||||
# Assert that the JPEG is readable (didn't get chewed up somewhere)
|
||||
from PIL import Image
|
||||
import io
|
||||
@@ -299,7 +295,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Add a watch and trigger a HTTP POST
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
|
||||
@@ -94,7 +94,6 @@ def test_unique_lines_functionality(client, live_server):
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
# Now set the content which contains the new text and re-ordered existing text
|
||||
set_modified_with_trigger_text_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
@@ -13,21 +13,37 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
|
||||
def test_expected_diff_output(self):
|
||||
base_dir = os.path.dirname(__file__)
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
|
||||
with open(base_dir + "/test-content/before.txt", 'r') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
|
||||
with open(base_dir + "/test-content/after.txt", 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
|
||||
output = diff.render_diff(previous_version_file_contents=previous_version_file_contents,
|
||||
newest_version_file_contents=newest_version_file_contents)
|
||||
|
||||
output = output.split("\n")
|
||||
|
||||
|
||||
self.assertIn('(changed) ok', output)
|
||||
self.assertIn('(into) xok', output)
|
||||
self.assertIn('(into) next-x-ok', output)
|
||||
self.assertIn('(added) and something new', output)
|
||||
|
||||
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
|
||||
with open(base_dir + "/test-content/after-2.txt", 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
output = diff.render_diff(previous_version_file_contents, newest_version_file_contents)
|
||||
output = output.split("\n")
|
||||
self.assertIn('(removed) for having learned computerese,', output)
|
||||
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
||||
|
||||
#diff_removed
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt", include_equal=False, include_removed=True, include_added=False)
|
||||
with open(base_dir + "/test-content/before.txt", 'r') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
|
||||
with open(base_dir + "/test-content/after.txt", 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
|
||||
output = output.split("\n")
|
||||
self.assertIn('(changed) ok', output)
|
||||
self.assertIn('(into) xok', output)
|
||||
@@ -35,7 +51,9 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
self.assertNotIn('(added) and something new', output)
|
||||
|
||||
#diff_removed
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt", include_equal=False, include_removed=True, include_added=False)
|
||||
with open(base_dir + "/test-content/after-2.txt", 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
|
||||
output = output.split("\n")
|
||||
self.assertIn('(removed) for having learned computerese,', output)
|
||||
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
||||
|
||||
@@ -4,7 +4,8 @@ import queue
|
||||
import time
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from .fetchers.text_json_diff import FilterNotFoundInResponse
|
||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||
|
||||
|
||||
# A single update worker
|
||||
#
|
||||
@@ -68,18 +69,28 @@ class update_worker(threading.Thread):
|
||||
else:
|
||||
line_feed_sep = "\n"
|
||||
|
||||
with open(watch_history[dates[-1]], 'rb') as f:
|
||||
snapshot_contents = f.read()
|
||||
# Add text that was triggered
|
||||
snapshot_contents = watch.get_history_snapshot(dates[-1])
|
||||
trigger_text = watch.get('trigger_text', [])
|
||||
triggered_text = ''
|
||||
|
||||
if len(trigger_text):
|
||||
from . import html_tools
|
||||
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
|
||||
if triggered_text:
|
||||
triggered_text = line_feed_sep.join(triggered_text)
|
||||
|
||||
|
||||
n_object.update({
|
||||
'watch_url': watch['url'],
|
||||
'current_snapshot': snapshot_contents,
|
||||
'diff': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), line_feed_sep=line_feed_sep),
|
||||
'diff_added': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_removed=False, line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_equal=True, line_feed_sep=line_feed_sep),
|
||||
'diff_removed': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_added=False, line_feed_sep=line_feed_sep),
|
||||
'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch_uuid,
|
||||
'screenshot': watch.get_screenshot_as_jpeg() if watch.get('notification_screenshot') else None,
|
||||
'current_snapshot': snapshot_contents.decode('utf-8'),
|
||||
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
||||
'diff_added': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_removed=False, line_feed_sep=line_feed_sep),
|
||||
'diff_removed': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_added=False, line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_equal=True, line_feed_sep=line_feed_sep)
|
||||
'watch_url': watch['url'],
|
||||
})
|
||||
logging.info (">> SENDING NOTIFICATION")
|
||||
self.notification_q.put(n_object)
|
||||
@@ -152,9 +163,8 @@ class update_worker(threading.Thread):
|
||||
os.unlink(full_path)
|
||||
|
||||
def run(self):
|
||||
from .fetchers import text_json_diff as fetch_site_status
|
||||
|
||||
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
|
||||
from .processors import text_json_diff, restock_diff
|
||||
|
||||
while not self.app.config.exit.is_set():
|
||||
|
||||
@@ -171,11 +181,21 @@ class update_worker(threading.Thread):
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
process_changedetection_results = True
|
||||
update_obj= {}
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
|
||||
update_obj = {}
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority,
|
||||
self.datastore.data['watching'][uuid]['url']))
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
processor = self.datastore.data['watching'][uuid].get('processor','text_json_diff')
|
||||
|
||||
# @todo some way to switch by name
|
||||
if processor == 'restock_diff':
|
||||
update_handler = restock_diff.perform_site_check(datastore=self.datastore)
|
||||
else:
|
||||
# Used as a default and also by some tests
|
||||
update_handler = text_json_diff.perform_site_check(datastore=self.datastore)
|
||||
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
@@ -313,6 +333,7 @@ class update_worker(threading.Thread):
|
||||
|
||||
self.cleanup_error_artifacts(uuid)
|
||||
|
||||
#
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 171 KiB |
@@ -68,5 +68,5 @@ pillow
|
||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||
|
||||
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
|
||||
pytest ~=6.2
|
||||
pytest ~=7.2
|
||||
pytest-flask ~=1.2
|
||||
|
||||
Reference in New Issue
Block a user