Compare commits

..

6 Commits

Author SHA1 Message Date
dgtlmoon
ed584b38bf API Access should be limited by preference 2025-03-23 00:23:28 +01:00
dgtlmoon
46d11f3d70 Re #3045 - API Access should still work even when UI Password is enabled 2025-03-23 00:11:04 +01:00
dgtlmoon
10b2bbea83 0.49.5 2025-03-22 22:51:33 +01:00
dgtlmoon
32d110b92f Template tidyup & UI Fixes (#3044) 2025-03-22 22:48:01 +01:00
dgtlmoon
860a5f5c1a Watch history - Ensure atomic/safe history data disk writes (#3042 #3041)
Some checks are pending
Build and push containers / metadata (push) Waiting to run
Build and push containers / build-push-containers (push) Waiting to run
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Waiting to run
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Blocked by required conditions
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Blocked by required conditions
ChangeDetection.io App Test / lint-code (push) Waiting to run
ChangeDetection.io App Test / test-application-3-10 (push) Blocked by required conditions
ChangeDetection.io App Test / test-application-3-11 (push) Blocked by required conditions
ChangeDetection.io App Test / test-application-3-12 (push) Blocked by required conditions
ChangeDetection.io App Test / test-application-3-13 (push) Blocked by required conditions
2025-03-22 19:16:08 +01:00
Nico Ell
70a18ee4b5 Testing - Replace Linux only 'resource' library with cross-platform 'psutil' library (#3037)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / test-container-build (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-03-21 09:50:32 +01:00
43 changed files with 564 additions and 1278 deletions

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.49.4'
__version__ = '0.49.5'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -33,6 +33,7 @@ def sigshutdown_handler(_signo, _stack_frame):
global datastore
name = signal.Signals(_signo).name
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
datastore.sync_to_json()
logger.success('Sync JSON to disk complete.')
# This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it.
# Solution: move to gevent or other server in the future (#2014)

View File

@@ -12,10 +12,11 @@ import copy
# See docs/README.md for rebuilding the docs/apidoc information
from . import api_schema
from ..model import schema as watch_schema
from ..model import watch_base
# Build a JSON Schema atleast partially based on our Watch model
schema = api_schema.build_watch_json_schema(watch_schema)
watch_base_config = watch_base()
schema = api_schema.build_watch_json_schema(watch_base_config)
schema_create_watch = copy.deepcopy(schema)
schema_create_watch['required'] = ['url']
@@ -52,9 +53,9 @@ class Watch(Resource):
@apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
"""
from copy import deepcopy
watch = self.datastore.data['watching'].get(uuid)
watch = deepcopy(self.datastore.data['watching'].get(uuid))
if not watch:
abort(404, message=f'No watch exists with the UUID of {uuid}')
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if request.args.get('recheck'):
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
@@ -72,16 +73,13 @@ class Watch(Resource):
self.datastore.data['watching'].get(uuid).unmute()
return "OK", 200
response = dict(watch.get_data())
# Add properties that aren't included in the standard dictionary items (they are properties/attr)
response['history_n'] = watch.history_n
response['last_changed'] = watch.last_changed
response['viewed'] = watch.viewed
response['title'] = watch.get('title')
return response
# Return without history, get that via another API call
# Properties are not returned as a JSON, so add the required props manually
watch['history_n'] = watch.history_n
# attr .last_changed will check for the last written text snapshot on change
watch['last_changed'] = watch.last_changed
watch['viewed'] = watch.viewed
return watch
@auth.check_token
def delete(self, uuid):
@@ -116,17 +114,16 @@ class Watch(Resource):
@apiSuccess (200) {String} OK Was updated
@apiSuccess (500) {String} ERR Some other error
"""
if not self.datastore.data['watching'].get(uuid):
abort(404, message=f'No watch exists with the UUID of {uuid}')
watch = self.datastore.data['watching'].get(uuid)
if not watch:
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if request.json.get('proxy'):
plist = self.datastore.proxy_list
if not request.json.get('proxy') in plist:
return f"Invalid proxy choice, currently supported proxies are '{', '.join(plist)}'", 400
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
self.datastore.data['watching'][uuid].update(request.json)
self.datastore.data['watching'][uuid].save_data()
watch.update(request.json)
return "OK", 200
@@ -288,8 +285,6 @@ class CreateWatch(Resource):
list = {}
tag_limit = request.args.get('tag', '').lower()
for uuid, watch in self.datastore.data['watching'].items():
# Watch tags by name (replace the other calls?)
tags = self.datastore.get_all_tags_for_watch(uuid=uuid)

View File

@@ -11,22 +11,14 @@ def check_token(f):
datastore = args[0].datastore
config_api_token_enabled = datastore.data['settings']['application'].get('api_access_token_enabled')
if not config_api_token_enabled:
return
try:
api_key_header = request.headers['x-api-key']
except KeyError:
return make_response(
jsonify("No authorization x-api-key header."), 403
)
config_api_token = datastore.data['settings']['application'].get('api_access_token')
if api_key_header != config_api_token:
return make_response(
jsonify("Invalid access - API key invalid."), 403
)
# config_api_token_enabled - a UI option in settings if access should obey the key or not
if config_api_token_enabled:
if request.headers.get('x-api-key') != config_api_token:
return make_response(
jsonify("Invalid access - API key invalid."), 403
)
return f(*args, **kwargs)

View File

@@ -89,6 +89,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash("Maximum number of backups reached, please remove some", "error")
return redirect(url_for('backups.index'))
# Be sure we're written fresh
datastore.sync_to_json()
zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching")))
zip_thread.start()
backup_threads.append(zip_thread)

View File

@@ -63,7 +63,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# Could be some remaining, or we could be on GET
form = forms.importForm(formdata=request.form if request.method == 'POST' else None, datastore=datastore)
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
output = render_template("import.html",
form=form,
import_url_list_remaining="\n".join(remaining_urls),

View File

@@ -3,6 +3,7 @@ import time
from wtforms import ValidationError
from loguru import logger
from changedetectionio.forms import validate_url
class Importer():
@@ -150,7 +151,6 @@ class import_xlsx_wachete(Importer):
self.new_uuids = []
from openpyxl import load_workbook
from changedetectionio.forms import validate_url
try:
wb = load_workbook(data)

View File

@@ -16,26 +16,24 @@
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="tab-pane-inner" id="url-list">
<legend>
<div class="pure-control-group">
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
(,):
<br>
<code>https://example.com tag1, tag2, last tag</code>
<br>
<p><strong>Example: </strong><code>https://example.com tag1, tag2, last tag</code></p>
URLs which do not pass validation will stay in the textarea.
</legend>
</div>
{{ render_field(form.processor, class="processor") }}
<div class="pure-control-group">
<textarea name="urls" class="pure-input-1-2" placeholder="https://"
style="width: 100%;
font-family:monospace;
white-space: pre;
overflow-wrap: normal;
overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
<div id="quick-watch-processor-type">
</div>
</div>
<div id="quick-watch-processor-type"></div>
</div>
@@ -43,7 +41,7 @@
<legend>
<div class="pure-control-group">
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
<br>
@@ -51,7 +49,7 @@
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
Be sure to set your default fetcher to Chrome if required.<br>
</p>
</legend>
</div>
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;

View File

@@ -1,28 +1,25 @@
from changedetectionio.strtobool import strtobool
from flask import Blueprint, flash, redirect, url_for
from flask_login import login_required
from queue import PriorityQueue
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import queuedWatchMetaData
from changedetectionio.processors.constants import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from queue import PriorityQueue
PRICE_DATA_TRACK_ACCEPT = 'accepted'
PRICE_DATA_TRACK_REJECT = 'rejected'
def construct_blueprint(datastore, update_q: PriorityQueue):
def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue):
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
@login_required
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
def accept(uuid):
old_data = datastore.data['watching'][uuid].get_data()
datastore.data['watching'][uuid] = datastore.rehydrate_entity(default_dict=old_data, processor_override='restock_diff')
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
datastore.data['watching'][uuid].clear_watch()
# Queue the watch for updating
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return redirect(url_for("index"))
@login_required

View File

@@ -1,3 +0,0 @@
PRICE_DATA_TRACK_ACCEPT = 'accepted'
PRICE_DATA_TRACK_REJECT = 'rejected'

View File

@@ -71,12 +71,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
datastore.save_settings()
datastore.needs_write_urgent = True
flash("Password protection enabled.", 'notice')
flask_login.logout_user()
return redirect(url_for('index'))
datastore.save_settings()
datastore.needs_write_urgent = True
flash("Settings updated.")
else:
@@ -84,24 +84,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Convert to ISO 8601 format, all date/time relative events stored as UTC time
utc_time = datetime.now(ZoneInfo("UTC")).isoformat()
# Get processor plugins info
from changedetectionio.processors import get_all_plugins_info
plugins_info = get_all_plugins_info()
# Process settings including plugin toggles
if request.method == 'POST' and form.validate():
# Process the main form data
app_update = dict(deepcopy(form.data['application']))
# Don't update password with '' or False (Added by wtforms when not in submission)
if 'password' in app_update and not app_update['password']:
del (app_update['password'])
datastore.data['settings']['application'].update(app_update)
datastore.data['settings']['requests'].update(form.data['requests'])
datastore.save_settings()
flash("Settings updated.")
output = render_template("settings.html",
api_key=datastore.data['settings']['application'].get('api_access_token'),
@@ -111,7 +93,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
form=form,
hide_remove_pass=os.getenv("SALTED_PASS", False),
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
plugins_info=plugins_info,
settings_application=datastore.data['settings']['application'],
timezone_default_config=datastore.data['settings']['application'].get('timezone'),
utc_time=utc_time,
@@ -124,6 +105,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
def settings_reset_api_key():
secret = secrets.token_hex(16)
datastore.data['settings']['application']['api_access_token'] = secret
datastore.needs_write_urgent = True
flash("API Key was regenerated.")
return redirect(url_for('settings.settings_page')+'#api')

View File

@@ -9,7 +9,6 @@
const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
{% endif %}
</script>
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
@@ -26,7 +25,6 @@
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#timedate">Time &amp Date</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
<li class="tab"><a href="#plugins">Plugins</a></li>
</ul>
</div>
<div class="box-wrap inner">
@@ -298,32 +296,6 @@ nav
{{ render_field(form.requests.form.extra_browsers) }}
</div>
</div>
<div class="tab-pane-inner" id="plugins">
<div class="pure-control-group">
<h4>Registered Plugins</h4>
<p>The following plugins are currently registered in the system - <a href="https://changedetection.io/plugins">Get more plugins here</a></p>
<table class="pure-table pure-table-striped">
<thead>
<tr>
<th>Name</th>
<th>Description</th>
<th>Version</th>
</tr>
</thead>
<tbody>
{% for plugin in plugins_info %}
<tr>
<td>{{ plugin.name }}</td>
<td>{{ plugin.description }}</td>
<td>{{ plugin.version }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div id="actions">
<div class="pure-control-group">
{{ render_button(form.save_button) }}

View File

@@ -56,7 +56,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
def mute(uuid):
if datastore.data['settings']['application']['tags'].get(uuid):
datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
datastore.data['settings']['application']['tags'][uuid].save_data()
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
@@ -177,8 +176,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
datastore.data['settings']['application']['tags'][uuid].update(form.data)
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
datastore.data['settings']['application']['tags'][uuid].save_data()
datastore.needs_write_urgent = True
flash("Updated")
return redirect(url_for('tags.tags_overview_page'))

View File

@@ -163,7 +163,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = True
datastore.data['watching'][uuid.strip()].save_data()
flash("{} watches paused".format(len(uuids)))
elif (op == 'unpause'):
@@ -171,7 +170,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = False
datastore.data['watching'][uuid.strip()].save_data()
flash("{} watches unpaused".format(len(uuids)))
elif (op == 'mark-viewed'):
@@ -186,7 +184,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['notification_muted'] = True
datastore.data['watching'][uuid.strip()].save_data()
flash("{} watches muted".format(len(uuids)))
elif (op == 'unmute'):
@@ -194,7 +191,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['notification_muted'] = False
datastore.data['watching'][uuid.strip()].save_data()
flash("{} watches un-muted".format(len(uuids)))
elif (op == 'recheck'):
@@ -210,7 +206,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]["last_error"] = False
datastore.data['watching'][uuid].save_data()
flash(f"{len(uuids)} watches errors cleared")
elif (op == 'clear-history'):
@@ -249,9 +244,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
flash(f"{len(uuids)} watches were tagged")
for uuid in uuids:
datastore.data['watching'][uuid.strip()].save_data()
return redirect(url_for('index'))

View File

@@ -24,6 +24,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
def edit_page(uuid):
from changedetectionio import forms
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
from changedetectionio import processors
import importlib
@@ -42,15 +43,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
switch_processor = request.args.get('switch_processor')
if switch_processor:
for p in processors.available_processors(datastore):
for p in processors.available_processors():
if p[0] == switch_processor:
datastore.data['watching'][uuid]['processor'] = switch_processor
flash(f"Switched to mode - {p[1]}.")
datastore.clear_watch_history(uuid)
redirect(url_for('ui_edit.edit_page', uuid=uuid))
default = datastore.data['watching'][uuid]
# be sure we update with a copy instead of accidently editing the live object by reference
default = deepcopy(datastore.data['watching'][uuid])
# Defaults for proxy choice
if datastore.proxy_list is not None: # When enabled
@@ -60,19 +61,31 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
default['proxy'] = ''
# proxy_override set to the json/text list of the items
# Get the appropriate form class for this processor using the pluggy system
processor_name = datastore.data['watching'][uuid].get('processor', 'text_json_diff')
form_class = processors.get_form_class_for_processor(processor_name)
if not form_class:
flash(f"Cannot load the edit form for processor/plugin '{processor_name}', plugin missing?", 'error')
# Does it use some custom form? does one exist?
processor_name = datastore.data['watching'][uuid].get('processor', '')
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
if not processor_classes:
flash(f"Cannot load the edit form for processor/plugin '{processor_classes[1]}', plugin missing?", 'error')
return redirect(url_for('index'))
parent_module = processors.get_parent_module(processor_classes[0])
try:
# Get the parent of the "processor.py" go up one, get the form (kinda spaghetti but its reusing existing code)
forms_module = importlib.import_module(f"{parent_module.__name__}.forms")
# Access the 'processor_settings_form' class from the 'forms' module
form_class = getattr(forms_module, 'processor_settings_form')
except ModuleNotFoundError as e:
# .forms didnt exist
form_class = forms.processor_text_json_diff_form
except AttributeError as e:
# .forms exists but no useful form
form_class = forms.processor_text_json_diff_form
form = form_class(formdata=request.form if request.method == 'POST' else None,
data=default,
extra_notification_tokens=default.extra_notification_token_values(),
default_system_settings=datastore.data['settings'],
datastore=datastore
default_system_settings=datastore.data['settings']
)
# For the form widget tag UUID back to "string name" for the field
@@ -114,7 +127,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
extra_update_obj['paused'] = False
extra_update_obj['time_between_check'] = form.time_between_check.data
extra_update_obj['ignore_text'] = form.ignore_text.data
# Ignore text
form_ignore_text = form.ignore_text.data
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
# Be sure proxy value is None
if datastore.proxy_list is not None and form.data['proxy'] == '':
@@ -140,23 +156,22 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
tag_uuids.append(datastore.add_tag(name=t))
extra_update_obj['tags'] = tag_uuids
datastore.data['watching'][uuid].update(form.data)
datastore.data['watching'][uuid].update(extra_update_obj)
if not datastore.data['watching'][uuid].get('tags'):
# Force it to be a list, because form.data['tags'] will be string if nothing found
# And del(form.data['tags'] ) wont work either for some reason
datastore.data['watching'][uuid]['tags'] = []
datastore.update_watch(uuid=uuid, update_obj=form.data | extra_update_obj)
# Recast it if need be to right data Watch handler
processor_name = datastore.data['watching'][uuid].get('processor')
watch_class = processors.get_watch_model_for_processor(processor_name)
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid])
datastore.data['watching'][uuid].save_data()
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
# But in the case something is added we should save straight away
datastore.needs_write_urgent = True
# Do not queue on edit if its not within the time range
@@ -183,7 +198,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}")
return False
#############################
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
# Queue the watch for immediate recheck, with a higher priority
@@ -222,7 +236,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Only works reliably with Playwright
template_args = {
'available_processors': processors.available_processors(datastore),
'available_processors': processors.available_processors(),
'available_timezones': sorted(available_timezones()),
'browser_steps_config': browser_step_ui_config,
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),

View File

@@ -191,7 +191,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
@login_optionally_required
def form_quick_watch_add():
from changedetectionio import forms
form = forms.quickWatchForm(request.form, datastore=datastore)
form = forms.quickWatchForm(request.form)
if not form.validate():
for widget, l in form.errors.items():

View File

@@ -4,7 +4,6 @@
import flask_login
import locale
import os
import pytz
import queue
import threading
import time
@@ -75,7 +74,6 @@ if os.getenv('FLASK_SERVER_NAME'):
# Disables caching of the templates
app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
app.jinja_env.globals.update(hasattr=hasattr)
csrf = CSRFProtect()
csrf.init_app(app)
notification_debug_log=[]
@@ -245,6 +243,9 @@ def changedetection_app(config=None, datastore_o=None):
# RSS access with token is allowed
elif request.endpoint and 'rss.feed' in request.endpoint:
return None
# API routes - use their own auth mechanism (@auth.check_token)
elif request.path.startswith('/api/'):
return None
else:
return login_manager.unauthorized()
@@ -344,7 +345,7 @@ def changedetection_app(config=None, datastore_o=None):
@login_optionally_required
def index():
global datastore
from changedetectionio.forms import quickWatchForm
from changedetectionio import forms
active_tag_req = request.args.get('tag', '').lower().strip()
active_tag_uuid = active_tag = None
@@ -370,6 +371,7 @@ def changedetection_app(config=None, datastore_o=None):
elif op == 'mute':
datastore.data['watching'][uuid].toggle_mute()
datastore.needs_write = True
return redirect(url_for('index', tag = active_tag_uuid))
# Sort by last_changed and add the uuid which is usually the key..
@@ -394,7 +396,7 @@ def changedetection_app(config=None, datastore_o=None):
else:
sorted_watches.append(watch)
form = quickWatchForm(request.form, datastore=datastore)
form = forms.quickWatchForm(request.form)
page = request.args.get(get_page_parameter(), type=int, default=1)
total_count = len(sorted_watches)

View File

@@ -23,7 +23,7 @@ from wtforms import (
from flask_wtf.file import FileField, FileAllowed
from wtforms.fields import FieldList
from wtforms.validators import ValidationError, Optional
from wtforms.validators import ValidationError
from validators.url import url as url_validator
@@ -508,14 +508,8 @@ class quickWatchForm(Form):
url = fields.URLField('URL', validators=[validateURL()])
tags = StringTagUUID('Group tag', [validators.Optional()])
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
processor = RadioField(u'Processor', default="text_json_diff")
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
# Set processor choices based on datastore if available
#datastore = kwargs.get('datastore')
self.processor.choices = self.processors.available_processors()
@@ -528,13 +522,6 @@ class commonSettingsForm(Form):
self.notification_body.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
# Set processor choices based on datastore if available
datastore = kwargs.get('datastore')
if datastore:
self.processor.choices = self.processors.available_processors(datastore)
else:
self.processor.choices = self.processors.available_processors()
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
@@ -542,26 +529,17 @@ class commonSettingsForm(Form):
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
processor = RadioField( label=u"Processor - What do you want to achieve?", default="text_json_diff")
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
class importForm(Form):
from . import processors
processor = RadioField(u'Processor', default="text_json_diff")
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
urls = TextAreaField('URLs')
xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')])
file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
# Set processor choices based on datastore if available
datastore = kwargs.get('datastore')
if datastore:
self.processor.choices = self.processors.available_processors(datastore)
else:
self.processor.choices = self.processors.available_processors()
class SingleBrowserStep(Form):
@@ -736,12 +714,11 @@ class globalSettingsRequestForm(Form):
default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides")
def validate_extra_proxies(self, extra_validators=None):
if self.data.get('extra_proxies'):
for e in self.data['extra_proxies']:
if e.get('proxy_name') or e.get('proxy_url'):
if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip():
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
return False
for e in self.data['extra_proxies']:
if e.get('proxy_name') or e.get('proxy_url'):
if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip():
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
return False
# datastore.data['settings']['application']..
@@ -772,6 +749,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
validators=[validators.NumberRange(min=0,
message="Should contain zero or more attempts")])
class globalSettingsForm(Form):
# Define these as FormFields/"sub forms", this way it matches the JSON storage
# datastore.data['settings']['application']..

View File

@@ -53,7 +53,7 @@ class model(dict):
'shared_diff_access': False,
'webdriver_delay': None , # Extra delay in seconds before extracting text
'tags': {}, #@todo use Tag.model initialisers
'timezone': None # Default IANA timezone name
'timezone': None, # Default IANA timezone name
}
}
}

View File

@@ -1,57 +1,14 @@
import os
import json
import uuid as uuid_builder
import time
from copy import deepcopy
from loguru import logger
from changedetectionio.model import watch_base, schema
from changedetectionio.model import watch_base
class model(watch_base):
"""Tag model that writes to tags/{uuid}/tag.json instead of the main watch directory"""
__datastore_path = None
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
self.__datastore_path = kw.get("datastore_path")
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
if kw.get('default'):
self.update(kw['default'])
del kw['default']
@property
def watch_data_dir(self):
# Override to use tags directory instead of the normal watch data directory
datastore_path = getattr(self, '_model__datastore_path', None)
if datastore_path:
tags_path = os.path.join(datastore_path, 'tags')
# Make sure the tags directory exists
if not os.path.exists(tags_path):
os.makedirs(tags_path)
return os.path.join(tags_path, self['uuid'])
return None
def save_data(self):
"""Override to save tag to tags/{uuid}/tag.json"""
logger.debug(f"Saving tag {self['uuid']}")
if not self.get('uuid'):
# Might have been called when creating the tag
return
tags_path = os.path.join(self.__datastore_path, 'tags')
if not os.path.isdir(tags_path):
os.mkdir(os.path.join(tags_path))
path = os.path.join(tags_path, self.get('uuid')+".json")
try:
with open(path + ".tmp", 'w') as json_file:
json.dump(self.get_data(), json_file, indent=4)
os.replace(path + ".tmp", path)
except Exception as e:
logger.error(f"Error writing JSON for tag {self.get('uuid')}!! (JSON file save was skipped) : {str(e)}")

View File

@@ -38,13 +38,17 @@ class model(watch_base):
jitter_seconds = 0
def __init__(self, *arg, **kw):
self.__datastore_path = kw.get('datastore_path')
if kw.get('datastore_path'):
del kw['datastore_path']
super(model, self).__init__(*arg, **kw)
if kw.get('default'):
self.update(kw['default'])
del kw['default']
if self.get('default'):
del self['default']
# Be sure the cached timestamp is ready
bump = self.history
@@ -292,12 +296,11 @@ class model(watch_base):
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
# Save some text file to the appropriate path and bump the history
# Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp, snapshot_id):
import brotli
import tempfile
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
self.ensure_data_dir_exists()
@@ -305,26 +308,37 @@ class model(watch_base):
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
# Decide on snapshot filename and destination path
if not skip_brotli and len(contents) > threshold:
snapshot_fname = f"{snapshot_id}.txt.br"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(dest):
with open(dest, 'wb') as f:
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
else:
snapshot_fname = f"{snapshot_id}.txt"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(dest):
with open(dest, 'wb') as f:
f.write(contents.encode('utf-8'))
encoded_data = contents.encode('utf-8')
# Append to index
# @todo check last char was \n
dest = os.path.join(self.watch_data_dir, snapshot_fname)
# Write snapshot file atomically if it doesn't exist
if not os.path.exists(dest):
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
tmp.write(encoded_data)
tmp.flush()
os.fsync(tmp.fileno())
tmp_path = tmp.name
os.rename(tmp_path, dest)
# Append to history.txt atomically
index_fname = os.path.join(self.watch_data_dir, "history.txt")
with open(index_fname, 'a') as f:
f.write("{},{}\n".format(timestamp, snapshot_fname))
f.close()
index_line = f"{timestamp},{snapshot_fname}\n"
# Lets try force flush here since it's usually a very small file
# If this still fails in the future then try reading all to memory first, re-writing etc
with open(index_fname, 'a', encoding='utf-8') as f:
f.write(index_line)
f.flush()
os.fsync(f.fileno())
# Update internal state
self.__newest_history_key = timestamp
self.__history_n += 1
@@ -413,6 +427,11 @@ class model(watch_base):
def snapshot_error_screenshot_ctime(self):
return self.__get_file_ctime('last-error-screenshot.png')
@property
def watch_data_dir(self):
# The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error.txt")

View File

@@ -1,246 +1,135 @@
import os
import uuid
from copy import deepcopy
from loguru import logger
import time
import json
from changedetectionio import strtobool
from changedetectionio.notification import default_notification_format_for_watch
schema = {
# Custom notification content
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case.
'body': None,
'browser_steps': [],
'browser_steps_last_error_step': None,
'check_count': 0,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'content-type': None,
'date_created': None,
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': 'system', # plaintext, playwright etc
'fetch_time': 0.0,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'filter_text_added': True,
'filter_text_removed': True,
'filter_text_replaced': True,
'follow_price_changes': True,
'has_ldjson_price_data': None,
'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
'include_filters': [],
'last_checked': 0,
'last_error': False,
'last_modified': None,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET',
'notification_alert_count': 0,
'notification_body': None,
'notification_format': default_notification_format_for_watch,
'notification_muted': False,
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
'notification_title': None,
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'paused': False,
'previous_md5': False,
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'processor': 'text_json_diff', # could be restock_diff or others from .processors
'processor_state': {}, # Extra configs for custom processors/plugins, keyed by processor name
'price_change_threshold_percent': None,
'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header
'sort_text_alphabetically': False,
'subtractive_selectors': [],
'tag': '', # Old system of text name for a tag, to be removed
'tags': [], # list of UUIDs to App.Tags
'text_should_not_be_present': [], # Text that should not present
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'time_between_check_use_default': True,
"time_schedule_limit": {
"enabled": False,
"monday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"tuesday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"wednesday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"thursday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"friday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"saturday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"sunday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
},
'title': None,
'track_ldjson_price_data': None,
'trim_text_whitespace': False,
'remove_duplicate_lines': False,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',
'uuid': None,
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection
}
class watch_base(dict):
__data = {}
__datastore_path = None
__save_enabled = True
def __init__(self, *arg, **kw):
# Initialize internal data storage
self.update({
# Custom notification content
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case.
'body': None,
'browser_steps': [],
'browser_steps_last_error_step': None,
'check_count': 0,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'content-type': None,
'date_created': None,
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': 'system', # plaintext, playwright etc
'fetch_time': 0.0,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'filter_text_added': True,
'filter_text_removed': True,
'filter_text_replaced': True,
'follow_price_changes': True,
'has_ldjson_price_data': None,
'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
'include_filters': [],
'last_checked': 0,
'last_error': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET',
'notification_alert_count': 0,
'notification_body': None,
'notification_format': default_notification_format_for_watch,
'notification_muted': False,
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
'notification_title': None,
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'paused': False,
'previous_md5': False,
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'processor': 'text_json_diff', # could be restock_diff or others from .processors
'price_change_threshold_percent': None,
'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header
'sort_text_alphabetically': False,
'subtractive_selectors': [],
'tag': '', # Old system of text name for a tag, to be removed
'tags': [], # list of UUIDs to App.Tags
'text_should_not_be_present': [], # Text that should not present
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'time_between_check_use_default': True,
"time_schedule_limit": {
"enabled": False,
"monday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"tuesday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"wednesday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"thursday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"friday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"saturday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
"sunday": {
"enabled": True,
"start_time": "00:00",
"duration": {
"hours": "24",
"minutes": "00"
}
},
},
'title': None,
'track_ldjson_price_data': None,
'trim_text_whitespace': False,
'remove_duplicate_lines': False,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',
'uuid': str(uuid.uuid4()),
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection
})
self.__data = deepcopy(schema)
self.__datastore_path = kw.pop('datastore_path', None)
# Initialize as empty dict but maintain dict interface
super(watch_base, self).__init__()
# Update with provided data
if arg or kw:
self.update(*arg, **kw)
super(watch_base, self).__init__(*arg, **kw)
# Generate UUID if needed
if not self.__data.get('uuid'):
self.__data['uuid'] = str(uuid.uuid4())
if self.__data.get('default'):
del(self.__data['default'])
@property
def watch_data_dir(self):
# The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
def enable_saving(self):
self.__save_enabled = True
# Dictionary interface methods to use self.__data
def __getitem__(self, key):
return self.__data[key]
def __setitem__(self, key, value):
self.__data[key] = value
self.__data['last_modified'] = time.time()
def __delitem__(self, key):
del self.__data[key]
def __contains__(self, key):
return key in self.__data
def __iter__(self):
return iter(self.__data)
def __len__(self):
return len(self.__data)
def get(self, key, default=None):
return self.__data.get(key, default)
def update(self, *args, **kwargs):
if args:
if len(args) > 1:
raise TypeError("update expected at most 1 arguments, got %d" % len(args))
other = dict(args[0])
for key in other:
self.__data[key] = other[key]
for key in kwargs:
self.__data[key] = kwargs[key]
self.__data['last_modified'] = time.time()
def items(self):
return self.__data.items()
def keys(self):
return self.__data.keys()
def values(self):
return self.__data.values()
def pop(self, key, default=None):
return self.__data.pop(key, default)
def popitem(self):
return self.__data.popitem()
def clear(self):
self.__data.clear()
self.__data['last_modified'] = time.time()
def get_data(self):
"""Returns the internal data dictionary"""
return self.__data
def save_data(self):
if self.__save_enabled:
if not self.__data.get('uuid'):
# Might have been called when creating the watch
return
logger.debug(f"Saving watch {self['uuid']}")
path = os.path.join(self.__datastore_path, self.get('uuid'))
filepath = os.path.join(str(path), "watch.json")
if not os.path.exists(path):
os.mkdir(path)
try:
import tempfile
with tempfile.NamedTemporaryFile(mode='wb+', delete=False) as tmp:
tmp.write(json.dumps(self.get_data(), indent=2).encode('utf-8'))
tmp.flush()
os.replace(tmp.name, filepath)
except Exception as e:
logger.error(f"Error writing JSON for {self.get('uuid')}!! (JSON file save was skipped) : {str(e)}")
if self.get('default'):
del self['default']

View File

@@ -4,12 +4,12 @@ from changedetectionio.strtobool import strtobool
from copy import deepcopy
from loguru import logger
import hashlib
import importlib
import inspect
import os
import pkgutil
import re
from .pluggy_interface import plugin_manager, hookimpl
class difference_detection_processor():
browser_steps = None
@@ -172,208 +172,83 @@ class difference_detection_processor():
return changed_detected, update_obj, ''.encode('utf-8')
def get_all_plugins_info():
def find_sub_packages(package_name):
"""
Get information about all registered processor plugins
:return: A list of dictionaries with plugin info
"""
plugins_info = []
# Collect from all registered plugins
for plugin in plugin_manager.get_plugins():
if hasattr(plugin, "get_processor_name") and hasattr(plugin, "get_processor_description"):
processor_name = plugin.get_processor_name()
description = plugin.get_processor_description()
# Get version if available
version = "N/A"
if hasattr(plugin, "get_processor_version"):
plugin_version = plugin.get_processor_version()
if plugin_version:
version = plugin_version
if processor_name and description:
plugins_info.append({
"name": processor_name,
"description": description,
"version": version
})
# Fallback if no plugins registered
if not plugins_info:
plugins_info = [
{"name": "text_json_diff", "description": "Webpage Text/HTML, JSON and PDF changes", "version": "1.0.0"},
{"name": "restock_diff", "description": "Re-stock & Price detection for single product pages", "version": "1.0.0"}
]
return plugins_info
Find all sub-packages within the given package.
def available_processors(datastore=None):
:param package_name: The name of the base package to scan for sub-packages.
:return: A list of sub-package names.
"""
package = importlib.import_module(package_name)
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
def find_processors():
"""
Find all subclasses of DifferenceDetectionProcessor in the specified package.
:param package_name: The name of the package to scan for processor modules.
:return: A list of (module, class) tuples.
"""
package_name = "changedetectionio.processors" # Name of the current package/module
processors = []
sub_packages = find_sub_packages(package_name)
for sub_package in sub_packages:
module_name = f"{package_name}.{sub_package}.processor"
try:
module = importlib.import_module(module_name)
# Iterate through all classes in the module
for name, obj in inspect.getmembers(module, inspect.isclass):
if issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor:
processors.append((module, sub_package))
except (ModuleNotFoundError, ImportError) as e:
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
return processors
def get_parent_module(module):
module_name = module.__name__
if '.' not in module_name:
return None # Top-level module has no parent
parent_module_name = module_name.rsplit('.', 1)[0]
try:
return importlib.import_module(parent_module_name)
except Exception as e:
pass
return False
def get_custom_watch_obj_for_processor(processor_name):
from changedetectionio.model import Watch
watch_class = Watch.model
processor_classes = find_processors()
custom_watch_obj = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
if custom_watch_obj:
# Parent of .processor.py COULD have its own Watch implementation
parent_module = get_parent_module(custom_watch_obj[0])
if hasattr(parent_module, 'Watch'):
watch_class = parent_module.Watch
return watch_class
def available_processors():
"""
Get a list of processors by name and description for the UI elements
Filtered by enabled_plugins setting if datastore is provided
:return: A list of tuples (processor_name, description)
"""
plugins_info = get_all_plugins_info()
processor_list = []
for plugin in plugins_info:
processor_list.append((plugin["name"], plugin["description"]))
return processor_list
def get_processor_handler(processor_name, datastore, watch_uuid):
"""
Get the processor handler for the specified processor name
:return: The processor handler instance
"""
# Try each plugin in turn
for plugin in plugin_manager.get_plugins():
if hasattr(plugin, "perform_site_check"):
handler = plugin.perform_site_check(datastore=datastore, watch_uuid=watch_uuid)
if handler:
return handler
# If no plugins handled it, use the appropriate built-in processor
watch = datastore.data['watching'].get(watch_uuid)
if watch and watch.get('processor') == 'restock_diff':
from .restock_diff.processor import perform_site_check
return perform_site_check(datastore=datastore, watch_uuid=watch_uuid)
else:
# Default to text_json_diff
from .text_json_diff.processor import perform_site_check
return perform_site_check(datastore=datastore, watch_uuid=watch_uuid)
def get_form_class_for_processor(processor_name):
"""
Get the form class for the specified processor name
:return: The form class
"""
# Try each plugin in turn
for plugin in plugin_manager.get_plugins():
if hasattr(plugin, "get_form_class"):
form_class = plugin.get_form_class(processor_name=processor_name)
if form_class:
return form_class
# If no plugins provided a form class, use the appropriate built-in form
if processor_name == 'restock_diff':
try:
from .restock_diff.forms import processor_settings_form
return processor_settings_form
except ImportError:
pass
# Default to text_json_diff form
from changedetectionio import forms
return forms.processor_text_json_diff_form
def get_watch_model_for_processor(processor_name):
"""
Get the Watch model class for the specified processor name
:return: The Watch model class
:return: A list :)
"""
# Try each plugin in turn
for plugin in plugin_manager.get_plugins():
if hasattr(plugin, "get_watch_model_class"):
model_class = plugin.get_watch_model_class(processor_name=processor_name)
if model_class:
return model_class
processor_classes = find_processors()
# Default to standard Watch model
from changedetectionio.model import Watch
return Watch.model
available = []
for package, processor_class in processor_classes:
available.append((processor_class, package.name))
# Define plugin implementations for the built-in processors
class TextJsonDiffPlugin:
@hookimpl
def get_processor_name(self):
return "text_json_diff"
return available
@hookimpl
def get_processor_description(self):
from .text_json_diff.processor import name
return name
@hookimpl
def get_processor_version(self):
from changedetectionio import __version__
return __version__
@hookimpl
def get_processor_ui_tag(self):
from .text_json_diff.processor import UI_tag
return UI_tag
@hookimpl
def perform_site_check(self, datastore, watch_uuid):
watch = datastore.data['watching'].get(watch_uuid)
if watch and watch.get('processor', 'text_json_diff') == 'text_json_diff':
from .text_json_diff.processor import perform_site_check
return perform_site_check(datastore=datastore, watch_uuid=watch_uuid)
return None
@hookimpl
def get_form_class(self, processor_name):
if processor_name == 'text_json_diff':
from changedetectionio import forms
return forms.processor_text_json_diff_form
return None
@hookimpl
def get_watch_model_class(self, processor_name):
if processor_name == 'text_json_diff':
from changedetectionio.model import Watch
return Watch.model
return None
class RestockDiffPlugin:
@hookimpl
def get_processor_name(self):
return "restock_diff"
@hookimpl
def get_processor_description(self):
from .restock_diff.processor import name
return name
@hookimpl
def get_processor_version(self):
from changedetectionio import __version__
return __version__
@hookimpl
def get_processor_ui_tag(self):
from .restock_diff.processor import UI_tag
return UI_tag
@hookimpl
def perform_site_check(self, datastore, watch_uuid):
watch = datastore.data['watching'].get(watch_uuid)
if watch and watch.get('processor') == 'restock_diff':
from .restock_diff.processor import perform_site_check
return perform_site_check(datastore=datastore, watch_uuid=watch_uuid)
return None
@hookimpl
def get_form_class(self, processor_name):
if processor_name == 'restock_diff':
try:
from .restock_diff.forms import processor_settings_form
return processor_settings_form
except ImportError:
pass
return None
@hookimpl
def get_watch_model_class(self, processor_name):
if processor_name == 'restock_diff':
from . import restock_diff
return restock_diff.Watch
return None
# Register the built-in processor plugins
plugin_manager.register(TextJsonDiffPlugin())
plugin_manager.register(RestockDiffPlugin())

View File

@@ -1,5 +0,0 @@
# Common constants used across processors
# Price data tracking constants
PRICE_DATA_TRACK_ACCEPT = 'accepted'
PRICE_DATA_TRACK_REJECT = 'rejected'

View File

@@ -1,85 +0,0 @@
import pluggy
from loguru import logger
# Ensure that the namespace in HookspecMarker matches PluginManager
PLUGIN_NAMESPACE = "changedetectionio_processors"
hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE)
hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE)
UI_tags = {}
class ProcessorSpec:
"""Hook specifications for difference detection processors."""
@hookspec
def get_processor_name():
"""Return the processor name for selection in the UI."""
pass
@hookspec
def get_processor_description():
"""Return a human-readable description of the processor."""
pass
@hookspec
def get_processor_version():
"""Return the processor plugin version."""
pass
@hookspec
def get_processor_ui_tag():
"""Return the UI tag for the processor (used for categorization in UI)."""
pass
@hookspec
def perform_site_check(datastore, watch_uuid):
"""Return the processor handler class or None if not applicable.
Each plugin should check if it's the right processor for this watch
and return None if it's not.
Should return an instance of a class that implements:
- call_browser(preferred_proxy_id=None): Fetch the content
- run_changedetection(watch): Analyze for changes and return tuple of (changed_detected, update_obj, contents)
"""
pass
@hookspec
def get_form_class(processor_name):
"""Return the WTForms form class for the processor settings or None if not applicable.
Each plugin should check if it's the right processor and return None if not.
"""
pass
@hookspec
def get_watch_model_class(processor_name):
"""Return a custom Watch model class if needed or None if not applicable.
Each plugin should check if it's the right processor and return None if not.
"""
pass
# Set up Pluggy Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
# Register hookspecs
plugin_manager.add_hookspecs(ProcessorSpec)
# Initialize by loading plugins and building UI_tags dictionary
try:
# Discover installed plugins from external packages (if any)
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
logger.info(f"Loaded plugins: {plugin_manager.get_plugins()}")
# Build UI_tags dictionary from all plugins
for plugin in plugin_manager.get_plugins():
if hasattr(plugin, "get_processor_name") and hasattr(plugin, "get_processor_ui_tag"):
plugin_name = plugin.get_processor_name()
ui_tag = plugin.get_processor_ui_tag()
if plugin_name and ui_tag:
UI_tags[plugin_name] = ui_tag
logger.info(f"Found UI tag for plugin {plugin_name}: {ui_tag}")
except Exception as e:
logger.critical(f"Error loading plugins: {str(e)}")

View File

@@ -1,4 +1,5 @@
from babel.numbers import parse_decimal
from changedetectionio.model.Watch import model as BaseWatch
from typing import Union
import re
@@ -6,7 +7,6 @@ import re
class Restock(dict):
def parse_currency(self, raw_value: str) -> Union[float, None]:
from babel.numbers import parse_decimal
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
standardized_value = raw_value
@@ -56,19 +56,14 @@ class Restock(dict):
super().__setitem__(key, value)
class Watch(BaseWatch):
def load_extra_vars(self):
# something from disk?
def __init__(self, *arg, **kw):
super().__init__(*arg, **kw)
# Restock Obj helps with the state of the situation
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
'follow_price_changes': True,
'in_stock_processing' : 'in_stock_only'
}
} #@todo update
def clear_watch(self):
super().clear_watch()

View File

@@ -9,7 +9,6 @@ import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Re-stock & Price detection for single product pages'
description = 'Detects if the product goes back to in-stock'
UI_tag = "Restock"
class UnableToExtractRestockData(Exception):
def __init__(self, status_code):
@@ -153,8 +152,7 @@ class perform_site_check(difference_detection_processor):
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
if not 'restock_settings' in watch.keys():
raise Exception("Restock settings not found in watch.")
self.screenshot = self.fetcher.screenshot
self.xpath_data = self.fetcher.xpath_data

View File

@@ -10,14 +10,13 @@ from changedetectionio.conditions import execute_ruleset_against_all_plugins
from changedetectionio.processors import difference_detection_processor
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
from changedetectionio import html_tools, content_fetchers
from changedetectionio.processors.constants import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from loguru import logger
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Webpage Text/HTML, JSON and PDF changes'
description = 'Detects all text changes where possible'
UI_tag = "Text Diff"
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']

View File

@@ -1,22 +1,4 @@
(function ($) {
// Initialize plugin management UI when the DOM is ready
$(document).ready(function() {
// Add event handlers for plugin checkboxes
$("#plugins-table input[type='checkbox']").on('change', function() {
const isEnabled = $(this).is(':checked');
// For visual feedback, fade the row when disabled
if (isEnabled) {
$(this).closest('tr').removeClass('disabled-plugin');
} else {
$(this).closest('tr').addClass('disabled-plugin');
}
const pluginName = $(this).closest('tr').find('td:nth-child(2)').text().trim();
console.log(`Plugin ${pluginName} ${isEnabled ? 'enabled' : 'disabled'}`);
});
});
/**
* debounce
* @param {integer} milliseconds This param indicates the number of milliseconds

View File

@@ -6,7 +6,7 @@ from flask import (
from .html_tools import TRANSLATE_WHITESPACE_TABLE
from . model import App, Watch
from copy import deepcopy
from copy import deepcopy, copy
from os import path, unlink
from threading import Lock
import json
@@ -17,9 +17,9 @@ import threading
import time
import uuid as uuid_builder
from loguru import logger
from deepmerge import always_merger
from .processors import get_watch_model_for_processor
from .processors import get_custom_watch_obj_for_processor
from .processors.restock_diff import Restock
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
@@ -31,6 +31,11 @@ dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
class ChangeDetectionStore:
lock = Lock()
# For general updates/writes that can wait a few seconds
needs_write = False
# For when we edit, we should write to disk
needs_write_urgent = False
__version_check = True
@@ -41,9 +46,12 @@ class ChangeDetectionStore:
self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
logger.info(f"Datastore path is '{self.json_store_path}'")
self.needs_write = False
self.start_time = time.time()
self.stop_thread = False
# Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
if path.isfile('changedetectionio/source.txt'):
with open('changedetectionio/source.txt') as f:
@@ -51,30 +59,38 @@ class ChangeDetectionStore:
# So when someone gives us a backup file to examine, we know exactly what code they were running.
self.__data['build_sha'] = f.read()
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
try:
import os
# First load global settings from the main JSON file if it exists
if os.path.isfile(self.json_store_path):
with open(self.json_store_path) as json_file:
from_disk = json.load(json_file)
# Load app_guid and settings from the main JSON file
if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid']
if 'settings' in from_disk:
if 'headers' in from_disk['settings']:
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
if 'requests' in from_disk['settings']:
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
# @todo retest with ", encoding='utf-8'"
with open(self.json_store_path) as json_file:
from_disk = json.load(json_file)
# @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid']
if 'settings' in from_disk:
if 'headers' in from_disk['settings']:
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
if 'requests' in from_disk['settings']:
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
# Convert each existing watch back to the Watch.model object
for uuid, watch in self.__data['watching'].items():
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
logger.info(f"Watching: {uuid} {watch['url']}")
# And for Tags also, should be Restock type because it has extra settings
for uuid, tag in self.__data['settings']['application']['tags'].items():
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff')
logger.info(f"Tag: {uuid} {tag['title']}")
# First time ran, Create the datastore.
except (FileNotFoundError):
@@ -93,8 +109,6 @@ class ChangeDetectionStore:
else:
# Bump the update version by running updates
self.scan_load_watches()
self.scan_load_tags()
self.run_updates()
self.__data['version_tag'] = version_tag
@@ -126,93 +140,53 @@ class ChangeDetectionStore:
secret = secrets.token_hex(16)
self.__data['settings']['application']['api_access_token'] = secret
def scan_load_watches(self):
self.needs_write = True
# Now scan for individual watch.json files in the datastore directory
import pathlib
watch_jsons = list(pathlib.Path(self.datastore_path).rglob("*/watch.json"))
# Finally start the thread that will manage periodic data saves to JSON
save_data_thread = threading.Thread(target=self.save_datastore).start()
for watch_file in watch_jsons:
# Extract UUID from the directory name (parent directory of watch.json)
uuid = watch_file.parent.name
def rehydrate_entity(self, uuid, entity, processor_override=None):
"""Set the dict back to the dict Watch object"""
entity['uuid'] = uuid
try:
with open(watch_file, 'r') as f:
watch_data = json.load(f)
# Create a Watch object and add it to the datastore
self.__data['watching'][uuid] = self.rehydrate_entity(default_dict=watch_data)
logger.info(f"Watching: {uuid} {watch_data.get('url')}")
if processor_override:
watch_class = get_custom_watch_obj_for_processor(processor_override)
entity['processor']=processor_override
else:
watch_class = get_custom_watch_obj_for_processor(entity.get('processor'))
except Exception as e:
logger.error(f"Error loading watch from {watch_file}: {str(e)}")
continue
logger.debug(f"{len(self.__data['watching'])} watches loaded.")
if entity.get('uuid') != 'text_json_diff':
logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
def scan_load_tags(self):
import pathlib
# Now scan for individual tag.json files in the tags directory
tags_path = os.path.join(self.datastore_path, 'tags')
if os.path.exists(tags_path):
tag_jsons = list(pathlib.Path(tags_path).rglob("*.json"))
for tag_file in tag_jsons:
# Extract UUID from the directory name (parent directory of tag.json)
try:
with open(tag_file, 'r') as f:
tag_data = json.load(f)
uuid = str(tag_file).replace('.json', '')
tag_data['uuid'] = uuid
# Create a Tag object and add it to the datastore
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
default_dict=tag_data,
processor_override='restock_diff'
)
logger.info(f"Tag: {uuid} {tag_data.get('title', 'No title found')}")
except Exception as e:
logger.error(f"Error loading tag from {tag_file}: {str(e)}")
continue
logger.debug(f"{len(self.__data['settings']['application']['tags'])} tags loaded.")
def rehydrate_entity(self, default_dict: dict, processor_override=None):
if not processor_override and default_dict.get('processor'):
processor_override = default_dict.get('processor')
if not processor_override:
processor_override = 'text_json_diff'
watch_class = get_watch_model_for_processor(processor_override)
default_dict['processor'] = processor_override
entity = watch_class(datastore_path=self.datastore_path, default=default_dict)
entity.enable_saving()
entity = watch_class(datastore_path=self.datastore_path, default=entity)
return entity
def set_last_viewed(self, uuid, timestamp):
logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
self.data['watching'][uuid].save_data()
self.needs_write = True
def remove_password(self):
self.__data['settings']['application']['password'] = False
self.save_settings()
self.needs_write = True
def update_watch(self, uuid, update_obj):
"""
Update a watch with new values using the deepmerge library.
"""
# It's possible that the watch could be deleted before update
if not uuid in self.data['watching'].keys() or update_obj is None:
if not self.__data['watching'].get(uuid):
return
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
for dict_key, d in self.generic_definition.items():
if isinstance(d, dict):
if update_obj is not None and dict_key in update_obj:
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
del (update_obj[dict_key])
with self.lock:
self.__data['watching'][uuid].update(update_obj)
self.__data['watching'][uuid].save_data()
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
for dict_key, d in self.generic_definition.items():
if isinstance(d, dict):
if update_obj is not None and dict_key in update_obj:
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
del (update_obj[dict_key])
self.__data['watching'][uuid].update(update_obj)
self.needs_write = True
@property
def threshold_seconds(self):
@@ -272,6 +246,8 @@ class ChangeDetectionStore:
shutil.rmtree(path)
del self.data['watching'][uuid]
self.needs_write_urgent = True
# Clone a watch by UUID
def clone(self, uuid):
url = self.data['watching'][uuid].get('url')
@@ -291,6 +267,7 @@ class ChangeDetectionStore:
# Remove a watchs data but keep the entry (URL etc)
def clear_watch_history(self, uuid):
self.__data['watching'][uuid].clear_watch()
self.needs_write_urgent = True
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
import requests
@@ -368,7 +345,7 @@ class ChangeDetectionStore:
apply_extras['tags'] = list(set(apply_extras.get('tags')))
# If the processor also has its own Watch implementation
watch_class = get_watch_model_for_processor(apply_extras.get('processor'))
watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
new_watch = watch_class(datastore_path=self.datastore_path, url=url)
new_uuid = new_watch.get('uuid')
@@ -381,11 +358,15 @@ class ChangeDetectionStore:
if not apply_extras.get('date_created'):
apply_extras['date_created'] = int(time.time())
new_watch.ensure_data_dir_exists()
new_watch.update(apply_extras)
new_watch.update(apply_extras)
new_watch.ensure_data_dir_exists()
self.__data['watching'][new_uuid] = new_watch
self.__data['watching'][new_uuid].save_data()
if write_to_disk_now:
self.sync_to_json()
logger.debug(f"Added '{url}'")
return new_uuid
@@ -399,22 +380,58 @@ class ChangeDetectionStore:
return False
def save_settings(self):
logger.info("Saving application settings...")
def sync_to_json(self):
logger.info("Saving JSON..")
try:
# Only save app settings, not the watches or tags (they're saved individually)
data = {'settings': self.__data.get('settings')}
#data = deepcopy(self.__data)
# Remove the watches from the main JSON file
if 'watching' in data:
del data['watching']
# Remove the tags from the main JSON file since they're saved individually now
# if 'settings' in data and 'application' in data['settings'] and 'tags' in data['settings']['application']:
# del data['settings']['application']['tags']
except Exception as e:
x=1
data = deepcopy(self.__data)
except RuntimeError as e:
# Try again in 15 seconds
time.sleep(15)
logger.error(f"! Data changed when writing to JSON, trying again.. {str(e)}")
self.sync_to_json()
return
else:
try:
# Re #286 - First write to a temp file, then confirm it looks OK and rename it
# This is a fairly basic strategy to deal with the case that the file is corrupted,
# system was out of memory, out of RAM etc
with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=4)
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
self.needs_write = False
self.needs_write_urgent = False
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
# by just running periodically in one thread, according to python, dict updates are threadsafe.
def save_datastore(self):
while True:
if self.stop_thread:
# Suppressing "Logging error in Loguru Handler #0" during CICD.
# Not a meaningful difference for a real use-case just for CICD.
# the side effect is a "Shutting down datastore thread" message
# at the end of each test.
# But still more looking better.
import sys
logger.remove()
logger.add(sys.stderr)
logger.critical("Shutting down datastore thread")
return
if self.needs_write or self.needs_write_urgent:
self.sync_to_json()
# Once per minute is enough, more and it can cause high CPU usage
# better here is to use something like self.app.config.exit.wait(1), but we cant get to 'app' from here
for i in range(120):
time.sleep(0.5)
if self.stop_thread or self.needs_write_urgent:
break
# Go through the datastore path and remove any snapshots that are not mentioned in the index
# This usually is not used, but can be handy.
@@ -568,17 +585,16 @@ class ChangeDetectionStore:
# Eventually almost everything todo with a watch will apply as a Tag
# So we use the same model as a Watch
from .model import Tag
new_tag = Tag.model(datastore_path=self.datastore_path, default={
'title': name.strip(),
'date_created': int(time.time())
})
with self.lock:
from .model import Tag
new_tag = Tag.model(datastore_path=self.datastore_path, default={
'title': name.strip(),
'date_created': int(time.time())
})
new_uuid = new_tag.get('uuid')
self.__data['settings']['application']['tags'][new_uuid] = new_tag
self.__data['settings']['application']['tags'][new_uuid].save_data()
new_uuid = new_tag.get('uuid')
self.__data['settings']['application']['tags'][new_uuid] = new_tag
return new_uuid
@@ -874,7 +890,6 @@ class ChangeDetectionStore:
# Migrate old 'in_stock' values to the new Restock
def update_17(self):
from .processors.restock_diff import Restock
for uuid, watch in self.data['watching'].items():
if 'in_stock' in watch:
watch['restock'] = Restock({'in_stock': watch.get('in_stock')})

View File

@@ -1,7 +1,3 @@
{% macro hasattr(obj, name) -%}
{{ obj is defined and name in obj.__dict__ }}
{%- endmacro %}
{% macro render_field(field) %}
<div {% if field.errors %} class="error" {% endif %}>{{ field.label }}</div>
<div {% if field.errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}

View File

@@ -1,49 +0,0 @@
{% extends 'base.html' %} {% block content %}
<div class="edit-form">
<div class="box-wrap inner">
<form
class="pure-form pure-form-stacked"
action="{{url_for('ui.clear_all_history')}}"
method="POST"
>
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
<fieldset>
<div class="pure-control-group">
This will remove version history (snapshots) for ALL watches, but keep
your list of URLs! <br />
You may like to use the <strong>BACKUP</strong> link first.<br />
</div>
<br />
<div class="pure-control-group">
<label for="confirmtext">Confirmation text</label>
<input
type="text"
id="confirmtext"
required=""
name="confirmtext"
value=""
size="10"
/>
<span class="pure-form-message-inline"
>Type in the word <strong>clear</strong> to confirm that you
understand.</span
>
</div>
<br />
<div class="pure-control-group">
<button type="submit" class="pure-button pure-button-primary">
Clear History!
</button>
</div>
<br />
<div class="pure-control-group">
<a href="{{url_for('index')}}" class="pure-button button-cancel"
>Cancel</a
>
</div>
</fieldset>
</form>
</div>
</div>
{% endblock %}

View File

@@ -1,125 +0,0 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="edit-form monospaced-textarea">
<div class="tabs collapsable">
<ul>
<li class="tab" id=""><a href="#url-list">URL List</a></li>
<li class="tab"><a href="#distill-io">Distill.io</a></li>
<li class="tab"><a href="#xlsx">.XLSX &amp; Wachete</a></li>
</ul>
</div>
<div class="box-wrap inner">
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="tab-pane-inner" id="url-list">
<legend>
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
(,):
<br>
<code>https://example.com tag1, tag2, last tag</code>
<br>
URLs which do not pass validation will stay in the textarea.
</legend>
{{ render_field(form.processor, class="processor") }}
<textarea name="urls" class="pure-input-1-2" placeholder="https://"
style="width: 100%;
font-family:monospace;
white-space: pre;
overflow-wrap: normal;
overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
<div id="quick-watch-processor-type">
</div>
</div>
<div class="tab-pane-inner" id="distill-io">
<legend>
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
<br>
<p>
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
Be sure to set your default fetcher to Chrome if required.<br>
</p>
</legend>
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
font-family:monospace;
white-space: pre;
overflow-wrap: normal;
overflow-x: scroll;" placeholder="Example Distill.io JSON export file
{
&quot;client&quot;: {
&quot;local&quot;: 1
},
&quot;data&quot;: [
{
&quot;name&quot;: &quot;Unraid | News&quot;,
&quot;uri&quot;: &quot;https://unraid.net/blog&quot;,
&quot;config&quot;: &quot;{\&quot;selections\&quot;:[{\&quot;frames\&quot;:[{\&quot;index\&quot;:0,\&quot;excludes\&quot;:[],\&quot;includes\&quot;:[{\&quot;type\&quot;:\&quot;xpath\&quot;,\&quot;expr\&quot;:\&quot;(//div[@id='App']/div[contains(@class,'flex')]/main[contains(@class,'relative')]/section[contains(@class,'relative')]/div[@class='container']/div[contains(@class,'flex')]/div[contains(@class,'w-full')])[1]\&quot;}]}],\&quot;dynamic\&quot;:true,\&quot;delay\&quot;:2}],\&quot;ignoreEmptyText\&quot;:true,\&quot;includeStyle\&quot;:false,\&quot;dataAttr\&quot;:\&quot;text\&quot;}&quot;,
&quot;tags&quot;: [],
&quot;content_type&quot;: 2,
&quot;state&quot;: 40,
&quot;schedule&quot;: &quot;{\&quot;type\&quot;:\&quot;INTERVAL\&quot;,\&quot;params\&quot;:{\&quot;interval\&quot;:4447}}&quot;,
&quot;ts&quot;: &quot;2022-03-27T15:51:15.667Z&quot;
}
]
}
" rows="25">{{ original_distill_json }}</textarea>
</div>
<div class="tab-pane-inner" id="xlsx">
<fieldset>
<div class="pure-control-group">
{{ render_field(form.xlsx_file, class="processor") }}
</div>
<div class="pure-control-group">
{{ render_field(form.file_mapping, class="processor") }}
</div>
</fieldset>
<div class="pure-control-group">
<span class="pure-form-message-inline">
Table of custom column and data types mapping for the <strong>Custom mapping</strong> File mapping type.
</span>
<table style="border: 1px solid #aaa; padding: 0.5rem; border-radius: 4px;">
<tr>
<td><strong>Column #</strong></td>
{% for n in range(4) %}
<td><input type="number" name="custom_xlsx[col_{{n}}]" style="width: 4rem;" min="1"></td>
{% endfor %}
</tr>
<tr>
<td><strong>Type</strong></td>
{% for n in range(4) %}
<td><select name="custom_xlsx[col_type_{{n}}]">
<option value="" style="color: #aaa"> -- none --</option>
<option value="url">URL</option>
<option value="title">Title</option>
<option value="include_filters">CSS/xPath filter</option>
<option value="tag">Group / Tag name(s)</option>
<option value="interval_minutes">Recheck time (minutes)</option>
</select></td>
{% endfor %}
</tr>
</table>
</div>
</div>
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
</form>
</div>
</div>
{% endblock %}

View File

@@ -1,19 +0,0 @@
{% extends 'base.html' %}
{% block content %}
<div class="edit-form">
<div class="inner">
<h4 style="margin-top: 0px;">Notification debug log</h4>
<div id="notification-error-log">
<ul style="font-size: 80%; margin:0px; padding: 0 0 0 7px">
{% for log in logs|reverse %}
<li>{{log}}</li>
{% endfor %}
</ul>
</div>
</div>
</div>
{% endblock %}

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python3
import resource
import psutil
import time
from threading import Thread
@@ -28,9 +28,10 @@ def reportlog(pytestconfig):
def track_memory(memory_usage, ):
process = psutil.Process(os.getpid())
while not memory_usage["stop"]:
max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
memory_usage["peak"] = max(memory_usage["peak"], max_rss)
current_rss = process.memory_info().rss
memory_usage["peak"] = max(memory_usage["peak"], current_rss)
time.sleep(0.01) # Adjust the sleep time as needed
@pytest.fixture(scope='function')

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup, extract_api_key_from_UI, wait_for_all_checks
from .util import live_server_setup, wait_for_all_checks
import json
import uuid
@@ -58,14 +58,14 @@ def test_setup(client, live_server, measure_memory_usage):
def test_api_simple(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
api_key = extract_api_key_from_UI(client)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Create a watch
set_original_response()
# Validate bad URL
test_url = url_for('test_endpoint', _external=True,
headers={'x-api-key': api_key}, )
test_url = url_for('test_endpoint', _external=True )
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "h://xxxxxxxxxom"}),
@@ -290,13 +290,13 @@ def test_access_denied(client, live_server, measure_memory_usage):
assert b"Settings updated." in res.data
def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
api_key = extract_api_key_from_UI(client)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Create a watch
set_original_response()
test_url = url_for('test_endpoint', _external=True,
headers={'x-api-key': api_key}, )
test_url = url_for('test_endpoint', _external=True)
# Create new
res = client.post(
@@ -371,7 +371,8 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
def test_api_import(client, live_server, measure_memory_usage):
api_key = extract_api_key_from_UI(client)
#live_server_setup(live_server)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
res = client.post(
url_for("import") + "?tag=import-test",
@@ -389,4 +390,48 @@ def test_api_import(client, live_server, measure_memory_usage):
# Should see the new tag in the tag/groups list
res = client.get(url_for('tags.tags_overview_page'))
assert b'import-test' in res.data
def test_api_conflict_UI_password(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Enable password check and diff page access bypass
res = client.post(
url_for("settings.settings_page"),
data={"application-password": "foobar", # password is now set! API should still work!
"application-api_access_token_enabled": "y",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." in res.data
# Create a watch
set_original_response()
test_url = url_for('test_endpoint', _external=True)
# Create new
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url, "title": "My test URL" }),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 201
wait_for_all_checks(client)
url = url_for("createwatch")
# Get a listing, it will be the first one
res = client.get(
url,
headers={'x-api-key': api_key}
)
assert res.status_code == 200
assert len(res.json)

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI, wait_for_all_checks
from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks
def set_response_with_ldjson():
@@ -101,9 +101,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
# Accept it
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
#time.sleep(1)
res = client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
# should now be switched to restock_mode
wait_for_all_checks(client)
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Offer should be gone
@@ -112,7 +110,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
assert b'tracking-ldjson-price-data' in res.data
# and last snapshop (via API) should be just the price
api_key = extract_api_key_from_UI(client)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
res = client.get(
url_for("watchsinglehistory", uuid=uuid, timestamp='latest'),
headers={'x-api-key': api_key},
@@ -156,7 +154,6 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_
assert b"1 Imported" in res.data
wait_for_all_checks(client)
assert len(client.application.config.get('DATASTORE').data['watching'])
for k,v in client.application.config.get('DATASTORE').data['watching'].items():
assert v.get('last_error') == False
assert v.get('has_ldjson_price_data') == has_ldjson_price_data, f"Detected LDJSON data? should be {has_ldjson_price_data}"
@@ -166,7 +163,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
def test_bad_ldjson_is_correctly_ignored(client, live_server):
def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_return_data = """
<html>

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python3
import json
import urllib
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
@@ -43,14 +44,12 @@ def set_number_out_of_range_response(number="150"):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_setup(live_server):
live_server_setup(live_server)
def test_conditions_with_text_and_number(client, live_server):
"""Test that both text and number conditions work together with AND logic."""
set_original_response("50")
#live_server_setup(live_server)
live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
@@ -139,7 +138,6 @@ def test_conditions_with_text_and_number(client, live_server):
def test_condition_validate_rule_row(client, live_server):
set_original_response("50")
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)

View File

@@ -1,15 +0,0 @@
from flask import url_for
from changedetectionio.tests.util import live_server_setup
def test_checkplugins_registered(live_server, client):
live_server_setup(live_server)
res = client.get(
url_for("settings.settings_page")
)
assert res.status_code == 200
# Should be registered in the info table
assert b'<td>Webpage Text/HTML, JSON and PDF changes' in res.data
assert b'<td>text_json_diff' in res.data

View File

@@ -95,14 +95,12 @@ def test_itemprop_price_change(client, live_server):
test_url = url_for('test_endpoint', _external=True)
set_original_response(props_markup=instock_props[0], price="190.95")
res = client.post(
client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
assert res.status_code == 200
# A change in price, should trigger a change by default
wait_for_all_checks(client)
res = client.get(url_for("index"))
@@ -112,7 +110,6 @@ def test_itemprop_price_change(client, live_server):
set_original_response(props_markup=instock_props[0], price='180.45')
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'180.45' in res.data
assert b'unviewed' in res.data
@@ -398,7 +395,7 @@ def test_data_sanity(client, live_server):
test_url = url_for('test_endpoint', _external=True)
test_url2 = url_for('test_endpoint2', _external=True)
set_original_response(props_markup=instock_props[0], price="950.95")
res = client.post(
client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True

View File

@@ -1,88 +0,0 @@
#!/usr/bin/env python3
import os
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_restock_settings_persistence(client, live_server):
"""Test that restock processor and settings are correctly saved and loaded after app restart"""
live_server_setup(live_server)
# Create a test page with pricing information
test_return_data = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<div>price: $10.99</div>
<div id="sametext">Out of stock</div>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# Add our URL to the import page (pointing to our test endpoint)
test_url = url_for('test_endpoint', _external=True)
# Add a new watch with the restock_diff processor
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
follow_redirects=True
)
# Wait for initial check to complete
wait_for_all_checks(client)
# Get the UUID of the watch
uuid = extract_UUID_from_client(client)
# Set custom restock settings
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={
"url": test_url,
"tags": "",
"headers": "",
"restock_settings-price_change_min": 10,
"restock_settings-price_change_threshold_percent": 5,
'fetch_backend': "html_requests",
"processor" : 'restock_diff'
},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Verify the settings were saved in the current datastore
app_config = client.application.config.get('DATASTORE').data
watch = app_config['watching'][uuid]
assert watch.get('processor') == 'restock_diff'
assert watch['restock_settings'].get('price_change_min') == 10
assert watch['restock_settings'].get('price_change_threshold_percent') == 5
# Restart the application by calling teardown and recreating the datastore
# This simulates shutting down and restarting the app
datastore = client.application.config.get('DATASTORE')
datastore.stop_thread = True
# Create a new datastore instance that will read from the saved JSON
from changedetectionio import store
new_datastore = store.ChangeDetectionStore(datastore_path="./test-datastore", include_default_watches=False)
client.application.config['DATASTORE'] = new_datastore
# Verify the watch settings were correctly loaded after restart
app_config = client.application.config.get('DATASTORE').data
watch = app_config['watching'][uuid]
# Check that processor mode is correctly preserved
assert watch.get('processor') == 'restock_diff', "Watch processor mode should be preserved as 'restock_diff'"
# Check that the restock settings were correctly preserved
assert watch['restock_settings'].get('price_change_min') == 10, "price_change_min setting should be preserved"
assert watch['restock_settings'].get('price_change_threshold_percent') == 5, "price_change_threshold_percent setting should be preserved"

View File

@@ -95,20 +95,6 @@ def wait_for_notification_endpoint_output():
return False
# kinda funky, but works for now
def extract_api_key_from_UI(client):
import re
res = client.get(
url_for("settings.settings_page"),
)
# <span id="api-key">{{api_key}}</span>
m = re.search('<span id="api-key">(.+?)</span>', str(res.data))
api_key = m.group(1)
return api_key.strip()
# kinda funky, but works for now
def get_UUID_for_tag_name(client, name):
app_config = client.application.config.get('DATASTORE').data

View File

@@ -270,16 +270,20 @@ class update_worker(threading.Thread):
logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
try:
# Get processor handler from pluggy plugin system
from changedetectionio.processors import get_processor_handler
# Processor is what we are using for detecting the "Change"
processor_name = watch.get('processor', 'text_json_diff')
# Get the handler via the plugin system
update_handler = get_processor_handler(processor_name=processor_name,
datastore=self.datastore,
watch_uuid=uuid)
processor = watch.get('processor', 'text_json_diff')
# Init a new 'difference_detection_processor', first look in processors
processor_module_name = f"changedetectionio.processors.{processor}.processor"
try:
processor_module = importlib.import_module(processor_module_name)
except ModuleNotFoundError as e:
print(f"Processor module '{processor}' not found.")
raise e
update_handler = processor_module.perform_site_check(datastore=self.datastore,
watch_uuid=uuid
)
update_handler.call_browser()
@@ -527,13 +531,14 @@ class update_worker(threading.Thread):
try:
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# Also save the snapshot on the first time checked, "last checked" will always be updated, so we just check history length.
if changed_detected or not watch.history_n:
if hasattr(update_handler, "screenshot") and update_handler.screenshot:
if update_handler.screenshot:
watch.save_screenshot(screenshot=update_handler.screenshot)
if hasattr(update_handler, "xpath_data") and update_handler.xpath_data:
if update_handler.xpath_data:
watch.save_xpath_data(data=update_handler.xpath_data)
# Small hack so that we sleep just enough to allow 1 second between history snapshots
@@ -586,7 +591,6 @@ class update_worker(threading.Thread):
'check_count': count
})
watch.save_data()
self.current_uuid = None # Done
self.q.task_done()

View File

@@ -73,7 +73,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
# playwright is installed at Dockerfile build time because it's not available on all platforms
pyppeteer-ng==2.0.0rc6
pyppeteer-ng==2.0.0rc5
pyppeteerstealth>=0.0.4
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
@@ -110,6 +110,5 @@ tzdata
pluggy ~= 1.5
deepmerge
# Needed for testing, cross-platform for process and system monitoring
psutil==7.0.0