mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-10 10:51:11 +00:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fecd181e07 | |||
| 525e390523 | |||
| 7fe332ad95 | |||
| b65a01ec02 |
@@ -66,27 +66,27 @@ jobs:
|
||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Docker Hub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
@@ -107,7 +107,7 @@ jobs:
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -131,7 +131,7 @@ jobs:
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
@@ -60,14 +60,14 @@ jobs:
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
@@ -42,10 +42,10 @@ jobs:
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.4'
|
||||
__version__ = '0.54.3'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -61,22 +61,8 @@ import time
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||
try:
|
||||
import ctypes as _ctypes
|
||||
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
|
||||
@@ -177,13 +177,6 @@ class Tag(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_tag(title=title)
|
||||
if new_uuid:
|
||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||
if extra:
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||
if tag:
|
||||
tag.update(extra)
|
||||
tag.commit()
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported tag", 400
|
||||
|
||||
@@ -40,6 +40,11 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||
for uuid, tag in (tags or {}).items():
|
||||
for f in Path(tag.data_dir).glob('*'):
|
||||
@@ -146,22 +151,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
|
||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
if not backups:
|
||||
abort(404)
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||
abort(404)
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import threading
|
||||
@@ -15,16 +14,6 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
|
||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||
_UUID_RE = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class RestoreForm(Form):
|
||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||
@@ -61,18 +50,7 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||
raise ValueError(
|
||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||
)
|
||||
resolved_dest = os.path.realpath(tmpdir)
|
||||
for member in zf.infolist():
|
||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||
zf.extract(member, tmpdir)
|
||||
zf.extractall(tmpdir)
|
||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||
|
||||
for entry in os.scandir(tmpdir):
|
||||
@@ -80,9 +58,6 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
||||
continue
|
||||
|
||||
uuid = entry.name
|
||||
if not _UUID_RE.match(uuid):
|
||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||
continue
|
||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||
|
||||
@@ -180,9 +155,7 @@ def construct_restore_blueprint(datastore):
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
form=form,
|
||||
restore_running=any(t.is_alive() for t in restore_threads),
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
restore_running=any(t.is_alive() for t in restore_threads))
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
@@ -200,22 +173,10 @@ def construct_restore_blueprint(datastore):
|
||||
flash(gettext("File must be a .zip backup file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Reject oversized uploads before reading the stream into memory.
|
||||
content_length = request.content_length
|
||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return.
|
||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||
# Read into memory now — the request stream is gone once we return
|
||||
try:
|
||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
zip_bytes = io.BytesIO(raw)
|
||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||
pass
|
||||
zip_bytes = io.BytesIO(zip_file.read())
|
||||
zipfile.ZipFile(zip_bytes) # quick validity check before spawning
|
||||
zip_bytes.seek(0)
|
||||
except zipfile.BadZipFile:
|
||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||
@@ -240,7 +201,6 @@ def construct_restore_blueprint(datastore):
|
||||
name="BackupRestore"
|
||||
)
|
||||
restore_thread.start()
|
||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||
restore_threads.append(restore_thread)
|
||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
@@ -19,10 +19,6 @@
|
||||
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB · Max decompressed size: %(decomp)s MB',
|
||||
upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||
|
||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
|
||||
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def clear_all_history():
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext', '')
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||
if confirmtext == 'clear':
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
|
||||
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
@@ -75,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||
|
||||
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
@@ -28,7 +28,6 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
|
||||
@@ -81,7 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
proxy_list = datastore.proxy_list
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
@@ -93,7 +92,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=proxy_list,
|
||||
has_proxies=datastore.proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -111,16 +110,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
# Return freed template-building memory to the OS immediately.
|
||||
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.get('share-link'):
|
||||
del (session['share-link'])
|
||||
|
||||
|
||||
@@ -213,13 +213,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if error_texts|length > 2 else '',
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
@@ -272,7 +271,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
@@ -306,20 +305,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||
{%- set restock = watch['restock'] -%}
|
||||
{%- set price = restock.get('price') -%}
|
||||
{%- set cur = restock.get('currency','') -%}
|
||||
|
||||
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||
{{ price }} {{ cur }}<!-- as string -->
|
||||
{%- if watch['restock']['price'] is number -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
|
||||
{%- endif -%}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -148,32 +148,10 @@ class fetcher(Fetcher):
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||
# (WHATWG encoding sniffing algorithm):
|
||||
# 1. BOM - highest confidence, check before anything else
|
||||
# 2. <meta charset> in first 2kb
|
||||
# 3. chardet statistical detection - last resort
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
boms = [
|
||||
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||
if bom_encoding:
|
||||
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||
r.encoding = bom_encoding
|
||||
else:
|
||||
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||
if meta_charset_match:
|
||||
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||
r.encoding = encoding
|
||||
else:
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -218,12 +217,8 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
return formatted_value
|
||||
|
||||
@app.template_filter('regex_search')
|
||||
def _jinja2_filter_regex_search(value, pattern):
|
||||
import re
|
||||
return re.search(pattern, str(value)) is not None
|
||||
return formatted_value
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
@@ -388,8 +383,6 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
|
||||
return ''
|
||||
|
||||
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
"""Sanitize a tag title to create a valid CSS class name.
|
||||
@@ -401,8 +394,9 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
import re
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
@@ -490,21 +484,28 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
_locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||
# 3. Map browser locale to our internal locale if needed
|
||||
return _locale_aliases.get(browser_locale, browser_locale)
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -1017,16 +1018,15 @@ def check_for_new_version():
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
session = requests.Session()
|
||||
session.verify = False
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = session.post("https://changedetection.io/check-ver.php",
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': __version__,
|
||||
'app_guid': datastore.data['app_guid'],
|
||||
'watch_count': len(datastore.data['watching'])
|
||||
})
|
||||
},
|
||||
|
||||
verify=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
@@ -608,12 +608,13 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import SafeXPath3Parser
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
|
||||
@@ -23,53 +23,6 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||
'unparsed-text',
|
||||
'unparsed-text-lines',
|
||||
'unparsed-text-available',
|
||||
'doc',
|
||||
'doc-available',
|
||||
'environment-variable',
|
||||
'available-environment-variables',
|
||||
]
|
||||
|
||||
|
||||
def _build_safe_xpath3_parser():
|
||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||
|
||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||
- doc / doc-available (XML fetch from URI)
|
||||
- environment-variable / available-environment-variables (env var leakage)
|
||||
|
||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||
so removing entries here does not affect XPath3Parser itself.
|
||||
|
||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||
"""
|
||||
import os
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
class SafeXPath3Parser(XPath3Parser):
|
||||
pass
|
||||
|
||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||
if env_override is not None:
|
||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||
else:
|
||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||
|
||||
for _fn in blocked:
|
||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||
|
||||
return SafeXPath3Parser
|
||||
|
||||
|
||||
# Module-level singleton — built once, reused everywhere.
|
||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
@@ -230,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
tree = None
|
||||
@@ -255,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
@@ -280,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
# Drop element references before the finally block so tree.clear() can release
|
||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||
del r
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
|
||||
@@ -43,11 +43,6 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||
_FAVICON_FILENAME_CACHE: dict = {}
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
@@ -811,8 +806,9 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate module-level favicon filename cache for this watch
|
||||
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
@@ -827,23 +823,35 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the watch data directory.
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||
deepcopy (which drops instance attrs), and concurrent request races.
|
||||
Invalidated by bump_favicon() when a new favicon is saved.
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
|
||||
Returns:
|
||||
str: Basename of the favicon file, or None if not found.
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
"""
|
||||
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
fname = os.path.basename(files[0]) if files else None
|
||||
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||
return fname
|
||||
|
||||
if not files:
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -1174,13 +1182,18 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for, has_request_context
|
||||
from flask import url_for
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
has_app_context = has_request_context()
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
|
||||
@@ -6,7 +6,6 @@ Extracted from update_worker.py to provide standalone notification functionality
|
||||
for both sync and async workers
|
||||
"""
|
||||
import datetime
|
||||
from copy import deepcopy
|
||||
|
||||
import pytz
|
||||
from loguru import logger
|
||||
@@ -353,7 +352,7 @@ class NotificationService:
|
||||
"""
|
||||
Send notification when content changes are detected
|
||||
"""
|
||||
|
||||
n_object = NotificationContextData()
|
||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
@@ -370,51 +369,21 @@ class NotificationService:
|
||||
# Should be a better parent getter in the model object
|
||||
|
||||
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
|
||||
# If the watch has no notification_body for example, it will try to get from the first matching group or system setting
|
||||
|
||||
# Should be, if none in the watch, and no group tag ones found, then use system ones at the end
|
||||
#n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch)
|
||||
n_object = NotificationContextData()
|
||||
# this change probably not needed?
|
||||
n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch)
|
||||
n_object['notification_title'] = _check_cascading_vars(self.datastore,'notification_title', watch)
|
||||
n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch)
|
||||
n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch)
|
||||
|
||||
notification_objects = []
|
||||
if n_object.get('notification_urls'):
|
||||
notification_objects.append(n_object)
|
||||
|
||||
|
||||
# LOGIC SHOULD BE something that all tests currently pass too
|
||||
# !!! _check_cascading_vars is not really used much, only used here..
|
||||
#
|
||||
|
||||
|
||||
# If any related group/tag has a notification_url set, then we fan out horizontally and collect it as extra notifications
|
||||
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
logger.debug(f'{len(tags)} related to this watch')
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
logger.debug(f"Checking group/tag for notification URLs '{tag['title']}' Muted? '{tag.get('notification_muted')}', URLs {tag.get('notification_urls')}")
|
||||
v = tag.get('notification_urls')
|
||||
if v and not tag.get('notification_muted'):
|
||||
logger.debug("OK MAN")
|
||||
next_n_object = deepcopy(n_object)
|
||||
next_n_object['notification_urls'] = v
|
||||
next_n_object['notification_title'] = _check_cascading_vars(self.datastore, 'notification_title', watch)
|
||||
next_n_object['notification_body'] = _check_cascading_vars(self.datastore, 'notification_body', watch)
|
||||
next_n_object['notification_format'] = _check_cascading_vars(self.datastore, 'notification_format', watch)
|
||||
notification_objects.append(next_n_object)
|
||||
logger.debug(f"Adding notification from group/tag {tag['title']}")
|
||||
|
||||
|
||||
# (Individual watch) Only prepare to notify if the rules above matched
|
||||
queued = False
|
||||
if notification_objects:
|
||||
if n_object and n_object.get('notification_urls'):
|
||||
queued = True
|
||||
|
||||
count = watch.get('notification_alert_count', 0) + 1
|
||||
self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
|
||||
for n_object in notification_objects:
|
||||
self.queue_notification_for_watch(n_object=n_object, watch=watch)
|
||||
|
||||
self.queue_notification_for_watch(n_object=n_object, watch=watch)
|
||||
|
||||
return queued
|
||||
|
||||
|
||||
@@ -260,16 +260,6 @@ class difference_detection_processor():
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
|
||||
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
def get_extra_watch_config(self, filename):
|
||||
|
||||
@@ -31,7 +31,6 @@ class Restock(dict):
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
# @todo locale needs to be the locale of the webpage
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
|
||||
@@ -437,18 +437,17 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
#useless
|
||||
# from ...html_tools import html_to_text
|
||||
# text = html_to_text(self.fetcher.content)
|
||||
# logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
# if not len(text):
|
||||
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
# raise ReplyWithContentButNoText(url=watch.link,
|
||||
# status_code=self.fetcher.get_last_status_code(),
|
||||
# screenshot=self.fetcher.screenshot,
|
||||
# html_content=self.fetcher.content,
|
||||
# xpath_data=self.fetcher.xpath_data
|
||||
# )
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Which restock settings to compare against?
|
||||
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
|
||||
|
||||
@@ -283,7 +283,4 @@ def query_price_availability(extracted_data):
|
||||
if not result.get('availability') and 'availability' in microdata:
|
||||
result['availability'] = microdata['availability']
|
||||
|
||||
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
|
||||
# using something like babel you need to know the locale of the website and even then it can be problematic
|
||||
# we dont really do anything with the price data so far.. so just accept it the way it comes.
|
||||
return result
|
||||
|
||||
@@ -178,44 +178,23 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
|
||||
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
|
||||
Test that a tag/group can be updated with processor_config_restock_diff via the API.
|
||||
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
|
||||
# Create a tag
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({
|
||||
"title": "Restock Group",
|
||||
"overrides_watch": True,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 7777777
|
||||
}
|
||||
}),
|
||||
data=json.dumps({"title": "Restock Group"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
|
||||
assert res.status_code == 201
|
||||
tag_uuid = res.json.get('uuid')
|
||||
|
||||
# Verify processor config was saved during creation (the bug: these were discarded)
|
||||
res = client.get(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
tag_data = res.json
|
||||
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
|
||||
"processor_config_restock_diff should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
|
||||
"price_change_min should be saved on POST"
|
||||
|
||||
# Update tag with valid processor_config_restock_diff via PUT
|
||||
# Update tag with valid processor_config_restock_diff
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
|
||||
@@ -48,15 +48,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
# Check POST preview
|
||||
res = client.post(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
|
||||
# Make a change
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
||||
from flask import url_for
|
||||
import io
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
from zipfile import ZipFile
|
||||
import re
|
||||
import time
|
||||
from changedetectionio.model import Watch, Tag
|
||||
@@ -68,9 +68,6 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Check for changedetection.json (settings file)
|
||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||
|
||||
# secret.txt must never be included — it contains the Flask session key
|
||||
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
url_for("backups.remove_backups"),
|
||||
@@ -199,63 +196,4 @@ def test_backup_restore(client, live_server, measure_memory_usage, datastore_pat
|
||||
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
|
||||
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
|
||||
assert isinstance(restored_tag2, Tag.model), \
|
||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||
|
||||
|
||||
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Zip Slip path traversal entries in a restore zip must be rejected."""
|
||||
import pytest
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# Build a zip with a path traversal entry that would escape the extraction dir
|
||||
malicious_zip = io.BytesIO()
|
||||
with ZipFile(malicious_zip, 'w') as zf:
|
||||
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
|
||||
malicious_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
with pytest.raises(ValueError, match="Zip Slip"):
|
||||
import_from_zip(
|
||||
zip_stream=malicious_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
|
||||
|
||||
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""A zip whose total uncompressed size exceeds the limit must be rejected.
|
||||
|
||||
The guard reads file_size from the zip central-directory metadata — no
|
||||
actual decompression happens, so this test is fast and uses minimal RAM.
|
||||
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
|
||||
50 KB is enough to trigger the check without creating any large files.
|
||||
"""
|
||||
import pytest
|
||||
import changedetectionio.blueprint.backups.restore as restore_mod
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
|
||||
bomb_zip = io.BytesIO()
|
||||
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
|
||||
zf.writestr("data.txt", b"\x00" * (100 * 1024))
|
||||
bomb_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
|
||||
try:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
|
||||
with pytest.raises(ValueError, match="decompressed size"):
|
||||
import_from_zip(
|
||||
zip_stream=bomb_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
finally:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
|
||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||
@@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
@@ -12,69 +11,6 @@ import os
|
||||
|
||||
|
||||
|
||||
def test_surrogate_characters_in_content_are_sanitized():
|
||||
"""Lone surrogates can appear in requests' r.text when a server returns malformed/mixed-encoding
|
||||
content. Without sanitization, encoding to UTF-8 raises UnicodeEncodeError.
|
||||
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
content_with_surrogate = '<html><body>Hello \udcad World</body></html>'
|
||||
|
||||
# Confirm the raw problem exists
|
||||
with pytest.raises(UnicodeEncodeError):
|
||||
content_with_surrogate.encode('utf-8')
|
||||
|
||||
# Our fix: sanitize after fetcher.run() in processors/base.py call_browser()
|
||||
sanitized = content_with_surrogate.encode('utf-8', errors='replace').decode('utf-8')
|
||||
assert 'Hello' in sanitized
|
||||
assert 'World' in sanitized
|
||||
assert '\udcad' not in sanitized
|
||||
|
||||
# Checksum computation (processors/base.py get_raw_document_checksum) must not crash
|
||||
hashlib.md5(sanitized.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def test_utf8_content_without_charset_header(client, live_server, datastore_path):
|
||||
"""Server returns UTF-8 content but no charset in Content-Type header.
|
||||
chardet can misdetect such pages as UTF-7 (Python 3.14 then produces surrogates).
|
||||
Our fix tries UTF-8 first before falling back to chardet.
|
||||
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
# UTF-8 encoded content with non-ASCII chars - no charset will be in the header
|
||||
html = '<html><body><p>Español</p><p>Français</p><p>日本語</p></body></html>'
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('utf-8'), mode='wb')
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||
# Should decode correctly as UTF-8, not produce mojibake (Español) or replacement chars
|
||||
assert 'Español'.encode('utf-8') in res.data
|
||||
assert 'Français'.encode('utf-8') in res.data
|
||||
assert '日本語'.encode('utf-8') in res.data
|
||||
|
||||
|
||||
def test_shiftjis_with_meta_charset(client, live_server, datastore_path):
|
||||
"""Server returns Shift-JIS content with no charset in HTTP header, but the HTML
|
||||
declares <meta charset="Shift-JIS">. We should use the meta tag, not chardet.
|
||||
Real-world case: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
japanese_text = '日本語のページ'
|
||||
html = f'<html><head><meta http-equiv="Content-Type" content="text/html;charset=Shift-JIS"></head><body><p>{japanese_text}</p></body></html>'
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('shift_jis'), mode='wb')
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||
assert japanese_text.encode('utf-8') in res.data
|
||||
|
||||
|
||||
def set_html_response(datastore_path):
|
||||
test_return_data = """
|
||||
<html><body><span class="nav_second_img_text">
|
||||
|
||||
@@ -171,7 +171,6 @@ def test_group_tag_notification(client, live_server, measure_memory_usage, datas
|
||||
delete_all_watches(client)
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
notification_url_endpoint = url_for('test_notification_endpoint', _external=True).replace('http', 'post')
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
@@ -182,50 +181,35 @@ def test_group_tag_notification(client, live_server, measure_memory_usage, datas
|
||||
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
|
||||
notification_form_data = {"notification_urls": notification_url,
|
||||
"notification_title": "New GROUP TAG ChangeDetection.io Notification - {{watch_url}}",
|
||||
"notification_body": "BASE URL: {{base_url}}\n"
|
||||
"Watch URL: {{watch_url}}\n"
|
||||
"Watch UUID: {{watch_uuid}}\n"
|
||||
"Watch title: {{watch_title}}\n"
|
||||
"Watch tag: {{watch_tag}}\n"
|
||||
"Preview: {{preview_url}}\n"
|
||||
"Diff URL: {{diff_url}}\n"
|
||||
"Snapshot: {{current_snapshot}}\n"
|
||||
"Diff: {{diff}}\n"
|
||||
"Diff Added: {{diff_added}}\n"
|
||||
"Diff Removed: {{diff_removed}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
"Diff as Patch: {{diff_patch}}\n"
|
||||
":-)",
|
||||
"notification_screenshot": True,
|
||||
"notification_format": 'text',
|
||||
"title": "test-tag"}
|
||||
|
||||
group_tag_form_data = {
|
||||
"notification_title": "New GROUP TAG ChangeDetection.io Notification - {{watch_url}}",
|
||||
"notification_body": "BASE URL: {{base_url}}\n"
|
||||
"Watch URL: {{watch_url}}\n"
|
||||
"Watch UUID: {{watch_uuid}}\n"
|
||||
"Watch title: {{watch_title}}\n"
|
||||
"Watch tag: {{watch_tag}}\n"
|
||||
"Preview: {{preview_url}}\n"
|
||||
"Diff URL: {{diff_url}}\n"
|
||||
"Snapshot: {{current_snapshot}}\n"
|
||||
"Diff: {{diff}}\n"
|
||||
"Diff Added: {{diff_added}}\n"
|
||||
"Diff Removed: {{diff_removed}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
"Diff as Patch: {{diff_patch}}\n"
|
||||
":-)",
|
||||
"notification_screenshot": True,
|
||||
"notification_format": 'text',
|
||||
}
|
||||
|
||||
# Setup for test-tag
|
||||
group_tag_form_data['notification_urls'] = notification_url_endpoint+"?outputfilename=test-tag.txt"
|
||||
group_tag_form_data['title'] = 'test-tag'
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid=get_UUID_for_tag_name(client, name="test-tag")),
|
||||
data=group_tag_form_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated" in res.data
|
||||
|
||||
# Setup for other-tag, we only add notifications-urls
|
||||
group_tag_form_data['notification_urls'] = notification_url_endpoint+"?outputfilename=other-tag.txt"
|
||||
group_tag_form_data['title'] = 'other-tag'
|
||||
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid=get_UUID_for_tag_name(client, name="other-tag")),
|
||||
data=group_tag_form_data,
|
||||
data=notification_form_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
@@ -233,14 +217,12 @@ def test_group_tag_notification(client, live_server, measure_memory_usage, datas
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
assert os.path.isfile(os.path.join(datastore_path, "test-tag.txt"))
|
||||
assert os.path.isfile(os.path.join(datastore_path, "other-tag.txt"))
|
||||
assert os.path.isfile(os.path.join(datastore_path, "notification.txt"))
|
||||
|
||||
# @todo assert the group name or other unique body is in other-tag.txt
|
||||
# Verify what was sent as a notification, this file should exist
|
||||
with open(os.path.join(datastore_path, "test-tag.txt"), "r") as f:
|
||||
with open(os.path.join(datastore_path, "notification.txt"), "r") as f:
|
||||
notification_submission = f.read()
|
||||
os.unlink(os.path.join(datastore_path, "test-tag.txt"))
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
|
||||
# Did we see the URL that had a change, in the notification?
|
||||
# Diff was correctly executed
|
||||
|
||||
@@ -624,76 +624,3 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
|
||||
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
||||
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
||||
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
||||
|
||||
|
||||
def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that clearing snapshot history works with translated confirmation text.
|
||||
|
||||
Issue #3865: When the app language is set to German, the clear history
|
||||
confirmation dialog shows the translated word (e.g. 'loschen') but the
|
||||
backend only accepted the English word 'clear', making it impossible
|
||||
to clear snapshots in non-English languages.
|
||||
"""
|
||||
from flask import url_for
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Add a watch so there is history to clear
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Set language to German
|
||||
res = client.get(
|
||||
url_for("set_language", locale="de"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
|
||||
# Verify the clear history page shows the German confirmation word
|
||||
res = client.get(
|
||||
url_for("ui.clear_all_history"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
|
||||
|
||||
# Submit the form with the German translated word
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={"confirmtext": "löschen"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
# Should NOT show error message
|
||||
assert b"Incorrect confirmation text" not in res.data, \
|
||||
"German confirmation word 'loschen' should be accepted (issue #3865)"
|
||||
|
||||
# Switch back to English and verify English word still works
|
||||
res = client.get(
|
||||
url_for("set_language", locale="en_US"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={"confirmtext": "clear"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert b"Incorrect confirmation text" not in res.data, \
|
||||
"English confirmation word 'clear' should still be accepted"
|
||||
|
||||
# Verify that missing/empty confirmtext does not crash the server
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200, \
|
||||
"Missing confirmtext should not crash the server"
|
||||
|
||||
@@ -467,38 +467,3 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
|
||||
assert b'155.55' in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_itemprop_as_str(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
test_return_data = f"""<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<span itemprop="offers" itemscope itemtype="http://schema.org/Offer">
|
||||
<meta content="767.55" itemprop="price"/>
|
||||
<meta content="EUR" itemprop="priceCurrency"/>
|
||||
<meta content="InStock" itemprop="availability"/>
|
||||
<meta content="https://www.123-test.dk" itemprop="url"/>
|
||||
</span>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'767.55' in res.data
|
||||
@@ -592,74 +592,3 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
|
||||
set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
|
||||
for content_type in RSS_XML_CONTENT_TYPES:
|
||||
_subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
|
||||
|
||||
|
||||
# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends
|
||||
# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions.
|
||||
def test_xpath_blocked_functions_unit():
|
||||
"""Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed)."""
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser
|
||||
from lxml import html
|
||||
|
||||
html_content = '<html><body><p>safe content</p></body></html>'
|
||||
|
||||
dangerous_expressions = [
|
||||
"unparsed-text('file:///etc/passwd')",
|
||||
"unparsed-text-lines('file:///etc/passwd')",
|
||||
"unparsed-text-available('file:///etc/passwd')",
|
||||
"doc('file:///etc/passwd')",
|
||||
"doc-available('file:///etc/passwd')",
|
||||
"environment-variable('PATH')",
|
||||
"available-environment-variables()",
|
||||
]
|
||||
|
||||
for expr in dangerous_expressions:
|
||||
# xpath_filter() must raise, not silently return file contents
|
||||
try:
|
||||
result = xpath_filter(expr, html_content)
|
||||
assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}"
|
||||
except elementpath.ElementPathError:
|
||||
pass # expected
|
||||
|
||||
# SafeXPath3Parser must reject the expression at parse time
|
||||
tree = html.fromstring(html_content)
|
||||
try:
|
||||
elementpath.select(tree, expr, parser=SafeXPath3Parser)
|
||||
assert False, f"SafeXPath3Parser should have raised for: {expr!r}"
|
||||
except elementpath.ElementPathError:
|
||||
pass # expected
|
||||
|
||||
# Sanity check: normal XPath still works
|
||||
result = xpath_filter('//p/text()', html_content)
|
||||
assert result == 'safe content'
|
||||
|
||||
|
||||
# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions.
|
||||
def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Edit-form validation must reject dangerous XPath 3.0 functions before they are stored."""
|
||||
from flask import url_for
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
dangerous_expressions = [
|
||||
"xpath:unparsed-text('file:///etc/passwd')",
|
||||
"xpath:environment-variable('PATH')",
|
||||
"xpath:doc('file:///etc/passwd')",
|
||||
]
|
||||
|
||||
for expr in dangerous_expressions:
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={"include_filters": expr, "url": test_url, "tags": "", "headers": "",
|
||||
'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"is not a valid XPath expression" in res.data, \
|
||||
f"Form should reject dangerous expression: {expr!r}"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -343,11 +343,8 @@ def new_live_server_setup(live_server):
|
||||
@live_server.app.route('/test_notification_endpoint', methods=['POST', 'GET'])
|
||||
def test_notification_endpoint():
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
from loguru import logger
|
||||
# @todo make safe
|
||||
fname = request.args.get('outputfilename', "notification.txt")
|
||||
logger.debug(f"Writing test notification endpoint data to '{fname}' - {request.args}")
|
||||
with open(os.path.join(datastore_path, fname), "wb") as f:
|
||||
|
||||
with open(os.path.join(datastore_path, "notification.txt"), "wb") as f:
|
||||
# Debug method, dump all POST to file also, used to prove #65
|
||||
data = request.stream.read()
|
||||
if data != None:
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -100,19 +100,6 @@ def is_safe_valid_url(test_url):
|
||||
logger.warning('URL validation failed: URL is empty or whitespace only')
|
||||
return False
|
||||
|
||||
# Per-request cache: same URL is often validated 2-3x per watchlist render (sort + display).
|
||||
# Flask's g is scoped to one request and auto-cleared on teardown, so dynamic Jinja2 URLs
|
||||
# like {{microtime()}} are always re-evaluated on the next request.
|
||||
# Falls back gracefully when called outside a request context (e.g. background workers).
|
||||
_cache_key = test_url
|
||||
try:
|
||||
from flask import g
|
||||
_cache = g.setdefault('_url_validation_cache', {})
|
||||
if _cache_key in _cache:
|
||||
return _cache[_cache_key]
|
||||
except RuntimeError:
|
||||
_cache = None # No app context
|
||||
|
||||
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
|
||||
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'
|
||||
|
||||
@@ -125,14 +112,11 @@ def is_safe_valid_url(test_url):
|
||||
test_url = r.sub('', test_url)
|
||||
|
||||
# Check the actual rendered URL in case of any Jinja markup
|
||||
# Only run jinja_render when the URL actually contains Jinja2 syntax - creating a new
|
||||
# ImmutableSandboxedEnvironment is expensive and is called once per watch per page load
|
||||
if '{%' in test_url or '{{' in test_url:
|
||||
try:
|
||||
test_url = jinja_render(test_url)
|
||||
except Exception as e:
|
||||
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
||||
return False
|
||||
try:
|
||||
test_url = jinja_render(test_url)
|
||||
except Exception as e:
|
||||
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
||||
return False
|
||||
|
||||
# Check query parameters and fragment
|
||||
if re.search(r'[<>]', test_url):
|
||||
@@ -158,6 +142,4 @@ def is_safe_valid_url(test_url):
|
||||
logger.warning(f'URL f"{test_url}" failed validation, aborting.')
|
||||
return False
|
||||
|
||||
if _cache is not None:
|
||||
_cache[_cache_key] = True
|
||||
return True
|
||||
|
||||
+4
-172
@@ -113,156 +113,8 @@ tags:
|
||||
|
||||
- name: Plugin API Extensions
|
||||
description: |
|
||||
## How Processor Plugins Extend the API
|
||||
|
||||
changedetection.io uses a **processor plugin** system to handle different types of change detection.
|
||||
Each processor lives in `changedetectionio/processors/<name>/` and may include an `api.yaml` file
|
||||
that extends the core Watch schema with processor-specific configuration fields.
|
||||
|
||||
### How it works
|
||||
|
||||
At startup, changedetection.io scans all installed processors for an `api.yaml` file. Any schemas
|
||||
and code samples defined there are deep-merged into the live API specification, making the
|
||||
processor's configuration fields valid on all watch create and update requests.
|
||||
|
||||
The live, fully-merged spec is always available at `/api/v1/full-spec` — use that URL with
|
||||
Swagger UI or Redoc to see the complete schema for your specific installation.
|
||||
|
||||
---
|
||||
|
||||
### Writing a processor `api.yaml`
|
||||
|
||||
Place an `api.yaml` in the processor plugin's own directory, alongside its `__init__.py`
|
||||
(e.g. `changedetectionio/processors/my_processor/api.yaml`). The schema name **must** follow the
|
||||
convention `processor_config_<processor_name>` (e.g. `processor_config_restock_diff`). That same
|
||||
key is used as the JSON field name when creating or updating a watch.
|
||||
|
||||
A minimal `api.yaml` for a hypothetical `my_processor`:
|
||||
|
||||
```yaml
|
||||
components:
|
||||
schemas:
|
||||
processor_config_my_processor:
|
||||
type: object
|
||||
description: Configuration for my_processor
|
||||
properties:
|
||||
some_option:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Enable some behaviour
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: curl
|
||||
label: my_processor example
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com",
|
||||
"processor": "my_processor",
|
||||
"processor_config_my_processor": { "some_option": true }
|
||||
}'
|
||||
```
|
||||
|
||||
The `paths` section in `api.yaml` is used only for injecting additional `x-code-samples` into
|
||||
existing endpoints — you cannot define new routes via plugin.
|
||||
|
||||
---
|
||||
|
||||
### Built-in plugin: `restock_diff`
|
||||
|
||||
The `restock_diff` processor is always shipped with changedetection.io. It monitors product
|
||||
availability and price changes using structured data (JSON-LD / schema.org microdata) and
|
||||
text heuristics. It is activated by setting `"processor": "restock_diff"` on a watch.
|
||||
|
||||
It adds the `processor_config_restock_diff` block to the Watch schema with these fields:
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `in_stock_processing` | string | `in_stock_only` | `in_stock_only` — only alert Out-of-Stock→In-Stock · `all_changes` — alert any availability change · `off` — disable stock tracking |
|
||||
| `follow_price_changes` | boolean | `true` | Monitor and alert on price changes |
|
||||
| `price_change_min` | number\|null | — | Alert when price drops **below** this value |
|
||||
| `price_change_max` | number\|null | — | Alert when price rises **above** this value |
|
||||
| `price_change_threshold_percent` | number\|null | — | Minimum % change since the original price to trigger an alert |
|
||||
|
||||
#### CREATE — Add a restock/price monitor
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product/widget",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### READ — Retrieve the monitor
|
||||
|
||||
The response JSON includes `processor_config_restock_diff` alongside all standard watch fields:
|
||||
|
||||
```bash
|
||||
curl -X GET "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY"
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"uuid": "cc0cfffa-f449-477b-83ea-0caafd1dc091",
|
||||
"url": "https://example.com/product/widget",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5,
|
||||
"price_change_min": null,
|
||||
"price_change_max": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### UPDATE — Change thresholds without recreating the monitor
|
||||
|
||||
Only fields included in the request body are updated; omitted fields are left unchanged.
|
||||
|
||||
```bash
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### DELETE — Remove the monitor
|
||||
|
||||
```bash
|
||||
curl -X DELETE "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
For the complete schema-validated documentation including all processor fields, fetch the live spec
|
||||
and load it into Swagger UI or Redoc:
|
||||
|
||||
```
|
||||
GET /api/v1/full-spec
|
||||
```
|
||||
Retrieve the live OpenAPI specification for this instance. Unlike the static spec, this endpoint
|
||||
returns the fully merged spec including schemas for any processor plugins installed on this instance.
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
@@ -2069,35 +1921,15 @@ paths:
|
||||
|
||||
Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
|
||||
spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
|
||||
|
||||
**Use this URL** with Swagger UI or Redoc to get schema-accurate documentation for your
|
||||
specific install — it includes every `processor_config_<name>` schema block contributed by
|
||||
installed processors (e.g. `processor_config_restock_diff` from the built-in restock plugin).
|
||||
|
||||
This endpoint requires no authentication and returns YAML.
|
||||
|
||||
To load it directly in Swagger UI, paste the URL into the "Explore" box:
|
||||
```
|
||||
http://localhost:5000/api/v1/full-spec
|
||||
```
|
||||
Use this URL with Swagger UI or Redoc to get accurate documentation for your specific install.
|
||||
security: []
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
source: |
|
||||
# Fetch the live merged spec (no API key needed)
|
||||
curl -X GET "http://localhost:5000/api/v1/full-spec"
|
||||
- lang: 'Python'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
# No authentication required
|
||||
response = requests.get('http://localhost:5000/api/v1/full-spec')
|
||||
print(response.text) # Returns YAML
|
||||
responses:
|
||||
'200':
|
||||
description: |
|
||||
Merged OpenAPI specification in YAML format. Includes all processor plugin schemas
|
||||
(e.g. `processor_config_restock_diff`) not present in the static `api-spec.yaml`.
|
||||
description: Merged OpenAPI specification in YAML format
|
||||
content:
|
||||
application/yaml:
|
||||
schema:
|
||||
|
||||
+9
-353
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user