mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-11 19:31:30 +00:00
Compare commits
55 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 388b280219 | |||
| 4294b461c7 | |||
| 77116f5203 | |||
| 238d6ba72d | |||
| ede06a92bd | |||
| 9d4249c820 | |||
| b5bac1c868 | |||
| 0479aa9654 | |||
| 746e213398 | |||
| 84d97ec9cf | |||
| c8f13f5084 | |||
| d74b7d5329 | |||
| 31a760c214 | |||
| 43bba5a1b6 | |||
| 7c9eb02df4 | |||
| 0ad4090d68 | |||
| 9a10353d61 | |||
| f8236848ba | |||
| 4ba5f6a003 | |||
| 05fc885108 | |||
| f37e448411 | |||
| dadc804567 | |||
| 65517a9c74 | |||
| 17002b5b23 | |||
| c4b890f4fa | |||
| 2ab172408d | |||
| b98f55030a | |||
| 6181b09b16 | |||
| 5f9fa15a6a | |||
| 34c2c05bc5 | |||
| 0da8dfb09a | |||
| b747e06c3e | |||
| 5a4266069b | |||
| 36269717b2 | |||
| 84f2629a0c | |||
| e9d740bd49 | |||
| c18421fbe9 | |||
| f29d6a857b | |||
| fcfe089a53 | |||
| b32617d700 | |||
| 380d8a26a1 | |||
| 02c03fc32b | |||
| db3d38b3ee | |||
| ecd8af94f6 | |||
| e400e463a4 | |||
| 9d355b8f05 | |||
| da43a17541 | |||
| 904eaaaaf7 | |||
| 1e12ae404f | |||
| ec7d56f85d | |||
| 417d57e574 | |||
| 1d7d812eb0 | |||
| 524393a1fb | |||
| b09ebcbef6 | |||
| 30ac10ff24 |
@@ -66,27 +66,27 @@ jobs:
|
||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
uses: docker/setup-qemu-action@v4
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Docker Hub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v4
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v5
|
||||
uses: docker/metadata-action@v6
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
@@ -103,11 +103,19 @@ jobs:
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -128,10 +136,10 @@ jobs:
|
||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
||||
echo "Github ref: ${{ github.ref }}"
|
||||
echo "Github ref name: ${{ github.ref_name }}"
|
||||
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v5
|
||||
uses: docker/metadata-action@v6
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
@@ -142,11 +150,20 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||
type=raw,value=latest
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
org.opencontainers.image.version=${{ github.event.release.tag_name }}
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
@@ -60,14 +60,14 @@ jobs:
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
uses: docker/setup-qemu-action@v4
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v4
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
@@ -52,4 +52,13 @@ jobs:
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
skip-pypuppeteer: true
|
||||
|
||||
|
||||
test-application-3-14:
|
||||
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.14'
|
||||
skip-pypuppeteer: false
|
||||
|
||||
@@ -42,10 +42,10 @@ jobs:
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v4
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -99,11 +99,7 @@ jobs:
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -587,6 +583,10 @@ jobs:
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
- name: Plugin get_html_head_extras hook injects into base.html
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -706,7 +706,19 @@ jobs:
|
||||
- name: Check upgrade works without error
|
||||
run: |
|
||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||
|
||||
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
# Checkout old version and create datastore
|
||||
git checkout 0.49.1
|
||||
python3 -m venv .venv
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.3'
|
||||
__version__ = '0.54.8'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -61,8 +61,22 @@ import time
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||
try:
|
||||
import ctypes as _ctypes
|
||||
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
|
||||
@@ -177,6 +177,13 @@ class Tag(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_tag(title=title)
|
||||
if new_uuid:
|
||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||
if extra:
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||
if tag:
|
||||
tag.update(extra)
|
||||
tag.commit()
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported tag", 400
|
||||
|
||||
@@ -338,7 +338,7 @@ class WatchHistoryDiff(Resource):
|
||||
word_diff = True
|
||||
|
||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'true'))
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'false'))
|
||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||
include_added = strtobool(request.args.get('added', 'true'))
|
||||
@@ -349,7 +349,7 @@ class WatchHistoryDiff(Resource):
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=ignore_whitespace,
|
||||
include_equal=changes_only,
|
||||
include_equal=not changes_only,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
@@ -567,4 +567,4 @@ class CreateWatch(Resource):
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
return list, 200
|
||||
return list, 200
|
||||
|
||||
@@ -40,11 +40,6 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||
for uuid, tag in (tags or {}).items():
|
||||
for f in Path(tag.data_dir).glob('*'):
|
||||
@@ -103,8 +98,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
|
||||
backup_threads = []
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||
@@ -146,30 +141,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return backup_info
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||
|
||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
if not backups:
|
||||
abort(404)
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||
abort(404)
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/", methods=['GET'])
|
||||
@backups_blueprint.route("/create", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def create():
|
||||
backups = find_backups()
|
||||
output = render_template("backup_create.html",
|
||||
@@ -178,8 +176,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
)
|
||||
return output
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def remove_backups():
|
||||
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import threading
|
||||
@@ -14,6 +15,16 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
|
||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||
_UUID_RE = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class RestoreForm(Form):
|
||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||
@@ -50,7 +61,18 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||
zf.extractall(tmpdir)
|
||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||
raise ValueError(
|
||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||
)
|
||||
resolved_dest = os.path.realpath(tmpdir)
|
||||
for member in zf.infolist():
|
||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||
zf.extract(member, tmpdir)
|
||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||
|
||||
for entry in os.scandir(tmpdir):
|
||||
@@ -58,6 +80,9 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
||||
continue
|
||||
|
||||
uuid = entry.name
|
||||
if not _UUID_RE.match(uuid):
|
||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||
continue
|
||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||
|
||||
@@ -149,16 +174,18 @@ def construct_restore_blueprint(datastore):
|
||||
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
|
||||
restore_threads = []
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def restore():
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
form=form,
|
||||
restore_running=any(t.is_alive() for t in restore_threads))
|
||||
restore_running=any(t.is_alive() for t in restore_threads),
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def backups_restore_start():
|
||||
if any(t.is_alive() for t in restore_threads):
|
||||
flash(gettext("A restore is already running, check back in a few minutes"), "error")
|
||||
@@ -173,10 +200,22 @@ def construct_restore_blueprint(datastore):
|
||||
flash(gettext("File must be a .zip backup file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return
|
||||
# Reject oversized uploads before reading the stream into memory.
|
||||
content_length = request.content_length
|
||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return.
|
||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||
try:
|
||||
zip_bytes = io.BytesIO(zip_file.read())
|
||||
zipfile.ZipFile(zip_bytes) # quick validity check before spawning
|
||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
zip_bytes = io.BytesIO(raw)
|
||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||
pass
|
||||
zip_bytes.seek(0)
|
||||
except zipfile.BadZipFile:
|
||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||
@@ -201,6 +240,7 @@ def construct_restore_blueprint(datastore):
|
||||
name="BackupRestore"
|
||||
)
|
||||
restore_thread.start()
|
||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||
restore_threads.append(restore_thread)
|
||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
@@ -19,6 +19,9 @@
|
||||
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||
|
||||
@@ -102,6 +102,35 @@ def run_async_in_browser_loop(coro):
|
||||
else:
|
||||
raise RuntimeError("Browser steps event loop is not available")
|
||||
|
||||
async def _close_session_resources(session_data, label=''):
|
||||
"""Close all browser resources for a session in the correct order.
|
||||
|
||||
browserstepper.cleanup() closes page+context but not the browser itself.
|
||||
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
|
||||
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
|
||||
"""
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
await browserstepper.cleanup()
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up browserstepper{label}: {e}")
|
||||
|
||||
browser = session_data.get('browser')
|
||||
if browser:
|
||||
try:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser{label}: {e}")
|
||||
|
||||
playwright_context = session_data.get('playwright_context')
|
||||
if playwright_context:
|
||||
try:
|
||||
await playwright_context.stop()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping playwright context{label}: {e}")
|
||||
|
||||
|
||||
def cleanup_expired_sessions():
|
||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
@@ -119,13 +148,10 @@ def cleanup_expired_sessions():
|
||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||
session_data = browsersteps_sessions[session_id]
|
||||
|
||||
# Cleanup playwright resources asynchronously
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -152,12 +178,10 @@ def cleanup_session_for_watch(watch_uuid):
|
||||
|
||||
session_data = browsersteps_sessions.get(session_id)
|
||||
if session_data:
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -178,64 +202,74 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
import time
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
# We keep the playwright session open for many minutes
|
||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
|
||||
browsersteps_start_session = {'start_time': time.time()}
|
||||
|
||||
# Create a new async playwright instance for browser steps
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if not '?' in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
browsersteps_start_session['browser'] = browser
|
||||
browsersteps_start_session['playwright_context'] = playwright_context
|
||||
|
||||
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
|
||||
if proxy_url:
|
||||
|
||||
# Playwright needs separate username and password values
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
|
||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
|
||||
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
|
||||
watch = datastore.data['watching'][watch_uuid]
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_name = watch.get_fetch_backend or 'system'
|
||||
if fetcher_name == 'system':
|
||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||
|
||||
browser = None
|
||||
playwright_context = None
|
||||
|
||||
# If the fetcher has its own browser launch for the live steps UI, use it.
|
||||
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
|
||||
# or None to fall back to the default CDP path.
|
||||
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
|
||||
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
|
||||
if result is not None:
|
||||
browser, playwright_context = result
|
||||
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")
|
||||
|
||||
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
|
||||
if browser is None:
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if '?' not in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
|
||||
|
||||
browsersteps_start_session['browser'] = browser
|
||||
browsersteps_start_session['playwright_context'] = playwright_context
|
||||
|
||||
browserstepper = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browser,
|
||||
proxy=proxy,
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
start_url=watch.link,
|
||||
headers=watch.get('headers')
|
||||
)
|
||||
|
||||
# Initialize the async connection
|
||||
await browserstepper.connect(proxy=proxy)
|
||||
|
||||
browsersteps_start_session['browserstepper'] = browserstepper
|
||||
|
||||
# For test
|
||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
import uuid
|
||||
@@ -270,8 +304,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug("Starting connection with playwright - done")
|
||||
return {'browsersteps_session_id': browsersteps_session_id}
|
||||
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def browser_steps_fetch_screenshot_image():
|
||||
from flask import (
|
||||
make_response,
|
||||
@@ -296,8 +330,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
|
||||
|
||||
# A request for an action was received
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def browsersteps_ui_update():
|
||||
import base64
|
||||
|
||||
|
||||
@@ -40,12 +40,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
contents = ''
|
||||
now = time.time()
|
||||
try:
|
||||
import asyncio
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||
asyncio.run(update_handler.call_browser(preferred_proxy_id=preferred_proxy))
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
|
||||
@@ -154,9 +154,8 @@
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -352,7 +351,7 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
|
||||
@@ -22,10 +22,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
|
||||
|
||||
from changedetectionio import processors
|
||||
output = render_template("groups-overview.html",
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
available_tags=sorted_tags,
|
||||
form=add_form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
tag_count=tag_count,
|
||||
)
|
||||
|
||||
@@ -208,9 +210,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
template = env.from_string(template_str)
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
# Watches whose URL currently matches this tag's pattern
|
||||
matching_watches = {
|
||||
w_uuid: watch
|
||||
for w_uuid, watch in datastore.data['watching'].items()
|
||||
if default.matches_url(watch.get('url', ''))
|
||||
}
|
||||
|
||||
output = render_template("edit-tag.html",
|
||||
extra_form_content=included_content,
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
matching_watches=matching_watches,
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
**template_args
|
||||
)
|
||||
|
||||
@@ -10,6 +10,8 @@ from changedetectionio.processors.restock_diff.forms import processor_settings_f
|
||||
|
||||
class group_restock_settings_form(restock_settings_form):
|
||||
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
|
||||
url_match_pattern = StringField('Auto-apply to watches with URLs matching',
|
||||
render_kw={"placeholder": "e.g. *://example.com/* or github.com/myorg"})
|
||||
|
||||
class SingleTag(Form):
|
||||
|
||||
|
||||
@@ -43,6 +43,20 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url_match_pattern, class="m-d") }}
|
||||
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
|
||||
</div>
|
||||
{% if matching_watches %}
|
||||
<div class="pure-control-group">
|
||||
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
|
||||
<ul class="tag-url-match-list">
|
||||
{% for w_uuid, w in matching_watches.items() %}
|
||||
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -3,6 +3,22 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<style>
|
||||
{%- for uuid, tag in available_tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
|
||||
@@ -48,7 +64,7 @@
|
||||
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
|
||||
|
||||
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def clear_all_history():
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
confirmtext = request.form.get('confirmtext', '')
|
||||
|
||||
if confirmtext == 'clear':
|
||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
|
||||
@@ -320,7 +320,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
'capabilities': capabilities
|
||||
'capabilities': capabilities,
|
||||
'auto_applied_tags': {
|
||||
tag_uuid: tag
|
||||
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
|
||||
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
|
||||
},
|
||||
}
|
||||
|
||||
included_content = None
|
||||
|
||||
@@ -10,7 +10,8 @@ from changedetectionio import html_tools
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET'])
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
@@ -59,12 +60,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
@@ -74,7 +71,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.args.get('version')
|
||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||
|
||||
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
|
||||
@@ -81,6 +81,14 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
|
||||
{% if auto_applied_tags %}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Also automatically applied by URL pattern:') }}
|
||||
{% for tag_uuid, tag in auto_applied_tags.items() %}
|
||||
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
|
||||
{% endfor %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
@@ -28,6 +28,7 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
|
||||
@@ -81,6 +81,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
proxy_list = datastore.proxy_list
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
@@ -92,7 +93,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=datastore.proxy_list,
|
||||
has_proxies=proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -110,6 +111,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
# Return freed template-building memory to the OS immediately.
|
||||
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.get('share-link'):
|
||||
del (session['share-link'])
|
||||
|
||||
|
||||
@@ -213,12 +213,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'has-error' if error_texts|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
@@ -271,7 +272,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
@@ -305,12 +306,20 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||
{%- if watch['restock']['price'] is number -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
|
||||
{%- set restock = watch['restock'] -%}
|
||||
{%- set price = restock.get('price') -%}
|
||||
{%- set cur = restock.get('currency','') -%}
|
||||
|
||||
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||
{{ price }} {{ cur }}<!-- as string -->
|
||||
{%- endif -%}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -49,6 +49,9 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
|
||||
@@ -75,6 +75,9 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
|
||||
@@ -148,10 +148,32 @@ class fetcher(Fetcher):
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||
# (WHATWG encoding sniffing algorithm):
|
||||
# 1. BOM - highest confidence, check before anything else
|
||||
# 2. <meta charset> in first 2kb
|
||||
# 3. chardet statistical detection - last resort
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
boms = [
|
||||
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||
if bom_encoding:
|
||||
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||
r.encoding = bom_encoding
|
||||
else:
|
||||
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||
if meta_charset_match:
|
||||
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||
r.encoding = encoding
|
||||
else:
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
|
||||
@@ -56,6 +56,10 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
|
||||
if total_height > capture_height:
|
||||
stitched = stitched.crop((0, 0, max_width, capture_height))
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
@@ -104,15 +104,17 @@ class fetcher(Fetcher):
|
||||
|
||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
||||
from selenium.webdriver.remote.client_config import ClientConfig
|
||||
from urllib3.util import Timeout
|
||||
driver = None
|
||||
try:
|
||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
||||
remote_connection = RemoteConnection(
|
||||
self.browser_connection_url,
|
||||
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
|
||||
client_config = ClientConfig(
|
||||
remote_server_addr=self.browser_connection_url,
|
||||
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
|
||||
)
|
||||
remote_connection.set_timeout(30) # seconds
|
||||
remote_connection = RemoteConnection(client_config=client_config)
|
||||
|
||||
# Now create the driver with the RemoteConnection
|
||||
driver = RemoteWebDriver(
|
||||
command_executor=remote_connection,
|
||||
options=options
|
||||
|
||||
@@ -45,8 +45,38 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
# Regexes built from the constants above — no brittle hardcoded strings
|
||||
_EXTRACT_REMOVED_RE = re.compile(
|
||||
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
|
||||
)
|
||||
_EXTRACT_ADDED_RE = re.compile(
|
||||
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
|
||||
)
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
|
||||
def extract_changed_from(raw_diff: str) -> str:
|
||||
"""Extract only the removed/changed-from fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def extract_changed_to(raw_diff: str) -> str:
|
||||
"""Extract only the added/changed-into fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
|
||||
"""
|
||||
Render word-level differences between two lines inline using diff-match-patch library.
|
||||
|
||||
@@ -133,14 +163,20 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if removed_tokens:
|
||||
removed_full = ''.join(removed_tokens).rstrip()
|
||||
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
else:
|
||||
result_parts.append(f'{removed_full}{trailing_removed}')
|
||||
|
||||
if added_tokens:
|
||||
if result_parts: # Add newline between removed and added
|
||||
result_parts.append('\n')
|
||||
added_full = ''.join(added_tokens).rstrip()
|
||||
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
else:
|
||||
result_parts.append(f'{added_full}{trailing_added}')
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
else:
|
||||
@@ -150,21 +186,27 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if op == 0: # Equal
|
||||
result_parts.append(text)
|
||||
elif op == 1: # Insertion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
elif op == -1: # Deletion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
|
||||
@@ -360,7 +402,7 @@ def customSequenceMatcher(
|
||||
|
||||
# Use inline word-level diff for single line replacements when word_diff is enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix)
|
||||
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
|
||||
if ignore_junk and not has_changes:
|
||||
# No real changes, skip this line
|
||||
|
||||
@@ -4,6 +4,7 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -211,15 +212,24 @@ def _is_safe_valid_url(test_url):
|
||||
from .validate_url import is_safe_valid_url
|
||||
return is_safe_valid_url(test_url)
|
||||
|
||||
@app.template_global('get_html_head_extras')
|
||||
def _get_html_head_extras():
|
||||
from .pluggy_interface import collect_html_head_extras
|
||||
return collect_html_head_extras()
|
||||
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
|
||||
return formatted_value
|
||||
|
||||
@app.template_filter('regex_search')
|
||||
def _jinja2_filter_regex_search(value, pattern):
|
||||
import re
|
||||
return re.search(pattern, str(value)) is not None
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||
@@ -383,6 +393,8 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
|
||||
return ''
|
||||
|
||||
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
"""Sanitize a tag title to create a valid CSS class name.
|
||||
@@ -394,9 +406,8 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
import re
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
||||
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
@@ -484,28 +495,21 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||
|
||||
def get_locale():
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||
# 3. Map browser locale to our internal locale if needed
|
||||
return _locale_aliases.get(browser_locale, browser_locale)
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -1018,15 +1022,16 @@ def check_for_new_version():
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
session = requests.Session()
|
||||
session.verify = False
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
r = session.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': __version__,
|
||||
'app_guid': datastore.data['app_guid'],
|
||||
'watch_count': len(datastore.data['watching'])
|
||||
},
|
||||
|
||||
verify=False)
|
||||
})
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
@@ -608,13 +608,12 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
from changedetectionio.html_tools import SafeXPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
@@ -668,9 +667,11 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise ValidationError("jq not support not found")
|
||||
|
||||
from changedetectionio.html_tools import validate_jq_expression
|
||||
input = line.replace('jq:', '')
|
||||
|
||||
try:
|
||||
validate_jq_expression(input)
|
||||
jq.compile(input)
|
||||
except (ValueError) as e:
|
||||
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
|
||||
@@ -1006,7 +1007,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField(_l('Password'))
|
||||
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"})
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
|
||||
+122
-12
@@ -4,6 +4,7 @@ from loguru import logger
|
||||
from typing import List
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
@@ -13,6 +14,45 @@ PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
|
||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# jq builtins that can leak sensitive data or cause harm when user-supplied expressions are executed.
|
||||
# env/$ENV reads all process environment variables (passwords, API keys, etc.)
|
||||
# include/import can read arbitrary files from disk
|
||||
# input/inputs reads beyond the supplied JSON data
|
||||
# debug/stderr leaks data to stderr
|
||||
# halt/halt_error terminates the process (DoS)
|
||||
_JQ_BLOCKED_PATTERNS = [
|
||||
(re.compile(r'\benv\b'), 'env (reads environment variables)'),
|
||||
(re.compile(r'\$ENV\b'), '$ENV (reads environment variables)'),
|
||||
(re.compile(r'\binclude\b'), 'include (reads files from disk)'),
|
||||
(re.compile(r'\bimport\b'), 'import (reads files from disk)'),
|
||||
(re.compile(r'\binputs?\b'), 'input/inputs (reads beyond provided data)'),
|
||||
(re.compile(r'\bdebug\b'), 'debug (leaks data to stderr)'),
|
||||
(re.compile(r'\bstderr\b'), 'stderr (leaks data to stderr)'),
|
||||
(re.compile(r'\bhalt(?:_error)?\b'), 'halt/halt_error (terminates the process)'),
|
||||
(re.compile(r'\$__loc__\b'), '$__loc__ (leaks file path information)'),
|
||||
(re.compile(r'\bbuiltins\b'), 'builtins (enumerates available functions)'),
|
||||
(re.compile(r'\bmodulemeta\b'), 'modulemeta (leaks module information)'),
|
||||
(re.compile(r'\$JQ_BUILD_CONFIGURATION\b'), '$JQ_BUILD_CONFIGURATION (leaks build information)'),
|
||||
]
|
||||
|
||||
def validate_jq_expression(expression: str) -> None:
|
||||
"""Raise ValueError if the jq expression uses any dangerous builtin.
|
||||
|
||||
User-supplied jq expressions are executed server-side. Without this check,
|
||||
builtins like `env` expose every process environment variable (SALTED_PASS,
|
||||
proxy credentials, API keys, etc.) as watch output.
|
||||
"""
|
||||
from changedetectionio.strtobool import strtobool
|
||||
if strtobool(os.getenv('JQ_ALLOW_RISKY_EXPRESSIONS', 'false')):
|
||||
return
|
||||
|
||||
for pattern, description in _JQ_BLOCKED_PATTERNS:
|
||||
if pattern.search(expression):
|
||||
msg = f"jq expression uses disallowed builtin: {description}"
|
||||
logger.critical(f"Security: blocked jq expression containing '{description}' - expression: {expression!r}")
|
||||
raise ValueError(msg)
|
||||
|
||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
@@ -23,6 +63,59 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||
'unparsed-text',
|
||||
'unparsed-text-lines',
|
||||
'unparsed-text-available',
|
||||
'doc',
|
||||
'doc-available',
|
||||
'json-doc',
|
||||
'json-doc-available',
|
||||
'collection', # XPath 2.0+: loads XML node collections from arbitrary URIs
|
||||
'uri-collection', # XPath 3.0+: enumerates URIs from resource collections
|
||||
'transform', # XPath 3.1: XSLT transformation (currently raises, block proactively)
|
||||
'load-xquery-module', # XPath 3.1: loads XQuery modules (currently raises, block proactively)
|
||||
'environment-variable',
|
||||
'available-environment-variables',
|
||||
]
|
||||
|
||||
|
||||
def _build_safe_xpath3_parser():
|
||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||
|
||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||
- doc / doc-available (XML fetch from URI)
|
||||
- environment-variable / available-environment-variables (env var leakage)
|
||||
|
||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||
so removing entries here does not affect XPath3Parser itself.
|
||||
|
||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||
"""
|
||||
import os
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
class SafeXPath3Parser(XPath3Parser):
|
||||
pass
|
||||
|
||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||
if env_override is not None:
|
||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||
else:
|
||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||
|
||||
for _fn in blocked:
|
||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||
|
||||
return SafeXPath3Parser
|
||||
|
||||
|
||||
# Module-level singleton — built once, reused everywhere.
|
||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
@@ -183,8 +276,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
tree = None
|
||||
@@ -210,7 +301,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
@@ -235,6 +326,9 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
# Drop element references before the finally block so tree.clear() can release
|
||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||
del r
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
@@ -330,12 +424,16 @@ def _parse_json(json_data, json_filter):
|
||||
raise Exception("jq not support not found")
|
||||
|
||||
if json_filter.startswith("jq:"):
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
|
||||
expr = json_filter.removeprefix("jq:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
match = jq_expression.input(json_data).all()
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
if json_filter.startswith("jqraw:"):
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
|
||||
expr = json_filter.removeprefix("jqraw:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
match = jq_expression.input(json_data).all()
|
||||
return '\n'.join(str(item) for item in match)
|
||||
|
||||
@@ -439,13 +537,25 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||
# Server may claim application/json but actually return JSONP
|
||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||
if jsonp_match:
|
||||
try:
|
||||
inner = jsonp_match.group(1).strip()
|
||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
|
||||
@@ -28,18 +28,20 @@ def get_timeago_locale(flask_locale):
|
||||
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'pt_BR': 'pt_BR', # Portuguese (Brasil)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'ja': 'ja', # Japanese
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
@@ -53,7 +55,8 @@ LANGUAGE_DATA = {
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
|
||||
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
|
||||
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
|
||||
@@ -46,11 +46,26 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
def matches_url(self, url: str) -> bool:
|
||||
"""Return True if this tag should be auto-applied to the given watch URL.
|
||||
|
||||
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
|
||||
substring match. Returns False if no pattern is configured.
|
||||
"""
|
||||
import fnmatch
|
||||
pattern = self.get('url_match_pattern', '').strip()
|
||||
if not pattern or not url:
|
||||
return False
|
||||
if any(c in pattern for c in ('*', '?', '[')):
|
||||
return fnmatch.fnmatch(url.lower(), pattern.lower())
|
||||
return pattern.lower() in url.lower()
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -43,6 +43,11 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||
_FAVICON_FILENAME_CACHE: dict = {}
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
@@ -383,6 +388,25 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
return self.get('fetch_backend')
|
||||
|
||||
@property
|
||||
def fetcher_supports_screenshots(self):
|
||||
"""Return True if the fetcher configured for this watch supports screenshots.
|
||||
|
||||
Resolves 'system' via self._datastore, then checks supports_screenshots on
|
||||
the actual fetcher class. Works for built-in and plugin fetchers alike.
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
|
||||
fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests
|
||||
if not fetcher_name or fetcher_name == 'system':
|
||||
fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||
if fetcher_class is None:
|
||||
return False
|
||||
|
||||
return bool(getattr(fetcher_class, 'supports_screenshots', False))
|
||||
|
||||
@property
|
||||
def is_pdf(self):
|
||||
url = str(self.get("url") or "").lower()
|
||||
@@ -806,9 +830,8 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
# Invalidate module-level favicon filename cache for this watch
|
||||
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
@@ -823,35 +846,23 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
Find any favicon.* file in the watch data directory.
|
||||
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||
deepcopy (which drops instance attrs), and concurrent request races.
|
||||
Invalidated by bump_favicon() when a new favicon is saved.
|
||||
|
||||
Returns:
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
str: Basename of the favicon file, or None if not found.
|
||||
"""
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
fname = os.path.basename(files[0]) if files else None
|
||||
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||
return fname
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -1182,18 +1193,13 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for
|
||||
from flask import url_for, has_request_context
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
has_app_context = has_request_context()
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
|
||||
@@ -88,6 +88,28 @@ class FormattableTimestamp(str):
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableExtract(str):
|
||||
"""
|
||||
A str subclass that holds only the extracted changed fragments from a diff.
|
||||
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
|
||||
|
||||
{{ diff_changed_from }} → old value(s) only, e.g. "$99.99"
|
||||
{{ diff_changed_to }} → new value(s) only, e.g. "$109.99"
|
||||
|
||||
Multiple changed fragments are joined with newlines.
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
|
||||
extracted = extract_fn(raw)
|
||||
else:
|
||||
extracted = ''
|
||||
instance = super().__new__(cls, extracted)
|
||||
return instance
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
@@ -161,6 +183,8 @@ class NotificationContextData(dict):
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
@@ -244,16 +268,27 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
from changedetectionio.diff import extract_changed_from, extract_changed_to
|
||||
extract_specs = {
|
||||
'diff_changed_from': extract_changed_from,
|
||||
'diff_changed_to': extract_changed_to,
|
||||
}
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only create FormattableDiff objects for diff keys actually used in the notification text
|
||||
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
|
||||
for key in NotificationContextData().keys():
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
if not key.startswith('diff'):
|
||||
continue
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
continue
|
||||
if key in diff_specs:
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
elif key in extract_specs:
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
|
||||
@@ -174,6 +174,64 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def get_html_head_extras():
|
||||
"""Return HTML to inject into the <head> of every page via base.html.
|
||||
|
||||
Plugins can use this to add <script>, <style>, or <link> tags that should
|
||||
be present on all pages. Return a raw HTML string or None.
|
||||
|
||||
IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
|
||||
sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
|
||||
work correctly. This hook is called inside a request context so url_for() is
|
||||
always available.
|
||||
|
||||
For small amounts of CSS/JS, return them inline — no file-serving needed::
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
return (
|
||||
'<style>.my-module-banner { color: red; }</style>\\n'
|
||||
'<script>console.log("my_module_content loaded");</script>'
|
||||
)
|
||||
|
||||
For larger assets, register your own lightweight Flask routes in the plugin
|
||||
module and point to them with url_for() so the sub-path prefix is handled
|
||||
automatically::
|
||||
|
||||
from flask import url_for, Response
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.flask_app import app as _app
|
||||
|
||||
MY_CSS = ".my-module-example { color: red; }"
|
||||
MY_JS = "console.log('my_module_content loaded');"
|
||||
|
||||
@_app.route('/my_module_content/css')
|
||||
def my_module_content_css():
|
||||
return Response(MY_CSS, mimetype='text/css',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@_app.route('/my_module_content/js')
|
||||
def my_module_content_js():
|
||||
return Response(MY_JS, mimetype='application/javascript',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
css = url_for('my_module_content_css')
|
||||
js = url_for('my_module_content_js')
|
||||
return (
|
||||
f'<link rel="stylesheet" href="{css}">\\n'
|
||||
f'<script src="{js}" defer></script>'
|
||||
)
|
||||
|
||||
Returns:
|
||||
str or None: Raw HTML string to inject inside <head>, or None
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -606,4 +664,20 @@ def apply_update_finalize(update_handler, watch, datastore, processing_exception
|
||||
except Exception as e:
|
||||
# Don't let plugin errors crash the worker
|
||||
logger.error(f"Error in update_finalize hook: {e}")
|
||||
logger.exception(f"update_finalize hook exception details:")
|
||||
logger.exception(f"update_finalize hook exception details:")
|
||||
|
||||
|
||||
def collect_html_head_extras():
|
||||
"""Collect and combine HTML head extras from all plugins.
|
||||
|
||||
Called from a Flask template global so it always runs inside a request context.
|
||||
This means url_for() works correctly in plugin implementations, including when the
|
||||
app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
|
||||
sets SCRIPT_NAME so url_for() automatically prepends the prefix).
|
||||
|
||||
Returns:
|
||||
str: Combined HTML string to inject inside <head>, or empty string
|
||||
"""
|
||||
results = plugin_manager.hook.get_html_head_extras()
|
||||
parts = [r for r in results if r]
|
||||
return "\n".join(parts) if parts else ""
|
||||
@@ -260,6 +260,16 @@ class difference_detection_processor():
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
|
||||
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
def get_extra_watch_config(self, filename):
|
||||
|
||||
@@ -42,10 +42,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
# Get error information for the template
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
|
||||
@@ -100,7 +100,13 @@ class guess_stream_type():
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||
self.is_plaintext = True
|
||||
else:
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
# magic will call a rss document 'xml'
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
@@ -10,6 +11,8 @@ supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
|
||||
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
@@ -31,6 +34,7 @@ class Restock(dict):
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
# @todo locale needs to be the locale of the webpage
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
@@ -62,6 +66,17 @@ class Restock(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def get_price_from_history_str(history_str):
|
||||
m = _price_re.search(history_str)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
try:
|
||||
return str(Decimal(m.group(1)))
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
class Watch(BaseWatch):
|
||||
def __init__(self, *arg, **kw):
|
||||
super().__init__(*arg, **kw)
|
||||
@@ -75,13 +90,27 @@ class Watch(BaseWatch):
|
||||
def extra_notification_token_values(self):
|
||||
values = super().extra_notification_token_values()
|
||||
values['restock'] = self.get('restock', {})
|
||||
|
||||
values['restock']['previous_price'] = None
|
||||
if self.history_n >= 2:
|
||||
history = self.history
|
||||
if history and len(history) >=2:
|
||||
"""Unfortunately for now timestamp is stored as string key"""
|
||||
sorted_keys = sorted(list(history), key=lambda x: int(x))
|
||||
sorted_keys.reverse()
|
||||
|
||||
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
|
||||
if price_str:
|
||||
values['restock']['previous_price'] = get_price_from_history_str(price_str)
|
||||
return values
|
||||
|
||||
def extra_notification_token_placeholder_info(self):
|
||||
values = super().extra_notification_token_placeholder_info()
|
||||
|
||||
values.append(('restock.price', "Price detected"))
|
||||
values.append(('restock.in_stock', "In stock status"))
|
||||
values.append(('restock.original_price', "Original price at first check"))
|
||||
values.append(('restock.previous_price', "Previous price in history"))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -437,17 +437,18 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
#useless
|
||||
# from ...html_tools import html_to_text
|
||||
# text = html_to_text(self.fetcher.content)
|
||||
# logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
# if not len(text):
|
||||
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
# raise ReplyWithContentButNoText(url=watch.link,
|
||||
# status_code=self.fetcher.get_last_status_code(),
|
||||
# screenshot=self.fetcher.screenshot,
|
||||
# html_content=self.fetcher.content,
|
||||
# xpath_data=self.fetcher.xpath_data
|
||||
# )
|
||||
|
||||
# Which restock settings to compare against?
|
||||
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
|
||||
|
||||
@@ -283,4 +283,7 @@ def query_price_availability(extracted_data):
|
||||
if not result.get('availability') and 'availability' in microdata:
|
||||
result['availability'] = microdata['availability']
|
||||
|
||||
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
|
||||
# using something like babel you need to know the locale of the website and even then it can be problematic
|
||||
# we dont really do anything with the price data so far.. so just accept it the way it comes.
|
||||
return result
|
||||
|
||||
@@ -154,11 +154,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
|
||||
@@ -29,9 +29,11 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Perform the operation
|
||||
if op == 'pause':
|
||||
watch.toggle_pause()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
||||
elif op == 'mute':
|
||||
watch.toggle_mute()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
||||
elif op == 'recheck':
|
||||
# Import here to avoid circular imports
|
||||
|
||||
@@ -199,8 +199,31 @@ def handle_watch_update(socketio, **kwargs):
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
|
||||
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
|
||||
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
|
||||
The exception is caught inside run_wsgi and routed to self.server.log() — it never
|
||||
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
|
||||
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
|
||||
disconnect correctly via its own disconnect handler and timeout logic."""
|
||||
try:
|
||||
from werkzeug.serving import BaseWSGIServer
|
||||
_original_log = BaseWSGIServer.log
|
||||
|
||||
def _filtered_log(self, type, message, *args):
|
||||
if type == 'error' and 'write() before start_response' in message:
|
||||
return
|
||||
_original_log(self, type, message, *args)
|
||||
|
||||
BaseWSGIServer.log = _filtered_log
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
_suppress_werkzeug_ws_abrupt_disconnect_noise()
|
||||
|
||||
import platform
|
||||
import sys
|
||||
|
||||
|
||||
@@ -116,6 +116,14 @@ $(document).ready(function () {
|
||||
$('#realtime-conn-error').show();
|
||||
});
|
||||
|
||||
// Tell the server we're leaving cleanly so it can release the connection
|
||||
// immediately rather than waiting for a timeout.
|
||||
// Note: this only fires for voluntary closes (tab/window close, navigation away).
|
||||
// Hard kills, crashes and network drops will still timeout normally on the server.
|
||||
window.addEventListener('beforeunload', function () {
|
||||
socket.disconnect();
|
||||
});
|
||||
|
||||
socket.on('queue_size', function (data) {
|
||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||
if(queueSizePagerInfoText) {
|
||||
|
||||
@@ -980,12 +980,20 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
def get_all_tags_for_watch(self, uuid):
|
||||
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
|
||||
watch = self.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return {}
|
||||
|
||||
# Should return a dict of full tag info linked by UUID
|
||||
if watch:
|
||||
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
# Start with manually assigned tags
|
||||
result = dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
|
||||
return {}
|
||||
# Additionally include any tag whose url_match_pattern matches this watch's URL
|
||||
watch_url = watch.get('url', '')
|
||||
if watch_url:
|
||||
for tag_uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
if tag_uuid not in result and tag.matches_url(watch_url):
|
||||
result[tag_uuid] = tag
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def extra_browsers(self):
|
||||
|
||||
@@ -98,6 +98,14 @@
|
||||
<td><code>{{ '{{diff_patch}}' }}</code></td>
|
||||
<td>{{ _('The diff output - patch in unified format') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_from}}' }}</code></td>
|
||||
<td>{{ _('Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_to}}' }}</code></td>
|
||||
<td>{{ _('Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{current_snapshot}}' }}</code></td>
|
||||
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
|
||||
|
||||
@@ -45,6 +45,10 @@
|
||||
<script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script>
|
||||
{% endif %}
|
||||
{%- set _html_head_extras = get_html_head_extras() -%}
|
||||
{%- if _html_head_extras %}
|
||||
{{ _html_head_extras | safe }}
|
||||
{%- endif %}
|
||||
</head>
|
||||
|
||||
<body class="{{extra_classes}}">
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import psutil
|
||||
import time
|
||||
from threading import Thread
|
||||
import multiprocessing
|
||||
|
||||
import pytest
|
||||
import arrow
|
||||
@@ -191,6 +192,34 @@ def cleanup(datastore_path):
|
||||
if os.path.isfile(f):
|
||||
os.unlink(f)
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest environment before tests run.
|
||||
|
||||
CRITICAL: Set multiprocessing start method to 'fork' for Python 3.14+ compatibility.
|
||||
|
||||
Python 3.14 changed the default start method from 'fork' to 'forkserver' on Linux.
|
||||
The forkserver method requires all objects to be picklable, but pytest-flask's
|
||||
LiveServer uses nested functions that can't be pickled.
|
||||
|
||||
Setting 'fork' explicitly:
|
||||
- Maintains compatibility with Python 3.10-3.13 (where 'fork' was already default)
|
||||
- Fixes Python 3.14 pickling errors
|
||||
- Only affects Unix-like systems (Windows uses 'spawn' regardless)
|
||||
|
||||
See: https://github.com/python/cpython/issues/126831
|
||||
See: https://docs.python.org/3/whatsnew/3.14.html
|
||||
"""
|
||||
# Only set if not already set (respects existing configuration)
|
||||
if multiprocessing.get_start_method(allow_none=True) is None:
|
||||
try:
|
||||
# 'fork' is available on Unix-like systems (Linux, macOS)
|
||||
# On Windows, this will have no effect as 'spawn' is the only option
|
||||
multiprocessing.set_start_method('fork', force=False)
|
||||
logger.debug("Set multiprocessing start method to 'fork' for Python 3.14+ compatibility")
|
||||
except (ValueError, RuntimeError):
|
||||
# Already set, not available on this platform, or context already created
|
||||
pass
|
||||
|
||||
def pytest_addoption(parser):
|
||||
"""Add custom command-line options for pytest.
|
||||
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
"""Test that plugins can inject HTML into base.html <head> via get_html_head_extras hookimpl."""
|
||||
import pytest
|
||||
from flask import url_for, Response
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl, plugin_manager
|
||||
|
||||
_MY_JS = "console.log('my_module_content loaded');"
|
||||
_MY_CSS = ".my-module-example { color: red; }"
|
||||
|
||||
|
||||
class _HeadExtrasPlugin:
|
||||
"""Test plugin that injects tags pointing at its own Flask routes."""
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
css_url = url_for('test_plugin_my_module_content_css')
|
||||
js_url = url_for('test_plugin_my_module_content_js')
|
||||
return (
|
||||
f'<link rel="stylesheet" id="test-head-extra-css" href="{css_url}">\n'
|
||||
f'<script id="test-head-extra-js" src="{js_url}" defer></script>'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def plugin_routes(live_server):
|
||||
"""Register plugin asset routes once per module (Flask routes can't be added twice)."""
|
||||
app = live_server.app
|
||||
|
||||
@app.route('/test-plugin/my_module_content/css')
|
||||
def test_plugin_my_module_content_css():
|
||||
return Response(_MY_CSS, mimetype='text/css',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@app.route('/test-plugin/my_module_content/js')
|
||||
def test_plugin_my_module_content_js():
|
||||
return Response(_MY_JS, mimetype='application/javascript',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def head_extras_plugin(plugin_routes):
|
||||
"""Register the hookimpl for one test then unregister it — function-scoped for clean isolation."""
|
||||
plugin = _HeadExtrasPlugin()
|
||||
plugin_manager.register(plugin, name="test_head_extras")
|
||||
yield plugin
|
||||
plugin_manager.unregister(name="test_head_extras")
|
||||
|
||||
|
||||
def test_plugin_html_injected_into_head(client, live_server, measure_memory_usage, datastore_path, head_extras_plugin):
|
||||
"""get_html_head_extras output must appear inside <head> in the rendered page."""
|
||||
res = client.get(url_for("watchlist.index"), follow_redirects=True)
|
||||
assert res.status_code == 200
|
||||
assert b'id="test-head-extra-css"' in res.data, "Plugin <link> tag missing from rendered page"
|
||||
assert b'id="test-head-extra-js"' in res.data, "Plugin <script> tag missing from rendered page"
|
||||
|
||||
head_end = res.data.find(b'</head>')
|
||||
assert head_end != -1
|
||||
for marker in (b'id="test-head-extra-css"', b'id="test-head-extra-js"'):
|
||||
pos = res.data.find(marker)
|
||||
assert pos != -1 and pos < head_end, f"{marker} must appear before </head>"
|
||||
|
||||
|
||||
def test_plugin_js_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
|
||||
"""The plugin-registered JS route must return JS with the right Content-Type."""
|
||||
res = client.get(url_for('test_plugin_my_module_content_js'))
|
||||
assert res.status_code == 200
|
||||
assert 'javascript' in res.content_type
|
||||
assert _MY_JS.encode() in res.data
|
||||
|
||||
|
||||
def test_plugin_css_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
|
||||
"""The plugin-registered CSS route must return CSS with the right Content-Type."""
|
||||
res = client.get(url_for('test_plugin_my_module_content_css'))
|
||||
assert res.status_code == 200
|
||||
assert 'css' in res.content_type
|
||||
assert _MY_CSS.encode() in res.data
|
||||
|
||||
|
||||
def test_no_extras_without_plugin(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""With no hookimpl registered the markers must not appear (isolation check)."""
|
||||
res = client.get(url_for("watchlist.index"), follow_redirects=True)
|
||||
assert b'id="test-head-extra-css"' not in res.data
|
||||
assert b'id="test-head-extra-js"' not in res.data
|
||||
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
|
||||
set_longer_modified_response, delete_all_watches
|
||||
|
||||
import logging
|
||||
|
||||
import os
|
||||
|
||||
# NOTE - RELIES ON mailserver as hostname running, see github build recipes
|
||||
smtp_test_server = 'mailserver'
|
||||
smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver')
|
||||
|
||||
ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())
|
||||
|
||||
|
||||
@@ -170,6 +170,14 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b'(changed) Which is across' in res.data
|
||||
assert b'Some text thats the same' in res.data
|
||||
|
||||
# Fetch the difference between two versions (default text format)
|
||||
res = client.get(
|
||||
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+"?changesOnly=true",
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b'Some text thats the same' not in res.data
|
||||
|
||||
# Test htmlcolor format
|
||||
res = client.get(
|
||||
|
||||
@@ -178,23 +178,44 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
|
||||
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that a tag/group can be updated with processor_config_restock_diff via the API.
|
||||
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
|
||||
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
# Create a tag
|
||||
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Restock Group"}),
|
||||
data=json.dumps({
|
||||
"title": "Restock Group",
|
||||
"overrides_watch": True,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 7777777
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201
|
||||
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
|
||||
tag_uuid = res.json.get('uuid')
|
||||
|
||||
# Update tag with valid processor_config_restock_diff
|
||||
# Verify processor config was saved during creation (the bug: these were discarded)
|
||||
res = client.get(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
tag_data = res.json
|
||||
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
|
||||
"processor_config_restock_diff should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
|
||||
"price_change_min should be saved on POST"
|
||||
|
||||
# Update tag with valid processor_config_restock_diff via PUT
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
|
||||
@@ -48,6 +48,15 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
# Check POST preview
|
||||
res = client.post(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
|
||||
# Make a change
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
|
||||
@@ -413,3 +422,28 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
|
||||
assert b'<foobar' not in res.data
|
||||
|
||||
res = delete_all_watches(client)
|
||||
|
||||
|
||||
def test_last_error_cleared_on_same_checksum(client, live_server, datastore_path):
|
||||
"""last_error should be cleared even when content is unchanged (checksumFromPreviousCheckWasTheSame path)"""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
|
||||
|
||||
# First check - establishes baseline checksum
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Inject a stale last_error directly (simulates a prior failed check)
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': 'Some previous error'})
|
||||
assert datastore.data['watching'][uuid].get('last_error') == 'Some previous error'
|
||||
|
||||
# Second check - same content, so checksumFromPreviousCheckWasTheSame will fire
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# last_error must be cleared even though no change was detected
|
||||
assert datastore.data['watching'][uuid].get('last_error') == False
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
||||
from flask import url_for
|
||||
import io
|
||||
from zipfile import ZipFile
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
import re
|
||||
import time
|
||||
from changedetectionio.model import Watch, Tag
|
||||
@@ -68,6 +68,9 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Check for changedetection.json (settings file)
|
||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||
|
||||
# secret.txt must never be included — it contains the Flask session key
|
||||
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
url_for("backups.remove_backups"),
|
||||
@@ -196,4 +199,63 @@ def test_backup_restore(client, live_server, measure_memory_usage, datastore_pat
|
||||
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
|
||||
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
|
||||
assert isinstance(restored_tag2, Tag.model), \
|
||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||
|
||||
|
||||
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Zip Slip path traversal entries in a restore zip must be rejected."""
|
||||
import pytest
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# Build a zip with a path traversal entry that would escape the extraction dir
|
||||
malicious_zip = io.BytesIO()
|
||||
with ZipFile(malicious_zip, 'w') as zf:
|
||||
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
|
||||
malicious_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
with pytest.raises(ValueError, match="Zip Slip"):
|
||||
import_from_zip(
|
||||
zip_stream=malicious_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
|
||||
|
||||
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""A zip whose total uncompressed size exceeds the limit must be rejected.
|
||||
|
||||
The guard reads file_size from the zip central-directory metadata — no
|
||||
actual decompression happens, so this test is fast and uses minimal RAM.
|
||||
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
|
||||
50 KB is enough to trigger the check without creating any large files.
|
||||
"""
|
||||
import pytest
|
||||
import changedetectionio.blueprint.backups.restore as restore_mod
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
|
||||
bomb_zip = io.BytesIO()
|
||||
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
|
||||
zf.writestr("data.txt", b"\x00" * (100 * 1024))
|
||||
bomb_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
|
||||
try:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
|
||||
with pytest.raises(ValueError, match="decompressed size"):
|
||||
import_from_zip(
|
||||
zip_stream=bomb_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
finally:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
@@ -11,6 +12,69 @@ import os
|
||||
|
||||
|
||||
|
||||
def test_surrogate_characters_in_content_are_sanitized():
|
||||
"""Lone surrogates can appear in requests' r.text when a server returns malformed/mixed-encoding
|
||||
content. Without sanitization, encoding to UTF-8 raises UnicodeEncodeError.
|
||||
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
content_with_surrogate = '<html><body>Hello \udcad World</body></html>'
|
||||
|
||||
# Confirm the raw problem exists
|
||||
with pytest.raises(UnicodeEncodeError):
|
||||
content_with_surrogate.encode('utf-8')
|
||||
|
||||
# Our fix: sanitize after fetcher.run() in processors/base.py call_browser()
|
||||
sanitized = content_with_surrogate.encode('utf-8', errors='replace').decode('utf-8')
|
||||
assert 'Hello' in sanitized
|
||||
assert 'World' in sanitized
|
||||
assert '\udcad' not in sanitized
|
||||
|
||||
# Checksum computation (processors/base.py get_raw_document_checksum) must not crash
|
||||
hashlib.md5(sanitized.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def test_utf8_content_without_charset_header(client, live_server, datastore_path):
|
||||
"""Server returns UTF-8 content but no charset in Content-Type header.
|
||||
chardet can misdetect such pages as UTF-7 (Python 3.14 then produces surrogates).
|
||||
Our fix tries UTF-8 first before falling back to chardet.
|
||||
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
# UTF-8 encoded content with non-ASCII chars - no charset will be in the header
|
||||
html = '<html><body><p>Español</p><p>Français</p><p>日本語</p></body></html>'
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('utf-8'), mode='wb')
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||
# Should decode correctly as UTF-8, not produce mojibake (Español) or replacement chars
|
||||
assert 'Español'.encode('utf-8') in res.data
|
||||
assert 'Français'.encode('utf-8') in res.data
|
||||
assert '日本語'.encode('utf-8') in res.data
|
||||
|
||||
|
||||
def test_shiftjis_with_meta_charset(client, live_server, datastore_path):
|
||||
"""Server returns Shift-JIS content with no charset in HTTP header, but the HTML
|
||||
declares <meta charset="Shift-JIS">. We should use the meta tag, not chardet.
|
||||
Real-world case: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
japanese_text = '日本語のページ'
|
||||
html = f'<html><head><meta http-equiv="Content-Type" content="text/html;charset=Shift-JIS"></head><body><p>{japanese_text}</p></body></html>'
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('shift_jis'), mode='wb')
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||
assert japanese_text.encode('utf-8') in res.data
|
||||
|
||||
|
||||
def set_html_response(datastore_path):
|
||||
test_return_data = """
|
||||
<html><body><span class="nav_second_img_text">
|
||||
|
||||
@@ -624,3 +624,76 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
|
||||
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
||||
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
||||
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
||||
|
||||
|
||||
def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that clearing snapshot history works with translated confirmation text.
|
||||
|
||||
Issue #3865: When the app language is set to German, the clear history
|
||||
confirmation dialog shows the translated word (e.g. 'loschen') but the
|
||||
backend only accepted the English word 'clear', making it impossible
|
||||
to clear snapshots in non-English languages.
|
||||
"""
|
||||
from flask import url_for
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Add a watch so there is history to clear
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Set language to German
|
||||
res = client.get(
|
||||
url_for("set_language", locale="de"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
|
||||
# Verify the clear history page shows the German confirmation word
|
||||
res = client.get(
|
||||
url_for("ui.clear_all_history"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
|
||||
|
||||
# Submit the form with the German translated word
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={"confirmtext": "löschen"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
# Should NOT show error message
|
||||
assert b"Incorrect confirmation text" not in res.data, \
|
||||
"German confirmation word 'loschen' should be accepted (issue #3865)"
|
||||
|
||||
# Switch back to English and verify English word still works
|
||||
res = client.get(
|
||||
url_for("set_language", locale="en_US"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={"confirmtext": "clear"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert b"Incorrect confirmation text" not in res.data, \
|
||||
"English confirmation word 'clear' should still be accepted"
|
||||
|
||||
# Verify that missing/empty confirmtext does not crash the server
|
||||
res = client.post(
|
||||
url_for("ui.clear_all_history"),
|
||||
data={},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200, \
|
||||
"Missing confirmtext should not crash the server"
|
||||
|
||||
@@ -16,6 +16,51 @@ except ModuleNotFoundError:
|
||||
|
||||
|
||||
|
||||
def test_jsonp_treated_as_plaintext():
|
||||
from ..processors.magic import guess_stream_type
|
||||
|
||||
# JSONP content (server wrongly claims application/json) should be detected as plaintext
|
||||
# Callback names are arbitrary identifiers, not always 'cb'
|
||||
jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
|
||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
|
||||
assert result.is_json is False
|
||||
assert result.is_plaintext is True
|
||||
|
||||
# Variation with dotted callback name e.g. jQuery.cb(...)
|
||||
jsonp_dotted = 'some.callback({ "version": "1.0" })'
|
||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
|
||||
assert result.is_json is False
|
||||
assert result.is_plaintext is True
|
||||
|
||||
# Real JSON should still be detected as JSON
|
||||
json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
|
||||
result = guess_stream_type(http_content_header="application/json", content=json_content)
|
||||
assert result.is_json is True
|
||||
assert result.is_plaintext is False
|
||||
|
||||
|
||||
def test_jsonp_json_filter_extraction():
|
||||
from .. import html_tools
|
||||
|
||||
# Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
|
||||
jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
|
||||
|
||||
# Deep nested jsonpath filter into array element
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
|
||||
assert text == '"8.0.68"'
|
||||
|
||||
# Filter that selects the second array element
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
|
||||
assert text == '"arm32"'
|
||||
|
||||
if jq_support:
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
|
||||
assert text == '"8.0.68"'
|
||||
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
|
||||
assert text == "https://example.com/app-arm32.apk"
|
||||
|
||||
|
||||
def test_unittest_inline_html_extract():
|
||||
# So lets pretend that the JSON we want is inside some HTML
|
||||
content="""
|
||||
|
||||
@@ -350,6 +350,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
res = client.get(url_for("settings.settings_page"))
|
||||
|
||||
assert b'{{restock.original_price}}' in res.data
|
||||
assert b'{{restock.previous_price}}' in res.data
|
||||
assert b'Original price at first check' in res.data
|
||||
|
||||
#####################
|
||||
@@ -358,7 +359,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-notification_urls": notification_url,
|
||||
"application-notification_title": "title new price {{restock.price}}",
|
||||
"application-notification_body": "new price {{restock.price}}",
|
||||
"application-notification_body": "new price {{restock.price}} previous price {{restock.previous_price}} instock {{restock.in_stock}}",
|
||||
"application-notification_format": default_notification_format,
|
||||
"requests-time_between_check-minutes": 180,
|
||||
'application-fetch_backend': "html_requests"},
|
||||
@@ -372,8 +373,6 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
|
||||
set_original_response(props_markup=instock_props[0], price='960.45', datastore_path=datastore_path)
|
||||
# A change in price, should trigger a change by default
|
||||
set_original_response(props_markup=instock_props[0], price='1950.45', datastore_path=datastore_path)
|
||||
client.get(url_for("ui.form_watch_checknow"))
|
||||
@@ -384,6 +383,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
notification = f.read()
|
||||
assert "new price 1950.45" in notification
|
||||
assert "title new price 1950.45" in notification
|
||||
assert "previous price 960.45" in notification
|
||||
|
||||
## Now test the "SEND TEST NOTIFICATION" is working
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
@@ -467,3 +467,38 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
|
||||
assert b'155.55' in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_itemprop_as_str(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
test_return_data = f"""<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<span itemprop="offers" itemscope itemtype="http://schema.org/Offer">
|
||||
<meta content="767.55" itemprop="price"/>
|
||||
<meta content="EUR" itemprop="priceCurrency"/>
|
||||
<meta content="InStock" itemprop="availability"/>
|
||||
<meta content="https://www.123-test.dk" itemprop="url"/>
|
||||
</span>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'767.55' in res.data
|
||||
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration tests for auto-applying tags to watches by URL pattern matching.
|
||||
|
||||
Verifies:
|
||||
- A tag with url_match_pattern shows on the watch overview list (via get_all_tags_for_watch)
|
||||
- The auto-applied tag appears on the watch edit page
|
||||
- A watch whose URL does NOT match the pattern does not get the tag
|
||||
"""
|
||||
|
||||
import json
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup
|
||||
|
||||
|
||||
def test_tag_url_pattern_shows_in_overview(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Tag with a matching url_match_pattern must appear in the watch overview row."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create a tag with a URL match pattern
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Auto GitHub", "url_match_pattern": "*github.com*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_uuid = res.json['uuid']
|
||||
|
||||
# Add a watch that matches the pattern
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://github.com/someuser/repo"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
matching_watch_uuid = res.json['uuid']
|
||||
|
||||
# Add a watch that does NOT match
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://example.com/page"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
non_matching_watch_uuid = res.json['uuid']
|
||||
|
||||
# Watch overview — the tag label must appear in the matching watch's row
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert res.status_code == 200
|
||||
html = res.get_data(as_text=True)
|
||||
|
||||
# The tag title should appear somewhere on the page (it's rendered per-watch via get_all_tags_for_watch)
|
||||
assert "Auto GitHub" in html, "Auto-matched tag title must appear in watch overview"
|
||||
|
||||
# Verify via the datastore directly that get_all_tags_for_watch returns the pattern-matched tag
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
matching_tags = datastore.get_all_tags_for_watch(matching_watch_uuid)
|
||||
assert tag_uuid in matching_tags, "Pattern-matched tag must be returned for matching watch"
|
||||
|
||||
non_matching_tags = datastore.get_all_tags_for_watch(non_matching_watch_uuid)
|
||||
assert tag_uuid not in non_matching_tags, "Pattern-matched tag must NOT appear for non-matching watch"
|
||||
|
||||
|
||||
def test_auto_applied_tag_shows_on_watch_edit(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""The watch edit page must show auto-applied tags (from URL pattern) separately."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Auto Docs", "url_match_pattern": "*docs.example.com*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://docs.example.com/guide"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
# Watch edit page must mention the auto-applied tag
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||
assert res.status_code == 200
|
||||
html = res.get_data(as_text=True)
|
||||
|
||||
assert "Auto Docs" in html, "Auto-applied tag name must appear on watch edit page"
|
||||
assert "automatically applied" in html.lower() or "auto" in html.lower(), \
|
||||
"Watch edit page must indicate the tag is auto-applied by pattern"
|
||||
|
||||
|
||||
def test_multiple_pattern_tags_all_applied(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""A watch matching multiple tag patterns must receive all of them, not just the first."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Two tags with different patterns that both match the same URL
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Docs", "url_match_pattern": "*docs.*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_docs_uuid = res.json['uuid']
|
||||
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Python", "url_match_pattern": "*python*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_python_uuid = res.json['uuid']
|
||||
|
||||
# A third tag whose pattern does NOT match
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Rust", "url_match_pattern": "*rust-lang*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_rust_uuid = res.json['uuid']
|
||||
|
||||
# Watch URL matches both "docs" and "python" patterns but not "rust"
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://docs.python.org/3/library/fnmatch.html"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
resolved = datastore.get_all_tags_for_watch(watch_uuid)
|
||||
|
||||
assert tag_docs_uuid in resolved, "First matching tag must be included"
|
||||
assert tag_python_uuid in resolved, "Second matching tag must be included"
|
||||
assert tag_rust_uuid not in resolved, "Non-matching tag must NOT be included"
|
||||
@@ -592,3 +592,79 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
|
||||
set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
|
||||
for content_type in RSS_XML_CONTENT_TYPES:
|
||||
_subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
|
||||
|
||||
|
||||
# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends
|
||||
# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions.
|
||||
def test_xpath_blocked_functions_unit():
|
||||
"""Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed)."""
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser
|
||||
from lxml import html
|
||||
|
||||
html_content = '<html><body><p>safe content</p></body></html>'
|
||||
|
||||
dangerous_expressions = [
|
||||
"unparsed-text('file:///etc/passwd')",
|
||||
"unparsed-text-lines('file:///etc/passwd')",
|
||||
"unparsed-text-available('file:///etc/passwd')",
|
||||
"doc('file:///etc/passwd')",
|
||||
"doc-available('file:///etc/passwd')",
|
||||
"json-doc('file:///datastore/changedetection.json')",
|
||||
"collection('file:///datastore/')",
|
||||
"uri-collection('file:///datastore/')",
|
||||
"transform(map{})",
|
||||
"load-xquery-module('foo')",
|
||||
"environment-variable('PATH')",
|
||||
"available-environment-variables()",
|
||||
]
|
||||
|
||||
for expr in dangerous_expressions:
|
||||
# xpath_filter() must raise, not silently return file contents
|
||||
try:
|
||||
result = xpath_filter(expr, html_content)
|
||||
assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}"
|
||||
except elementpath.ElementPathError:
|
||||
pass # expected
|
||||
|
||||
# SafeXPath3Parser must reject the expression at parse time
|
||||
tree = html.fromstring(html_content)
|
||||
try:
|
||||
elementpath.select(tree, expr, parser=SafeXPath3Parser)
|
||||
assert False, f"SafeXPath3Parser should have raised for: {expr!r}"
|
||||
except elementpath.ElementPathError:
|
||||
pass # expected
|
||||
|
||||
# Sanity check: normal XPath still works
|
||||
result = xpath_filter('//p/text()', html_content)
|
||||
assert result == 'safe content'
|
||||
|
||||
|
||||
# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions.
|
||||
def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Edit-form validation must reject dangerous XPath 3.0 functions before they are stored."""
|
||||
from flask import url_for
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
dangerous_expressions = [
|
||||
"xpath:unparsed-text('file:///etc/passwd')",
|
||||
"xpath:environment-variable('PATH')",
|
||||
"xpath:doc('file:///etc/passwd')",
|
||||
]
|
||||
|
||||
for expr in dangerous_expressions:
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={"include_filters": expr, "url": test_url, "tags": "", "headers": "",
|
||||
'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"is not a valid XPath expression" in res.data, \
|
||||
f"Form should reject dangerous expression: {expr!r}"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Static analysis test: verify @login_optionally_required is always applied
|
||||
AFTER (inner to) @blueprint.route(), not before it.
|
||||
|
||||
In Flask, @route() must be the outermost decorator because it registers
|
||||
whatever function it receives. If @login_optionally_required is placed
|
||||
above @route(), the raw unprotected function gets registered and auth is
|
||||
silently bypassed (GHSA-jmrh-xmgh-x9j4).
|
||||
|
||||
Correct order (route outermost, auth inner):
|
||||
@blueprint.route('/path')
|
||||
@login_optionally_required
|
||||
def view(): ...
|
||||
|
||||
Wrong order (auth never called):
|
||||
@login_optionally_required ← registered by route, then discarded
|
||||
@blueprint.route('/path')
|
||||
def view(): ...
|
||||
"""
|
||||
|
||||
import ast
|
||||
import pathlib
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = pathlib.Path(__file__).parents[3] # …/changedetection.io/
|
||||
SOURCE_ROOT = REPO_ROOT / "changedetectionio"
|
||||
|
||||
|
||||
def _is_route_decorator(node: ast.expr) -> bool:
|
||||
"""Return True if the decorator looks like @something.route(...)."""
|
||||
return (
|
||||
isinstance(node, ast.Call)
|
||||
and isinstance(node.func, ast.Attribute)
|
||||
and node.func.attr == "route"
|
||||
)
|
||||
|
||||
|
||||
def _is_auth_decorator(node: ast.expr) -> bool:
|
||||
"""Return True if the decorator is @login_optionally_required."""
|
||||
return isinstance(node, ast.Name) and node.id == "login_optionally_required"
|
||||
|
||||
|
||||
def collect_violations() -> list[str]:
|
||||
violations = []
|
||||
|
||||
for path in SOURCE_ROOT.rglob("*.py"):
|
||||
try:
|
||||
tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
|
||||
except SyntaxError:
|
||||
continue
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
continue
|
||||
|
||||
decorators = node.decorator_list
|
||||
auth_indices = [i for i, d in enumerate(decorators) if _is_auth_decorator(d)]
|
||||
route_indices = [i for i, d in enumerate(decorators) if _is_route_decorator(d)]
|
||||
|
||||
# Bad order: auth decorator appears at a lower index (higher up) than a route decorator
|
||||
for auth_idx in auth_indices:
|
||||
for route_idx in route_indices:
|
||||
if auth_idx < route_idx:
|
||||
rel = path.relative_to(REPO_ROOT)
|
||||
violations.append(
|
||||
f"{rel}:{node.lineno} — `{node.name}`: "
|
||||
f"@login_optionally_required (line {decorators[auth_idx].lineno}) "
|
||||
f"is above @route (line {decorators[route_idx].lineno}); "
|
||||
f"auth wrapper will never be called"
|
||||
)
|
||||
|
||||
return violations
|
||||
|
||||
|
||||
def test_auth_decorator_order():
|
||||
violations = collect_violations()
|
||||
if violations:
|
||||
msg = (
|
||||
"\n\nFound routes where @login_optionally_required is placed ABOVE @blueprint.route().\n"
|
||||
"This silently disables authentication — @route() registers the raw function\n"
|
||||
"and the auth wrapper is never called.\n\n"
|
||||
"Fix: move @blueprint.route() to be the outermost (topmost) decorator.\n\n"
|
||||
+ "\n".join(f" • {v}" for v in violations)
|
||||
)
|
||||
pytest.fail(msg)
|
||||
@@ -64,7 +64,7 @@ class TestTriggerConditions(unittest.TestCase):
|
||||
"conditions": [
|
||||
{"operator": ">=", "field": "extracted_number", "value": "10"},
|
||||
{"operator": "<=", "field": "extracted_number", "value": "5000"},
|
||||
{"operator": "in", "field": "page_text", "value": "rock"},
|
||||
{"operator": "in", "field": "page_filtered_text", "value": "rock"},
|
||||
#{"operator": "starts_with", "field": "page_text", "value": "I saw"},
|
||||
]
|
||||
}
|
||||
|
||||
@@ -15,7 +15,9 @@ from changedetectionio.diff import (
|
||||
CHANGED_PLACEMARKER_OPEN,
|
||||
CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED,
|
||||
extract_changed_from,
|
||||
extract_changed_to,
|
||||
)
|
||||
|
||||
|
||||
@@ -381,5 +383,140 @@ Line 3 with tabs and spaces"""
|
||||
self.assertNotIn('[-Line 2-]', output)
|
||||
self.assertNotIn('[+Line 2+]', output)
|
||||
|
||||
def test_diff_changed_from_to_word_level(self):
|
||||
"""Primary use case: extract just the old/new value from a changed line (e.g. price monitoring)"""
|
||||
before = "Widget costs $99.99 per month"
|
||||
after = "Widget costs $109.99 per month"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99.99")
|
||||
self.assertEqual(extract_changed_to(raw), "$109.99")
|
||||
|
||||
def test_diff_changed_from_to_multiple_changes(self):
|
||||
"""Multiple changed fragments on different lines are joined with newline.
|
||||
An unchanged line between the two changes ensures each is a 1-to-1 replace,
|
||||
so word_diff fires per line rather than falling back to multi-line block mode."""
|
||||
before = "Price $99\nunchanged\nTax $5"
|
||||
after = "Price $149\nunchanged\nTax $12"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99\n$5")
|
||||
self.assertEqual(extract_changed_to(raw), "$149\n$12")
|
||||
|
||||
def test_diff_changed_from_to_pure_insert_delete(self):
|
||||
"""Pure line additions/deletions (no inline word diff) are also captured"""
|
||||
before = "old line"
|
||||
after = "new line"
|
||||
|
||||
# word_diff=False forces line-level CHANGED markers
|
||||
raw = diff.render_diff(before, after, word_diff=False)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "old line")
|
||||
self.assertEqual(extract_changed_to(raw), "new line")
|
||||
|
||||
def test_diff_changed_from_to_similar_numbers(self):
|
||||
"""$90.00 → $9.00 must not produce a partial match like '0.00'.
|
||||
The tokenizer splits on whitespace only, so '$90.00' and '$9.00' are
|
||||
each a single atomic token — diff never sees their internal characters."""
|
||||
before = "for sale $90.00"
|
||||
after = "for sale $9.00"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$90.00")
|
||||
self.assertEqual(extract_changed_to(raw), "$9.00")
|
||||
|
||||
def test_diff_changed_from_to_whole_line_replaced(self):
|
||||
"""When every token on the line changed (no common tokens), render_inline_word_diff
|
||||
takes the whole_line_replaced path using CHANGED/CHANGED_INTO markers instead of
|
||||
REMOVED/ADDED. Extraction must still work via the alternation in the regex."""
|
||||
before = "$99"
|
||||
after = "$109"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99")
|
||||
self.assertEqual(extract_changed_to(raw), "$109")
|
||||
|
||||
def test_diff_changed_from_to_multiple_words_same_line(self):
|
||||
"""When multiple words change on the same line all fragments are joined with newline.
|
||||
'quick brown fox jumps' -> 'slow brown fox hops' gives 'quick\njumps' / 'slow\nhops'.
|
||||
These tokens work best when a single value changes per line."""
|
||||
before = "quick brown fox jumps"
|
||||
after = "slow brown fox hops"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "quick\njumps")
|
||||
self.assertEqual(extract_changed_to(raw), "slow\nhops")
|
||||
|
||||
def test_diff_changed_from_to_no_change(self):
|
||||
"""No changes → empty string"""
|
||||
content = "nothing changed here"
|
||||
|
||||
raw = diff.render_diff(content, content, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "")
|
||||
self.assertEqual(extract_changed_to(raw), "")
|
||||
|
||||
|
||||
def test_word_diff_no_prefix_whole_line_replaced(self):
|
||||
"""When include_change_type_prefix=False, word-level diffs for whole-line
|
||||
replacements must not include placemarkers (issue #3816)."""
|
||||
before = "73"
|
||||
after = "100"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
|
||||
|
||||
self.assertNotIn('PLACEMARKER', raw)
|
||||
# Should contain just the raw values separated by newline
|
||||
self.assertIn('73', raw)
|
||||
self.assertIn('100', raw)
|
||||
|
||||
def test_word_diff_no_prefix_inline_changes(self):
|
||||
"""When include_change_type_prefix=False, inline word-level diffs
|
||||
must not include placemarkers (issue #3816)."""
|
||||
before = "the price is 50 dollars"
|
||||
after = "the price is 75 dollars"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
|
||||
|
||||
self.assertNotIn('PLACEMARKER', raw)
|
||||
self.assertIn('50', raw)
|
||||
self.assertIn('75', raw)
|
||||
|
||||
def test_word_diff_with_prefix_still_wraps(self):
|
||||
"""Default include_change_type_prefix=True must still wrap tokens."""
|
||||
before = "73"
|
||||
after = "100"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=True)
|
||||
|
||||
self.assertIn('PLACEMARKER', raw)
|
||||
|
||||
def test_word_diff_no_prefix_exact_output(self):
|
||||
"""Pin exact output for include_change_type_prefix=False to catch regressions.
|
||||
|
||||
Whole-line replacement: old and new values separated by newline, no markers.
|
||||
Inline partial replacement: equal tokens kept, changed tokens (both old and new)
|
||||
appended without markers — this means old+new are concatenated in place.
|
||||
"""
|
||||
# Whole-line replaced: both values on separate lines, clean
|
||||
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=False)
|
||||
self.assertEqual(raw, '73\n100')
|
||||
|
||||
# Inline word replacement: equal context preserved, old+new token concatenated in-place
|
||||
raw = diff.render_diff('the price is 50 dollars', 'the price is 75 dollars',
|
||||
word_diff=True, include_change_type_prefix=False)
|
||||
self.assertEqual(raw, 'the price is 5075 dollars')
|
||||
|
||||
# Sanity: with prefix the whole-line case is fully wrapped
|
||||
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=True)
|
||||
self.assertEqual(raw, '@changed_PLACEMARKER_OPEN73@changed_PLACEMARKER_CLOSED\n'
|
||||
'@changed_into_PLACEMARKER_OPEN100@changed_into_PLACEMARKER_CLOSED')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# run from dir above changedetectionio/ dir
|
||||
# python3 -m unittest changedetectionio.tests.unit.test_tag_url_match
|
||||
|
||||
import unittest
|
||||
from changedetectionio.model.Tag import model as TagModel
|
||||
|
||||
|
||||
def make_tag(pattern):
|
||||
"""Minimal Tag instance for testing matches_url — skips datastore wiring."""
|
||||
tag = TagModel.__new__(TagModel)
|
||||
dict.__init__(tag)
|
||||
tag['url_match_pattern'] = pattern
|
||||
return tag
|
||||
|
||||
|
||||
class TestTagUrlMatch(unittest.TestCase):
|
||||
|
||||
def test_wildcard_matches(self):
|
||||
tag = make_tag('*example.com*')
|
||||
self.assertTrue(tag.matches_url('https://example.com/page'))
|
||||
self.assertTrue(tag.matches_url('https://www.example.com/shop/item'))
|
||||
self.assertFalse(tag.matches_url('https://other.com/page'))
|
||||
|
||||
def test_wildcard_case_insensitive(self):
|
||||
tag = make_tag('*EXAMPLE.COM*')
|
||||
self.assertTrue(tag.matches_url('https://example.com/page'))
|
||||
|
||||
def test_substring_match(self):
|
||||
tag = make_tag('github.com/myorg')
|
||||
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
|
||||
self.assertFalse(tag.matches_url('https://github.com/otherorg/repo'))
|
||||
|
||||
def test_substring_case_insensitive(self):
|
||||
tag = make_tag('GitHub.com/MyOrg')
|
||||
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
|
||||
|
||||
def test_empty_pattern_never_matches(self):
|
||||
tag = make_tag('')
|
||||
self.assertFalse(tag.matches_url('https://example.com'))
|
||||
|
||||
def test_empty_url_never_matches(self):
|
||||
tag = make_tag('*example.com*')
|
||||
self.assertFalse(tag.matches_url(''))
|
||||
|
||||
def test_question_mark_wildcard(self):
|
||||
tag = make_tag('https://example.com/item-?')
|
||||
self.assertTrue(tag.matches_url('https://example.com/item-1'))
|
||||
self.assertFalse(tag.matches_url('https://example.com/item-12'))
|
||||
|
||||
def test_substring_is_broad(self):
|
||||
"""Plain substring matching is intentionally broad — 'evil.com' matches anywhere
|
||||
in the URL string, including 'notevil.com'. Users who need precise domain matching
|
||||
should use a wildcard pattern like '*://evil.com/*' instead."""
|
||||
tag = make_tag('evil.com')
|
||||
self.assertTrue(tag.matches_url('https://evil.com/page'))
|
||||
self.assertTrue(tag.matches_url('https://notevil.com')) # substring match — expected
|
||||
|
||||
def test_precise_domain_match_with_wildcard(self):
|
||||
"""Use wildcard pattern for precise domain matching to avoid substring surprises."""
|
||||
tag = make_tag('*://evil.com/*')
|
||||
self.assertTrue(tag.matches_url('https://evil.com/page'))
|
||||
self.assertFalse(tag.matches_url('https://notevil.com/page'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -76,7 +76,9 @@ These commands read settings from `../../setup.cfg` automatically.
|
||||
- `en_US` - English (US)
|
||||
- `fr` - French (Français)
|
||||
- `it` - Italian (Italiano)
|
||||
- `ja` - Japanese (日本語)
|
||||
- `ko` - Korean (한국어)
|
||||
- `pt_BR` - Portuguese (Brasil)
|
||||
- `zh` - Chinese Simplified (中文简体)
|
||||
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
|
||||
|
||||
|
||||
Binary file not shown.
@@ -369,7 +369,7 @@ msgstr "Protokol ladění oznámení"
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/tags/templates/edit-tag.html
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "General"
|
||||
msgstr "Generál"
|
||||
msgstr "Obecné"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "Fetching"
|
||||
@@ -393,7 +393,7 @@ msgstr "RSS"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "Backups"
|
||||
msgstr "Backups"
|
||||
msgstr "Zálohy"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "Time & Date"
|
||||
@@ -409,7 +409,7 @@ msgstr "Info"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "Default recheck time for all watches, current system minimum is"
|
||||
msgstr "Výchozí čas opětovné kontroly pro všechny monitory, aktuální systémové minimum je"
|
||||
msgstr "Výchozí čas opětovné kontroly pro všechna sledování, aktuální systémové minimum je"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "more info"
|
||||
@@ -445,9 +445,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "Allow access to the watch change history page when password is enabled (Good for sharing the diff page)"
|
||||
msgstr ""
|
||||
"Povolit přístup na stránku historie změn monitoru, když je povoleno heslo (Vhodné pro sdílení stránky rozdílů)Povolit"
|
||||
" anonymní přístup na stránku historie sledování, když je povoleno heslo"
|
||||
msgstr "Povolit přístup na stránku historie změn monitoru, když je povoleno heslo (Vhodné pro sdílení stránky rozdílů)"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "When a request returns no content, or the HTML does not contain any text, is this considered a change?"
|
||||
@@ -455,7 +453,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "Choose a default proxy for all watches"
|
||||
msgstr "Vyberte výchozí proxy pro všechny monitory"
|
||||
msgstr "Vyberte výchozí proxy pro všechna sledování"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html
|
||||
msgid "Base URL used for the"
|
||||
@@ -479,7 +477,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Use the"
|
||||
msgstr "Použijte"
|
||||
msgstr "Použít"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Basic"
|
||||
@@ -505,7 +503,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "This will wait"
|
||||
msgstr "Tohle počká"
|
||||
msgstr "Toto počká"
|
||||
|
||||
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "seconds before extracting the text."
|
||||
@@ -865,7 +863,7 @@ msgstr "povoleny adresy URL pro upozornění v celém systému"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/edit-tag.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "this form will override notification settings for this watch only"
|
||||
msgstr "tento formulář přepíše nastavení oznámení pouze pro tyto monitory"
|
||||
msgstr "tento formulář přepíše nastavení oznámení pouze pro tato sledování"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/edit-tag.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "an empty Notification URL list here will still send notifications."
|
||||
@@ -882,7 +880,7 @@ msgstr "Přidejte novou značku organizace"
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Watch group / tag"
|
||||
msgstr "Skupina / Značka"
|
||||
msgstr "Sledovat skupinu / Značka"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
msgid "Groups allows you to manage filters and notifications for multiple watches under a single organisational tag."
|
||||
@@ -890,15 +888,15 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
msgid "# Watches"
|
||||
msgstr "# monitorů"
|
||||
msgstr "# Sledování"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
msgid "Tag / Label name"
|
||||
msgstr "Název štítku / štítku"
|
||||
msgstr "Tag / Název štítku"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
msgid "No website organisational tags/groups configured"
|
||||
msgstr "Žádné skupiny/značky"
|
||||
msgstr "Žádné skupiny/značky zatím nebyly nastaveny"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
@@ -908,7 +906,7 @@ msgstr "Upravit"
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Recheck"
|
||||
msgstr "Znovu zkontrolujte"
|
||||
msgstr "Znovu zkontrolovat"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
msgid "Delete Group?"
|
||||
@@ -922,7 +920,7 @@ msgstr ""
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Delete"
|
||||
msgstr "Vymazat"
|
||||
msgstr "Smazat"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
msgid "Deletes and removes tag"
|
||||
@@ -945,36 +943,36 @@ msgstr "Odpojit"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html
|
||||
msgid "Keep the tag but unlink any watches"
|
||||
msgstr "Ponechte štítek, ale odpojte všechny monitory"
|
||||
msgstr "Ponechte štítek, ale odpojte všechna sledování"
|
||||
|
||||
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "RSS Feed for this watch"
|
||||
msgstr "RSS kanál pro tyto monitory"
|
||||
msgstr "RSS kanál pro toto sledování"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
msgid "{} watches deleted"
|
||||
msgstr ""
|
||||
msgstr "{} sledování smazáno"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
msgid "{} watches paused"
|
||||
msgstr "{} monitorů pozastaveno"
|
||||
msgstr "{} sledování pozastaveno"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
msgid "{} watches unpaused"
|
||||
msgstr ""
|
||||
msgstr "{} sledování opět spuštěno"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
msgid "{} watches updated"
|
||||
msgstr ""
|
||||
msgstr "{} sledování aktualizováno"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
msgid "{} watches muted"
|
||||
msgstr "{} monitorů ztlumeno"
|
||||
msgstr "{} sledování ztlumeno"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
@@ -1013,7 +1011,7 @@ msgstr "Sledujte tuto adresu URL!"
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
msgid "Cleared snapshot history for watch {}"
|
||||
msgstr "Historie snímků vymazána pro monitor {}"
|
||||
msgstr "Historie snímků vymazána pro sledování {}"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
msgid "History clearing started in background"
|
||||
@@ -1030,7 +1028,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
msgid "Deleted."
|
||||
msgstr "Vymazat"
|
||||
msgstr "Smazáno"
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
msgid "Cloned, you are editing the new watch."
|
||||
@@ -1047,7 +1045,7 @@ msgstr ""
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
msgid "Queued {} watches for rechecking ({} already queued or running)."
|
||||
msgstr "Do fronty přidáno {} monitorů k opětovné kontrole ({} již ve frontě nebo běží)."
|
||||
msgstr "Do fronty přidáno {} sledování k opětovné kontrole ({} již ve frontě nebo běží)."
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
@@ -1056,7 +1054,7 @@ msgstr "Do fronty přidáno {} sledování k opětovné kontrole."
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
msgid "Queueing watches for rechecking in background..."
|
||||
msgstr "Přidávání monitorů do fronty pro opětovnou kontrolu na pozadí..."
|
||||
msgstr "Přidává se sledování do fronty pro opětovnou kontrolu na pozadí..."
|
||||
|
||||
#: changedetectionio/blueprint/ui/__init__.py
|
||||
#, python-brace-format
|
||||
@@ -1105,7 +1103,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/ui/edit.py
|
||||
msgid "Updated watch."
|
||||
msgstr "Smazat monitory?"
|
||||
msgstr "Sledování aktualizováno."
|
||||
|
||||
#: changedetectionio/blueprint/ui/preview.py
|
||||
msgid "Preview unavailable - No fetch/check completed or triggers not reached"
|
||||
@@ -1121,7 +1119,7 @@ msgstr "Možná budete chtít použít"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
|
||||
msgid "BACKUP"
|
||||
msgstr "BACKUP"
|
||||
msgstr "ZÁLOHA"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
|
||||
msgid "link first."
|
||||
@@ -1161,11 +1159,11 @@ msgstr "Sdílet jako obrázek"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff-offscreen-options.html
|
||||
msgid "Ignore any lines matching"
|
||||
msgstr "Ignorujte všechny odpovídající řádky"
|
||||
msgstr "Ignorovat všechny odpovídající řádky"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff-offscreen-options.html
|
||||
msgid "Ignore any lines matching excluding digits"
|
||||
msgstr "Ignorujte všechny odpovídající řádky kromě číslic"
|
||||
msgstr "Ignorovat všechny odpovídající řádky kromě číslic"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html
|
||||
msgid "From"
|
||||
@@ -1185,7 +1183,7 @@ msgstr "Řádky"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html
|
||||
msgid "Ignore Whitespace"
|
||||
msgstr "Ignorujte mezery"
|
||||
msgstr "Ignorovat mezery"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html
|
||||
msgid "Same/non-changed"
|
||||
@@ -1209,7 +1207,7 @@ msgstr "Klávesnice:"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
|
||||
msgid "Previous"
|
||||
msgstr "Náhled"
|
||||
msgstr "Předchozí"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
|
||||
msgid "Next"
|
||||
@@ -1241,7 +1239,7 @@ msgstr "Aktuální snímek obrazovky"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html
|
||||
msgid "Extract Data"
|
||||
msgstr "Extrahujte data"
|
||||
msgstr "Extrahovat data"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html
|
||||
msgid "seconds ago."
|
||||
@@ -1269,7 +1267,7 @@ msgstr "NASTAVENÍ"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html
|
||||
msgid "Goto single snapshot"
|
||||
msgstr "Přejít na jeden snímek"
|
||||
msgstr "Přejít na samotný snímek"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/diff.html
|
||||
msgid "Highlight text to share or add to ignore lists."
|
||||
@@ -1359,15 +1357,15 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Check/Scan all"
|
||||
msgstr "Znovu zkontrolujte vše"
|
||||
msgstr "Vše znovu zkontrolovat"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Choose a proxy for this watch"
|
||||
msgstr "RSS kanál pro tyto monitory"
|
||||
msgstr "Vybrat proxy pro toto sledování"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Using the current global default settings"
|
||||
msgstr "Použití aktuálního globálního výchozího nastavení"
|
||||
msgstr "Aktuálně je použito globální výchozí nastavení"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Show advanced options"
|
||||
@@ -1391,7 +1389,7 @@ msgstr "Proměnné jsou podporovány v hodnotách hlavičky požadavku"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Alert! Extra headers file found and will be added to this watch!"
|
||||
msgstr "Upozornění! Byl nalezen další soubor záhlaví a bude přidán do těchto monitorů!"
|
||||
msgstr "Upozornění! Byl nalezen další soubor záhlaví a bude přidán do těchto sledování!"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Headers can be also read from a file in your data-directory"
|
||||
@@ -1427,7 +1425,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Visual Selector data is not ready, watch needs to be checked atleast once."
|
||||
msgstr "Data Visual Selector nejsou připravena, monitory je třeba alespoň jednou zkontrolovat."
|
||||
msgstr "Data Visual Selector nejsou připravena, sledování je třeba alespoň jednou zkontrolovat."
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid ""
|
||||
@@ -1633,11 +1631,11 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Delete Watch?"
|
||||
msgstr "Smazat monitory?"
|
||||
msgstr "Smazat sledování?"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Are you sure you want to delete the watch for:"
|
||||
msgstr "Opravdu chcete smazat monitory pro:"
|
||||
msgstr "Opravdu chcete smazat sledování pro:"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "This action cannot be undone."
|
||||
@@ -1661,15 +1659,15 @@ msgstr "Vymazat historii"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Clone & Edit"
|
||||
msgstr "Klonovat a upravovat"
|
||||
msgstr "Duplikovat a upravit"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/preview.html
|
||||
msgid "Select timestamp"
|
||||
msgstr "Vyberte časové razítko"
|
||||
msgstr "Vybrat časové razítko"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/preview.html
|
||||
msgid "Go"
|
||||
msgstr "Jít"
|
||||
msgstr "Přejít"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/preview.html
|
||||
msgid "Current erroring screenshot from most recent request"
|
||||
@@ -1715,7 +1713,7 @@ msgstr ""
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Add a new web page change detection watch"
|
||||
msgstr "Přidejte nové monitory zjišťování změn webové stránky"
|
||||
msgstr "Přidejte nové sledování zjišťování změn webové stránky"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Watch this URL!"
|
||||
@@ -1723,7 +1721,7 @@ msgstr "Monitorovat tuto URL!"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Edit first then Watch"
|
||||
msgstr "Upravit a monitorovat"
|
||||
msgstr "Nejdříve upravit, poté sledovat"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Pause"
|
||||
@@ -1747,7 +1745,7 @@ msgstr "Štítek"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Mark viewed"
|
||||
msgstr "Mark zobrazil"
|
||||
msgstr "Označit jako shlédnuté"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Use default notification"
|
||||
@@ -1775,7 +1773,7 @@ msgstr "Vymazat/resetovat historii"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Delete Watches?"
|
||||
msgstr "Smazat monitory?"
|
||||
msgstr "Smazat sledování?"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "<p>Are you sure you want to delete the selected watches?</strong></p><p>This action cannot be undone.</p>"
|
||||
@@ -1823,7 +1821,7 @@ msgstr "importovat seznam"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Detecting restock and price"
|
||||
msgstr "Detekce zásob a ceny"
|
||||
msgstr "Kontrola zásob a ceny"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "In stock"
|
||||
@@ -1876,7 +1874,7 @@ msgstr "Nepřečtený"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Recheck all"
|
||||
msgstr "Znovu zkontrolujte vše"
|
||||
msgstr "Znovu zkontrolovat vše"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
#, python-format
|
||||
@@ -2026,7 +2024,7 @@ msgstr "neděle"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Weeks"
|
||||
msgstr "týdny"
|
||||
msgstr "Týdny"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Should contain zero or more seconds"
|
||||
@@ -2046,7 +2044,7 @@ msgstr "Minuty"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Seconds"
|
||||
msgstr "sekundy"
|
||||
msgstr "Sekundy"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Notification Body and Title is required when a Notification URL is used"
|
||||
@@ -2151,7 +2149,7 @@ msgstr "Nahrajte soubor .xlsx"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Must be .xlsx file!"
|
||||
msgstr "Musí to být soubor .xlsx!"
|
||||
msgstr "Musí být soubor .xlsx!"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "File mapping"
|
||||
@@ -2175,7 +2173,7 @@ msgstr "Interval mezi kontrolami"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Use global settings for time between check and scheduler."
|
||||
msgstr "Použijte globální nastavení pro čas mezi kontrolou a plánovačem."
|
||||
msgstr "Použít globální nastavení pro čas mezi kontrolou a plánovačem."
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "CSS/JSONPath/JQ/XPath Filters"
|
||||
@@ -2284,7 +2282,7 @@ msgstr "Připojte snímek obrazovky k oznámení (pokud je to možné)"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Match"
|
||||
msgstr "# monitory"
|
||||
msgstr "Shoda"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Match all of the following"
|
||||
@@ -2355,11 +2353,11 @@ msgstr "Výchozí proxy"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Random jitter seconds ± check"
|
||||
msgstr "Náhodné jitter sekundy ± kontrola"
|
||||
msgstr "Náhodný rozptyl kontrol ± sekund"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Number of fetch workers"
|
||||
msgstr "Počet pracovníků aportů"
|
||||
msgstr "Počet procesů kontrol"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Should be between 1 and 50"
|
||||
@@ -2367,15 +2365,15 @@ msgstr "Mělo by být mezi 1 a 50"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Requests timeout in seconds"
|
||||
msgstr "Požaduje časový limit v sekundách"
|
||||
msgstr "Časový limit vypršení kontrol v sekundách"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Should be between 1 and 999"
|
||||
msgstr "Mělo by být mezi 1 a 999"
|
||||
msgstr "Nastavit mezi 1 a 999"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Default User-Agent overrides"
|
||||
msgstr "Výchozí přepisy User-Agent"
|
||||
msgstr "Změna výchozího nastavení hodnoty User-Agent"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Both a name, and a Proxy URL is required."
|
||||
@@ -2387,11 +2385,11 @@ msgstr "Otevřete stránku „Historie“ na nové kartě"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Realtime UI Updates Enabled"
|
||||
msgstr "Aktualizace v reálném čase offline"
|
||||
msgstr "Aktualizace UI v reálném čase"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Favicons Enabled"
|
||||
msgstr "zvážit povolení"
|
||||
msgstr "Povolit favikony"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Use page <title> in watch overview list"
|
||||
@@ -2427,7 +2425,7 @@ msgstr "Heslo"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Pager size"
|
||||
msgstr "Velikost pageru"
|
||||
msgstr "Počet položek na stránku"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Should be atleast zero (disabled)"
|
||||
@@ -2459,7 +2457,7 @@ msgstr "Povolit anonymní přístup na stránku historie sledování, když je p
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Hide muted watches from RSS feed"
|
||||
msgstr "Skrýt ztlumené monitory ze zdroje RSS"
|
||||
msgstr "Skrýt ztlumená sledování pro RSS zdroje"
|
||||
|
||||
#: changedetectionio/forms.py
|
||||
msgid "Enable RSS reader mode "
|
||||
|
||||
Binary file not shown.
@@ -1617,7 +1617,7 @@ msgstr "Bereich zeichnen"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Clear selection"
|
||||
msgstr "Klare Auswahl"
|
||||
msgstr "Auswahl löschen"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "One moment, fetching screenshot and element information.."
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -100,6 +100,19 @@ def is_safe_valid_url(test_url):
|
||||
logger.warning('URL validation failed: URL is empty or whitespace only')
|
||||
return False
|
||||
|
||||
# Per-request cache: same URL is often validated 2-3x per watchlist render (sort + display).
|
||||
# Flask's g is scoped to one request and auto-cleared on teardown, so dynamic Jinja2 URLs
|
||||
# like {{microtime()}} are always re-evaluated on the next request.
|
||||
# Falls back gracefully when called outside a request context (e.g. background workers).
|
||||
_cache_key = test_url
|
||||
try:
|
||||
from flask import g
|
||||
_cache = g.setdefault('_url_validation_cache', {})
|
||||
if _cache_key in _cache:
|
||||
return _cache[_cache_key]
|
||||
except RuntimeError:
|
||||
_cache = None # No app context
|
||||
|
||||
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
|
||||
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'
|
||||
|
||||
@@ -112,11 +125,14 @@ def is_safe_valid_url(test_url):
|
||||
test_url = r.sub('', test_url)
|
||||
|
||||
# Check the actual rendered URL in case of any Jinja markup
|
||||
try:
|
||||
test_url = jinja_render(test_url)
|
||||
except Exception as e:
|
||||
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
||||
return False
|
||||
# Only run jinja_render when the URL actually contains Jinja2 syntax - creating a new
|
||||
# ImmutableSandboxedEnvironment is expensive and is called once per watch per page load
|
||||
if '{%' in test_url or '{{' in test_url:
|
||||
try:
|
||||
test_url = jinja_render(test_url)
|
||||
except Exception as e:
|
||||
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
||||
return False
|
||||
|
||||
# Check query parameters and fragment
|
||||
if re.search(r'[<>]', test_url):
|
||||
@@ -142,4 +158,6 @@ def is_safe_valid_url(test_url):
|
||||
logger.warning(f'URL f"{test_url}" failed validation, aborting.')
|
||||
return False
|
||||
|
||||
if _cache is not None:
|
||||
_cache[_cache_key] = True
|
||||
return True
|
||||
|
||||
@@ -284,6 +284,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
logger.debug(f'[{uuid}] - checksumFromPreviousCheckWasTheSame - Checksum from previous check was the same, nothing todo here.')
|
||||
# Reset the edited flag since we successfully completed the check
|
||||
watch.reset_watch_edited_flag()
|
||||
# Page was fetched successfully - clear any previous error state
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
|
||||
cleanup_error_artifacts(uuid, datastore)
|
||||
|
||||
except content_fetchers_exceptions.BrowserConnectError as e:
|
||||
datastore.update_watch(uuid=uuid,
|
||||
|
||||
+1
-1
@@ -28,7 +28,7 @@ services:
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
|
||||
#
|
||||
#
|
||||
# Alternative WebDriver/selenium URL, do not use "'s or 's! (old, deprecated, does not support screenshots very well)
|
||||
# Alternative WebDriver/selenium URL, do not use "'s or 's! (old, deprecated, does not support screenshots very well, Can't handle custom headers etc)
|
||||
# - WEBDRIVER_URL=http://browser-selenium-chrome:4444/wd/hub
|
||||
#
|
||||
# WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy,
|
||||
|
||||
+179
-4
@@ -113,8 +113,156 @@ tags:
|
||||
|
||||
- name: Plugin API Extensions
|
||||
description: |
|
||||
Retrieve the live OpenAPI specification for this instance. Unlike the static spec, this endpoint
|
||||
returns the fully merged spec including schemas for any processor plugins installed on this instance.
|
||||
## How Processor Plugins Extend the API
|
||||
|
||||
changedetection.io uses a **processor plugin** system to handle different types of change detection.
|
||||
Each processor lives in `changedetectionio/processors/<name>/` and may include an `api.yaml` file
|
||||
that extends the core Watch schema with processor-specific configuration fields.
|
||||
|
||||
### How it works
|
||||
|
||||
At startup, changedetection.io scans all installed processors for an `api.yaml` file. Any schemas
|
||||
and code samples defined there are deep-merged into the live API specification, making the
|
||||
processor's configuration fields valid on all watch create and update requests.
|
||||
|
||||
The live, fully-merged spec is always available at `/api/v1/full-spec` — use that URL with
|
||||
Swagger UI or Redoc to see the complete schema for your specific installation.
|
||||
|
||||
---
|
||||
|
||||
### Writing a processor `api.yaml`
|
||||
|
||||
Place an `api.yaml` in the processor plugin's own directory, alongside its `__init__.py`
|
||||
(e.g. `changedetectionio/processors/my_processor/api.yaml`). The schema name **must** follow the
|
||||
convention `processor_config_<processor_name>` (e.g. `processor_config_restock_diff`). That same
|
||||
key is used as the JSON field name when creating or updating a watch.
|
||||
|
||||
A minimal `api.yaml` for a hypothetical `my_processor`:
|
||||
|
||||
```yaml
|
||||
components:
|
||||
schemas:
|
||||
processor_config_my_processor:
|
||||
type: object
|
||||
description: Configuration for my_processor
|
||||
properties:
|
||||
some_option:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Enable some behaviour
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: curl
|
||||
label: my_processor example
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com",
|
||||
"processor": "my_processor",
|
||||
"processor_config_my_processor": { "some_option": true }
|
||||
}'
|
||||
```
|
||||
|
||||
The `paths` section in `api.yaml` is used only for injecting additional `x-code-samples` into
|
||||
existing endpoints — you cannot define new routes via plugin.
|
||||
|
||||
---
|
||||
|
||||
### Built-in plugin: `restock_diff`
|
||||
|
||||
The `restock_diff` processor is always shipped with changedetection.io. It monitors product
|
||||
availability and price changes using structured data (JSON-LD / schema.org microdata) and
|
||||
text heuristics. It is activated by setting `"processor": "restock_diff"` on a watch.
|
||||
|
||||
It adds the `processor_config_restock_diff` block to the Watch schema with these fields:
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `in_stock_processing` | string | `in_stock_only` | `in_stock_only` — only alert Out-of-Stock→In-Stock · `all_changes` — alert any availability change · `off` — disable stock tracking |
|
||||
| `follow_price_changes` | boolean | `true` | Monitor and alert on price changes |
|
||||
| `price_change_min` | number\|null | — | Alert when price drops **below** this value |
|
||||
| `price_change_max` | number\|null | — | Alert when price rises **above** this value |
|
||||
| `price_change_threshold_percent` | number\|null | — | Minimum % change since the original price to trigger an alert |
|
||||
|
||||
#### CREATE — Add a restock/price monitor
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product/widget",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### READ — Retrieve the monitor
|
||||
|
||||
The response JSON includes `processor_config_restock_diff` alongside all standard watch fields:
|
||||
|
||||
```bash
|
||||
curl -X GET "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY"
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"uuid": "cc0cfffa-f449-477b-83ea-0caafd1dc091",
|
||||
"url": "https://example.com/product/widget",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5,
|
||||
"price_change_min": null,
|
||||
"price_change_max": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### UPDATE — Change thresholds without recreating the monitor
|
||||
|
||||
Only fields included in the request body are updated; omitted fields are left unchanged.
|
||||
|
||||
```bash
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### DELETE — Remove the monitor
|
||||
|
||||
```bash
|
||||
curl -X DELETE "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
||||
-H "x-api-key: YOUR_API_KEY"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
For the complete schema-validated documentation including all processor fields, fetch the live spec
|
||||
and load it into Swagger UI or Redoc:
|
||||
|
||||
```
|
||||
GET /api/v1/full-spec
|
||||
```
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
@@ -577,6 +725,13 @@ components:
|
||||
- true: Tag settings override watch settings
|
||||
- false: Tag settings do not override (watches use their own settings)
|
||||
- null: Not decided yet / inherit default behavior
|
||||
url_match_pattern:
|
||||
type: string
|
||||
description: |
|
||||
Automatically apply this tag to any watch whose URL matches this pattern.
|
||||
Supports fnmatch wildcards (* and ?): e.g. *://example.com/* or github.com/myorg.
|
||||
Plain strings are matched as case-insensitive substrings.
|
||||
Leave empty to disable auto-matching.
|
||||
# Future: Aggregated statistics from all watches with this tag
|
||||
# check_count:
|
||||
# type: integer
|
||||
@@ -1921,15 +2076,35 @@ paths:
|
||||
|
||||
Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
|
||||
spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
|
||||
Use this URL with Swagger UI or Redoc to get accurate documentation for your specific install.
|
||||
|
||||
**Use this URL** with Swagger UI or Redoc to get schema-accurate documentation for your
|
||||
specific install — it includes every `processor_config_<name>` schema block contributed by
|
||||
installed processors (e.g. `processor_config_restock_diff` from the built-in restock plugin).
|
||||
|
||||
This endpoint requires no authentication and returns YAML.
|
||||
|
||||
To load it directly in Swagger UI, paste the URL into the "Explore" box:
|
||||
```
|
||||
http://localhost:5000/api/v1/full-spec
|
||||
```
|
||||
security: []
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
source: |
|
||||
# Fetch the live merged spec (no API key needed)
|
||||
curl -X GET "http://localhost:5000/api/v1/full-spec"
|
||||
- lang: 'Python'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
# No authentication required
|
||||
response = requests.get('http://localhost:5000/api/v1/full-spec')
|
||||
print(response.text) # Returns YAML
|
||||
responses:
|
||||
'200':
|
||||
description: Merged OpenAPI specification in YAML format
|
||||
description: |
|
||||
Merged OpenAPI specification in YAML format. Includes all processor plugin schemas
|
||||
(e.g. `processor_config_restock_diff`) not present in the static `api-spec.yaml`.
|
||||
content:
|
||||
application/yaml:
|
||||
schema:
|
||||
|
||||
+353
-9
File diff suppressed because one or more lines are too long
+2
-2
@@ -40,7 +40,7 @@ orjson~=3.11
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise==1.9.7
|
||||
apprise==1.9.8
|
||||
|
||||
diff_match_patch
|
||||
|
||||
@@ -98,7 +98,7 @@ pytest-flask ~=1.3
|
||||
pytest-mock ~=3.15
|
||||
|
||||
# OpenAPI validation support
|
||||
openapi-core[flask] ~= 0.22
|
||||
openapi-core[flask] ~= 0.23
|
||||
|
||||
loguru
|
||||
|
||||
|
||||
Reference in New Issue
Block a user