Compare commits

...

26 Commits

Author SHA1 Message Date
dgtlmoon
e1b048f961 oops 2026-02-18 17:52:57 +01:00
dgtlmoon
9ba645d4cc Do it threaded 2026-02-18 17:39:09 +01:00
dgtlmoon
e6c0d538e6 oops forgot module 2026-02-18 17:34:01 +01:00
dgtlmoon
e2fffc36e4 Small tidy 2026-02-18 17:32:16 +01:00
dgtlmoon
b9a2f781ac Backups was missing tags 2026-02-18 17:29:45 +01:00
dgtlmoon
76abb4ab71 WIP 2026-02-18 17:26:44 +01:00
dgtlmoon
bca35f680e 0.53.4 2026-02-18 14:07:26 +01:00
dgtlmoon
fafea1b5c6 Updates/migration - Re-run tag update, re-save to cleanup changedetection.json, code refactor (#3898) 2026-02-18 14:05:23 +01:00
dgtlmoon
93630e188d UI - Search modal - fixes for running in sub path
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-18 10:27:12 +01:00
dgtlmoon
7e99d748b9 Puppeteer - Adding extra browser cleanup (#3897) 2026-02-18 10:18:14 +01:00
dgtlmoon
352c91c619 Puppeteer - Use a modern scroll method for screenshot stitching 2026-02-18 10:01:22 +01:00
dgtlmoon
a6e55aaba9 UI - CSS - Ensure 'difference' 'preview' both wraps by word and by very long strings
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-17 17:08:44 +01:00
dgtlmoon
25a17bd49d Fix: Some SPAs with long content - Stripping tags must also find matching close tag (#3895) 2026-02-17 16:57:29 +01:00
dgtlmoon
954582a581 Fix: Some SPA's also set body content to display: none which breaks text output 2026-02-17 15:38:54 +01:00
dgtlmoon
d8ef86a8b5 "Error 200 no content" - Some very large SPA pages make HTML to Text fail by dumping 10Mb+ into page header, strip extras. (#3892) 2026-02-17 14:44:03 +01:00
dgtlmoon
8711d29861 UI - Filters & Triggers - Adding reminder that you can also use 'Conditions' for trigger rules
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
2026-02-17 02:55:18 +01:00
dgtlmoon
2343ddd88a Minor code tidy 2026-02-17 02:46:22 +01:00
dgtlmoon
c6d6ef0e0c Fix time schedule off-by-one bug at exact end times for all durations and add comprehensive edge case tests Re #846 (#3890) 2026-02-17 02:38:16 +01:00
dgtlmoon
23063ad8a1 UI - More fixes for realtime updates 2026-02-17 02:37:03 +01:00
dgtlmoon
27b8a2d178 UI - Fixing realtime updates for status updates when checking (#3889) 2026-02-17 02:26:38 +01:00
dgtlmoon
a53f2a784d Pluggy plugin hook for before and after a watch is processed (#3888) 2026-02-17 01:58:41 +01:00
dgtlmoon
7558ca5fda 0.53.3 2026-02-16 20:41:07 +01:00
dgtlmoon
383c3b427f API - Adding automated test for API with NGINX sub-path, Skip validation errors about server path (allows use on sub-paths/reverse proxy etc) (#3886) 2026-02-16 20:32:35 +01:00
dgtlmoon
b01ba5d8a1 UI - Use version from code in version tab
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-16 19:41:27 +01:00
dgtlmoon
86e5184cef 0.53.2 2026-02-16 18:52:31 +01:00
dgtlmoon
1dbf1f5db5 UI - Watch overview - Restock price, validate number before output (#3883) 2026-02-16 18:50:37 +01:00
34 changed files with 1571 additions and 136 deletions

33
.github/nginx-reverse-proxy-test.conf vendored Normal file
View File

@@ -0,0 +1,33 @@
server {
listen 80;
server_name localhost;
# Test basic reverse proxy to changedetection.io
location / {
proxy_pass http://changedet-app:5000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
# Test subpath deployment with X-Forwarded-Prefix
location /changedet-sub/ {
proxy_pass http://changedet-app:5000/;
proxy_set_header X-Forwarded-Prefix /changedet-sub;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
}

View File

@@ -324,6 +324,175 @@ jobs:
run: |
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
nginx-reverse-proxy:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up services
run: |
docker network create changedet-network
# Start changedetection.io container with X-Forwarded headers support
docker run --name changedet-app --hostname changedet-app --network changedet-network \
-e USE_X_SETTINGS=true \
-d test-changedetectionio
sleep 3
- name: Start nginx reverse proxy
run: |
# Start nginx with our test configuration
docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \
-v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \
nginx:alpine
sleep 2
- name: Test reverse proxy - root path
run: |
echo "=== Testing nginx reverse proxy at root path ==="
curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html
# Check for changedetection.io UI elements
if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then
echo "✓ Found checkbox-uuid in response"
else
echo "ERROR: checkbox-uuid not found in response"
cat /tmp/nginx-test-root.html
exit 1
fi
# Check for watchlist content
if grep -q -i "watch" /tmp/nginx-test-root.html; then
echo "✓ Found watch/watchlist content in response"
else
echo "ERROR: watchlist content not found"
cat /tmp/nginx-test-root.html
exit 1
fi
echo "✓ Root path reverse proxy working correctly"
- name: Test reverse proxy - subpath with X-Forwarded-Prefix
run: |
echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ==="
curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html
# Check for changedetection.io UI elements
if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then
echo "✓ Found checkbox-uuid in subpath response"
else
echo "ERROR: checkbox-uuid not found in subpath response"
cat /tmp/nginx-test-subpath.html
exit 1
fi
echo "✓ Subpath reverse proxy working correctly"
- name: Test API through reverse proxy subpath
run: |
echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ==="
# Extract API key from the changedetection.io datastore
API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4)
if [ -z "$API_KEY" ]; then
echo "ERROR: Could not extract API key from datastore"
docker exec changedet-app cat /datastore/changedetection.json
exit 1
fi
echo "✓ Extracted API key: ${API_KEY:0:8}..."
# Create a watch via API through nginx proxy subpath
echo "Creating watch via POST to /changedet-sub/api/v1/watch"
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/test-nginx-proxy",
"tag": "nginx-test"
}')
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "201" ]; then
echo "ERROR: Expected HTTP 201, got $HTTP_CODE"
echo "Response: $BODY"
exit 1
fi
echo "✓ Watch created successfully (HTTP 201)"
# Extract the watch UUID from response
WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4)
echo "✓ Watch UUID: $WATCH_UUID"
# Update the watch via PUT through nginx proxy subpath
echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}"
RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
-d '{
"paused": true
}')
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "200" ]; then
echo "ERROR: Expected HTTP 200, got $HTTP_CODE"
echo "Response: $BODY"
exit 1
fi
if echo "$BODY" | grep -q 'OK'; then
echo "✓ Watch updated successfully (HTTP 200, response: OK)"
else
echo "ERROR: Expected response 'OK', got: $BODY"
echo "Response: $BODY"
exit 1
fi
# Verify the watch is paused via GET
echo "Verifying watch is paused via GET"
RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
-H "x-api-key: ${API_KEY}")
if echo "$RESPONSE" | grep -q '"paused": *true'; then
echo "✓ Watch is paused as expected"
else
echo "ERROR: Watch paused state not confirmed"
echo "Response: $RESPONSE"
exit 1
fi
echo "✓ API tests through nginx subpath completed successfully"
- name: Cleanup nginx test
if: always()
run: |
docker logs nginx-proxy || true
docker logs changedet-app || true
docker stop nginx-proxy changedet-app || true
docker rm nginx-proxy changedet-app || true
# Proxy tests
proxy-tests:
runs-on: ubuntu-latest

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.53.1'
__version__ = '0.53.4'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -610,7 +610,7 @@ def main():
@app.context_processor
def inject_template_globals():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
return dict(right_sticky="v"+__version__,
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False,
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),

View File

@@ -97,17 +97,6 @@ class Tag(Resource):
# Delete the tag, and any tag reference
del self.datastore.data['settings']['application']['tags'][uuid]
# Delete tag.json file if it exists
import os
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
logger.info(f"Deleted tag.json for tag {uuid}")
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
# Remove tag from all watches
for watch_uuid, watch in self.datastore.data['watching'].items():
if watch.get('tags') and uuid in watch['tags']:

View File

@@ -103,6 +103,7 @@ def validate_openapi_request(operation_id):
if request.method.upper() != 'GET':
# Lazy import - only loaded when actually validating a request
from openapi_core.contrib.flask import FlaskOpenAPIRequest
from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError
spec = get_openapi_spec()
openapi_request = FlaskOpenAPIRequest(request)
@@ -110,6 +111,16 @@ def validate_openapi_request(operation_id):
if result.errors:
error_details = []
for error in result.errors:
# Skip path/server validation errors for reverse proxy compatibility
# Flask routing already validates that endpoints exist (returns 404 if not).
# OpenAPI validation here is primarily for request body schema validation.
# When behind nginx/reverse proxy, URLs may have path prefixes that don't
# match the OpenAPI server definitions, causing false positives.
if isinstance(error, PathError):
logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
continue
error_str = str(error)
# Extract detailed schema errors from __cause__
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
for schema_error in error.__cause__.schema_errors:
@@ -117,9 +128,12 @@ def validate_openapi_request(operation_id):
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
error_details.append(f"{field}: {msg}")
else:
error_details.append(str(error))
error_details.append(error_str)
# Only raise if we have actual validation errors (not path/server issues)
if error_details:
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
except BadRequest:
# Re-raise BadRequest exceptions (validation failures)
raise

View File

@@ -13,7 +13,7 @@ from loguru import logger
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
def create_backup(datastore_path, watches: dict):
def create_backup(datastore_path, watches: dict, tags: dict = None):
logger.debug("Creating backup...")
import zipfile
from pathlib import Path
@@ -45,6 +45,15 @@ def create_backup(datastore_path, watches: dict):
if os.path.isfile(secret_file):
zipObj.write(secret_file, arcname="secret.txt")
# Add tag data directories (each tag has its own {uuid}/tag.json)
for uuid, tag in (tags or {}).items():
for f in Path(tag.data_dir).glob('*'):
zipObj.write(f,
arcname=os.path.join(f.parts[-2], f.parts[-1]),
compress_type=zipfile.ZIP_DEFLATED,
compresslevel=8)
logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
# Add any data in the watch data directory.
for uuid, w in watches.items():
for f in Path(w.data_dir).glob('*'):
@@ -88,7 +97,10 @@ def create_backup(datastore_path, watches: dict):
def construct_blueprint(datastore: ChangeDetectionStore):
from .restore import construct_restore_blueprint
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
backup_threads = []
@login_optionally_required
@@ -96,16 +108,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
def request_backup():
if any(thread.is_alive() for thread in backup_threads):
flash(gettext("A backup is already running, check back in a few minutes"), "error")
return redirect(url_for('backups.index'))
return redirect(url_for('backups.create'))
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
flash(gettext("Maximum number of backups reached, please remove some"), "error")
return redirect(url_for('backups.index'))
return redirect(url_for('backups.create'))
# With immediate persistence, all data is already saved
zip_thread = threading.Thread(
target=create_backup,
args=(datastore.datastore_path, datastore.data.get("watching")),
kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
daemon=True,
name="BackupCreator"
)
@@ -113,7 +126,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
backup_threads.append(zip_thread)
flash(gettext("Backup building in background, check back in a few minutes."))
return redirect(url_for('backups.index'))
return redirect(url_for('backups.create'))
def find_backups():
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
@@ -155,14 +168,14 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
@login_optionally_required
@backups_blueprint.route("", methods=['GET'])
def index():
@backups_blueprint.route("/", methods=['GET'])
@backups_blueprint.route("/create", methods=['GET'])
def create():
backups = find_backups()
output = render_template("overview.html",
output = render_template("backup_create.html",
available_backups=backups,
backup_running=any(thread.is_alive() for thread in backup_threads)
)
return output
@login_optionally_required
@@ -176,6 +189,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash(gettext("Backups were deleted."))
return redirect(url_for('backups.index'))
return redirect(url_for('backups.create'))
return backups_blueprint

View File

@@ -0,0 +1,208 @@
import io
import json
import os
import shutil
import tempfile
import threading
import zipfile
from flask import Blueprint, render_template, flash, url_for, redirect, request
from flask_babel import gettext, lazy_gettext as _l
from wtforms import Form, BooleanField, SubmitField
from flask_wtf.file import FileField, FileAllowed
from loguru import logger
from changedetectionio.flask_app import login_optionally_required
class RestoreForm(Form):
zip_file = FileField(_l('Backup zip file'), validators=[
FileAllowed(['zip'], _l('Must be a .zip backup file!'))
])
include_groups = BooleanField(_l('Include groups'), default=True)
include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
include_watches = BooleanField(_l('Include watches'), default=True)
include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
submit = SubmitField(_l('Restore backup'))
def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
"""
Extract and import watches and groups from a backup zip stream.
Mirrors the store's _load_watches / _load_tags loading pattern:
- UUID dirs with tag.json → Tag.model + tag_obj.commit()
- UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
Raises zipfile.BadZipFile if the stream is not a valid zip.
"""
from changedetectionio.model import Tag
restored_groups = 0
skipped_groups = 0
restored_watches = 0
skipped_watches = 0
current_tags = datastore.data['settings']['application'].get('tags', {})
current_watches = datastore.data['watching']
with tempfile.TemporaryDirectory() as tmpdir:
logger.debug(f"Restore: extracting zip to {tmpdir}")
with zipfile.ZipFile(zip_stream, 'r') as zf:
zf.extractall(tmpdir)
logger.debug("Restore: zip extracted, scanning UUID directories")
for entry in os.scandir(tmpdir):
if not entry.is_dir():
continue
uuid = entry.name
tag_json_path = os.path.join(entry.path, 'tag.json')
watch_json_path = os.path.join(entry.path, 'watch.json')
# --- Tags (groups) ---
if include_groups and os.path.exists(tag_json_path):
if uuid in current_tags and not include_groups_replace:
logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
skipped_groups += 1
continue
try:
with open(tag_json_path, 'r', encoding='utf-8') as f:
tag_data = json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
continue
title = tag_data.get('title', uuid)
logger.debug(f"Restore: importing group '{title}' ({uuid})")
# Mirror _load_tags: set uuid and force processor
tag_data['uuid'] = uuid
tag_data['processor'] = 'restock_diff'
# Copy the UUID directory so data_dir exists for commit()
dst_dir = os.path.join(datastore.datastore_path, uuid)
if os.path.exists(dst_dir):
shutil.rmtree(dst_dir)
shutil.copytree(entry.path, dst_dir)
tag_obj = Tag.model(
datastore_path=datastore.datastore_path,
__datastore=datastore.data,
default=tag_data
)
current_tags[uuid] = tag_obj
tag_obj.commit()
restored_groups += 1
logger.success(f"Restore: group '{title}' ({uuid}) restored")
# --- Watches ---
elif include_watches and os.path.exists(watch_json_path):
if uuid in current_watches and not include_watches_replace:
logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
skipped_watches += 1
continue
try:
with open(watch_json_path, 'r', encoding='utf-8') as f:
watch_data = json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
continue
url = watch_data.get('url', uuid)
logger.debug(f"Restore: importing watch '{url}' ({uuid})")
# Copy UUID directory first so data_dir and history files exist
dst_dir = os.path.join(datastore.datastore_path, uuid)
if os.path.exists(dst_dir):
shutil.rmtree(dst_dir)
shutil.copytree(entry.path, dst_dir)
# Mirror _load_watches / rehydrate_entity
watch_data['uuid'] = uuid
watch_obj = datastore.rehydrate_entity(uuid, watch_data)
current_watches[uuid] = watch_obj
watch_obj.commit()
restored_watches += 1
logger.success(f"Restore: watch '{url}' ({uuid}) restored")
logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
f"watches {restored_watches} restored / {skipped_watches} skipped")
# Persist changedetection.json (includes the updated tags dict)
logger.debug("Restore: committing datastore settings")
datastore.commit()
return {
'restored_groups': restored_groups,
'skipped_groups': skipped_groups,
'restored_watches': restored_watches,
'skipped_watches': skipped_watches,
}
def construct_restore_blueprint(datastore):
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
restore_threads = []
@login_optionally_required
@restore_blueprint.route("/restore", methods=['GET'])
def restore():
form = RestoreForm()
return render_template("backup_restore.html",
form=form,
restore_running=any(t.is_alive() for t in restore_threads))
@login_optionally_required
@restore_blueprint.route("/restore/start", methods=['POST'])
def backups_restore_start():
if any(t.is_alive() for t in restore_threads):
flash(gettext("A restore is already running, check back in a few minutes"), "error")
return redirect(url_for('backups.restore.restore'))
zip_file = request.files.get('zip_file')
if not zip_file or not zip_file.filename:
flash(gettext("No file uploaded"), "error")
return redirect(url_for('backups.restore.restore'))
if not zip_file.filename.lower().endswith('.zip'):
flash(gettext("File must be a .zip backup file"), "error")
return redirect(url_for('backups.restore.restore'))
# Read into memory now — the request stream is gone once we return
try:
zip_bytes = io.BytesIO(zip_file.read())
zipfile.ZipFile(zip_bytes) # quick validity check before spawning
zip_bytes.seek(0)
except zipfile.BadZipFile:
flash(gettext("Invalid or corrupted zip file"), "error")
return redirect(url_for('backups.restore.restore'))
include_groups = request.form.get('include_groups') == 'y'
include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
include_watches = request.form.get('include_watches') == 'y'
include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
restore_thread = threading.Thread(
target=import_from_zip,
kwargs={
'zip_stream': zip_bytes,
'datastore': datastore,
'include_groups': include_groups,
'include_groups_replace': include_groups_replace,
'include_watches': include_watches,
'include_watches_replace': include_watches_replace,
},
daemon=True,
name="BackupRestore"
)
restore_thread.start()
restore_threads.append(restore_thread)
flash(gettext("Restore started in background, check back in a few minutes."))
return redirect(url_for('backups.restore.restore'))
return restore_blueprint

View File

@@ -0,0 +1,49 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_simple_field, render_field %}
<div class="edit-form">
<div class="tabs collapsable">
<ul>
<li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
<li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
</ul>
</div>
<div class="box-wrap inner">
<div id="general">
{% if backup_running %}
<p>
<span class="spinner"></span>&nbsp;<strong>{{ _('A backup is running!') }}</strong>
</p>
{% endif %}
<p>
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
</p>
<br>
{% if available_backups %}
<ul>
{% for backup in available_backups %}
<li>
<a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
</li>
{% endfor %}
</ul>
{% else %}
<p>
<strong>{{ _('No backups found.') }}</strong>
</p>
{% endif %}
<a class="pure-button pure-button-primary"
href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
{% if available_backups %}
<a class="pure-button button-small button-error "
href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
{% endif %}
</div>
</div>
</div>
{% endblock %}

View File

@@ -0,0 +1,58 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field %}
<div class="edit-form">
<div class="tabs collapsable">
<ul>
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
<li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
</ul>
</div>
<div class="box-wrap inner">
<div id="general">
{% if restore_running %}
<p>
<span class="spinner"></span>&nbsp;<strong>{{ _('A restore is running!') }}</strong>
</p>
{% endif %}
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
<form class="pure-form pure-form-stacked settings"
action="{{ url_for('backups.restore.backups_restore_start') }}"
method="POST"
enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="pure-control-group">
{{ render_checkbox_field(form.include_groups) }}
<span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.include_groups_replace_existing) }}
<span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.include_watches) }}
<span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.include_watches_replace_existing) }}
<span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
</div>
<div class="pure-control-group">
{{ render_field(form.zip_file) }}
</div>
<div class="pure-controls">
<button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
</div>
</form>
</div>
</div>
</div>
{% endblock %}

View File

@@ -1,36 +0,0 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_simple_field, render_field %}
<div class="edit-form">
<div class="box-wrap inner">
<h2>{{ _('Backups') }}</h2>
{% if backup_running %}
<p>
<span class="spinner"></span>&nbsp;<strong>{{ _('A backup is running!') }}</strong>
</p>
{% endif %}
<p>
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
</p>
<br>
{% if available_backups %}
<ul>
{% for backup in available_backups %}
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
{% endfor %}
</ul>
{% else %}
<p>
<strong>{{ _('No backups found.') }}</strong>
</p>
{% endif %}
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
{% if available_backups %}
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
{% endif %}
</div>
</div>
{% endblock %}

View File

@@ -16,6 +16,11 @@
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="tab-pane-inner" id="url-list">
<p>
{{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
<br>
</p>
<div class="pure-control-group">
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
<br>
@@ -37,9 +42,6 @@
</div>
<div class="tab-pane-inner" id="distill-io">
<div class="pure-control-group">
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
@@ -49,8 +51,6 @@
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
</p>
</div>
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
font-family:monospace;
white-space: pre;
@@ -114,6 +114,7 @@
</div>
</div>
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
</form>
</div>

View File

@@ -25,7 +25,7 @@
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
{% if plugin_tabs %}

View File

@@ -70,17 +70,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if datastore.data['settings']['application']['tags'].get(uuid):
del datastore.data['settings']['application']['tags'][uuid]
# Delete tag.json file if it exists
import os
tag_dir = os.path.join(datastore.datastore_path, uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
logger.info(f"Deleted tag.json for tag {uuid}")
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
# Remove tag from all watches in background thread to avoid blocking
def remove_tag_background(tag_uuid):
"""Background thread to remove tag from watches - discarded after completion."""
@@ -127,19 +116,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@tags_blueprint.route("/delete_all", methods=['GET'])
@login_optionally_required
def delete_all():
# Delete all tag.json files
import os
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
# Clear all tags from settings immediately
datastore.data['settings']['application']['tags'] = {}
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
# TagsDict 'del' handler will remove the dir
del datastore.data['settings']['application']['tags'][tag_uuid]
# Clear tags from all watches in background thread to avoid blocking
def clear_all_tags_background():
@@ -255,7 +236,4 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
def form_tag_delete(uuid):
return redirect(url_for('tags.tags_overview_page'))
return tags_blueprint

View File

@@ -194,9 +194,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
tag_limit = request.args.get('tag')
now = int(time.time())
# Mark watches as viewed in background thread to avoid blocking
def mark_viewed_background():
"""Background thread to mark watches as viewed - discarded after completion."""
# Mark watches as viewed - use background thread only for large watch counts
def mark_viewed_impl():
"""Mark watches as viewed - can run synchronously or in background thread."""
marked_count = 0
try:
for watch_uuid, watch in datastore.data['watching'].items():
@@ -209,15 +209,21 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
datastore.set_last_viewed(watch_uuid, now)
marked_count += 1
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
except Exception as e:
logger.error(f"Error in background mark as viewed: {e}")
logger.error(f"Error marking as viewed: {e}")
# Start background thread and return immediately
thread = threading.Thread(target=mark_viewed_background, daemon=True)
thread.start()
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
# For larger counts, use background thread to avoid blocking the UI
watch_count = len(datastore.data['watching'])
if watch_count < 10:
# Run synchronously for small watch counts
mark_viewed_impl()
else:
# Start background thread for large watch counts
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
thread.start()
flash(gettext("Marking watches as viewed in background..."))
return redirect(url_for('watchlist.index', tag=tag_limit))
@ui_blueprint.route("/delete", methods=['GET'])

View File

@@ -304,12 +304,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
</span>
{%- endif -%}
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
{%- if watch['restock']['price'] != None -%}
{%- if watch.get('restock') and watch['restock'].get('price') -%}
{%- if watch['restock']['price'] is number -%}
<span class="restock-label price" title="{{ _('Price') }}">
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
</span>
{%- endif -%}
{%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
{%- endif -%}
{%- elif not watch.has_restock_info -%}
<span class="restock-label error">{{ _('No information') }}</span>
{%- endif -%}

View File

@@ -86,8 +86,8 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
# better than scrollTo incase they override it in the page
await page.evaluate(
"""(y) => {
document.documentElement.scrollTop = y;
document.body.scrollTop = y;
const el = document.scrollingElement;
if (el) el.scrollTop = y;
}""",
y
)
@@ -305,6 +305,8 @@ class fetcher(Fetcher):
await asyncio.wait_for(self.browser.close(), timeout=3.0)
except Exception as cleanup_error:
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
finally:
self.browser = None
raise
# Add console handler to capture console.log from favicon fetcher
@@ -532,6 +534,14 @@ class fetcher(Fetcher):
)
except asyncio.TimeoutError:
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
finally:
# Internal cleanup on any exception/timeout - call quit() immediately
# This prevents connection leaks during exception bursts
# Worker.py's quit() call becomes a redundant safety net (idempotent)
try:
await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
except Exception as cleanup_error:
logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")
# Plugin registration for built-in fetcher

View File

@@ -565,6 +565,27 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
if is_rss:
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
html_content = re.sub(r'</title>', r'</h1>', html_content)
else:
# Strip bloat in one pass, SPA's often dump 10Mb+ into the <head> for styles, which is not needed
# Causing inscriptis to silently exit when more than ~10MB is found.
# All we are doing here is converting the HTML to text, no CSS layout etc
# Use backreference (\1) to ensure opening/closing tags match (prevents <style> matching </svg> in CSS data URIs)
html_content = re.sub(r'<(style|script|svg|noscript)[^>]*>.*?</\1>|<(?:link|meta)[^>]*/?>|<!--.*?-->',
'', html_content, flags=re.DOTALL | re.IGNORECASE)
# SPAs often use <body style="display:none"> to hide content until JS loads
# inscriptis respects CSS display rules, so we need to remove these hiding styles
# to extract the actual page content
body_style_pattern = r'(<body[^>]*)\s+style\s*=\s*["\']([^"\']*\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b[^"\']*)["\']'
# Check if body has hiding styles that need to be fixed
body_match = re.search(body_style_pattern, html_content, flags=re.IGNORECASE)
if body_match:
from loguru import logger
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{body_match.group(2)}')")
html_content = re.sub(body_style_pattern, r'\1', html_content, flags=re.IGNORECASE)
text_content = get_text(html_content, config=parser_config)
return text_content

View File

@@ -2,6 +2,7 @@ from os import getenv
from copy import deepcopy
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
from changedetectionio.model.Tags import TagsDict
from changedetectionio.notification import (
default_notification_body,
@@ -68,7 +69,7 @@ class model(dict):
'schema_version' : 0,
'shared_diff_access': False,
'strip_ignored_lines': False,
'tags': {}, #@todo use Tag.model initialisers
'tags': None, # Initialized in __init__ with real datastore_path
'webdriver_delay': None , # Extra delay in seconds before extracting text
'ui': {
'use_page_title_in_list': True,
@@ -80,10 +81,16 @@ class model(dict):
}
}
def __init__(self, *arg, **kw):
def __init__(self, *arg, datastore_path=None, **kw):
super(model, self).__init__(*arg, **kw)
# Capture any tags data passed in before base_config overwrites the structure
existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
self.update(deepcopy(self.base_config))
# TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
if datastore_path is None:
raise ValueError("App.model() requires 'datastore_path' keyword argument")
self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)
def parse_headers_from_text_file(filepath):

View File

@@ -0,0 +1,39 @@
import os
import shutil
from pathlib import Path
from loguru import logger
_SENTINEL = object()
class TagsDict(dict):
"""Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
self._datastore_path = Path(datastore_path)
super().__init__(*args, **kwargs)
def __delitem__(self, key: str) -> None:
super().__delitem__(key)
tag_dir = self._datastore_path / key
tag_json_file = tag_dir / "tag.json"
if not os.path.exists(tag_json_file):
logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
return
try:
shutil.rmtree(tag_dir)
logger.info(f"Deleted tag directory for tag {key!r}")
except FileNotFoundError:
pass
except OSError as e:
logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
def pop(self, key: str, default=_SENTINEL):
"""Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
if key in self:
value = self[key]
del self[key]
return value
if default is _SENTINEL:
raise KeyError(key)
return default

View File

@@ -129,6 +129,51 @@ class ChangeDetectionSpec:
"""
pass
@hookspec
def update_handler_alter(update_handler, watch, datastore):
"""Modify or wrap the update_handler before it processes a watch.
This hook is called after the update_handler (perform_site_check instance) is created
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
- Wrap the handler to add logging/metrics
- Modify handler configuration
- Add custom preprocessing logic
Args:
update_handler: The perform_site_check instance that will process the watch
watch: The watch dict being processed
datastore: The application datastore
Returns:
object or None: Return a modified/wrapped handler, or None to keep the original.
If multiple plugins return handlers, they are chained in registration order.
"""
pass
@hookspec
def update_finalize(update_handler, watch, datastore, processing_exception):
"""Called after watch processing completes (success or failure).
This hook is called in the finally block after all processing is complete,
allowing plugins to perform cleanup, update metrics, or log final status.
The plugin can access update_handler.last_logging_insert_id if it was stored
during update_handler_alter, and use processing_exception to determine if
the processing succeeded or failed.
Args:
update_handler: The perform_site_check instance (may be None if creation failed)
watch: The watch dict that was processed (may be None if not loaded)
datastore: The application datastore
processing_exception: The exception from the main processing block, or None if successful.
This does NOT include cleanup exceptions - only exceptions from
the actual watch processing (fetch, diff, etc).
Returns:
None: This hook doesn't return a value
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -499,4 +544,66 @@ def get_plugin_template_paths():
template_paths.append(templates_dir)
logger.debug(f"Added plugin template path: {templates_dir}")
return template_paths
return template_paths
def apply_update_handler_alter(update_handler, watch, datastore):
"""Apply update_handler_alter hooks from all plugins.
Allows plugins to wrap or modify the update_handler before it processes a watch.
Multiple plugins can chain modifications - each plugin receives the result from
the previous plugin.
Args:
update_handler: The perform_site_check instance to potentially modify
watch: The watch dict being processed
datastore: The application datastore
Returns:
object: The (potentially modified/wrapped) update_handler
"""
# Get all plugins that implement the update_handler_alter hook
results = plugin_manager.hook.update_handler_alter(
update_handler=update_handler,
watch=watch,
datastore=datastore
)
# Chain results - each plugin gets the result from the previous one
current_handler = update_handler
if results:
for result in results:
if result is not None:
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
current_handler = result
return current_handler
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
"""Apply update_finalize hooks from all plugins.
Called in the finally block after watch processing completes, allowing plugins
to perform cleanup, update metrics, or log final status.
Args:
update_handler: The perform_site_check instance (may be None)
watch: The watch dict that was processed (may be None)
datastore: The application datastore
processing_exception: The exception from processing, or None if successful
Returns:
None
"""
try:
# Call all plugins that implement the update_finalize hook
plugin_manager.hook.update_finalize(
update_handler=update_handler,
watch=watch,
datastore=datastore,
processing_exception=processing_exception
)
except Exception as e:
# Don't let plugin errors crash the worker
logger.error(f"Error in update_finalize hook: {e}")
logger.exception(f"update_finalize hook exception details:")

View File

@@ -347,6 +347,7 @@ class ContentProcessor:
def extract_text_from_html(self, html_content, stream_content_type):
"""Convert HTML to plain text."""
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
return html_tools.html_to_text(
html_content=html_content,
render_anchor_tag_content=do_anchor,

View File

@@ -102,7 +102,9 @@
}
// Navigate to search results (always redirect to watchlist home)
window.location.href = '/?' + params.toString();
// Use base_path if available (for sub-path deployments like /enlighten-richerx)
const basePath = typeof base_path !== 'undefined' ? base_path : '';
window.location.href = basePath + '/?' + params.toString();
});
}
});

View File

@@ -1 +1 @@
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces;overflow-wrap:anywhere}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}

View File

@@ -62,6 +62,7 @@ body.difference-page {
pre {
white-space: break-spaces;
overflow-wrap: anywhere;
}

File diff suppressed because one or more lines are too long

View File

@@ -22,6 +22,8 @@ import uuid as uuid_builder
from loguru import logger
from blinker import signal
from ..model.Tags import TagsDict
# Try to import orjson for faster JSON serialization
try:
import orjson
@@ -121,6 +123,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if 'application' in settings_data['settings']:
self.__data['settings']['application'].update(settings_data['settings']['application'])
# Use our Tags dict with cleanup helpers etc
# @todo Same for Watches
existing_tags = settings_data.get('settings', {}).get('application', {}).get('tags') or {}
self.__data['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=self.datastore_path)
# More or less for the old format which had this data in the one url-watches.json
# cant hurt to leave it here,
if 'watching' in settings_data:
@@ -196,7 +203,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
self.datastore_path = datastore_path
# Initialize data structure
self.__data = App.model()
self.__data = App.model(datastore_path=datastore_path)
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
# Base definition for all watchers (deepcopy part of #569)
@@ -355,6 +362,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Deep copy settings to avoid modifying the original
settings_copy = copy.deepcopy(self.__data['settings'])
# Is saved as {uuid}/tag.json
settings_copy['application']['tags'] = {}
return {
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
'app_guid': self.__data.get('app_guid'),

View File

@@ -669,7 +669,9 @@ class DatastoreUpdatesMixin:
def update_26(self):
self.migrate_legacy_db_format()
def update_28(self):
# Re-run tag to JSON migration
def update_29(self):
"""
Migrate tags to individual tag.json files.
@@ -682,8 +684,6 @@ class DatastoreUpdatesMixin:
- Enables independent tag versioning/backup
- Maintains backwards compatibility (tags stay in settings too)
"""
# Force save as tag.json (not watch.json) even if object is corrupted
logger.critical("=" * 80)
logger.critical("Running migration: Individual tag persistence (update_28)")
logger.critical("Creating individual tag.json files")
@@ -702,6 +702,9 @@ class DatastoreUpdatesMixin:
failed_count = 0
for uuid, tag_data in tags.items():
if os.path.isfile(os.path.join(self.datastore_path, uuid, "tag.json")):
logger.debug(f"Tag {uuid} tag.json exists, skipping")
continue
try:
tag_data.commit()
saved_count += 1
@@ -723,3 +726,7 @@ class DatastoreUpdatesMixin:
logger.info("Future tag edits will update both locations (dual storage)")
logger.critical("=" * 80)
# write it to disk, it will be saved without ['tags'] in the JSON db because we find it from disk glob
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
self._save_settings()

View File

@@ -10,6 +10,7 @@
<li>{{ _('Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
<li>{{ _('Each line is processed separately (think of each line as "OR")') }}</li>
<li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
<li>{{ _('You can also use')}} <a href="#conditions">{{ _('conditions')}}</a> - {{ _('"Page text" - with Contains, Starts With, Not Contains and many more' ) }} <code>/foo\d/</code></li>
</ul>
</span>
</div>

View File

@@ -6,11 +6,10 @@ import io
from zipfile import ZipFile
import re
import time
from changedetectionio.model import Watch, Tag
def test_backup(client, live_server, measure_memory_usage, datastore_path):
# live_server_setup(live_server) # Setup on conftest per function
set_original_response(datastore_path=datastore_path)
@@ -32,7 +31,7 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
time.sleep(4)
res = client.get(
url_for("backups.index"),
url_for("backups.create"),
follow_redirects=True
)
# Can see the download link to the backup
@@ -80,11 +79,12 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
def test_watch_data_package_download(client, live_server, measure_memory_usage, datastore_path):
"""Test downloading a single watch's data as a zip package"""
import os
set_original_response(datastore_path=datastore_path)
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
tag_uuid = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag")
tag_uuid2 = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag number two")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
@@ -113,4 +113,87 @@ def test_watch_data_package_download(client, live_server, measure_memory_usage,
# Should contain history/snapshot files
uuid4hex_txt = re.compile(f'^{re.escape(uuid)}/.*\\.txt', re.I)
txt_files = list(filter(uuid4hex_txt.match, files))
assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
def test_backup_restore(client, live_server, measure_memory_usage, datastore_path):
"""Test that a full backup zip can be restored — watches and tags survive a round-trip."""
set_original_response(datastore_path=datastore_path)
datastore = live_server.app.config['DATASTORE']
watch_url = url_for('test_endpoint', _external=True)
# Set up: one watch and two tags
uuid = datastore.add_watch(url=watch_url)
tag_uuid = datastore.add_tag(title="Tasty backup tag")
tag_uuid2 = datastore.add_tag(title="Tasty backup tag number two")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Create a full backup
client.get(url_for("backups.request_backup"), follow_redirects=True)
time.sleep(4)
# Download the latest backup zip
res = client.get(url_for("backups.download_backup", filename="latest"), follow_redirects=True)
assert res.content_type == "application/zip"
zip_data = res.data
# Confirm the zip contains both watch.json and tag.json entries
backup = ZipFile(io.BytesIO(zip_data))
names = backup.namelist()
assert f"{uuid}/watch.json" in names, f"watch.json missing from backup: {names}"
assert f"{tag_uuid}/tag.json" in names, f"tag.json for tag 1 missing from backup: {names}"
assert f"{tag_uuid2}/tag.json" in names, f"tag.json for tag 2 missing from backup: {names}"
# --- Wipe everything ---
datastore.delete('all')
client.get(url_for("tags.delete_all"), follow_redirects=True)
assert uuid not in datastore.data['watching'], "Watch should be gone after delete"
assert tag_uuid not in datastore.data['settings']['application']['tags'], "Tag 1 should be gone after delete"
assert tag_uuid2 not in datastore.data['settings']['application']['tags'], "Tag 2 should be gone after delete"
# --- Restore from the backup zip ---
res = client.post(
url_for("backups.restore.backups_restore_start"),
data={
'zip_file': (io.BytesIO(zip_data), 'backup.zip'),
'include_groups': 'y',
'include_groups_replace_existing': 'y',
'include_watches': 'y',
'include_watches_replace_existing': 'y',
},
content_type='multipart/form-data',
follow_redirects=True
)
assert res.status_code == 200
# Wait for the thread to finish
time.sleep(2)
# --- Watch checks ---
restored_watch = datastore.data['watching'].get(uuid)
assert restored_watch is not None, f"Watch {uuid} not found after restore"
assert restored_watch['url'] == watch_url, "Restored watch URL does not match"
assert isinstance(restored_watch, Watch.model), \
f"Watch not properly rehydrated, got {type(restored_watch)}"
assert restored_watch.history_n >= 1, \
f"Restored watch should have at least 1 history entry, got {restored_watch.history_n}"
# --- Tag checks ---
restored_tags = datastore.data['settings']['application']['tags']
restored_tag = restored_tags.get(tag_uuid)
assert restored_tag is not None, f"Tag {tag_uuid} not found after restore"
assert restored_tag['title'] == "Tasty backup tag", "Restored tag 1 title does not match"
assert isinstance(restored_tag, Tag.model), \
f"Tag 1 not properly rehydrated, got {type(restored_tag)}"
restored_tag2 = restored_tags.get(tag_uuid2)
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
assert isinstance(restored_tag2, Tag.model), \
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"

View File

@@ -40,6 +40,7 @@ def test_favicon(client, live_server, measure_memory_usage, datastore_path):
res = client.get(url_for('static_content', group='..', filename='__init__.py'))
assert res.status_code != 200
res = client.get(url_for('static_content', group='.', filename='../__init__.py'))
assert res.status_code != 200

View File

@@ -199,6 +199,259 @@ class TestHtmlToText(unittest.TestCase):
print(f"✓ Basic thread-safety test passed: {len(results)} threads, no errors")
def test_large_html_with_bloated_head(self):
"""
Test that html_to_text can handle large HTML documents with massive <head> bloat.
SPAs often dump 10MB+ of styles, scripts, and other bloat into the <head> section.
This can cause inscriptis to silently exit when processing very large documents.
The fix strips <style>, <script>, <svg>, <noscript>, <link>, <meta>, and HTML comments
before processing, allowing extraction of actual body content.
"""
# Generate massive style block (~5MB)
large_style = '<style>' + '.class{color:red;}\n' * 200000 + '</style>\n'
# Generate massive script block (~5MB)
large_script = '<script>' + 'console.log("bloat");\n' * 200000 + '</script>\n'
# Generate lots of SVG bloat (~3MB)
svg_bloat = '<svg><path d="M0,0 L100,100"/></svg>\n' * 50000
# Generate meta/link tags (~2MB)
meta_bloat = '<meta name="description" content="bloat"/>\n' * 50000
link_bloat = '<link rel="stylesheet" href="bloat.css"/>\n' * 50000
# Generate HTML comments (~1MB)
comment_bloat = '<!-- This is bloat -->\n' * 50000
# Generate noscript bloat
noscript_bloat = '<noscript>Enable JavaScript</noscript>\n' * 10000
# Build the large HTML document
html = f'''<!DOCTYPE html>
<html>
<head>
<title>Test Page</title>
{large_style}
{large_script}
{svg_bloat}
{meta_bloat}
{link_bloat}
{comment_bloat}
{noscript_bloat}
</head>
<body>
<h1>Important Heading</h1>
<p>This is the actual content that should be extracted.</p>
<div>
<p>First paragraph with meaningful text.</p>
<p>Second paragraph with more content.</p>
</div>
<footer>Footer text</footer>
</body>
</html>
'''
# Verify the HTML is actually large (should be ~20MB+)
html_size_mb = len(html) / (1024 * 1024)
assert html_size_mb > 15, f"HTML should be >15MB, got {html_size_mb:.2f}MB"
print(f" Testing {html_size_mb:.2f}MB HTML document with bloated head...")
# This should not crash or silently exit
text = html_to_text(html)
# Verify we got actual text output (not empty/None)
assert text is not None, "html_to_text returned None"
assert len(text) > 0, "html_to_text returned empty string"
# Verify the actual body content was extracted
assert 'Important Heading' in text, "Failed to extract heading"
assert 'actual content that should be extracted' in text, "Failed to extract paragraph"
assert 'First paragraph with meaningful text' in text, "Failed to extract first paragraph"
assert 'Second paragraph with more content' in text, "Failed to extract second paragraph"
assert 'Footer text' in text, "Failed to extract footer"
# Verify bloat was stripped (output should be tiny compared to input)
text_size_kb = len(text) / 1024
assert text_size_kb < 1, f"Output too large ({text_size_kb:.2f}KB), bloat not stripped"
# Verify no CSS, script content, or SVG leaked through
assert 'color:red' not in text, "Style content leaked into text output"
assert 'console.log' not in text, "Script content leaked into text output"
assert '<path' not in text, "SVG content leaked into text output"
assert 'bloat.css' not in text, "Link href leaked into text output"
print(f" ✓ Successfully processed {html_size_mb:.2f}MB HTML -> {text_size_kb:.2f}KB text")
def test_body_display_none_spa_pattern(self):
"""
Test that html_to_text can extract content from pages with display:none body.
SPAs (Single Page Applications) often use <body style="display:none"> to hide content
until JavaScript loads and renders the page. inscriptis respects CSS display rules,
so without preprocessing, it would skip all content and return only newlines.
The fix strips display:none and visibility:hidden styles from the body tag before
processing, allowing text extraction from client-side rendered applications.
"""
# Test case 1: Basic display:none
html1 = '''<!DOCTYPE html>
<html lang="en">
<head><title>What's New Fluxguard</title></head>
<body style="display:none">
<h1>Important Heading</h1>
<p>This is actual content that should be extracted.</p>
<div>
<p>First paragraph with meaningful text.</p>
<p>Second paragraph with more content.</p>
</div>
</body>
</html>'''
text1 = html_to_text(html1)
# Before fix: would return ~33 newlines, len(text) ~= 33
# After fix: should extract actual content, len(text) > 100
assert len(text1) > 100, f"Expected substantial text output, got {len(text1)} chars"
assert 'Important Heading' in text1, "Failed to extract heading from display:none body"
assert 'actual content' in text1, "Failed to extract paragraph from display:none body"
assert 'First paragraph' in text1, "Failed to extract nested content"
# Should not be mostly newlines
newline_ratio = text1.count('\n') / len(text1)
assert newline_ratio < 0.5, f"Output is mostly newlines ({newline_ratio:.2%}), content not extracted"
# Test case 2: visibility:hidden (another hiding pattern)
html2 = '<html><body style="visibility:hidden"><h1>Hidden Content</h1><p>Test paragraph.</p></body></html>'
text2 = html_to_text(html2)
assert 'Hidden Content' in text2, "Failed to extract content from visibility:hidden body"
assert 'Test paragraph' in text2, "Failed to extract paragraph from visibility:hidden body"
# Test case 3: Mixed styles (display:none with other CSS)
html3 = '<html><body style="color: red; display:none; font-size: 12px"><p>Mixed style content</p></body></html>'
text3 = html_to_text(html3)
assert 'Mixed style content' in text3, "Failed to extract content from body with mixed styles"
# Test case 4: Case insensitivity (DISPLAY:NONE uppercase)
html4 = '<html><body style="DISPLAY:NONE"><p>Uppercase style</p></body></html>'
text4 = html_to_text(html4)
assert 'Uppercase style' in text4, "Failed to handle uppercase DISPLAY:NONE"
# Test case 5: Space variations (display: none vs display:none)
html5 = '<html><body style="display: none"><p>With spaces</p></body></html>'
text5 = html_to_text(html5)
assert 'With spaces' in text5, "Failed to handle 'display: none' with space"
# Test case 6: Body with other attributes (class, id)
html6 = '<html><body class="foo" style="display:none" id="bar"><p>With attributes</p></body></html>'
text6 = html_to_text(html6)
assert 'With attributes' in text6, "Failed to extract from body with multiple attributes"
# Test case 7: Should NOT affect opacity:0 (which doesn't hide from inscriptis)
html7 = '<html><body style="opacity:0"><p>Transparent content</p></body></html>'
text7 = html_to_text(html7)
# Opacity doesn't affect inscriptis text extraction, content should be there
assert 'Transparent content' in text7, "Incorrectly stripped opacity:0 style"
print(" ✓ All display:none body tag tests passed")
def test_style_tag_with_svg_data_uri(self):
"""
Test that style tags containing SVG data URIs are properly stripped.
Some WordPress and modern sites embed SVG as data URIs in CSS, which contains
<svg> and </svg> tags within the style content. The regex must use backreferences
to ensure <style> matches </style> (not </svg> inside the CSS).
This was causing errors where the regex would match <style> and stop at the first
</svg> it encountered inside a CSS data URI, breaking the HTML structure.
"""
# Real-world example from WordPress wp-block-image styles
html = '''<!DOCTYPE html>
<html>
<head>
<style id='wp-block-image-inline-css'>
.wp-block-image>a,.wp-block-image>figure>a{display:inline-block}.wp-block-image img{box-sizing:border-box;height:auto;max-width:100%;vertical-align:bottom}@supports ((-webkit-mask-image:none) or (mask-image:none)) or (-webkit-mask-image:none){.wp-block-image.is-style-circle-mask img{border-radius:0;-webkit-mask-image:url('data:image/svg+xml;utf8,<svg viewBox="0 0 100 100" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="50"/></svg>');mask-image:url('data:image/svg+xml;utf8,<svg viewBox="0 0 100 100" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="50"/></svg>');mask-mode:alpha}}
</style>
</head>
<body>
<h1>Test Heading</h1>
<p>This is the actual content that should be extracted.</p>
<div class="wp-block-image">
<img src="test.jpg" alt="Test image">
</div>
</body>
</html>'''
# This should not crash and should extract the body content
text = html_to_text(html)
# Verify the actual body content was extracted
assert text is not None, "html_to_text returned None"
assert len(text) > 0, "html_to_text returned empty string"
assert 'Test Heading' in text, "Failed to extract heading"
assert 'actual content that should be extracted' in text, "Failed to extract paragraph"
# Verify CSS content was stripped (including the SVG data URI)
assert '.wp-block-image' not in text, "CSS class selector leaked into text"
assert 'mask-image' not in text, "CSS property leaked into text"
assert 'data:image/svg+xml' not in text, "SVG data URI leaked into text"
assert 'viewBox' not in text, "SVG attributes leaked into text"
# Verify no broken HTML structure
assert '<style' not in text, "Unclosed style tag in output"
assert '</svg>' not in text, "SVG closing tag leaked into text"
print(" ✓ Style tag with SVG data URI test passed")
def test_style_tag_closes_correctly(self):
"""
Test that each tag type (style, script, svg) closes with the correct closing tag.
Before the fix, the regex used (?:style|script|svg|noscript) for both opening and
closing tags, which meant <style> could incorrectly match </svg> as its closing tag.
With backreferences, <style> must close with </style>, <svg> with </svg>, etc.
"""
# Test nested tags where incorrect matching would break
html = '''<!DOCTYPE html>
<html>
<head>
<style>
body { background: url('data:image/svg+xml,<svg><rect/></svg>'); }
</style>
<script>
const svg = '<svg><path d="M0,0"/></svg>';
</script>
</head>
<body>
<h1>Content</h1>
<svg><circle cx="50" cy="50" r="40"/></svg>
<p>After SVG</p>
</body>
</html>'''
text = html_to_text(html)
# Should extract body content
assert 'Content' in text, "Failed to extract heading"
assert 'After SVG' in text, "Failed to extract content after SVG"
# Should strip all style/script/svg content
assert 'background:' not in text, "Style content leaked"
assert 'const svg' not in text, "Script content leaked"
assert '<circle' not in text, "SVG element leaked"
assert 'data:image/svg+xml' not in text, "Data URI leaked"
print(" ✓ Tag closing validation test passed")
if __name__ == '__main__':
# Can run this file directly for quick testing

View File

@@ -8,6 +8,7 @@ python3 -m pytest changedetectionio/tests/unit/test_time_handler.py -v
"""
import unittest
import unittest.mock
import arrow
from changedetectionio import time_handler
@@ -240,6 +241,211 @@ class TestAmIInsideTime(unittest.TestCase):
# Result depends on current time
self.assertIsInstance(result, bool)
def test_24_hour_schedule_from_midnight(self):
"""Test 24-hour schedule starting at midnight covers entire day."""
timezone_str = 'UTC'
# Test at a specific time: Monday 00:00
test_time = arrow.get('2024-01-01 00:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd') # Monday
# Mock current time for testing
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="00:00",
timezone_str=timezone_str,
duration=1440 # 24 hours
)
self.assertTrue(result, "Should be active at start of 24-hour schedule")
def test_24_hour_schedule_at_end_of_day(self):
"""Test 24-hour schedule is active at 23:59:59."""
timezone_str = 'UTC'
# Test at Monday 23:59:59
test_time = arrow.get('2024-01-01 23:59:59', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd') # Monday
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="00:00",
timezone_str=timezone_str,
duration=1440 # 24 hours
)
self.assertTrue(result, "Should be active at end of 24-hour schedule")
def test_24_hour_schedule_at_midnight_transition(self):
"""Test 24-hour schedule at exactly midnight transition."""
timezone_str = 'UTC'
# Test at Tuesday 00:00:00 (end of Monday's 24-hour schedule)
test_time = arrow.get('2024-01-02 00:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
monday = test_time.shift(days=-1).format('dddd') # Monday
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=monday,
time_str="00:00",
timezone_str=timezone_str,
duration=1440 # 24 hours
)
self.assertTrue(result, "Should include exactly midnight at end of 24-hour schedule")
def test_schedule_crosses_midnight_before_midnight(self):
"""Test schedule crossing midnight - before midnight."""
timezone_str = 'UTC'
# Monday 23:30
test_time = arrow.get('2024-01-01 23:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd') # Monday
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="23:00",
timezone_str=timezone_str,
duration=120 # 2 hours (until 01:00 next day)
)
self.assertTrue(result, "Should be active before midnight in cross-midnight schedule")
def test_schedule_crosses_midnight_after_midnight(self):
"""Test schedule crossing midnight - after midnight."""
timezone_str = 'UTC'
# Tuesday 00:30
test_time = arrow.get('2024-01-02 00:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
monday = test_time.shift(days=-1).format('dddd') # Monday
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=monday,
time_str="23:00",
timezone_str=timezone_str,
duration=120 # 2 hours (until 01:00 Tuesday)
)
self.assertTrue(result, "Should be active after midnight in cross-midnight schedule")
def test_schedule_crosses_midnight_at_exact_end(self):
"""Test schedule crossing midnight at exact end time."""
timezone_str = 'UTC'
# Tuesday 01:00 (exact end of Monday 23:00 + 120 minutes)
test_time = arrow.get('2024-01-02 01:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
monday = test_time.shift(days=-1).format('dddd') # Monday
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=monday,
time_str="23:00",
timezone_str=timezone_str,
duration=120 # 2 hours
)
self.assertTrue(result, "Should include exact end time of schedule")
def test_duration_60_minutes(self):
"""Test that duration of 60 minutes works correctly."""
timezone_str = 'UTC'
test_time = arrow.get('2024-01-01 12:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd')
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="12:00",
timezone_str=timezone_str,
duration=60 # Exactly 60 minutes
)
self.assertTrue(result, "60-minute duration should work")
def test_duration_at_exact_end_minute(self):
"""Test at exact end of 60-minute window."""
timezone_str = 'UTC'
# Exactly 13:00 (end of 12:00 + 60 minutes)
test_time = arrow.get('2024-01-01 13:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd')
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="12:00",
timezone_str=timezone_str,
duration=60
)
self.assertTrue(result, "Should include exact end minute")
def test_one_second_after_schedule_ends(self):
"""Test one second after schedule should end."""
timezone_str = 'UTC'
# 13:00:01 (one second after 12:00 + 60 minutes)
test_time = arrow.get('2024-01-01 13:00:01', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd')
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="12:00",
timezone_str=timezone_str,
duration=60
)
self.assertFalse(result, "Should be False one second after schedule ends")
def test_multi_day_schedule(self):
"""Test schedule longer than 24 hours (48 hours)."""
timezone_str = 'UTC'
# Tuesday 12:00 (36 hours after Monday 00:00)
test_time = arrow.get('2024-01-02 12:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
monday = test_time.shift(days=-1).format('dddd')
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=monday,
time_str="00:00",
timezone_str=timezone_str,
duration=2880 # 48 hours
)
self.assertTrue(result, "Should support multi-day schedules")
def test_schedule_one_minute_duration(self):
"""Test very short 1-minute schedule."""
timezone_str = 'UTC'
test_time = arrow.get('2024-01-01 12:00:30', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd')
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="12:00",
timezone_str=timezone_str,
duration=1 # Just 1 minute
)
self.assertTrue(result, "1-minute schedule should work")
def test_schedule_at_exact_start_time(self):
"""Test at exact start time (00:00:00.000000)."""
timezone_str = 'UTC'
test_time = arrow.get('2024-01-01 12:00:00.000000', 'YYYY-MM-DD HH:mm:ss.SSSSSS').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd')
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="12:00",
timezone_str=timezone_str,
duration=30
)
self.assertTrue(result, "Should include exact start time")
def test_schedule_one_microsecond_before_start(self):
"""Test one microsecond before schedule starts."""
timezone_str = 'UTC'
test_time = arrow.get('2024-01-01 11:59:59.999999', 'YYYY-MM-DD HH:mm:ss.SSSSSS').replace(tzinfo=timezone_str)
day_of_week = test_time.format('dddd')
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.am_i_inside_time(
day_of_week=day_of_week,
time_str="12:00",
timezone_str=timezone_str,
duration=30
)
self.assertFalse(result, "Should not include time before start")
class TestIsWithinSchedule(unittest.TestCase):
"""Tests for the is_within_schedule function."""
@@ -405,6 +611,175 @@ class TestIsWithinSchedule(unittest.TestCase):
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "Should handle timezone with whitespace")
def test_schedule_with_60_minutes(self):
"""Test schedule with duration of 0 hours and 60 minutes."""
timezone_str = 'UTC'
now = arrow.now(timezone_str)
current_day = now.format('dddd').lower()
current_hour = now.format('HH:00')
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
current_day: {
'enabled': True,
'start_time': current_hour,
'duration': {'hours': 0, 'minutes': 60} # 60 minutes
}
}
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "Should accept 60 minutes as valid duration")
def test_schedule_with_24_hours(self):
"""Test schedule with duration of 24 hours and 0 minutes."""
timezone_str = 'UTC'
now = arrow.now(timezone_str)
current_day = now.format('dddd').lower()
start_hour = now.format('HH:00')
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
current_day: {
'enabled': True,
'start_time': start_hour,
'duration': {'hours': 24, 'minutes': 0} # Full 24 hours
}
}
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "Should accept 24 hours as valid duration")
def test_schedule_with_90_minutes(self):
"""Test schedule with duration of 0 hours and 90 minutes."""
timezone_str = 'UTC'
now = arrow.now(timezone_str)
current_day = now.format('dddd').lower()
current_hour = now.format('HH:00')
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
current_day: {
'enabled': True,
'start_time': current_hour,
'duration': {'hours': 0, 'minutes': 90} # 90 minutes = 1.5 hours
}
}
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "Should accept 90 minutes as valid duration")
def test_schedule_24_hours_from_midnight(self):
"""Test 24-hour schedule from midnight using is_within_schedule."""
timezone_str = 'UTC'
test_time = arrow.get('2024-01-01 12:00:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
current_day = test_time.format('dddd').lower() # monday
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
current_day: {
'enabled': True,
'start_time': '00:00',
'duration': {'hours': 24, 'minutes': 0}
}
}
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "24-hour schedule from midnight should cover entire day")
def test_schedule_24_hours_at_end_of_day(self):
"""Test 24-hour schedule at 23:59 using is_within_schedule."""
timezone_str = 'UTC'
test_time = arrow.get('2024-01-01 23:59:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
current_day = test_time.format('dddd').lower()
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
current_day: {
'enabled': True,
'start_time': '00:00',
'duration': {'hours': 24, 'minutes': 0}
}
}
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "Should be active at 23:59 in 24-hour schedule")
def test_schedule_crosses_midnight_with_is_within_schedule(self):
"""Test schedule crossing midnight using is_within_schedule."""
timezone_str = 'UTC'
# Tuesday 00:30
test_time = arrow.get('2024-01-02 00:30:00', 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=timezone_str)
# Get Monday as that's when the schedule started
monday = test_time.shift(days=-1).format('dddd').lower()
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
'monday': {
'enabled': True,
'start_time': '23:00',
'duration': {'hours': 2, 'minutes': 0} # Until 01:00 Tuesday
},
'tuesday': {
'enabled': False,
'start_time': '09:00',
'duration': {'hours': 8, 'minutes': 0}
}
}
with unittest.mock.patch('arrow.now', return_value=test_time):
result = time_handler.is_within_schedule(time_schedule_limit)
# Note: This checks Tuesday's schedule, not Monday's overlap
# So it should be False because Tuesday is disabled
self.assertFalse(result, "Should check current day (Tuesday), which is disabled")
def test_schedule_with_mixed_hours_minutes(self):
"""Test schedule with both hours and minutes (23 hours 60 minutes = 24 hours)."""
timezone_str = 'UTC'
now = arrow.now(timezone_str)
current_day = now.format('dddd').lower()
current_hour = now.format('HH:00')
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
current_day: {
'enabled': True,
'start_time': current_hour,
'duration': {'hours': 23, 'minutes': 60} # = 1440 minutes = 24 hours
}
}
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "Should handle 23 hours + 60 minutes = 24 hours")
def test_schedule_48_hours(self):
"""Test schedule with 48-hour duration."""
timezone_str = 'UTC'
now = arrow.now(timezone_str)
current_day = now.format('dddd').lower()
start_hour = now.format('HH:00')
time_schedule_limit = {
'enabled': True,
'timezone': timezone_str,
current_day: {
'enabled': True,
'start_time': start_hour,
'duration': {'hours': 48, 'minutes': 0} # 2 full days
}
}
result = time_handler.is_within_schedule(time_schedule_limit)
self.assertTrue(result, "Should support 48-hour (multi-day) schedules")
class TestWeekdayEnum(unittest.TestCase):
"""Tests for the Weekday enum."""

View File

@@ -62,19 +62,19 @@ def am_i_inside_time(
# Calculate start and end times for the overlap from the previous day
start_datetime_tz = start_datetime_tz.shift(days=-1)
end_datetime_tz = start_datetime_tz.shift(minutes=duration)
if start_datetime_tz <= now_tz < end_datetime_tz:
if start_datetime_tz <= now_tz <= end_datetime_tz:
return True
# Handle current day's range
if target_weekday == current_weekday:
end_datetime_tz = start_datetime_tz.shift(minutes=duration)
if start_datetime_tz <= now_tz < end_datetime_tz:
if start_datetime_tz <= now_tz <= end_datetime_tz:
return True
# Handle next day's overlap
if target_weekday == (current_weekday + 1) % 7:
end_datetime_tz = start_datetime_tz.shift(minutes=duration)
if now_tz < start_datetime_tz and now_tz.shift(days=1) < end_datetime_tz:
if now_tz < start_datetime_tz and now_tz.shift(days=1) <= end_datetime_tz:
return True
return False

View File

@@ -4,11 +4,10 @@ import changedetectionio.content_fetchers.exceptions as content_fetchers_excepti
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio import html_tools
from changedetectionio import worker_pool
from changedetectionio.flask_app import watch_check_update
from changedetectionio.queuedWatchMetaData import PrioritizedItem
from changedetectionio.pluggy_interface import apply_update_handler_alter, apply_update_finalize
import asyncio
import importlib
import os
import sys
import time
@@ -56,6 +55,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
while not app.config.exit.is_set():
update_handler = None
watch = None
processing_exception = None # Reset at start of each iteration to prevent state bleeding
try:
# Efficient blocking via run_in_executor (no polling overhead!)
@@ -119,7 +119,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
# to prevent race condition with wait_for_all_checks()
fetch_start_time = round(time.time())
try:
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
changed_detected = False
@@ -136,6 +136,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
try:
# Retrieve signal by name to ensure thread-safe access across worker threads
watch_check_update = signal('watch_check_update')
watch_check_update.send(watch_uuid=uuid)
# Processor is what we are using for detecting the "Change"
@@ -154,6 +156,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid)
# Allow plugins to modify/wrap the update_handler
update_handler = apply_update_handler_alter(update_handler, watch, datastore)
update_signal = signal('watch_small_status_comment')
update_signal.send(watch_uuid=uuid, status="Fetching page..")
@@ -473,8 +478,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.exception(f"Worker {worker_id} full exception details:")
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
# Store favicon if necessary
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
@@ -498,6 +501,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
gc.collect()
except Exception as e:
# Store the processing exception for plugin finalization hook
processing_exception = e
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
logger.exception(f"Worker {worker_id} full exception details:")
@@ -509,6 +514,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
finally:
# Always cleanup - this runs whether there was an exception or not
if uuid:
# Capture references for plugin finalize hook BEFORE cleanup
# (cleanup may delete these variables, but plugins need the original references)
finalize_handler = update_handler # Capture now, before cleanup deletes it
finalize_watch = watch # Capture now, before any modifications
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
try:
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
@@ -518,12 +528,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.exception(f"Worker {worker_id} full exception details:")
try:
# Release UUID from processing (thread-safe)
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
# Send completion signal
if watch:
watch_check_update.send(watch_uuid=watch['uuid'])
# Clean up all memory references BEFORE garbage collection
if update_handler:
@@ -547,7 +551,37 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
logger.exception(f"Worker {worker_id} full exception details:")
del(uuid)
# Call plugin finalization hook after all cleanup is done
# Use captured references from before cleanup
try:
apply_update_finalize(
update_handler=finalize_handler,
watch=finalize_watch,
datastore=datastore,
processing_exception=processing_exception
)
except Exception as finalize_error:
logger.error(f"Worker {worker_id} error in finalize hook: {finalize_error}")
logger.exception(f"Worker {worker_id} full exception details:")
finally:
# Clean up captured references to allow immediate garbage collection
del finalize_handler
del finalize_watch
# Release UUID from processing AFTER all cleanup and hooks complete (thread-safe)
# This ensures wait_for_all_checks() waits for finalize hooks to complete
try:
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
except Exception as release_error:
logger.error(f"Worker {worker_id} error releasing UUID: {release_error}")
logger.exception(f"Worker {worker_id} full exception details:")
finally:
# Send completion signal - retrieve by name to ensure thread-safe access
if watch:
watch_check_update = signal('watch_check_update')
watch_check_update.send(watch_uuid=watch['uuid'])
del (uuid)
# Brief pause before continuing to avoid tight error loops (only on error)
if 'e' in locals():