Compare commits

..

1 Commits

Author SHA1 Message Date
dgtlmoon 3ca9607df1 Adding Ukranian translations, rebuilding translations. 2026-02-28 21:24:51 +01:00
129 changed files with 920 additions and 23159 deletions
+9 -26
View File
@@ -66,27 +66,27 @@ jobs:
echo ${{ github.ref }} > changedetectionio/tag.txt
- name: Set up QEMU
uses: docker/setup-qemu-action@v4
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Login to GitHub Container Registry
uses: docker/login-action@v4
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Docker Hub Container Registry
uses: docker/login-action@v4
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v4
uses: docker/setup-buildx-action@v3
with:
install: true
version: latest
@@ -95,7 +95,7 @@ jobs:
# master branch -> :dev container tag
- name: Docker meta :dev
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
uses: docker/metadata-action@v6
uses: docker/metadata-action@v5
id: meta_dev
with:
images: |
@@ -103,19 +103,11 @@ jobs:
ghcr.io/${{ github.repository }}
tags: |
type=raw,value=dev
labels: |
org.opencontainers.image.created=${{ github.event.release.published_at }}
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
org.opencontainers.image.documentation=https://changedetection.io
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
org.opencontainers.image.title=changedetection.io
org.opencontainers.image.url=https://changedetection.io
- name: Build and push :dev
id: docker_build
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
with:
context: ./
file: ./Dockerfile
@@ -136,10 +128,10 @@ jobs:
echo "Release tag: ${{ github.event.release.tag_name }}"
echo "Github ref: ${{ github.ref }}"
echo "Github ref name: ${{ github.ref_name }}"
- name: Docker meta :tag
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/metadata-action@v6
uses: docker/metadata-action@v5
id: meta
with:
images: |
@@ -150,20 +142,11 @@ jobs:
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
type=raw,value=latest
labels: |
org.opencontainers.image.created=${{ github.event.release.published_at }}
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
org.opencontainers.image.documentation=https://changedetection.io
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
org.opencontainers.image.title=changedetection.io
org.opencontainers.image.url=https://changedetection.io
org.opencontainers.image.version=${{ github.event.release.tag_name }}
- name: Build and push :tag
id: docker_build_tag_release
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
with:
context: ./
file: ./Dockerfile
+3 -3
View File
@@ -60,14 +60,14 @@ jobs:
# Just test that the build works, some libraries won't compile on ARM/rPi etc
- name: Set up QEMU
uses: docker/setup-qemu-action@v4
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v4
uses: docker/setup-buildx-action@v3
with:
install: true
version: latest
@@ -75,7 +75,7 @@ jobs:
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
id: docker_build
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
# https://github.com/docker/build-push-action#customizing
with:
context: ./
+1 -10
View File
@@ -52,13 +52,4 @@ jobs:
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.13'
skip-pypuppeteer: true
test-application-3-14:
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.14'
skip-pypuppeteer: false
skip-pypuppeteer: true
@@ -42,10 +42,10 @@ jobs:
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
uses: docker/setup-buildx-action@v3
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
with:
context: ./
file: ./Dockerfile
@@ -99,7 +99,11 @@ jobs:
- name: Run Unit Tests
run: |
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
# Basic pytest tests with ancillary services
basic-tests:
@@ -583,10 +587,6 @@ jobs:
run: |
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
- name: Plugin get_html_head_extras hook injects into base.html
run: |
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
# Container startup tests
container-tests:
runs-on: ubuntu-latest
@@ -706,19 +706,7 @@ jobs:
- name: Check upgrade works without error
run: |
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
g++ \
gcc \
libc-dev \
libffi-dev \
libjpeg-dev \
libssl-dev \
libxslt-dev \
make \
patch \
pkg-config \
zlib1g-dev
# Checkout old version and create datastore
git checkout 0.49.1
python3 -m venv .venv
+1 -2
View File
@@ -1,6 +1,5 @@
[python: **.py]
keywords = _ _l gettext
keywords = _:1,_l:1,gettext:1
[jinja2: **/templates/**.html]
encoding = utf-8
keywords = _ _l gettext
+1 -15
View File
@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.54.10'
__version__ = '0.54.2'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -61,22 +61,8 @@ import time
# ==============================================================================
import multiprocessing
import os
import sys
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
if 'MALLOC_ARENA_MAX' not in os.environ:
os.environ['MALLOC_ARENA_MAX'] = '2'
try:
import ctypes as _ctypes
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
except Exception:
pass
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
if 'pytest' not in sys.modules:
-7
View File
@@ -177,13 +177,6 @@ class Tag(Resource):
new_uuid = self.datastore.add_tag(title=title)
if new_uuid:
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
extra = {k: v for k, v in json_data.items() if k != 'title'}
if extra:
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
if tag:
tag.update(extra)
tag.commit()
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported tag", 400
+3 -3
View File
@@ -338,7 +338,7 @@ class WatchHistoryDiff(Resource):
word_diff = True
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
changes_only = strtobool(request.args.get('changesOnly', 'false'))
changes_only = strtobool(request.args.get('changesOnly', 'true'))
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
include_removed = strtobool(request.args.get('removed', 'true'))
include_added = strtobool(request.args.get('added', 'true'))
@@ -349,7 +349,7 @@ class WatchHistoryDiff(Resource):
previous_version_file_contents=from_version_file_contents,
newest_version_file_contents=to_version_file_contents,
ignore_junk=ignore_whitespace,
include_equal=not changes_only,
include_equal=changes_only,
include_removed=include_removed,
include_added=include_added,
include_replaced=include_replaced,
@@ -567,4 +567,4 @@ class CreateWatch(Resource):
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
return list, 200
return list, 200
+14 -12
View File
@@ -40,6 +40,11 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
zipObj.write(url_watches_json, arcname="url-watches.json")
logger.debug("Added url-watches.json to backup")
# Add the flask app secret (if it exists)
secret_file = os.path.join(datastore_path, "secret.txt")
if os.path.isfile(secret_file):
zipObj.write(secret_file, arcname="secret.txt")
# Add tag data directories (each tag has its own {uuid}/tag.json)
for uuid, tag in (tags or {}).items():
for f in Path(tag.data_dir).glob('*'):
@@ -98,8 +103,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
backup_threads = []
@backups_blueprint.route("/request-backup", methods=['GET'])
@login_optionally_required
@backups_blueprint.route("/request-backup", methods=['GET'])
def request_backup():
if any(thread.is_alive() for thread in backup_threads):
flash(gettext("A backup is already running, check back in a few minutes"), "error")
@@ -141,33 +146,30 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return backup_info
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
@login_optionally_required
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
def download_backup(filename):
import re
filename = filename.strip()
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
abort(404)
# Resolve 'latest' before any validation so checks run against the real filename.
if filename == 'latest':
backups = find_backups()
if not backups:
abort(404)
filename = backups[0]['filename']
if not re.match(r"^" + backup_filename_regex + "$", filename):
abort(400) # Bad Request if the filename doesn't match the pattern
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
abort(404)
logger.debug(f"Backup download request for '{full_path}'")
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
@login_optionally_required
@backups_blueprint.route("/", methods=['GET'])
@backups_blueprint.route("/create", methods=['GET'])
@login_optionally_required
def create():
backups = find_backups()
output = render_template("backup_create.html",
@@ -176,8 +178,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
)
return output
@backups_blueprint.route("/remove-backups", methods=['GET'])
@login_optionally_required
@backups_blueprint.route("/remove-backups", methods=['GET'])
def remove_backups():
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
+7 -47
View File
@@ -1,7 +1,6 @@
import io
import json
import os
import re
import shutil
import tempfile
import threading
@@ -15,16 +14,6 @@ from loguru import logger
from changedetectionio.flask_app import login_optionally_required
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
_UUID_RE = re.compile(
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
re.IGNORECASE,
)
class RestoreForm(Form):
zip_file = FileField(_l('Backup zip file'), validators=[
@@ -61,18 +50,7 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
with tempfile.TemporaryDirectory() as tmpdir:
logger.debug(f"Restore: extracting zip to {tmpdir}")
with zipfile.ZipFile(zip_stream, 'r') as zf:
total_uncompressed = sum(m.file_size for m in zf.infolist())
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
raise ValueError(
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
)
resolved_dest = os.path.realpath(tmpdir)
for member in zf.infolist():
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
zf.extract(member, tmpdir)
zf.extractall(tmpdir)
logger.debug("Restore: zip extracted, scanning UUID directories")
for entry in os.scandir(tmpdir):
@@ -80,9 +58,6 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
continue
uuid = entry.name
if not _UUID_RE.match(uuid):
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
continue
tag_json_path = os.path.join(entry.path, 'tag.json')
watch_json_path = os.path.join(entry.path, 'watch.json')
@@ -174,18 +149,16 @@ def construct_restore_blueprint(datastore):
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
restore_threads = []
@restore_blueprint.route("/restore", methods=['GET'])
@login_optionally_required
@restore_blueprint.route("/restore", methods=['GET'])
def restore():
form = RestoreForm()
return render_template("backup_restore.html",
form=form,
restore_running=any(t.is_alive() for t in restore_threads),
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
restore_running=any(t.is_alive() for t in restore_threads))
@restore_blueprint.route("/restore/start", methods=['POST'])
@login_optionally_required
@restore_blueprint.route("/restore/start", methods=['POST'])
def backups_restore_start():
if any(t.is_alive() for t in restore_threads):
flash(gettext("A restore is already running, check back in a few minutes"), "error")
@@ -200,22 +173,10 @@ def construct_restore_blueprint(datastore):
flash(gettext("File must be a .zip backup file"), "error")
return redirect(url_for('backups.restore.restore'))
# Reject oversized uploads before reading the stream into memory.
content_length = request.content_length
if content_length and content_length > _MAX_UPLOAD_BYTES:
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
return redirect(url_for('backups.restore.restore'))
# Read into memory now — the request stream is gone once we return.
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
# Read into memory now — the request stream is gone once we return
try:
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
if len(raw) > _MAX_UPLOAD_BYTES:
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
return redirect(url_for('backups.restore.restore'))
zip_bytes = io.BytesIO(raw)
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
pass
zip_bytes = io.BytesIO(zip_file.read())
zipfile.ZipFile(zip_bytes) # quick validity check before spawning
zip_bytes.seek(0)
except zipfile.BadZipFile:
flash(gettext("Invalid or corrupted zip file"), "error")
@@ -240,7 +201,6 @@ def construct_restore_blueprint(datastore):
name="BackupRestore"
)
restore_thread.start()
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
restore_threads.append(restore_thread)
flash(gettext("Restore started in background, check back in a few minutes."))
return redirect(url_for('backups.restore.restore'))
@@ -19,9 +19,6 @@
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
<p class="pure-form-message">
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
</p>
<form class="pure-form pure-form-stacked settings"
action="{{ url_for('backups.restore.backups_restore_start') }}"
@@ -102,35 +102,6 @@ def run_async_in_browser_loop(coro):
else:
raise RuntimeError("Browser steps event loop is not available")
async def _close_session_resources(session_data, label=''):
"""Close all browser resources for a session in the correct order.
browserstepper.cleanup() closes page+context but not the browser itself.
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
"""
browserstepper = session_data.get('browserstepper')
if browserstepper:
try:
await browserstepper.cleanup()
except Exception as e:
logger.error(f"Error cleaning up browserstepper{label}: {e}")
browser = session_data.get('browser')
if browser:
try:
await asyncio.wait_for(browser.close(), timeout=5.0)
except Exception as e:
logger.warning(f"Error closing browser{label}: {e}")
playwright_context = session_data.get('playwright_context')
if playwright_context:
try:
await playwright_context.stop()
except Exception as e:
logger.warning(f"Error stopping playwright context{label}: {e}")
def cleanup_expired_sessions():
"""Remove expired browsersteps sessions and cleanup their resources"""
global browsersteps_sessions, browsersteps_watch_to_session
@@ -148,10 +119,13 @@ def cleanup_expired_sessions():
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
session_data = browsersteps_sessions[session_id]
try:
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
except Exception as e:
logger.error(f"Error cleaning up session {session_id}: {e}")
# Cleanup playwright resources asynchronously
browserstepper = session_data.get('browserstepper')
if browserstepper:
try:
run_async_in_browser_loop(browserstepper.cleanup())
except Exception as e:
logger.error(f"Error cleaning up session {session_id}: {e}")
# Remove from sessions dict
del browsersteps_sessions[session_id]
@@ -178,10 +152,12 @@ def cleanup_session_for_watch(watch_uuid):
session_data = browsersteps_sessions.get(session_id)
if session_data:
try:
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
except Exception as e:
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
browserstepper = session_data.get('browserstepper')
if browserstepper:
try:
run_async_in_browser_loop(browserstepper.cleanup())
except Exception as e:
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
# Remove from sessions dict
del browsersteps_sessions[session_id]
@@ -202,74 +178,64 @@ def construct_blueprint(datastore: ChangeDetectionStore):
import time
from playwright.async_api import async_playwright
# We keep the playwright session open for many minutes
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
keepalive_ms = ((keepalive_seconds + 3) * 1000)
browsersteps_start_session = {'start_time': time.time()}
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
proxy = None
if proxy_id:
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
if proxy_url:
from urllib.parse import urlparse
parsed = urlparse(proxy_url)
proxy = {'server': proxy_url}
if parsed.username:
proxy['username'] = parsed.username
if parsed.password:
proxy['password'] = parsed.password
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
# Create a new async playwright instance for browser steps
playwright_instance = async_playwright()
playwright_context = await playwright_instance.start()
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
watch = datastore.data['watching'][watch_uuid]
from changedetectionio import content_fetchers
fetcher_name = watch.get_fetch_backend or 'system'
if fetcher_name == 'system':
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
fetcher_class = getattr(content_fetchers, fetcher_name, None)
browser = None
playwright_context = None
# If the fetcher has its own browser launch for the live steps UI, use it.
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
# or None to fall back to the default CDP path.
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
if result is not None:
browser, playwright_context = result
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
if browser is None:
playwright_instance = async_playwright()
playwright_context = await playwright_instance.start()
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
a = "?" if '?' not in base_url else '&'
base_url += a + f"timeout={keepalive_ms}"
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
keepalive_ms = ((keepalive_seconds + 3) * 1000)
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
a = "?" if not '?' in base_url else '&'
base_url += a + f"timeout={keepalive_ms}"
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
browsersteps_start_session['browser'] = browser
browsersteps_start_session['playwright_context'] = playwright_context
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
proxy = None
if proxy_id:
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
if proxy_url:
# Playwright needs separate username and password values
from urllib.parse import urlparse
parsed = urlparse(proxy_url)
proxy = {'server': proxy_url}
if parsed.username:
proxy['username'] = parsed.username
if parsed.password:
proxy['password'] = parsed.password
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
browserstepper = browser_steps.browsersteps_live_ui(
playwright_browser=browser,
proxy=proxy,
start_url=watch.link,
headers=watch.get('headers')
start_url=datastore.data['watching'][watch_uuid].link,
headers=datastore.data['watching'][watch_uuid].get('headers')
)
# Initialize the async connection
await browserstepper.connect(proxy=proxy)
browsersteps_start_session['browserstepper'] = browserstepper
# For test
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
return browsersteps_start_session
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
@login_optionally_required
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
def browsersteps_start_session():
# A new session was requested, return sessionID
import uuid
@@ -304,8 +270,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.debug("Starting connection with playwright - done")
return {'browsersteps_session_id': browsersteps_session_id}
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
@login_optionally_required
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
def browser_steps_fetch_screenshot_image():
from flask import (
make_response,
@@ -330,8 +296,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
# A request for an action was received
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
@login_optionally_required
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
def browsersteps_ui_update():
import base64
@@ -40,13 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
contents = ''
now = time.time()
try:
import asyncio
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid
)
asyncio.run(update_handler.call_browser(preferred_proxy_id=preferred_proxy))
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
# title, size is len contents not len xfer
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 404:
@@ -160,7 +160,8 @@ class import_xlsx_wachete(Importer):
flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error')
return
for row_id, row in enumerate(wb.active.iter_rows(min_row=2), start=2):
row_id = 2
for row in wb.active.iter_rows(min_row=row_id):
try:
extras = {}
data = {}
@@ -211,6 +212,8 @@ class import_xlsx_wachete(Importer):
except Exception as e:
logger.error(e)
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error')
else:
row_id += 1
flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
@@ -238,10 +241,10 @@ class import_xlsx_custom(Importer):
# @todo cehck atleast 2 rows, same in other method
from changedetectionio.forms import validate_url
row_i = 0
row_i = 1
try:
for row_i, row in enumerate(wb.active.iter_rows(), start=1):
for row in wb.active.iter_rows():
url = None
tags = None
extras = {}
@@ -292,5 +295,7 @@ class import_xlsx_custom(Importer):
except Exception as e:
logger.error(e)
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error')
else:
row_i += 1
flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
@@ -9,7 +9,6 @@
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
</ul>
</div>
+1 -1
View File
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
datastore: The ChangeDetectionStore instance
"""
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
def rss_tag_feed(tag_uuid):
from flask import make_response, request, url_for
@@ -154,8 +154,9 @@
</span>
</div>
<div class="pure-control-group">
<br>
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
<br>
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
</div>
</div>
@@ -351,7 +352,7 @@ nav
</div>
</div>
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
<div class="pure-control-group" id="extra-proxies-setting">
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
@@ -22,14 +22,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
from changedetectionio import processors
output = render_template("groups-overview.html",
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
available_tags=sorted_tags,
form=add_form,
generate_tag_colors=processors.generate_processor_badge_colors,
tag_count=tag_count,
wcag_text_color=processors.wcag_text_color,
)
return output
@@ -211,17 +208,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
template = env.from_string(template_str)
included_content = template.render(**template_args)
# Watches whose URL currently matches this tag's pattern
matching_watches = {
w_uuid: watch
for w_uuid, watch in datastore.data['watching'].items()
if default.matches_url(watch.get('url', ''))
}
output = render_template("edit-tag.html",
extra_form_content=included_content,
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
matching_watches=matching_watches,
settings_application=datastore.data['settings']['application'],
**template_args
)
+7 -7
View File
@@ -5,17 +5,17 @@ from wtforms import (
validators,
)
from wtforms.fields.simple import BooleanField
from flask_babel import lazy_gettext as _l
from changedetectionio.processors.restock_diff.forms import processor_settings_form as restock_settings_form
class group_restock_settings_form(restock_settings_form):
overrides_watch = BooleanField(_l('Activate for individual watches in this tag/group?'), default=False)
url_match_pattern = StringField(_l('Auto-apply to watches with URLs matching'),
render_kw={"placeholder": _l("e.g. *://example.com/* or github.com/myorg")})
tag_colour = StringField(_l('Tag colour'), default='')
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
class SingleTag(Form):
name = StringField(_l('Tag name'), [validators.InputRequired()], render_kw={"placeholder": _l("Name")})
save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"})
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -17,7 +17,6 @@
</script>
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
@@ -44,46 +43,6 @@
<div class="pure-control-group">
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
</div>
<div class="pure-control-group">
{{ render_field(form.url_match_pattern, class="m-d") }}
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
</div>
{% if matching_watches %}
<div class="pure-control-group">
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
<ul class="tag-url-match-list">
{% for w_uuid, w in matching_watches.items() %}
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
{% endfor %}
</ul>
</div>
{% endif %}
<div class="pure-control-group">
<label>{{ _('Tag colour') }}</label>
<div style="display:flex; align-items:center; gap:0.75em;">
<input type="checkbox" id="use_custom_colour"
{% if data.get('tag_colour') %}checked{% endif %}>
<label for="use_custom_colour" style="margin:0">{{ _('Custom colour') }}</label>
<input type="color" id="tag_colour_picker"
value="{{ data.get('tag_colour') or '#4f8ef7' }}"
{% if not data.get('tag_colour') %}disabled{% endif %}>
<input type="hidden" name="tag_colour" id="tag_colour_hidden"
value="{{ data.get('tag_colour', '') }}">
</div>
<span class="pure-form-message-inline">{{ _('Leave unchecked to use the auto-generated colour based on the tag name.') }}</span>
</div>
<script>
(function () {
var cb = document.getElementById('use_custom_colour');
var picker = document.getElementById('tag_colour_picker');
var hidden = document.getElementById('tag_colour_hidden');
picker.addEventListener('input', function () { hidden.value = this.value; });
cb.addEventListener('change', function () {
picker.disabled = !this.checked;
hidden.value = this.checked ? picker.value : '';
});
})();
</script>
</fieldset>
</div>
@@ -3,26 +3,6 @@
{% from '_helpers.html' import render_simple_field, render_field %}
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
<style>
{%- for uuid, tag in available_tags -%}
{%- if tag and tag.title -%}
{%- set class_name = tag.title|sanitize_tag_class -%}
{%- if tag.get('tag_colour') -%}
.watch-tag-list.tag-{{ class_name }} { background-color: {{ tag.tag_colour }}; color: {{ wcag_text_color(tag.tag_colour) }}; }
{%- else -%}
{%- set colors = generate_tag_colors(tag.title) -%}
.watch-tag-list.tag-{{ class_name }} {
background-color: {{ colors['light']['bg'] }};
color: {{ colors['light']['color'] }};
}
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
background-color: {{ colors['dark']['bg'] }};
color: {{ colors['dark']['color'] }};
}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
</style>
<div class="box">
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
@@ -65,10 +45,10 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{% for uuid, tag in available_tags %}
<tr id="{{ uuid }}" class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}">
<td class="watch-controls">
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('Mute notifications') }}" title="{{ _('Mute notifications') }}" class="icon icon-mute" ></a>
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
</td>
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
<td>
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
+2 -2
View File
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
@login_optionally_required
def clear_all_history():
if request.method == 'POST':
confirmtext = request.form.get('confirmtext', '')
confirmtext = request.form.get('confirmtext')
if confirmtext.strip().lower() == gettext('clear').strip().lower():
if confirmtext == 'clear':
# Run in background thread to avoid blocking
def clear_history_background():
# Capture UUIDs first to avoid race conditions
+4 -9
View File
@@ -142,7 +142,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
for p in datastore.extra_browsers:
form.fetch_backend.choices.append(p)
form.fetch_backend.choices.append(("system", gettext('System settings default')))
form.fetch_backend.choices.append(("system", 'System settings default'))
# form.browser_steps[0] can be assumed that we 'goto url' first
@@ -150,7 +150,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
del form.proxy
else:
form.proxy.choices = [('', gettext('Default'))]
form.proxy.choices = [('', 'Default')]
for p in datastore.proxy_list:
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
@@ -301,7 +301,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'extra_classes': ' '.join(c),
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
'extra_processor_config': form.extra_tab_content(),
'extra_title': f" - {gettext('Edit')} - {watch.label}",
'extra_title': f" - Edit - {watch.label}",
'form': form,
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
@@ -320,12 +320,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid,
'watch': watch,
'capabilities': capabilities,
'auto_applied_tags': {
tag_uuid: tag
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
},
'capabilities': capabilities
}
included_content = None
+8 -7
View File
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
def construct_blueprint(datastore: ChangeDetectionStore):
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET'])
@login_optionally_required
def preview_page(uuid):
"""
@@ -60,8 +59,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
versions = []
timestamp = None
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = watch.fetcher_supports_screenshots
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
triggered_line_numbers = []
ignored_line_numbers = []
@@ -71,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
else:
# So prepare the latest preview or not
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
preferred_version = request.args.get('version')
versions = list(watch.history.keys())
timestamp = versions[-1]
if preferred_version and preferred_version in versions:
@@ -107,7 +108,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
current_diff_url=watch['url'],
current_version=timestamp,
extra_stylesheets=extra_stylesheets,
extra_title=f" - {gettext('Diff')} - {watch.label} @ {timestamp}",
extra_title=f" - Diff - {watch.label} @ {timestamp}",
highlight_ignored_line_numbers=ignored_line_numbers,
highlight_triggered_line_numbers=triggered_line_numbers,
highlight_blocked_line_numbers=blocked_line_numbers,
@@ -81,14 +81,6 @@
<div class="pure-control-group">
{{ render_field(form.tags) }}
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
{% if auto_applied_tags %}
<span class="pure-form-message-inline">
{{ _('Also automatically applied by URL pattern:') }}
{% for tag_uuid, tag in auto_applied_tags.items() %}
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
{% endfor %}
</span>
{% endif %}
</div>
<div class="pure-control-group inline-radio">
{{ render_field(form.processor) }}
@@ -17,7 +17,7 @@
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
{% if versions|length >= 2 %}
<div id="diff-form" style="text-align: center;">
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
<form class="pure-form " action="" method="POST">
<fieldset>
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
name="from_version"
@@ -28,7 +28,6 @@
</option>
{% endfor %}
</select>
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
</fieldset>
@@ -81,7 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
proxy_list = datastore.proxy_list
output = render_template(
"watch-overview.html",
active_tag=active_tag,
@@ -92,9 +91,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
extra_classes='has-queue' if not update_q.empty() else '',
form=form,
generate_tag_colors=processors.generate_processor_badge_colors,
wcag_text_color=processors.wcag_text_color,
guid=datastore.data['app_guid'],
has_proxies=proxy_list,
has_proxies=datastore.proxy_list,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
now_time_server=round(time.time()),
pagination=pagination,
@@ -112,16 +110,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
watches=sorted_watches
)
# Return freed template-building memory to the OS immediately.
# render_template allocates ~20MB of intermediate strings that are freed on return,
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
if session.get('share-link'):
del (session['share-link'])
@@ -71,13 +71,6 @@ document.addEventListener('DOMContentLoaded', function() {
{%- for uuid, tag in tags -%}
{%- if tag and tag.title -%}
{%- set class_name = tag.title|sanitize_tag_class -%}
{%- if tag.get('tag_colour') -%}
.button-tag.tag-{{ class_name }},
.watch-tag-list.tag-{{ class_name }} {
background-color: {{ tag.tag_colour }};
color: {{ wcag_text_color(tag.tag_colour) }};
}
{%- else -%}
{%- set colors = generate_tag_colors(tag.title) -%}
.button-tag.tag-{{ class_name }} {
background-color: {{ colors['light']['bg'] }};
@@ -99,7 +92,6 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
color: {{ colors['dark']['color'] }};
}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
</style>
<div class="box" id="form-quick-watch-add">
@@ -221,13 +213,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{%- set checking_now = is_checking_now(watch) -%}
{%- set history_n = watch.history_n -%}
{%- set favicon = watch.get_favicon_filename() -%}
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
{%- set row_classes = [
loop.cycle('pure-table-odd', 'pure-table-even'),
'processor-' ~ watch['processor'],
'has-error' if error_texts|length > 2 else '',
'has-error' if watch.compile_error_texts()|length > 2 else '',
'paused' if watch.paused is defined and watch.paused != False else '',
'unviewed' if watch.has_unviewed else '',
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
@@ -245,10 +236,10 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
<td class="inline watch-controls">
<div>
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="{{ _('Pause checks') }}" title="{{ _('Pause checks') }}" class="icon icon-pause" ></a>
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="{{ _('UnPause checks') }}" title="{{ _('UnPause checks') }}" class="icon icon-unpause" ></a>
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('Mute notification') }}" title="{{ _('Mute notification') }}" class="icon icon-mute" ></a>
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ _('UnMute notification') }}" title="{{ _('UnMute notification') }}" class="icon icon-mute" ></a>
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
</div>
</td>
@@ -280,7 +271,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{% endif %}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
</span>
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
{%- if watch['processor'] == 'text_json_diff' -%}
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
@@ -292,7 +283,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{%- endfor -%}
</div>
<div class="status-icons">
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="{{ _('Create a link to share watch config with others') }}" ></a>
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
{{ effective_fetcher|fetcher_status_icons }}
@@ -314,20 +305,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{%- endif -%}
{%- if watch.get('restock') and watch['restock'].get('price') -%}
{%- set restock = watch['restock'] -%}
{%- set price = restock.get('price') -%}
{%- set cur = restock.get('currency','') -%}
{%- if price is not none and (price|string)|regex_search('\d') -%}
<span class="restock-label price" title="{{ _('Price') }}">
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
{{ price }} {{ cur }}<!-- as string -->
{%- if watch['restock']['price'] is number -%}
<span class="restock-label price" title="{{ _('Price') }}">
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
</span>
{%- else -%} <!-- watch['restock']['price']' is not a number, cant output it -->
{%- endif -%}
</span>
{%- endif -%}
{%- elif not watch.has_restock_info -%}
<span class="restock-label error">{{ _('No information') }}</span>
{%- endif -%}
@@ -335,13 +318,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
</td>
{%- endif -%}
{#last_checked becomes fetch-start-time#}
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" data-label="{{ _('Last Checked') }}">
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
<div class="spinner-wrapper" style="display:none;" >
<span class="spinner"></span><span class="status-text">&nbsp;{{ _('Checking now') }}</span>
</div>
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
</td>
<td class="last-changed" data-timestamp="{{ watch.last_changed }}" data-label="{{ _('Last Changed') }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
{{watch.last_changed|format_timestamp_timeago}}
{%- else -%}
{{ _('Not yet') }}
+9 -10
View File
@@ -1,5 +1,4 @@
from json_logic.builtins import BUILTINS
from flask_babel import lazy_gettext as _l
from .exceptions import EmptyConditionRuleRowNotUsable
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
@@ -7,19 +6,19 @@ from . import default_plugin
from loguru import logger
# List of all supported JSON Logic operators
operator_choices = [
(None, _l("Choose one - Operator")),
(">", _l("Greater Than")),
("<", _l("Less Than")),
(">=", _l("Greater Than or Equal To")),
("<=", _l("Less Than or Equal To")),
("==", _l("Equals")),
("!=", _l("Not Equals")),
("in", _l("Contains")),
(None, "Choose one - Operator"),
(">", "Greater Than"),
("<", "Less Than"),
(">=", "Greater Than or Equal To"),
("<=", "Less Than or Equal To"),
("==", "Equals"),
("!=", "Not Equals"),
("in", "Contains"),
]
# Fields available in the rules
field_choices = [
(None, _l("Choose one - Field")),
(None, "Choose one - Field"),
]
# The data we will feed the JSON Rules to see if it passes the test/conditions or not
+9 -10
View File
@@ -3,7 +3,6 @@ import re
import pluggy
from price_parser import Price
from loguru import logger
from flask_babel import lazy_gettext as _l
hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
@@ -48,22 +47,22 @@ def register_operators():
@hookimpl
def register_operator_choices():
return [
("!in", _l("Does NOT Contain")),
("starts_with", _l("Text Starts With")),
("ends_with", _l("Text Ends With")),
("length_min", _l("Length minimum")),
("length_max", _l("Length maximum")),
("contains_regex", _l("Text Matches Regex")),
("!contains_regex", _l("Text Does NOT Match Regex")),
("!in", "Does NOT Contain"),
("starts_with", "Text Starts With"),
("ends_with", "Text Ends With"),
("length_min", "Length minimum"),
("length_max", "Length maximum"),
("contains_regex", "Text Matches Regex"),
("!contains_regex", "Text Does NOT Match Regex"),
]
@hookimpl
def register_field_choices():
return [
("extracted_number", _l("Extracted number after 'Filters & Triggers'")),
("extracted_number", "Extracted number after 'Filters & Triggers'"),
# ("meta_description", "Meta Description"),
# ("meta_keywords", "Meta Keywords"),
("page_filtered_text", _l("Page text after 'Filters & Triggers'")),
("page_filtered_text", "Page text after 'Filters & Triggers'"),
#("page_title", "Page <title>"), # actual page title <title>
]
+6 -7
View File
@@ -1,7 +1,6 @@
# Condition Rule Form (for each rule row)
from wtforms import Form, SelectField, StringField, validators
from wtforms import validators
from flask_babel import lazy_gettext as _l
class ConditionFormRow(Form):
@@ -9,18 +8,18 @@ class ConditionFormRow(Form):
from changedetectionio.conditions import plugin_manager
from changedetectionio.conditions import operator_choices, field_choices
field = SelectField(
_l("Field"),
"Field",
choices=field_choices,
validators=[validators.Optional()]
)
operator = SelectField(
_l("Operator"),
"Operator",
choices=operator_choices,
validators=[validators.Optional()]
)
value = StringField(_l("Value"), validators=[validators.Optional()], render_kw={"placeholder": _l("A value")})
value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"})
def validate(self, extra_validators=None):
# First, run the default validators
@@ -31,15 +30,15 @@ class ConditionFormRow(Form):
# If any of the operator/field/value is set, then they must be all set
if any(value not in ("", False, "None", None) for value in [self.operator.data, self.field.data, self.value.data]):
if not self.operator.data or self.operator.data == 'None':
self.operator.errors.append(_l("Operator is required."))
self.operator.errors.append("Operator is required.")
return False
if not self.field.data or self.field.data == 'None':
self.field.errors.append(_l("Field is required."))
self.field.errors.append("Field is required.")
return False
if not self.value.data:
self.value.errors.append(_l("Value is required."))
self.value.errors.append("Value is required.")
return False
return True # Only return True if all conditions pass
@@ -4,7 +4,6 @@ Provides metrics for measuring text similarity between snapshots.
"""
import pluggy
from loguru import logger
from flask_babel import gettext as _, lazy_gettext as _l
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
@@ -54,8 +53,8 @@ def register_operator_choices():
@conditions_hookimpl
def register_field_choices():
return [
("levenshtein_ratio", _l("Levenshtein - Text similarity ratio")),
("levenshtein_distance", _l("Levenshtein - Text change distance")),
("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
("levenshtein_distance", "Levenshtein - Text change distance"),
]
@conditions_hookimpl
@@ -78,7 +77,7 @@ def ui_edit_stats_extras(watch):
"""Add Levenshtein stats to the UI using the global plugin system"""
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
if len(watch.history.keys()) < 2:
return f"<p>{_('Not enough history to calculate Levenshtein metrics')}</p>"
return "<p>Not enough history to calculate Levenshtein metrics</p>"
# Protection against the algorithm getting stuck on huge documents
@@ -88,37 +87,37 @@ def ui_edit_stats_extras(watch):
for idx in (-1, -2)
if len(k) >= abs(idx)
):
return f"<p>{_('Snapshot too large for edit statistics, skipping.')}</p>"
return "<p>Snapshot too large for edit statistics, skipping.</p>"
try:
lev_data = levenshtein_ratio_recent_history(watch)
if not lev_data or not isinstance(lev_data, dict):
return f"<p>{_('Unable to calculate Levenshtein metrics')}</p>"
return "<p>Unable to calculate Levenshtein metrics</p>"
html = f"""
<div class="levenshtein-stats">
<h4>{_('Levenshtein Text Similarity Details')}</h4>
<h4>Levenshtein Text Similarity Details</h4>
<table class="pure-table">
<tbody>
<tr>
<td>{_('Raw distance (edits needed)')}</td>
<td>Raw distance (edits needed)</td>
<td>{lev_data['distance']}</td>
</tr>
<tr>
<td>{_('Similarity ratio')}</td>
<td>Similarity ratio</td>
<td>{lev_data['ratio']:.4f}</td>
</tr>
<tr>
<td>{_('Percent similar')}</td>
<td>Percent similar</td>
<td>{lev_data['percent_similar']}%</td>
</tr>
</tbody>
</table>
<p style="font-size: 80%;">{_('Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.')}</p>
<p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
</div>
"""
return html
except Exception as e:
logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
return f"<p>{_('Error calculating Levenshtein metrics')}</p>"
return "<p>Error calculating Levenshtein metrics</p>"
@@ -4,7 +4,6 @@ Provides word count metrics for snapshot content.
"""
import pluggy
from loguru import logger
from flask_babel import gettext as _, lazy_gettext as _l
# Support both plugin systems
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
@@ -41,7 +40,7 @@ def register_operator_choices():
def register_field_choices():
# Add a field that will be available in conditions
return [
("word_count", _l("Word count of content")),
("word_count", "Word count of content"),
]
@conditions_hookimpl
@@ -62,16 +61,16 @@ def _generate_stats_html(watch):
html = f"""
<div class="word-count-stats">
<h4>{_('Content Analysis')}</h4>
<h4>Content Analysis</h4>
<table class="pure-table">
<tbody>
<tr>
<td>{_('Word count (latest snapshot)')}</td>
<td>Word count (latest snapshot)</td>
<td>{word_count}</td>
</tr>
</tbody>
</table>
<p style="font-size: 80%;">{_('Word count is a simple measure of content length, calculated by splitting text on whitespace.')}</p>
<p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
</div>
"""
return html
@@ -49,9 +49,6 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
if page_height > page.viewport_size['height']:
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
viewport_start = time.time()
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
# Set viewport to a larger size to capture more content at once
@@ -75,9 +75,6 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
if page_height > page.viewport['height']:
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
viewport_start = time.time()
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
viewport_time = time.time() - viewport_start
+5 -28
View File
@@ -1,4 +1,3 @@
from flask_babel import lazy_gettext as _l
from loguru import logger
from urllib.parse import urljoin, urlparse
import hashlib
@@ -14,7 +13,7 @@ from changedetectionio.validate_url import is_private_hostname
# "html_requests" is listed as the default fetcher in store.py!
class fetcher(Fetcher):
fetcher_description = _l("Basic fast Plaintext/HTTP Client")
fetcher_description = "Basic fast Plaintext/HTTP Client"
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
@@ -149,32 +148,10 @@ class fetcher(Fetcher):
# Default to UTF-8 for XML if no encoding found
r.encoding = 'utf-8'
else:
# No charset in HTTP header - sniff encoding in priority order matching browsers
# (WHATWG encoding sniffing algorithm):
# 1. BOM - highest confidence, check before anything else
# 2. <meta charset> in first 2kb
# 3. chardet statistical detection - last resort
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
boms = [
(b'\xef\xbb\xbf', 'utf-8-sig'),
(b'\xff\xfe', 'utf-16-le'),
(b'\xfe\xff', 'utf-16-be'),
]
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
if bom_encoding:
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
r.encoding = bom_encoding
else:
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
if meta_charset_match:
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
r.encoding = encoding
else:
encoding = chardet.detect(r.content)['encoding']
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
if encoding:
r.encoding = encoding
# For other content types, use chardet
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
self.headers = r.headers
@@ -38,39 +38,26 @@
if (a.size !== b.size) {
return b.size - a.size;
}
// Second priority: apple-touch-icon over regular icon
const isAppleA = /apple-touch-icon/.test(a.rel);
const isAppleB = /apple-touch-icon/.test(b.rel);
if (isAppleA && !isAppleB) return -1;
if (!isAppleA && isAppleB) return 1;
// Third priority: icons with no size attribute (fallback icons) last
const hasNoSizeA = !a.hasSizes;
const hasNoSizeB = !b.hasSizes;
if (hasNoSizeA && !hasNoSizeB) return 1;
if (!hasNoSizeA && hasNoSizeB) return -1;
return 0;
});
const timeoutMs = 2000;
// 1 MB — matches the server-side limit in bump_favicon()
const MAX_BYTES = 1 * 1024 * 1024;
for (const icon of icons) {
try {
// Inline data URI — no network fetch needed, data is already here
if (icon.href.startsWith('data:')) {
const match = icon.href.match(/^data:([^;]+);base64,([A-Za-z0-9+/=]+)$/);
if (!match) continue;
const mime_type = match[1];
const base64 = match[2];
// Rough size check: base64 is ~4/3 the binary size
if (base64.length * 0.75 > MAX_BYTES) continue;
return { url: icon.href, mime_type, base64 };
}
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
@@ -87,15 +74,12 @@
const blob = await resp.blob();
if (blob.size > MAX_BYTES) continue;
// Convert blob to base64
const reader = new FileReader();
return await new Promise(resolve => {
reader.onloadend = () => {
resolve({
url: icon.href,
mime_type: blob.type,
base64: reader.result.split(",")[1]
});
};
@@ -114,3 +98,4 @@
// Auto-execute and return result for page.evaluate()
return await window.getFaviconAsBlob();
})();
@@ -56,10 +56,6 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
im.close()
del images
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
if total_height > capture_height:
stitched = stitched.crop((0, 0, max_width, capture_height))
# Draw caption only if page was trimmed
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
@@ -104,17 +104,15 @@ class fetcher(Fetcher):
from selenium.webdriver.remote.remote_connection import RemoteConnection
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
from selenium.webdriver.remote.client_config import ClientConfig
from urllib3.util import Timeout
driver = None
try:
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
client_config = ClientConfig(
remote_server_addr=self.browser_connection_url,
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
remote_connection = RemoteConnection(
self.browser_connection_url,
)
remote_connection = RemoteConnection(client_config=client_config)
remote_connection.set_timeout(30) # seconds
# Now create the driver with the RemoteConnection
driver = RemoteWebDriver(
command_executor=remote_connection,
options=options
+18 -60
View File
@@ -45,38 +45,8 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
# Compiled regex patterns for performance
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
# Regexes built from the constants above — no brittle hardcoded strings
_EXTRACT_REMOVED_RE = re.compile(
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
+ r'|' +
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
)
_EXTRACT_ADDED_RE = re.compile(
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
+ r'|' +
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
)
def extract_changed_from(raw_diff: str) -> str:
"""Extract only the removed/changed-from fragments from a raw diff string.
Useful for {{diff_changed_from}} gives just the old value (e.g. old price),
not the full surrounding line. Multiple fragments joined with newlines.
"""
return '\n'.join(next((g for g in m.groups() if g is not None), '') for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
def extract_changed_to(raw_diff: str) -> str:
"""Extract only the added/changed-into fragments from a raw diff string.
Useful for {{diff_changed_to}} gives just the new value (e.g. new price),
not the full surrounding line. Multiple fragments joined with newlines.
"""
return '\n'.join(next((g for g in m.groups() if g is not None), '') for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
"""
Render word-level differences between two lines inline using diff-match-patch library.
@@ -163,20 +133,14 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
if removed_tokens:
removed_full = ''.join(removed_tokens).rstrip()
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
if include_change_type_prefix:
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
else:
result_parts.append(f'{removed_full}{trailing_removed}')
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
if added_tokens:
if result_parts: # Add newline between removed and added
result_parts.append('\n')
added_full = ''.join(added_tokens).rstrip()
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
if include_change_type_prefix:
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
else:
result_parts.append(f'{added_full}{trailing_added}')
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
return ''.join(result_parts), has_changes
else:
@@ -186,27 +150,21 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
if op == 0: # Equal
result_parts.append(text)
elif op == 1: # Insertion
if not include_change_type_prefix:
result_parts.append(text)
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
else:
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
result_parts.append(trailing)
elif op == -1: # Deletion
if not include_change_type_prefix:
result_parts.append(text)
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
else:
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
result_parts.append(trailing)
return ''.join(result_parts), has_changes
@@ -402,7 +360,7 @@ def customSequenceMatcher(
# Use inline word-level diff for single line replacements when word_diff is enabled
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix)
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
if ignore_junk and not has_changes:
# No real changes, skip this line
@@ -457,8 +415,8 @@ def render_diff(
Returns:
str: Rendered difference
"""
newest_lines = [line.rstrip() for line in (newest_version_file_contents or '').splitlines()]
previous_lines = [line.rstrip() for line in (previous_version_file_contents or '').splitlines()]
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
now = time.time()
logger.debug(
f"diff options: "
+23 -28
View File
@@ -4,7 +4,6 @@ import flask_login
import locale
import os
import queue
import re
import sys
import threading
import time
@@ -212,23 +211,14 @@ def _is_safe_valid_url(test_url):
from .validate_url import is_safe_valid_url
return is_safe_valid_url(test_url)
@app.template_global('get_html_head_extras')
def _get_html_head_extras():
from .pluggy_interface import collect_html_head_extras
return collect_html_head_extras()
@app.template_filter('format_number_locale')
def _jinja2_filter_format_number_locale(value: float) -> str:
"Formats for example 4000.10 to the local locale default of 4,000.10"
# Format the number with two decimal places (locale format string will return 6 decimal)
formatted_value = locale.format_string("%.2f", value, grouping=True)
return formatted_value
@app.template_filter('regex_search')
def _jinja2_filter_regex_search(value, pattern):
import re
return re.search(pattern, str(value)) is not None
return formatted_value
@app.template_global('is_checking_now')
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
@@ -393,8 +383,6 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
return ''
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
@app.template_filter('sanitize_tag_class')
def _jinja2_filter_sanitize_tag_class(tag_title):
"""Sanitize a tag title to create a valid CSS class name.
@@ -406,8 +394,9 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
Returns:
str: A sanitized string suitable for use as a CSS class name
"""
import re
# Remove all non-alphanumeric characters and convert to lowercase
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
# Ensure it starts with a letter (CSS requirement)
if sanitized and not sanitized[0].isalpha():
sanitized = 'tag' + sanitized
@@ -495,21 +484,28 @@ def changedetection_app(config=None, datastore_o=None):
available_languages = get_available_languages()
language_codes = get_language_codes()
_locale_aliases = {
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
}
_locale_match_list = language_codes + list(_locale_aliases.keys())
def get_locale():
# Locale aliases: map browser language codes to translation directory names
# This handles cases where browsers send standard codes (e.g., zh-TW)
# but our translations use more specific codes (e.g., zh_Hant_TW)
locale_aliases = {
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
}
# 1. Try to get locale from session (user explicitly selected)
if 'locale' in session:
return session['locale']
# 2. Fall back to Accept-Language header
browser_locale = request.accept_languages.best_match(_locale_match_list)
# 3. Map browser locale to our internal locale if needed
return _locale_aliases.get(browser_locale, browser_locale)
# Get the best match from browser's Accept-Language header
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
# 3. Check if we need to map the browser locale to our internal locale
if browser_locale in locale_aliases:
return locale_aliases[browser_locale]
return browser_locale
# Initialize Babel with locale selector
babel = Babel(app, locale_selector=get_locale)
@@ -1022,16 +1018,15 @@ def check_for_new_version():
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
session = requests.Session()
session.verify = False
while not app.config.exit.is_set():
try:
r = session.post("https://changedetection.io/check-ver.php",
r = requests.post("https://changedetection.io/check-ver.php",
data={'version': __version__,
'app_guid': datastore.data['app_guid'],
'watch_count': len(datastore.data['watching'])
})
},
verify=False)
except:
pass
+12 -14
View File
@@ -608,12 +608,13 @@ class ValidateCSSJSONXPATHInput(object):
raise ValidationError("XPath not permitted in this field!")
from lxml import etree, html
import elementpath
from changedetectionio.html_tools import SafeXPath3Parser
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
tree = html.fromstring("<html></html>")
line = line.replace('xpath:', '')
try:
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
elementpath.select(tree, line.strip(), parser=XPath3Parser)
except elementpath.ElementPathError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
@@ -667,11 +668,9 @@ class ValidateCSSJSONXPATHInput(object):
# `jq` requires full compilation in windows and so isn't generally available
raise ValidationError("jq not support not found")
from changedetectionio.html_tools import validate_jq_expression
input = line.replace('jq:', '')
try:
validate_jq_expression(input)
jq.compile(input)
except (ValueError) as e:
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
@@ -725,7 +724,7 @@ class ValidateStartsWithRegex(object):
raise ValidationError(self.message or _l("Invalid value."))
class quickWatchForm(Form):
url = StringField(_l('URL'), validators=[validateURL()])
url = fields.URLField(_l('URL'), validators=[validateURL()])
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
@@ -771,16 +770,16 @@ class SingleBrowserStep(Form):
operation = SelectField(_l('Operation'), [validators.Optional()], choices=browser_step_ui_config.keys())
# maybe better to set some <script>var..
selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": _l("CSS or xPath selector")})
optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": _l("Value")})
selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": "CSS or xPath selector"})
optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": "Value"})
# @todo move to JS? ajax fetch new field?
# remove_button = SubmitField(_l('-'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
# add_button = SubmitField(_l('+'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
class processor_text_json_diff_form(commonSettingsForm):
url = StringField(_l('Web Page URL'), validators=[validateURL()])
tags = StringTagUUID(_l('Group Tag'), [validators.Optional()], default='')
url = fields.URLField('Web Page URL', validators=[validateURL()])
tags = StringTagUUID('Group Tag', [validators.Optional()], default='')
time_between_check = EnhancedFormField(
TimeBetweenCheckForm,
@@ -798,7 +797,6 @@ class processor_text_json_diff_form(commonSettingsForm):
subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
extract_lines_containing = StringListField(_l('Extract lines containing'), [validators.Optional()])
extract_text = StringListField(_l('Extract text'), [ValidateListRegex()])
title = StringField(_l('Title'), default='')
@@ -918,7 +916,7 @@ class processor_text_json_diff_form(commonSettingsForm):
class SingleExtraProxy(Form):
# maybe better to set some <script>var..
proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": _l("Name")})
proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
proxy_url = StringField(_l('Proxy URL'), [
validators.Optional(),
ValidateStartsWithRegex(
@@ -930,7 +928,7 @@ class SingleExtraProxy(Form):
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
class SingleExtraBrowser(Form):
browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": _l("Name")})
browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
browser_connection_url = StringField(_l('Browser connection URL'), [
validators.Optional(),
ValidateStartsWithRegex(
@@ -999,7 +997,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
# Screenshot comparison settings
min_change_percentage = FloatField(
_l('Screenshot: Minimum Change Percentage'),
'Screenshot: Minimum Change Percentage',
validators=[
validators.Optional(),
validators.NumberRange(min=0.0, max=100.0, message=_l('Must be between 0 and 100'))
@@ -1008,7 +1006,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
)
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"})
password = SaltyPasswordField(_l('Password'))
pager_size = IntegerField(_l('Pager size'),
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,
+12 -122
View File
@@ -4,7 +4,6 @@ from loguru import logger
from typing import List
import html
import json
import os
import re
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
@@ -14,45 +13,6 @@ PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
# jq builtins that can leak sensitive data or cause harm when user-supplied expressions are executed.
# env/$ENV reads all process environment variables (passwords, API keys, etc.)
# include/import can read arbitrary files from disk
# input/inputs reads beyond the supplied JSON data
# debug/stderr leaks data to stderr
# halt/halt_error terminates the process (DoS)
_JQ_BLOCKED_PATTERNS = [
(re.compile(r'\benv\b'), 'env (reads environment variables)'),
(re.compile(r'\$ENV\b'), '$ENV (reads environment variables)'),
(re.compile(r'\binclude\b'), 'include (reads files from disk)'),
(re.compile(r'\bimport\b'), 'import (reads files from disk)'),
(re.compile(r'\binputs?\b'), 'input/inputs (reads beyond provided data)'),
(re.compile(r'\bdebug\b'), 'debug (leaks data to stderr)'),
(re.compile(r'\bstderr\b'), 'stderr (leaks data to stderr)'),
(re.compile(r'\bhalt(?:_error)?\b'), 'halt/halt_error (terminates the process)'),
(re.compile(r'\$__loc__\b'), '$__loc__ (leaks file path information)'),
(re.compile(r'\bbuiltins\b'), 'builtins (enumerates available functions)'),
(re.compile(r'\bmodulemeta\b'), 'modulemeta (leaks module information)'),
(re.compile(r'\$JQ_BUILD_CONFIGURATION\b'), '$JQ_BUILD_CONFIGURATION (leaks build information)'),
]
def validate_jq_expression(expression: str) -> None:
"""Raise ValueError if the jq expression uses any dangerous builtin.
User-supplied jq expressions are executed server-side. Without this check,
builtins like `env` expose every process environment variable (SALTED_PASS,
proxy credentials, API keys, etc.) as watch output.
"""
from changedetectionio.strtobool import strtobool
if strtobool(os.getenv('JQ_ALLOW_RISKY_EXPRESSIONS', 'false')):
return
for pattern, description in _JQ_BLOCKED_PATTERNS:
if pattern.search(expression):
msg = f"jq expression uses disallowed builtin: {description}"
logger.critical(f"Security: blocked jq expression containing '{description}' - expression: {expression!r}")
raise ValueError(msg)
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
# 'price' , 'lowPrice', 'highPrice' are usually under here
@@ -63,59 +23,6 @@ class JSONNotFound(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
'unparsed-text',
'unparsed-text-lines',
'unparsed-text-available',
'doc',
'doc-available',
'json-doc',
'json-doc-available',
'collection', # XPath 2.0+: loads XML node collections from arbitrary URIs
'uri-collection', # XPath 3.0+: enumerates URIs from resource collections
'transform', # XPath 3.1: XSLT transformation (currently raises, block proactively)
'load-xquery-module', # XPath 3.1: loads XQuery modules (currently raises, block proactively)
'environment-variable',
'available-environment-variables',
]
def _build_safe_xpath3_parser():
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
XPath 3.0 includes functions that can read arbitrary files or environment variables:
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
- doc / doc-available (XML fetch from URI)
- environment-variable / available-environment-variables (env var leakage)
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
so removing entries here does not affect XPath3Parser itself.
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
"""
import os
from elementpath.xpath3 import XPath3Parser
class SafeXPath3Parser(XPath3Parser):
pass
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
if env_override is not None:
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
else:
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
for _fn in blocked:
SafeXPath3Parser.symbol_table.pop(_fn, None)
return SafeXPath3Parser
# Module-level singleton — built once, reused everywhere.
SafeXPath3Parser = _build_safe_xpath3_parser()
# Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "(?i)foobar" type configuration
@lru_cache(maxsize=100)
@@ -276,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
"""
from lxml import etree, html
import elementpath
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
parser = etree.HTMLParser()
tree = None
@@ -301,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
# This allows //title to match elements in the default namespace
namespaces[''] = tree.nsmap[None]
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
@@ -326,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
else:
html_block += elementpath_tostring(element)
# Drop element references before the finally block so tree.clear() can release
# the libxml2 document immediately (elements pin the C-level doc via refcount).
del r
return html_block
finally:
# Explicitly clear the tree to free memory
@@ -424,16 +330,12 @@ def _parse_json(json_data, json_filter):
raise Exception("jq not support not found")
if json_filter.startswith("jq:"):
expr = json_filter.removeprefix("jq:")
validate_jq_expression(expr)
jq_expression = jq.compile(expr)
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match)
if json_filter.startswith("jqraw:"):
expr = json_filter.removeprefix("jqraw:")
validate_jq_expression(expr)
jq_expression = jq.compile(expr)
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
match = jq_expression.input(json_data).all()
return '\n'.join(str(item) for item in match)
@@ -537,25 +439,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
else:
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
# Server may claim application/json but actually return JSONP
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
if jsonp_match:
try:
inner = jsonp_match.group(1).strip()
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
if not stripped_text_from_html:
# Probably something else, go fish inside for it
try:
stripped_text_from_html = extract_json_blob_from_html(content=content,
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
json_filter=json_filter)
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
# Probably something else, go fish inside for it
try:
stripped_text_from_html = extract_json_blob_from_html(content=content,
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
json_filter=json_filter )
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
if not stripped_text_from_html:
# Re 265 - Just return an empty string when filter not found
+11 -14
View File
@@ -28,20 +28,18 @@ def get_timeago_locale(flask_locale):
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
"""
locale_map = {
'zh': 'zh_CN', # Chinese Simplified
'zh': 'zh_CN', # Chinese Simplified
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
'pt': 'pt_PT', # Portuguese (Portugal)
'pt_BR': 'pt_BR', # Portuguese (Brasil)
'sv': 'sv_SE', # Swedish
'no': 'nb_NO', # Norwegian Bokmål
'hi': 'in_HI', # Hindi
'cs': 'en', # Czech not supported by timeago, fallback to English
'ja': 'ja', # Japanese
'uk': 'uk', # Ukrainian
'en_GB': 'en', # British English - timeago uses 'en'
'en_US': 'en', # American English - timeago uses 'en'
'pt': 'pt_PT', # Portuguese (Portugal)
'sv': 'sv_SE', # Swedish
'no': 'nb_NO', # Norwegian Bokmål
'hi': 'in_HI', # Hindi
'cs': 'en', # Czech not supported by timeago, fallback to English
'uk': 'uk', # Ukrainian
'en_GB': 'en', # British English - timeago uses 'en'
'en_US': 'en', # American English - timeago uses 'en'
}
return locale_map.get(flask_locale, flask_locale)
@@ -55,8 +53,7 @@ LANGUAGE_DATA = {
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
-15
View File
@@ -46,26 +46,11 @@ class model(EntityPersistenceMixin, watch_base):
super(model, self).__init__(*arg, **kw)
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
if kw.get('default'):
self.update(kw['default'])
del kw['default']
def matches_url(self, url: str) -> bool:
"""Return True if this tag should be auto-applied to the given watch URL.
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
substring match. Returns False if no pattern is configured.
"""
import fnmatch
pattern = self.get('url_match_pattern', '').strip()
if not pattern or not url:
return False
if any(c in pattern for c in ('*', '?', '[')):
return fnmatch.fnmatch(url.lower(), pattern.lower())
return pattern.lower() in url.lower()
# _save_to_disk() method provided by EntityPersistenceMixin
# commit() and _get_commit_data() methods inherited from watch_base
# Tag uses default _get_commit_data() (includes all keys)
+53 -90
View File
@@ -43,11 +43,6 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
# Module-level favicon filename cache: data_dir → basename (or None)
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
_FAVICON_FILENAME_CACHE: dict = {}
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -388,25 +383,6 @@ class model(EntityPersistenceMixin, watch_base):
return self.get('fetch_backend')
@property
def fetcher_supports_screenshots(self):
"""Return True if the fetcher configured for this watch supports screenshots.
Resolves 'system' via self._datastore, then checks supports_screenshots on
the actual fetcher class. Works for built-in and plugin fetchers alike.
"""
from changedetectionio import content_fetchers
fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests
if not fetcher_name or fetcher_name == 'system':
fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
fetcher_class = getattr(content_fetchers, fetcher_name, None)
if fetcher_class is None:
return False
return bool(getattr(fetcher_class, 'supports_screenshots', False))
@property
def is_pdf(self):
url = str(self.get("url") or "").lower()
@@ -798,50 +774,24 @@ class model(EntityPersistenceMixin, watch_base):
# Also in the case that the file didnt exist
return True
def bump_favicon(self, url, favicon_base_64: str, mime_type: str = None) -> None:
def bump_favicon(self, url, favicon_base_64: str) -> None:
from urllib.parse import urlparse
import base64
import binascii
import re
decoded = None
MAX_FAVICON_BYTES = 1 * 1024 * 1024 # 1 MB
MIME_TO_EXT = {
'image/png': 'png',
'image/x-icon': 'ico',
'image/vnd.microsoft.icon': 'ico',
'image/jpeg': 'jpg',
'image/gif': 'gif',
'image/svg+xml': 'svg',
'image/webp': 'webp',
'image/bmp': 'bmp',
}
extension = None
# If the caller already resolved the MIME type (e.g. from blob.type or a data URI),
# use that directly — it's more reliable than guessing from a URL path.
if mime_type:
extension = MIME_TO_EXT.get(mime_type.lower().split(';')[0].strip(), None)
# Fall back to extracting extension from URL path, unless it's a data URI.
if not extension and url and not url.startswith('data:'):
if url:
try:
parsed = urlparse(url)
filename = os.path.basename(parsed.path)
(_base, ext) = filename.lower().strip().rsplit('.', 1)
extension = ext
(base, extension) = filename.lower().strip().rsplit('.', 1)
except ValueError:
logger.warning(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}', defaulting to ico")
# Handle data URIs: extract MIME type from the URI itself when not already known
if not extension and url and url.startswith('data:'):
m = re.match(r'^data:([^;]+);base64,', url)
if m:
extension = MIME_TO_EXT.get(m.group(1).lower(), None)
if not extension:
extension = 'ico'
logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
return None
else:
# Assume favicon.ico
base = "favicon"
extension = "ico"
fname = os.path.join(self.data_dir, f"favicon.{extension}")
@@ -850,50 +800,58 @@ class model(EntityPersistenceMixin, watch_base):
decoded = base64.b64decode(favicon_base_64, validate=True)
except (binascii.Error, ValueError) as e:
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
return None
else:
if decoded:
try:
with open(fname, 'wb') as f:
f.write(decoded)
if len(decoded) > MAX_FAVICON_BYTES:
logger.warning(f"UUID: {self.get('uuid')} Favicon too large ({len(decoded)} bytes), skipping")
return None
# Invalidate favicon filename cache
if hasattr(self, '_favicon_filename_cache'):
delattr(self, '_favicon_filename_cache')
try:
with open(fname, 'wb') as f:
f.write(decoded)
# A signal that could trigger the socket server to update the browser also
watch_check_update = signal('watch_favicon_bump')
if watch_check_update:
watch_check_update.send(watch_uuid=self.get('uuid'))
# Invalidate module-level favicon filename cache for this watch
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
# A signal that could trigger the socket server to update the browser also
watch_check_update = signal('watch_favicon_bump')
if watch_check_update:
watch_check_update.send(watch_uuid=self.get('uuid'))
except Exception as e:
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
return None
except Exception as e:
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
# @todo - Store some checksum and only write when its different
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
def get_favicon_filename(self) -> str | None:
"""
Find any favicon.* file in the watch data directory.
Find any favicon.* file in the current working directory
and return the contents of the newest one.
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
deepcopy (which drops instance attrs), and concurrent request races.
Invalidated by bump_favicon() when a new favicon is saved.
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
Returns:
str: Basename of the favicon file, or None if not found.
str: Basename of the newest favicon file, or None if not found.
"""
if self.data_dir in _FAVICON_FILENAME_CACHE:
return _FAVICON_FILENAME_CACHE[self.data_dir]
# Check cache first (prevents 26M+ allocations from repeated glob operations)
cache_key = '_favicon_filename_cache'
if hasattr(self, cache_key):
return getattr(self, cache_key)
import glob
# Search for all favicon.* files
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
fname = os.path.basename(files[0]) if files else None
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
return fname
if not files:
result = None
else:
# Find the newest by modification time
newest_file = max(files, key=os.path.getmtime)
result = os.path.basename(newest_file)
# Cache the result
setattr(self, cache_key, result)
return result
def get_screenshot_as_thumbnail(self, max_age=3200):
"""Return path to a square thumbnail of the most recent screenshot.
@@ -1224,13 +1182,18 @@ class model(EntityPersistenceMixin, watch_base):
def compile_error_texts(self, has_proxies=None):
"""Compile error texts for this watch.
Accepts has_proxies parameter to ensure it works even outside app context"""
from flask import url_for, has_request_context
from flask import url_for
from markupsafe import Markup
output = [] # Initialize as list since we're using append
last_error = self.get('last_error','')
has_app_context = has_request_context()
try:
url_for('settings.settings_page')
except Exception as e:
has_app_context = False
else:
has_app_context = True
# has app+request context, we can use url_for()
if has_app_context:
-2
View File
@@ -186,7 +186,6 @@ class watch_base(dict):
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'content-type': None,
'date_created': None,
'extract_lines_containing': [], # Keep only lines containing these substrings (plain text, case-insensitive)
'extract_text': [], # Extract text by regex after filters
'fetch_backend': 'system', # plaintext, playwright etc
'fetch_time': 0.0,
@@ -338,7 +337,6 @@ class watch_base(dict):
# These are set by processors/workers and should not trigger edited flag
additional_system_fields = {
'last_check_status', # Set by processors
'last_filter_config_hash', # Set by text_json_diff processor, internal skip-cache
'restock', # Set by restock processor
'last_viewed', # Set by mark_all_viewed endpoint
}
+1 -4
View File
@@ -259,12 +259,9 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
or url.startswith('https://discord.com/api'))\
and 'html' in requested_output_format:
# Discord doesn't render HTML — convert markup to plain text equivalents.
# &nbsp; is injected upstream to preserve double-spaces for HTML email clients;
# Discord displays it as the literal string "&nbsp;" so strip it here.
# Discord doesn't support HTML, replace <br> with newlines
n_body = n_body.strip().replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n')
n_body = n_body.replace('&nbsp;', ' ')
n_body = newline_re.sub('\n', n_body)
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
+8 -44
View File
@@ -88,29 +88,6 @@ class FormattableTimestamp(str):
return self._dt.isoformat()
class FormattableExtract(str):
"""
A str subclass that holds only the extracted changed fragments from a diff.
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
{{ diff_changed_from }} old value(s) only, e.g. "$99.99"
{{ diff_changed_to }} new value(s) only, e.g. "$109.99"
Multiple changed fragments are joined with newlines.
Being a str subclass means it is natively JSON serializable.
"""
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
if prev_snapshot or current_snapshot:
from changedetectionio import diff as diff_module
# word_diff=True is required — placemarker extraction regexes only exist in word-diff output
raw = diff_module.render_diff(prev_snapshot or '', current_snapshot or '', word_diff=True)
extracted = extract_fn(raw)
else:
extracted = ''
instance = super().__new__(cls, extracted)
return instance
class FormattableDiff(str):
"""
A str subclass representing a rendered diff. As a plain string it renders
@@ -184,8 +161,6 @@ class NotificationContextData(dict):
'diff_patch': FormattableDiff('', '', patch_format=True),
'diff_removed': FormattableDiff('', '', include_added=False),
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
'diff_url': None,
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
'notification_timestamp': time.time(),
@@ -269,27 +244,16 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
}
from changedetectionio.diff import extract_changed_from, extract_changed_to
extract_specs = {
'diff_changed_from': extract_changed_from,
'diff_changed_to': extract_changed_to,
}
ret = {}
rendered_count = 0
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
# Only create FormattableDiff objects for diff keys actually used in the notification text
for key in NotificationContextData().keys():
if not key.startswith('diff'):
continue
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
continue
if key in diff_specs:
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
rendered_count += 1
elif key in extract_specs:
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
rendered_count += 1
if key.startswith('diff') and key in diff_specs:
# Check if this placeholder is actually used in the notification text
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
if re.search(pattern, notification_scan_text, re.IGNORECASE):
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
rendered_count += 1
if rendered_count:
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
@@ -497,7 +461,7 @@ Thanks - Your omniscient changedetection.io installation.
n_object = NotificationContextData({
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
'notification_body': body,
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
'notification_format': self._check_cascading_vars('notification_format', watch),
})
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
+1 -75
View File
@@ -174,64 +174,6 @@ class ChangeDetectionSpec:
"""
pass
@hookspec
def get_html_head_extras():
"""Return HTML to inject into the <head> of every page via base.html.
Plugins can use this to add <script>, <style>, or <link> tags that should
be present on all pages. Return a raw HTML string or None.
IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
work correctly. This hook is called inside a request context so url_for() is
always available.
For small amounts of CSS/JS, return them inline no file-serving needed::
from changedetectionio.pluggy_interface import hookimpl
@hookimpl
def get_html_head_extras(self):
return (
'<style>.my-module-banner { color: red; }</style>\\n'
'<script>console.log("my_module_content loaded");</script>'
)
For larger assets, register your own lightweight Flask routes in the plugin
module and point to them with url_for() so the sub-path prefix is handled
automatically::
from flask import url_for, Response
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.flask_app import app as _app
MY_CSS = ".my-module-example { color: red; }"
MY_JS = "console.log('my_module_content loaded');"
@_app.route('/my_module_content/css')
def my_module_content_css():
return Response(MY_CSS, mimetype='text/css',
headers={'Cache-Control': 'max-age=3600'})
@_app.route('/my_module_content/js')
def my_module_content_js():
return Response(MY_JS, mimetype='application/javascript',
headers={'Cache-Control': 'max-age=3600'})
@hookimpl
def get_html_head_extras(self):
css = url_for('my_module_content_css')
js = url_for('my_module_content_js')
return (
f'<link rel="stylesheet" href="{css}">\\n'
f'<script src="{js}" defer></script>'
)
Returns:
str or None: Raw HTML string to inject inside <head>, or None
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -664,20 +606,4 @@ def apply_update_finalize(update_handler, watch, datastore, processing_exception
except Exception as e:
# Don't let plugin errors crash the worker
logger.error(f"Error in update_finalize hook: {e}")
logger.exception(f"update_finalize hook exception details:")
def collect_html_head_extras():
"""Collect and combine HTML head extras from all plugins.
Called from a Flask template global so it always runs inside a request context.
This means url_for() works correctly in plugin implementations, including when the
app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
sets SCRIPT_NAME so url_for() automatically prepends the prefix).
Returns:
str: Combined HTML string to inject inside <head>, or empty string
"""
results = plugin_manager.hook.get_html_head_extras()
parts = [r for r in results if r]
return "\n".join(parts) if parts else ""
logger.exception(f"update_finalize hook exception details:")
-12
View File
@@ -341,18 +341,6 @@ def get_processor_descriptions():
return descriptions
def wcag_text_color(hex_bg: str) -> str:
"""Return #000000 or #ffffff for maximum WCAG contrast against hex_bg."""
hex_bg = hex_bg.lstrip('#')
if len(hex_bg) != 6:
return '#000000'
r, g, b = (int(hex_bg[i:i+2], 16) / 255 for i in (0, 2, 4))
def lin(c):
return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
L = 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)
return '#000000' if L > 0.179 else '#ffffff'
def generate_processor_badge_colors(processor_name):
"""
Generate consistent colors for a processor badge based on its name.
+1 -10
View File
@@ -97,6 +97,7 @@ class difference_detection_processor():
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
self.last_raw_content_checksum = None
async def validate_iana_url(self):
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
@@ -259,16 +260,6 @@ class difference_detection_processor():
# @todo .quit here could go on close object, so we can run JS if change-detected
await self.fetcher.quit(watch=self.watch)
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
if self.fetcher.content and isinstance(self.fetcher.content, str):
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
# After init, call run_changedetection() which will do the actual change-detection
def get_extra_watch_config(self, filename):
+5 -2
View File
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
# Get error information for the template
screenshot_url = watch.get_screenshot()
is_html_webdriver = watch.fetcher_supports_screenshots
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
@@ -61,7 +64,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
screenshot=screenshot_url,
is_html_webdriver=is_html_webdriver,
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
extra_title=f" - {watch.label} - {gettext('Extract Data')}",
extra_title=f" - {watch.label} - Extract Data",
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
pure_menu_fixed=False
)
+1 -7
View File
@@ -100,13 +100,7 @@ class guess_stream_type():
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
self.is_rss = True
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
# A JSONP response starts with an identifier followed by '(' - not valid JSON
if re.match(r'^\w[\w.]*\s*\(', test_content):
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
self.is_plaintext = True
else:
self.is_json = True
self.is_json = True
elif 'pdf' in magic_content_header:
self.is_pdf = True
# magic will call a rss document 'xml'
@@ -1,7 +1,6 @@
from babel.numbers import parse_decimal
from changedetectionio.model.Watch import model as BaseWatch
from decimal import Decimal, InvalidOperation
from typing import Union
import re
@@ -11,8 +10,6 @@ supports_browser_steps = True
supports_text_filters_and_triggers = True
supports_text_filters_and_triggers_elements = True
supports_request_type = True
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
class Restock(dict):
@@ -34,7 +31,6 @@ class Restock(dict):
if standardized_value:
# Convert to float
# @todo locale needs to be the locale of the webpage
return float(parse_decimal(standardized_value, locale='en'))
return None
@@ -66,17 +62,6 @@ class Restock(dict):
super().__setitem__(key, value)
def get_price_from_history_str(history_str):
m = _price_re.search(history_str)
if not m:
return None
try:
return str(Decimal(m.group(1)))
except InvalidOperation:
return None
class Watch(BaseWatch):
def __init__(self, *arg, **kw):
super().__init__(*arg, **kw)
@@ -90,27 +75,13 @@ class Watch(BaseWatch):
def extra_notification_token_values(self):
values = super().extra_notification_token_values()
values['restock'] = self.get('restock', {})
values['restock']['previous_price'] = None
if self.history_n >= 2:
history = self.history
if history and len(history) >=2:
"""Unfortunately for now timestamp is stored as string key"""
sorted_keys = sorted(list(history), key=lambda x: int(x))
sorted_keys.reverse()
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
if price_str:
values['restock']['previous_price'] = get_price_from_history_str(price_str)
return values
def extra_notification_token_placeholder_info(self):
values = super().extra_notification_token_placeholder_info()
values.append(('restock.price', "Price detected"))
values.append(('restock.in_stock', "In stock status"))
values.append(('restock.original_price', "Original price at first check"))
values.append(('restock.previous_price', "Previous price in history"))
return values
@@ -437,18 +437,17 @@ class perform_site_check(difference_detection_processor):
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
#useless
# from ...html_tools import html_to_text
# text = html_to_text(self.fetcher.content)
# logger.debug(f"Length of text after conversion: {len(text)}")
# if not len(text):
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
# raise ReplyWithContentButNoText(url=watch.link,
# status_code=self.fetcher.get_last_status_code(),
# screenshot=self.fetcher.screenshot,
# html_content=self.fetcher.content,
# xpath_data=self.fetcher.xpath_data
# )
from ...html_tools import html_to_text
text = html_to_text(self.fetcher.content)
logger.debug(f"Length of text after conversion: {len(text)}")
if not len(text):
from ...content_fetchers.exceptions import ReplyWithContentButNoText
raise ReplyWithContentButNoText(url=watch.link,
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
html_content=self.fetcher.content,
xpath_data=self.fetcher.xpath_data
)
# Which restock settings to compare against?
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
@@ -283,7 +283,4 @@ def query_price_availability(extracted_data):
if not result.get('availability') and 'availability' in microdata:
result['availability'] = microdata['availability']
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
# using something like babel you need to know the locale of the website and even then it can be problematic
# we dont really do anything with the price data so far.. so just accept it the way it comes.
return result
@@ -3,11 +3,11 @@
{% block content %}
<div class="tabs">
<ul>
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">{{ _('Error Text') }}</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">{{ _('Error Screenshot') }}</a></li> {% endif %}
<li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">{{ _('Text') }}</a></li>
<li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">{{ _('Screenshot') }}</a></li>
<li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">{{ _('Extract Data') }}</a></li>
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">Error Text</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">Error Screenshot</a></li> {% endif %}
<li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">Text</a></li>
<li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">Screenshot</a></li>
<li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">Extract Data</a></li>
</ul>
</div>
@@ -17,23 +17,23 @@
<form id="extract-data-form" class="pure-form pure-form-stacked edit-form" action="{{ url_for('ui.ui_diff.diff_history_page_extract_POST', uuid=uuid) }}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<p>{{ _('This tool will extract text data from all of the watch history.') }}</p>
<p>This tool will extract text data from all of the watch history.</p>
<div class="pure-control-group">
{{ render_field(extract_form.extract_regex) }}
<span class="pure-form-message-inline">
{{ _('A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.')|safe }}<br>
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
<p>
{{ _('For example, to extract only the numbers from text') }} &dash;<br>
<strong>{{ _('Raw text') }}</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
<strong>{{ _('RegEx to extract:') }}</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
For example, to extract only the numbers from text &dash;<br>
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
</p>
<p>
<a href="https://RegExr.com/">{{ _('Be sure to test your RegEx here.') }}</a>
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
</p>
<p>
{{ _('Each RegEx group bracket') }} <code>()</code> {{ _('will be in its own column, the first column value is always the date.') }}
Each RegEx group bracket <code>()</code> will be in its own column, the first column value is always the date.
</p>
</span>
</div>
@@ -7,7 +7,6 @@ a side-by-side or unified diff view with syntax highlighting and change markers.
import os
import time
from flask_babel import gettext
from loguru import logger
from changedetectionio import diff, strtobool
@@ -155,7 +154,11 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
screenshot_url = watch.get_screenshot()
is_html_webdriver = watch.fetcher_supports_screenshots
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
@@ -208,7 +211,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
diff_prefs=diff_prefs,
extra_classes='difference-page',
extra_stylesheets=extra_stylesheets,
extra_title=f" - {watch.label} - {gettext('History')}",
extra_title=f" - {watch.label} - History",
extract_form=extract_form,
from_version=str(from_version),
is_html_webdriver=is_html_webdriver,
@@ -85,10 +85,6 @@ class FilterConfig:
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
return self._subtractive_selectors_cache
@property
def extract_lines_containing(self):
return self._get_merged_rules('extract_lines_containing')
@property
def extract_text(self):
return self._get_merged_rules('extract_text')
@@ -105,30 +101,6 @@ class FilterConfig:
def text_should_not_be_present(self):
return self._get_merged_rules('text_should_not_be_present')
def get_filter_config_hash(self):
"""
Stable hash of the effective filter configuration.
Used by the skip-logic in run_changedetection() so that any change to
global settings, tag overrides, or watch filters automatically invalidates
the raw-content-unchanged shortcut without needing scattered
clear_all_last_checksums() calls at every settings mutation site.
"""
app = self.datastore.data['settings']['application']
config = {
'extract_lines_containing': sorted(self.extract_lines_containing),
'extract_text': sorted(self.extract_text),
'ignore_text': sorted(self.ignore_text),
'include_filters': sorted(self.include_filters),
'subtractive_selectors': sorted(self.subtractive_selectors),
'text_should_not_be_present': sorted(self.text_should_not_be_present),
'trigger_text': sorted(self.trigger_text),
# Global processing flags not captured by the filter lists above
'ignore_whitespace': app.get('ignore_whitespace', False),
'strip_ignored_lines': app.get('strip_ignored_lines', False),
}
return hashlib.md5(json.dumps(config, sort_keys=True).encode()).hexdigest()
@property
def has_include_filters(self):
return bool(self.include_filters) and bool(self.include_filters[0].strip())
@@ -163,17 +135,6 @@ class ContentTransformer:
text = text.replace("\n\n", "\n")
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
@staticmethod
def extract_lines_containing(text, substrings):
"""Keep only lines that contain at least one of the given substrings (case-insensitive)."""
needles = [s.lower() for s in substrings if s.strip()]
if not needles:
return text
return '\n'.join(
line for line in text.splitlines()
if any(needle in line.lower() for needle in needles)
)
@staticmethod
def extract_by_regex(text, regex_patterns):
"""Extract text matching regex patterns."""
@@ -416,26 +377,19 @@ class perform_site_check(difference_detection_processor):
raise Exception("Watch no longer exists.")
current_raw_document_checksum = self.get_raw_document_checksum()
# Build filter config up front so we can hash it for the skip check.
filter_config = FilterConfig(watch, self.datastore)
current_filter_config_hash = filter_config.get_filter_config_hash()
# Skip only when ALL of these hold:
# 1. raw HTML is unchanged
# 2. watch config was not edited (was_edited covers per-watch field changes)
# 3. effective filter config is unchanged (covers global/tag setting changes that
# bypass was_edited — e.g. global_ignore_text, global_subtractive_selectors)
# last_filter_config_hash being False means first run or upgrade: don't skip.
# Skip processing only if BOTH conditions are true:
# 1. HTML content unchanged (checksum matches last saved checksum)
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
# The was_edited flag handles all watch configuration changes, so we don't need
# separate checks for trigger_text or other processing rules.
if (not force_reprocess and
not watch.was_edited and
self.last_raw_content_checksum and
self.last_raw_content_checksum == current_raw_document_checksum and
watch.get('last_filter_config_hash') and
watch.get('last_filter_config_hash') == current_filter_config_hash):
self.last_raw_content_checksum == current_raw_document_checksum):
raise checksumFromPreviousCheckWasTheSame()
# Initialize remaining components
# Initialize components
filter_config = FilterConfig(watch, self.datastore)
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
transformer = ContentTransformer()
rule_engine = RuleEngine()
@@ -456,7 +410,6 @@ class perform_site_check(difference_detection_processor):
# Save the raw content checksum to file (processor implementation detail, not watch config)
self.update_last_raw_content_checksum(current_raw_document_checksum)
update_obj['last_filter_config_hash'] = current_filter_config_hash
# === CONTENT PREPROCESSING ===
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
@@ -550,10 +503,6 @@ class perform_site_check(difference_detection_processor):
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
# === LINE FILTER (plain-text substring) ===
if filter_config.extract_lines_containing:
stripped_text = transformer.extract_lines_containing(stripped_text, filter_config.extract_lines_containing)
# === REGEX EXTRACTION ===
if filter_config.extract_text:
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
@@ -587,8 +536,8 @@ class perform_site_check(difference_detection_processor):
# === BLOCKING RULES EVALUATION ===
blocked = False
# Check trigger_text - use text_for_checksuming so ignore_text can suppress trigger_text
if rule_engine.evaluate_trigger_text(text_for_checksuming, filter_config.trigger_text):
# Check trigger_text
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
blocked = True
# Check text_should_not_be_present
-2
View File
@@ -29,11 +29,9 @@ def register_watch_operation_handlers(socketio, datastore):
# Perform the operation
if op == 'pause':
watch.toggle_pause()
watch.commit()
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
elif op == 'mute':
watch.toggle_mute()
watch.commit()
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
elif op == 'recheck':
# Import here to avoid circular imports
@@ -199,31 +199,8 @@ def handle_watch_update(socketio, **kwargs):
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
The exception is caught inside run_wsgi and routed to self.server.log() it never
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
disconnect correctly via its own disconnect handler and timeout logic."""
try:
from werkzeug.serving import BaseWSGIServer
_original_log = BaseWSGIServer.log
def _filtered_log(self, type, message, *args):
if type == 'error' and 'write() before start_response' in message:
return
_original_log(self, type, message, *args)
BaseWSGIServer.log = _filtered_log
except Exception:
pass
def init_socketio(app, datastore):
"""Initialize SocketIO with the main Flask app"""
_suppress_werkzeug_ws_abrupt_disconnect_noise()
import platform
import sys
@@ -1,20 +1,5 @@
function checkDiscordHtmlWarning() {
var urls = $('textarea.notification-urls').val() || '';
var format = $('select.notification-format').val() || '';
var isDiscord = /discord:\/\/|https:\/\/discord(?:app)?\.com\/api/i.test(urls);
var isHtml = format === 'html' || format === 'htmlcolor';
if (isDiscord && isHtml) {
$('#discord-html-format-warning').show();
} else {
$('#discord-html-format-warning').hide();
}
}
$(document).ready(function () {
$('textarea.notification-urls, select.notification-format').on('change input', checkDiscordHtmlWarning);
checkDiscordHtmlWarning();
$('#add-email-helper').click(function (e) {
e.preventDefault();
email = prompt("Destination email");
-8
View File
@@ -116,14 +116,6 @@ $(document).ready(function () {
$('#realtime-conn-error').show();
});
// Tell the server we're leaving cleanly so it can release the connection
// immediately rather than waiting for a timeout.
// Note: this only fires for voluntary closes (tab/window close, navigation away).
// Hard kills, crashes and network drops will still timeout normally on the server.
window.addEventListener('beforeunload', function () {
socket.disconnect();
});
socket.on('queue_size', function (data) {
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
if(queueSizePagerInfoText) {
@@ -47,12 +47,12 @@ $grid-gap: 0.5rem;
.last-checked::before {
color: var(--color-text);
content: attr(data-label) " ";
content: "Last Checked ";
}
.last-changed::before {
color: var(--color-text);
content: attr(data-label) " ";
content: "Last Changed ";
}
/* Force table to not be like tables anymore */
File diff suppressed because one or more lines are too long
+4 -12
View File
@@ -980,20 +980,12 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
def get_all_tags_for_watch(self, uuid):
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
watch = self.data['watching'].get(uuid)
if not watch:
return {}
# Start with manually assigned tags
result = dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
# Should return a dict of full tag info linked by UUID
if watch:
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
# Additionally include any tag whose url_match_pattern matches this watch's URL
watch_url = watch.get('url', '')
if watch_url:
for tag_uuid, tag in self.__data['settings']['application']['tags'].items():
if tag_uuid not in result and tag.matches_url(watch_url):
result[tag_uuid] = tag
return result
return {}
@property
def extra_browsers(self):
@@ -98,14 +98,6 @@
<td><code>{{ '{{diff_patch}}' }}</code></td>
<td>{{ _('The diff output - patch in unified format') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_changed_from}}' }}</code></td>
<td>{{ _('Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_changed_to}}' }}</code></td>
<td>{{ _('Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
</tr>
<tr>
<td><code>{{ '{{current_snapshot}}' }}</code></td>
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
@@ -195,10 +187,6 @@
<div class="">
{{ render_field(form.notification_format , class="notification-format") }}
<span class="pure-form-message-inline">{{ _('Format for all notifications') }}</span>
<div id="discord-html-format-warning" class="inline-warning" style="display: none; margin-top: 6px;">
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Note') }}" title="{{ _('Note') }}">
{{ _('Discord does not render HTML — switch to') }} <strong>{{ _('Plain Text') }}</strong> {{ _('format to avoid') }} <code>&amp;nbsp;</code> {{ _('and other HTML entities appearing literally in your notifications.') }}
</div>
</div>
</div>
{% endmacro %}
+2 -6
View File
@@ -45,10 +45,6 @@
<script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script>
{% endif %}
{%- set _html_head_extras = get_html_head_extras() -%}
{%- if _html_head_extras %}
{{ _html_head_extras | safe }}
{%- endif %}
</head>
<body class="{{extra_classes}}">
@@ -69,7 +65,7 @@
{% else %}
{% if new_version_available and not(has_password and not current_user.is_authenticated) %}
<span id="new-version-text" class="pure-menu-heading">
<a href="https://changedetection.io">{{ _('A new version is available') }}</a>
<a href="https://changedetection.io">A new version is available</a>
</span>
{% endif %}
{% endif %}
@@ -235,7 +231,7 @@
{% if session['share-link'] %}
<ul class="messages with-share-link">
<li class="message">
{{ _('Share this link:') }}
Share this link:
<span id="share-link">{{ session['share-link'] }}</span>
<img style="height: 1em; display: inline-block" src="{{url_for('static_content', group='images', filename='copy.svg')}}" >
</li>
@@ -10,7 +10,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath 1 &amp; 2, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">{{ _('Show advanced help and tips') }}</span><br>
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
<ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
@@ -47,9 +47,9 @@ nav
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> {{ _('Remove HTML element(s) by CSS and XPath selectors before text conversion.') }} </li>
<li> {{ _("Don't paste HTML here, use only CSS and XPath selectors") }} </li>
<li> {{ _('Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML.') }} </li>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
@@ -49,21 +49,6 @@ Unavailable") }}
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_lines_containing, rows=5, placeholder="celsius
temperature
price") }}
<span class="pure-form-message-inline">
<ul>
<li>{{ _('Keep only lines that contain any of these words or phrases (plain text, case-insensitive)') }}</li>
<li>{{ _('One entry per line — any line in the page text that contains a match is kept') }}</li>
<li>{{ _('Simpler alternative to regex — use this when you just want lines about a specific topic') }}</li>
<li>{{ _('Example: enter') }} <code>celsius</code> {{ _('to keep only lines mentioning temperature readings') }}</li>
</ul>
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
-29
View File
@@ -2,7 +2,6 @@
import psutil
import time
from threading import Thread
import multiprocessing
import pytest
import arrow
@@ -192,34 +191,6 @@ def cleanup(datastore_path):
if os.path.isfile(f):
os.unlink(f)
def pytest_configure(config):
"""Configure pytest environment before tests run.
CRITICAL: Set multiprocessing start method to 'fork' for Python 3.14+ compatibility.
Python 3.14 changed the default start method from 'fork' to 'forkserver' on Linux.
The forkserver method requires all objects to be picklable, but pytest-flask's
LiveServer uses nested functions that can't be pickled.
Setting 'fork' explicitly:
- Maintains compatibility with Python 3.10-3.13 (where 'fork' was already default)
- Fixes Python 3.14 pickling errors
- Only affects Unix-like systems (Windows uses 'spawn' regardless)
See: https://github.com/python/cpython/issues/126831
See: https://docs.python.org/3/whatsnew/3.14.html
"""
# Only set if not already set (respects existing configuration)
if multiprocessing.get_start_method(allow_none=True) is None:
try:
# 'fork' is available on Unix-like systems (Linux, macOS)
# On Windows, this will have no effect as 'spawn' is the only option
multiprocessing.set_start_method('fork', force=False)
logger.debug("Set multiprocessing start method to 'fork' for Python 3.14+ compatibility")
except (ValueError, RuntimeError):
# Already set, not available on this platform, or context already created
pass
def pytest_addoption(parser):
"""Add custom command-line options for pytest.
@@ -1,83 +0,0 @@
"""Test that plugins can inject HTML into base.html <head> via get_html_head_extras hookimpl."""
import pytest
from flask import url_for, Response
from changedetectionio.pluggy_interface import hookimpl, plugin_manager
_MY_JS = "console.log('my_module_content loaded');"
_MY_CSS = ".my-module-example { color: red; }"
class _HeadExtrasPlugin:
"""Test plugin that injects tags pointing at its own Flask routes."""
@hookimpl
def get_html_head_extras(self):
css_url = url_for('test_plugin_my_module_content_css')
js_url = url_for('test_plugin_my_module_content_js')
return (
f'<link rel="stylesheet" id="test-head-extra-css" href="{css_url}">\n'
f'<script id="test-head-extra-js" src="{js_url}" defer></script>'
)
@pytest.fixture(scope='module')
def plugin_routes(live_server):
"""Register plugin asset routes once per module (Flask routes can't be added twice)."""
app = live_server.app
@app.route('/test-plugin/my_module_content/css')
def test_plugin_my_module_content_css():
return Response(_MY_CSS, mimetype='text/css',
headers={'Cache-Control': 'max-age=3600'})
@app.route('/test-plugin/my_module_content/js')
def test_plugin_my_module_content_js():
return Response(_MY_JS, mimetype='application/javascript',
headers={'Cache-Control': 'max-age=3600'})
@pytest.fixture
def head_extras_plugin(plugin_routes):
"""Register the hookimpl for one test then unregister it — function-scoped for clean isolation."""
plugin = _HeadExtrasPlugin()
plugin_manager.register(plugin, name="test_head_extras")
yield plugin
plugin_manager.unregister(name="test_head_extras")
def test_plugin_html_injected_into_head(client, live_server, measure_memory_usage, datastore_path, head_extras_plugin):
"""get_html_head_extras output must appear inside <head> in the rendered page."""
res = client.get(url_for("watchlist.index"), follow_redirects=True)
assert res.status_code == 200
assert b'id="test-head-extra-css"' in res.data, "Plugin <link> tag missing from rendered page"
assert b'id="test-head-extra-js"' in res.data, "Plugin <script> tag missing from rendered page"
head_end = res.data.find(b'</head>')
assert head_end != -1
for marker in (b'id="test-head-extra-css"', b'id="test-head-extra-js"'):
pos = res.data.find(marker)
assert pos != -1 and pos < head_end, f"{marker} must appear before </head>"
def test_plugin_js_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
"""The plugin-registered JS route must return JS with the right Content-Type."""
res = client.get(url_for('test_plugin_my_module_content_js'))
assert res.status_code == 200
assert 'javascript' in res.content_type
assert _MY_JS.encode() in res.data
def test_plugin_css_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
"""The plugin-registered CSS route must return CSS with the right Content-Type."""
res = client.get(url_for('test_plugin_my_module_content_css'))
assert res.status_code == 200
assert 'css' in res.content_type
assert _MY_CSS.encode() in res.data
def test_no_extras_without_plugin(client, live_server, measure_memory_usage, datastore_path):
"""With no hookimpl registered the markers must not appear (isolation check)."""
res = client.get(url_for("watchlist.index"), follow_redirects=True)
assert b'id="test-head-extra-css"' not in res.data
assert b'id="test-head-extra-js"' not in res.data
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
set_longer_modified_response, delete_all_watches
import logging
import os
# NOTE - RELIES ON mailserver as hostname running, see github build recipes
smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver')
smtp_test_server = 'mailserver'
ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())
-14
View File
@@ -170,14 +170,6 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
headers={'x-api-key': api_key},
)
assert b'(changed) Which is across' in res.data
assert b'Some text thats the same' in res.data
# Fetch the difference between two versions (default text format)
res = client.get(
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+"?changesOnly=true",
headers={'x-api-key': api_key},
)
assert b'Some text thats the same' not in res.data
# Test htmlcolor format
res = client.get(
@@ -374,9 +366,6 @@ def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path
watch['last_changed'] = 454444444444
watch['date_created'] = 454444444444
# Exercise the new extract_lines_containing field
watch['extract_lines_containing'] = ['celsius', 'temperature']
# HTTP PUT ( UPDATE an existing watch )
res = client.put(
url_for("watch", uuid=uuid),
@@ -400,9 +389,6 @@ def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path
assert date_created != 454444444444
assert date_created != "454444444444"
assert res.json.get('extract_lines_containing') == ['celsius', 'temperature'], \
"extract_lines_containing should be persisted and returned via API"
def test_access_denied(client, live_server, measure_memory_usage, datastore_path):
# `config_api_token_enabled` Should be On by default
+5 -26
View File
@@ -178,44 +178,23 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
"""
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
Test that a tag/group can be updated with processor_config_restock_diff via the API.
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
# Create a tag
res = client.post(
url_for("tag"),
data=json.dumps({
"title": "Restock Group",
"overrides_watch": True,
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": True,
"price_change_min": 7777777
}
}),
data=json.dumps({"title": "Restock Group"}),
headers={'content-type': 'application/json', 'x-api-key': api_key}
)
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
assert res.status_code == 201
tag_uuid = res.json.get('uuid')
# Verify processor config was saved during creation (the bug: these were discarded)
res = client.get(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
tag_data = res.json
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
"processor_config_restock_diff should be saved on POST"
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
"price_change_min should be saved on POST"
# Update tag with valid processor_config_restock_diff via PUT
# Update tag with valid processor_config_restock_diff
res = client.put(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
-34
View File
@@ -48,15 +48,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# Check this class does not appear (that we didnt see the actual source)
assert b'foobar-detection' not in res.data
# Check POST preview
res = client.post(
url_for("ui.ui_preview.preview_page", uuid="first"),
follow_redirects=True
)
# Check this class does not appear (that we didnt see the actual source)
assert b'foobar-detection' not in res.data
# Make a change
set_modified_response(datastore_path=datastore_path)
@@ -422,28 +413,3 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
assert b'&lt;foobar' not in res.data
res = delete_all_watches(client)
def test_last_error_cleared_on_same_checksum(client, live_server, datastore_path):
"""last_error should be cleared even when content is unchanged (checksumFromPreviousCheckWasTheSame path)"""
set_original_response(datastore_path=datastore_path)
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
# First check - establishes baseline checksum
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Inject a stale last_error directly (simulates a prior failed check)
datastore = client.application.config.get('DATASTORE')
datastore.update_watch(uuid=uuid, update_obj={'last_error': 'Some previous error'})
assert datastore.data['watching'][uuid].get('last_error') == 'Some previous error'
# Second check - same content, so checksumFromPreviousCheckWasTheSame will fire
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# last_error must be cleared even though no change was detected
assert datastore.data['watching'][uuid].get('last_error') == False
delete_all_watches(client)
+2 -64
View File
@@ -3,7 +3,7 @@
from .util import set_original_response, live_server_setup, wait_for_all_checks
from flask import url_for
import io
from zipfile import ZipFile, ZIP_DEFLATED
from zipfile import ZipFile
import re
import time
from changedetectionio.model import Watch, Tag
@@ -68,9 +68,6 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
# Check for changedetection.json (settings file)
assert 'changedetection.json' in l, "changedetection.json should be in backup"
# secret.txt must never be included — it contains the Flask session key
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
# Get the latest one
res = client.get(
url_for("backups.remove_backups"),
@@ -199,63 +196,4 @@ def test_backup_restore(client, live_server, measure_memory_usage, datastore_pat
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
assert isinstance(restored_tag2, Tag.model), \
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
"""Zip Slip path traversal entries in a restore zip must be rejected."""
import pytest
from changedetectionio.blueprint.backups.restore import import_from_zip
# Build a zip with a path traversal entry that would escape the extraction dir
malicious_zip = io.BytesIO()
with ZipFile(malicious_zip, 'w') as zf:
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
malicious_zip.seek(0)
datastore = live_server.app.config['DATASTORE']
with pytest.raises(ValueError, match="Zip Slip"):
import_from_zip(
zip_stream=malicious_zip,
datastore=datastore,
include_groups=True,
include_groups_replace=True,
include_watches=True,
include_watches_replace=True,
)
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
"""A zip whose total uncompressed size exceeds the limit must be rejected.
The guard reads file_size from the zip central-directory metadata no
actual decompression happens, so this test is fast and uses minimal RAM.
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
50 KB is enough to trigger the check without creating any large files.
"""
import pytest
import changedetectionio.blueprint.backups.restore as restore_mod
from changedetectionio.blueprint.backups.restore import import_from_zip
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
bomb_zip = io.BytesIO()
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
zf.writestr("data.txt", b"\x00" * (100 * 1024))
bomb_zip.seek(0)
datastore = live_server.app.config['DATASTORE']
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
try:
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
with pytest.raises(ValueError, match="decompressed size"):
import_from_zip(
zip_stream=bomb_zip,
datastore=datastore,
include_groups=True,
include_groups_replace=True,
include_watches=True,
include_watches_replace=True,
)
finally:
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
-64
View File
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
# coding=utf-8
import hashlib
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
@@ -12,69 +11,6 @@ import os
def test_surrogate_characters_in_content_are_sanitized():
"""Lone surrogates can appear in requests' r.text when a server returns malformed/mixed-encoding
content. Without sanitization, encoding to UTF-8 raises UnicodeEncodeError.
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
"""
content_with_surrogate = '<html><body>Hello \udcad World</body></html>'
# Confirm the raw problem exists
with pytest.raises(UnicodeEncodeError):
content_with_surrogate.encode('utf-8')
# Our fix: sanitize after fetcher.run() in processors/base.py call_browser()
sanitized = content_with_surrogate.encode('utf-8', errors='replace').decode('utf-8')
assert 'Hello' in sanitized
assert 'World' in sanitized
assert '\udcad' not in sanitized
# Checksum computation (processors/base.py get_raw_document_checksum) must not crash
hashlib.md5(sanitized.encode('utf-8')).hexdigest()
def test_utf8_content_without_charset_header(client, live_server, datastore_path):
"""Server returns UTF-8 content but no charset in Content-Type header.
chardet can misdetect such pages as UTF-7 (Python 3.14 then produces surrogates).
Our fix tries UTF-8 first before falling back to chardet.
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
"""
from .util import write_test_file_and_sync
# UTF-8 encoded content with non-ASCII chars - no charset will be in the header
html = '<html><body><p>Español</p><p>Français</p><p>日本語</p></body></html>'
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('utf-8'), mode='wb')
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
# Should decode correctly as UTF-8, not produce mojibake (Español) or replacement chars
assert 'Español'.encode('utf-8') in res.data
assert 'Français'.encode('utf-8') in res.data
assert '日本語'.encode('utf-8') in res.data
def test_shiftjis_with_meta_charset(client, live_server, datastore_path):
"""Server returns Shift-JIS content with no charset in HTTP header, but the HTML
declares <meta charset="Shift-JIS">. We should use the meta tag, not chardet.
Real-world case: https://github.com/dgtlmoon/changedetection.io/issues/3952
"""
from .util import write_test_file_and_sync
japanese_text = '日本語のページ'
html = f'<html><head><meta http-equiv="Content-Type" content="text/html;charset=Shift-JIS"></head><body><p>{japanese_text}</p></body></html>'
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('shift_jis'), mode='wb')
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
assert japanese_text.encode('utf-8') in res.data
def set_html_response(datastore_path):
test_return_data = """
<html><body><span class="nav_second_img_text">
@@ -220,342 +220,3 @@ def test_regex_error_handling(client, live_server, measure_memory_usage, datasto
assert b'is not a valid regular expression.' in res.data
delete_all_watches(client)
def test_extract_lines_containing(client, live_server, measure_memory_usage, datastore_path):
"""Test the 'extract_lines_containing' filter keeps only lines with matching substrings."""
test_return_data = """<html>
<body>
<p>Current temperature: 21 celsius</p>
<p>Humidity: 55%</p>
<p>Wind speed: 10 km/h</p>
<p>Feels like: 19 celsius</p>
<p>UV index: 3</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={
'extract_lines_containing': 'celsius',
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True)
# Lines containing 'celsius' should be present
assert b'celsius' in res.data
# Lines without 'celsius' should be excluded
assert b'Humidity' not in res.data
assert b'Wind speed' not in res.data
assert b'UV index' not in res.data
delete_all_watches(client)
def test_extract_lines_containing_case_insensitive(client, live_server, measure_memory_usage, datastore_path):
"""Test that extract_lines_containing is case-insensitive."""
test_return_data = """<html>
<body>
<p>PRICE: $99.99</p>
<p>Price drops to $79.99</p>
<p>Stock: Available</p>
<p>price history shows decline</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={
'extract_lines_containing': 'price',
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True)
# All three price lines (different cases) should match
assert b'$99.99' in res.data
assert b'$79.99' in res.data
assert b'price history' in res.data
# Non-price line should be excluded
assert b'Stock' not in res.data
delete_all_watches(client)
def test_extract_lines_containing_multiple_terms(client, live_server, measure_memory_usage, datastore_path):
"""Test that multiple extract_lines_containing entries act as OR (keep line if any term matches)."""
test_return_data = """<html>
<body>
<p>Temperature: 21 celsius</p>
<p>Humidity: 55%</p>
<p>Wind speed: 10 km/h</p>
<p>Rain chance: 20%</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={
'extract_lines_containing': 'celsius\r\nhumidity',
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True)
assert b'celsius' in res.data
assert b'Humidity' in res.data
# Wind and Rain lines should be excluded
assert b'Wind speed' not in res.data
assert b'Rain chance' not in res.data
delete_all_watches(client)
def test_extract_lines_containing_with_ignore_text(client, live_server, measure_memory_usage, datastore_path):
"""
extract_lines_containing narrows to matching lines; ignore_text then suppresses specific
lines from triggering change detection (they remain visible but don't affect the checksum).
Filters are set BEFORE the first check so the filtered+ignored checksum is the baseline
from the very start no race between a forced-recheck and the next content write.
"""
initial_data = """<html><body>
<p>Temperature: 21 celsius</p>
<p>Feels like: 19 celsius</p>
<p>Humidity: 55%</p>
</body></html>"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(initial_data)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={'paused': True})
# Set filters BEFORE the first check so the baseline is always filtered+ignored.
# (Setting them after an initial unfiltered check creates a race: the forced recheck
# that updates previous_md5 must complete before the next content write, which is
# timing-sensitive and fails intermittently on slower systems / Python 3.14.)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={
'extract_lines_containing': 'celsius',
'ignore_text': 'Feels like',
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
},
follow_redirects=True
)
assert b"unpaused" in res.data
# First check — establishes filtered+ignored baseline. previous_md5 was False so
# a change is always detected here; mark_all_viewed clears it before we assert.
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Sanity: preview should only show celsius lines
res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True)
assert b'celsius' in res.data
assert b'Humidity' not in res.data
# Change ONLY the ignored "Feels like" line — should NOT trigger a change
changed_data = """<html><body>
<p>Temperature: 21 celsius</p>
<p>Feels like: 17 celsius</p>
<p>Humidity: 55%</p>
</body></html>"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(changed_data)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data, "Changing an ignored line should not trigger a change notification"
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
time.sleep(1)
# Change the non-ignored celsius line — SHOULD trigger
triggered_data = """<html><body>
<p>Temperature: 30 celsius</p>
<p>Feels like: 17 celsius</p>
<p>Humidity: 55%</p>
</body></html>"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(triggered_data)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data, "Changing a non-ignored line should trigger a change notification"
delete_all_watches(client)
def test_extract_lines_containing_with_extract_text_regex(client, live_server, measure_memory_usage, datastore_path):
"""
extract_lines_containing first narrows to relevant lines, then extract_text regex
pulls specific tokens from those lines verifying correct pipeline ordering.
"""
test_return_data = """<html><body>
<p>Widget price: $49.99 each</p>
<p>Gadget price: $129.00 each</p>
<p>Latest news: price index up 2%</p>
<p>Stock count: 150 units</p>
<p>Shipping cost: $5.99</p>
</body></html>"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={
# Step 1: keep lines containing "price" (excludes Stock count and Shipping cost)
'extract_lines_containing': 'price',
# Step 2: from those lines extract only dollar amounts
'extract_text': r'/\$[\d.]+/',
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True)
# Dollar amounts from price lines should be extracted
assert b'$49.99' in res.data
assert b'$129.00' in res.data
# "price index up 2%" has no dollar amount — nothing extracted from that line
# "Shipping cost" line was excluded by extract_lines_containing before regex ran
assert b'$5.99' not in res.data
# Raw line text should not appear — regex replaced it with just the match
assert b'Widget' not in res.data
assert b'Stock count' not in res.data
delete_all_watches(client)
def test_extract_lines_containing_with_include_filters_css(client, live_server, measure_memory_usage, datastore_path):
"""
CSS include_filters narrows the HTML first; extract_lines_containing then filters
within that already-reduced text verifying correct pipeline ordering.
"""
test_return_data = """<html><body>
<div class="weather">
<p>Temperature: 21 celsius</p>
<p>Humidity: 60%</p>
<p>Wind: 15 km/h</p>
</div>
<div class="news">
<p>Local forecast: warm celsius weather ahead</p>
<p>Markets closed early</p>
</div>
</body></html>"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={
# CSS filter: only look inside the weather div
'include_filters': 'div.weather',
# Then keep only celsius lines from that section
'extract_lines_containing': 'celsius',
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True)
# Only the celsius line from the weather div should survive both filters
assert b'celsius' in res.data
# Other weather lines excluded by extract_lines_containing
assert b'Humidity' not in res.data
assert b'Wind' not in res.data
# News div content excluded entirely by CSS filter (even though it contains "celsius")
assert b'Markets' not in res.data
assert b'forecast' not in res.data
delete_all_watches(client)
-73
View File
@@ -624,76 +624,3 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
assert "".encode() in res.data, "Expected Korean '' for Minutes"
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
"""
Test that clearing snapshot history works with translated confirmation text.
Issue #3865: When the app language is set to German, the clear history
confirmation dialog shows the translated word (e.g. 'loschen') but the
backend only accepted the English word 'clear', making it impossible
to clear snapshots in non-English languages.
"""
from flask import url_for
test_url = url_for('test_endpoint', _external=True)
# Add a watch so there is history to clear
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Set language to German
res = client.get(
url_for("set_language", locale="de"),
follow_redirects=True
)
assert res.status_code == 200
# Verify the clear history page shows the German confirmation word
res = client.get(
url_for("ui.clear_all_history"),
follow_redirects=True
)
assert res.status_code == 200
assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
# Submit the form with the German translated word
res = client.post(
url_for("ui.clear_all_history"),
data={"confirmtext": "löschen"},
follow_redirects=True
)
assert res.status_code == 200
# Should NOT show error message
assert b"Incorrect confirmation text" not in res.data, \
"German confirmation word 'loschen' should be accepted (issue #3865)"
# Switch back to English and verify English word still works
res = client.get(
url_for("set_language", locale="en_US"),
follow_redirects=True
)
res = client.post(
url_for("ui.clear_all_history"),
data={"confirmtext": "clear"},
follow_redirects=True
)
assert res.status_code == 200
assert b"Incorrect confirmation text" not in res.data, \
"English confirmation word 'clear' should still be accepted"
# Verify that missing/empty confirmtext does not crash the server
res = client.post(
url_for("ui.clear_all_history"),
data={},
follow_redirects=True
)
assert res.status_code == 200, \
"Missing confirmtext should not crash the server"
-82
View File
@@ -214,85 +214,3 @@ def test_import_watchete_xlsx(client, live_server, measure_memory_usage, datasto
assert watch.get('fetch_backend') == 'system' # uses default if blank
delete_all_watches(client)
def test_import_wachete_xlsx_row_counter(client, live_server, measure_memory_usage, datastore_path):
"""Row counter in Wachete XLSX import must advance even after a failed row.
Regression: row_id was only incremented in the try/else (on success), so
after any failure the counter froze and all subsequent errors cited the
stale number. With the enumerate() fix, row 5 must say "row 5", not "row 3".
"""
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
# Header row (row 1)
ws.append(['Name', 'Id', 'Url', 'Interval (min)', 'XPath', 'Dynamic Wachet', 'Portal Wachet', 'Folder'])
# Row 2: valid
ws.append(['Site A', '001', 'https://example.com/a', 60, None, None, None, None])
# Row 3: bad URL — must report row 3
ws.append(['Site B', '002', 'not-a-valid-url', 60, None, None, None, None])
# Row 4: valid
ws.append(['Site C', '003', 'https://example.com/c', 60, None, None, None, None])
# Row 5: bad URL — must report row 5, not "row 3" (the pre-fix stale value)
ws.append(['Site D', '004', 'also-not-valid', 60, None, None, None, None])
xlsx_bytes = io.BytesIO()
wb.save(xlsx_bytes)
xlsx_bytes.seek(0)
res = client.post(
url_for("imports.import_page"),
data={'file_mapping': 'wachete', 'xlsx_file': (xlsx_bytes, 'test.xlsx')},
follow_redirects=True,
)
assert b'2 imported from Wachete .xlsx' in res.data
assert b'Error processing row number 3' in res.data
assert b'Error processing row number 5' in res.data
delete_all_watches(client)
def test_import_custom_xlsx_row_counter(client, live_server, measure_memory_usage, datastore_path):
"""Row counter in custom XLSX import must reflect the actual row, not always row 1.
Regression: row_i was incremented in the else clause of the *outer* try/except
(which only fired once, after the whole loop), so every URL-validation error
inside the loop reported "row 1". With enumerate() the third row must say
"row 3", not "row 1".
"""
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
# Row 1: bad URL — must report row 1
ws.append(['not-valid-url-row1'])
# Row 2: valid
ws.append(['https://example.com/b'])
# Row 3: bad URL — must report row 3, not "row 1" (the pre-fix value)
ws.append(['not-valid-url-row3'])
# Row 4: valid
ws.append(['https://example.com/d'])
xlsx_bytes = io.BytesIO()
wb.save(xlsx_bytes)
xlsx_bytes.seek(0)
res = client.post(
url_for("imports.import_page"),
data={
'file_mapping': 'custom',
'custom_xlsx[col_0]': '1',
'custom_xlsx[col_type_0]': 'url',
'xlsx_file': (xlsx_bytes, 'test.xlsx'),
},
follow_redirects=True,
)
assert b'2 imported from custom .xlsx' in res.data
assert b'Error processing row number 1' in res.data
assert b'Error processing row number 3' in res.data
delete_all_watches(client)
@@ -16,51 +16,6 @@ except ModuleNotFoundError:
def test_jsonp_treated_as_plaintext():
from ..processors.magic import guess_stream_type
# JSONP content (server wrongly claims application/json) should be detected as plaintext
# Callback names are arbitrary identifiers, not always 'cb'
jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
assert result.is_json is False
assert result.is_plaintext is True
# Variation with dotted callback name e.g. jQuery.cb(...)
jsonp_dotted = 'some.callback({ "version": "1.0" })'
result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
assert result.is_json is False
assert result.is_plaintext is True
# Real JSON should still be detected as JSON
json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
result = guess_stream_type(http_content_header="application/json", content=json_content)
assert result.is_json is True
assert result.is_plaintext is False
def test_jsonp_json_filter_extraction():
from .. import html_tools
# Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
# Deep nested jsonpath filter into array element
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
assert text == '"8.0.68"'
# Filter that selects the second array element
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
assert text == '"arm32"'
if jq_support:
text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
assert text == '"8.0.68"'
text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
assert text == "https://example.com/app-arm32.apk"
def test_unittest_inline_html_extract():
# So lets pretend that the JSON we want is inside some HTML
content="""
@@ -350,7 +350,6 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
res = client.get(url_for("settings.settings_page"))
assert b'{{restock.original_price}}' in res.data
assert b'{{restock.previous_price}}' in res.data
assert b'Original price at first check' in res.data
#####################
@@ -359,7 +358,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
url_for("settings.settings_page"),
data={"application-notification_urls": notification_url,
"application-notification_title": "title new price {{restock.price}}",
"application-notification_body": "new price {{restock.price}} previous price {{restock.previous_price}} instock {{restock.in_stock}}",
"application-notification_body": "new price {{restock.price}}",
"application-notification_format": default_notification_format,
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
@@ -373,6 +372,8 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
assert b"Settings updated." in res.data
set_original_response(props_markup=instock_props[0], price='960.45', datastore_path=datastore_path)
# A change in price, should trigger a change by default
set_original_response(props_markup=instock_props[0], price='1950.45', datastore_path=datastore_path)
client.get(url_for("ui.form_watch_checknow"))
@@ -383,7 +384,6 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
notification = f.read()
assert "new price 1950.45" in notification
assert "title new price 1950.45" in notification
assert "previous price 960.45" in notification
## Now test the "SEND TEST NOTIFICATION" is working
os.unlink(os.path.join(datastore_path, "notification.txt"))
@@ -467,38 +467,3 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
assert b'155.55' in res.data
delete_all_watches(client)
def test_itemprop_as_str(client, live_server, measure_memory_usage, datastore_path):
test_return_data = f"""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<span itemprop="offers" itemscope itemtype="http://schema.org/Offer">
<meta content="767.55" itemprop="price"/>
<meta content="EUR" itemprop="priceCurrency"/>
<meta content="InStock" itemprop="availability"/>
<meta content="https://www.123-test.dk" itemprop="url"/>
</span>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True)
client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
client.get(url_for("ui.form_watch_checknow"))
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'767.55' in res.data
+14 -100
View File
@@ -50,85 +50,6 @@ def test_favicon(client, live_server, measure_memory_usage, datastore_path):
res = client.get(url_for('static_content', group='js', filename='../styles/styles.css'))
assert res.status_code != 200
def test_favicon_inline_data_uri(client, live_server, measure_memory_usage, datastore_path):
"""
bump_favicon() must handle a data URI as the url parameter.
Previously this logged "Cant work out file extension from 'data:image/png;base64,...'" and bailed.
The mime_type from the data URI should be used to pick the correct extension.
"""
import base64
import os
# 1x1 transparent PNG (minimal valid PNG bytes)
PNG_BYTES = (
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01'
b'\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82'
)
png_b64 = base64.b64encode(PNG_BYTES).decode()
data_uri = f"data:image/png;base64,{png_b64}"
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
# Should NOT raise / bail — must save as favicon.png
watch.bump_favicon(url=data_uri, favicon_base_64=png_b64, mime_type='image/png')
favicon_fname = watch.get_favicon_filename()
assert favicon_fname is not None, "Favicon should have been saved"
assert favicon_fname.endswith('.png'), f"Expected .png extension, got: {favicon_fname}"
full_path = os.path.join(watch.data_dir, favicon_fname)
assert os.path.getsize(full_path) == len(PNG_BYTES)
# Also verify it's served correctly via the static route
res = client.get(url_for('static_content', group='favicon', filename=uuid))
assert res.status_code == 200
assert res.data == PNG_BYTES
def test_favicon_mime_type_overrides_url_extension(client, live_server, measure_memory_usage, datastore_path):
"""
mime_type parameter takes precedence over the URL path extension.
A URL ending in .ico but with mime_type='image/png' should save as .png.
"""
import base64
import os
PNG_BYTES = (
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01'
b'\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82'
)
png_b64 = base64.b64encode(PNG_BYTES).decode()
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
watch.bump_favicon(url='https://example.com/favicon.ico', favicon_base_64=png_b64, mime_type='image/png')
favicon_fname = watch.get_favicon_filename()
assert favicon_fname is not None
assert favicon_fname.endswith('.png'), f"mime_type should override URL extension, got: {favicon_fname}"
def test_favicon_oversized_rejected(client, live_server, measure_memory_usage, datastore_path):
"""Favicons larger than 1 MB must be silently dropped."""
import base64
import os
oversized = b'\x00' * (1 * 1024 * 1024 + 1)
oversized_b64 = base64.b64encode(oversized).decode()
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
result = watch.bump_favicon(url='https://example.com/big.png', favicon_base_64=oversized_b64, mime_type='image/png')
assert result is None, "bump_favicon should return None for oversized favicon"
assert watch.get_favicon_filename() is None, "No favicon file should have been written"
def test_bad_access(client, live_server, measure_memory_usage, datastore_path):
res = client.post(
@@ -663,16 +584,13 @@ def test_static_directory_traversal(client, live_server, measure_memory_usage, d
def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memory_usage, datastore_path):
"""
SSRF protection: IANA-reserved/private IP addresses are blocked at fetch-time, not add-time.
Watches targeting private/reserved IPs can be *added* freely; the block happens when the
fetcher actually tries to reach the URL (via validate_iana_url() in call_browser()).
SSRF protection: IANA-reserved/private IP addresses must be blocked by default.
Covers:
1. is_private_hostname() correctly classifies all reserved ranges
2. is_safe_valid_url() ALLOWS private-IP URLs at add-time (IANA check moved to fetch-time)
3. ALLOW_IANA_RESTRICTED_ADDRESSES has no effect on add-time; it only controls fetch-time
4. UI form accepts private-IP URLs at add-time without error
2. is_safe_valid_url() rejects private-IP URLs at add-time (env var off)
3. is_safe_valid_url() allows private-IP URLs when ALLOW_IANA_RESTRICTED_ADDRESSES=true
4. UI form rejects private-IP URLs and shows the standard error message
5. Requests fetcher blocks fetch-time DNS rebinding (fresh check on every fetch)
6. Requests fetcher blocks redirects that lead to a private IP (open-redirect bypass)
@@ -705,10 +623,9 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
assert not is_private_hostname(host), f"{host} should be identified as public"
# ------------------------------------------------------------------
# 2. is_safe_valid_url() ALLOWS private-IP URLs at add-time
# IANA check is no longer done here — it moved to fetch-time validate_iana_url()
# 2. is_safe_valid_url() blocks private-IP URLs (env var off)
# ------------------------------------------------------------------
private_ip_urls = [
blocked_urls = [
'http://127.0.0.1/',
'http://10.0.0.1/',
'http://172.16.0.1/',
@@ -719,24 +636,21 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
'http://[fc00::1]/',
'http://[fe80::1]/',
]
for url in private_ip_urls:
assert is_safe_valid_url(url), f"{url} should be allowed by is_safe_valid_url (IANA check is at fetch-time)"
for url in blocked_urls:
assert not is_safe_valid_url(url), f"{url} should be blocked by is_safe_valid_url"
# ------------------------------------------------------------------
# 3. ALLOW_IANA_RESTRICTED_ADDRESSES does not affect add-time validation
# It only controls fetch-time blocking inside validate_iana_url()
# 3. ALLOW_IANA_RESTRICTED_ADDRESSES=true bypasses the block
# ------------------------------------------------------------------
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'true')
assert is_safe_valid_url('http://127.0.0.1/'), \
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
"Private IP should be allowed when ALLOW_IANA_RESTRICTED_ADDRESSES=true"
# Restore the block for the remaining assertions
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
assert is_safe_valid_url('http://127.0.0.1/'), \
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
# ------------------------------------------------------------------
# 4. UI form accepts private-IP URLs at add-time
# The watch is created; the SSRF block fires later at fetch-time
# 4. UI form rejects private-IP URLs
# ------------------------------------------------------------------
for url in ['http://127.0.0.1/', 'http://169.254.169.254/latest/meta-data/']:
res = client.post(
@@ -744,8 +658,8 @@ def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memor
data={'url': url, 'tags': ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted or invalid URL format' not in res.data, \
f"UI should accept {url} at add-time (SSRF is blocked at fetch-time)"
assert b'Watch protocol is not permitted or invalid URL format' in res.data, \
f"UI should reject {url}"
# ------------------------------------------------------------------
# 5. Fetch-time DNS-rebinding check in the requests fetcher
@@ -1,144 +0,0 @@
#!/usr/bin/env python3
"""
Integration tests for auto-applying tags to watches by URL pattern matching.
Verifies:
- A tag with url_match_pattern shows on the watch overview list (via get_all_tags_for_watch)
- The auto-applied tag appears on the watch edit page
- A watch whose URL does NOT match the pattern does not get the tag
"""
import json
from flask import url_for
from .util import set_original_response, live_server_setup
def test_tag_url_pattern_shows_in_overview(client, live_server, measure_memory_usage, datastore_path):
"""Tag with a matching url_match_pattern must appear in the watch overview row."""
set_original_response(datastore_path=datastore_path)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Create a tag with a URL match pattern
res = client.post(
url_for("tag"),
data=json.dumps({"title": "Auto GitHub", "url_match_pattern": "*github.com*"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
tag_uuid = res.json['uuid']
# Add a watch that matches the pattern
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "https://github.com/someuser/repo"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
matching_watch_uuid = res.json['uuid']
# Add a watch that does NOT match
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "https://example.com/page"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
non_matching_watch_uuid = res.json['uuid']
# Watch overview — the tag label must appear in the matching watch's row
res = client.get(url_for("watchlist.index"))
assert res.status_code == 200
html = res.get_data(as_text=True)
# The tag title should appear somewhere on the page (it's rendered per-watch via get_all_tags_for_watch)
assert "Auto GitHub" in html, "Auto-matched tag title must appear in watch overview"
# Verify via the datastore directly that get_all_tags_for_watch returns the pattern-matched tag
datastore = live_server.app.config['DATASTORE']
matching_tags = datastore.get_all_tags_for_watch(matching_watch_uuid)
assert tag_uuid in matching_tags, "Pattern-matched tag must be returned for matching watch"
non_matching_tags = datastore.get_all_tags_for_watch(non_matching_watch_uuid)
assert tag_uuid not in non_matching_tags, "Pattern-matched tag must NOT appear for non-matching watch"
def test_auto_applied_tag_shows_on_watch_edit(client, live_server, measure_memory_usage, datastore_path):
"""The watch edit page must show auto-applied tags (from URL pattern) separately."""
set_original_response(datastore_path=datastore_path)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
res = client.post(
url_for("tag"),
data=json.dumps({"title": "Auto Docs", "url_match_pattern": "*docs.example.com*"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "https://docs.example.com/guide"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
watch_uuid = res.json['uuid']
# Watch edit page must mention the auto-applied tag
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
assert res.status_code == 200
html = res.get_data(as_text=True)
assert "Auto Docs" in html, "Auto-applied tag name must appear on watch edit page"
assert "automatically applied" in html.lower() or "auto" in html.lower(), \
"Watch edit page must indicate the tag is auto-applied by pattern"
def test_multiple_pattern_tags_all_applied(client, live_server, measure_memory_usage, datastore_path):
"""A watch matching multiple tag patterns must receive all of them, not just the first."""
set_original_response(datastore_path=datastore_path)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Two tags with different patterns that both match the same URL
res = client.post(
url_for("tag"),
data=json.dumps({"title": "Org Docs", "url_match_pattern": "*docs.*"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
tag_docs_uuid = res.json['uuid']
res = client.post(
url_for("tag"),
data=json.dumps({"title": "Org Python", "url_match_pattern": "*python*"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
tag_python_uuid = res.json['uuid']
# A third tag whose pattern does NOT match
res = client.post(
url_for("tag"),
data=json.dumps({"title": "Org Rust", "url_match_pattern": "*rust-lang*"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
tag_rust_uuid = res.json['uuid']
# Watch URL matches both "docs" and "python" patterns but not "rust"
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "https://docs.python.org/3/library/fnmatch.html"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201, res.data
watch_uuid = res.json['uuid']
datastore = live_server.app.config['DATASTORE']
resolved = datastore.get_all_tags_for_watch(watch_uuid)
assert tag_docs_uuid in resolved, "First matching tag must be included"
assert tag_python_uuid in resolved, "Second matching tag must be included"
assert tag_rust_uuid not in resolved, "Non-matching tag must NOT be included"
-14
View File
@@ -70,10 +70,6 @@ def test_trigger_functionality(client, live_server, measure_memory_usage, datast
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# And set the trigger text as 'ignore text', it should then not trigger
live_server.app.config['DATASTORE'].data['settings']['application']['global_ignore_text'] = [trigger_text]
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
@@ -126,16 +122,6 @@ def test_trigger_functionality(client, live_server, measure_memory_usage, datast
# Now set the content which contains the trigger text
set_modified_with_trigger_text_response(datastore_path=datastore_path)
# There is a "ignore text" set of the change that should be also the trigger, it should not trigger
# because the ignore text should be stripped from the response, therefor, the trigger should not fire
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
live_server.app.config['DATASTORE'].data['settings']['application']['global_ignore_text'] = []
# check that the trigger fired once we stopped ignore it
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
@@ -592,79 +592,3 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
for content_type in RSS_XML_CONTENT_TYPES:
_subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends
# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions.
def test_xpath_blocked_functions_unit():
"""Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed)."""
import elementpath
from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser
from lxml import html
html_content = '<html><body><p>safe content</p></body></html>'
dangerous_expressions = [
"unparsed-text('file:///etc/passwd')",
"unparsed-text-lines('file:///etc/passwd')",
"unparsed-text-available('file:///etc/passwd')",
"doc('file:///etc/passwd')",
"doc-available('file:///etc/passwd')",
"json-doc('file:///datastore/changedetection.json')",
"collection('file:///datastore/')",
"uri-collection('file:///datastore/')",
"transform(map{})",
"load-xquery-module('foo')",
"environment-variable('PATH')",
"available-environment-variables()",
]
for expr in dangerous_expressions:
# xpath_filter() must raise, not silently return file contents
try:
result = xpath_filter(expr, html_content)
assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}"
except elementpath.ElementPathError:
pass # expected
# SafeXPath3Parser must reject the expression at parse time
tree = html.fromstring(html_content)
try:
elementpath.select(tree, expr, parser=SafeXPath3Parser)
assert False, f"SafeXPath3Parser should have raised for: {expr!r}"
except elementpath.ElementPathError:
pass # expected
# Sanity check: normal XPath still works
result = xpath_filter('//p/text()', html_content)
assert result == 'safe content'
# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions.
def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path):
"""Edit-form validation must reject dangerous XPath 3.0 functions before they are stored."""
from flask import url_for
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
dangerous_expressions = [
"xpath:unparsed-text('file:///etc/passwd')",
"xpath:environment-variable('PATH')",
"xpath:doc('file:///etc/passwd')",
]
for expr in dangerous_expressions:
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"include_filters": expr, "url": test_url, "tags": "", "headers": "",
'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
follow_redirects=True
)
assert b"is not a valid XPath expression" in res.data, \
f"Form should reject dangerous expression: {expr!r}"
delete_all_watches(client)
@@ -1,85 +0,0 @@
"""
Static analysis test: verify @login_optionally_required is always applied
AFTER (inner to) @blueprint.route(), not before it.
In Flask, @route() must be the outermost decorator because it registers
whatever function it receives. If @login_optionally_required is placed
above @route(), the raw unprotected function gets registered and auth is
silently bypassed (GHSA-jmrh-xmgh-x9j4).
Correct order (route outermost, auth inner):
@blueprint.route('/path')
@login_optionally_required
def view(): ...
Wrong order (auth never called):
@login_optionally_required registered by route, then discarded
@blueprint.route('/path')
def view(): ...
"""
import ast
import pathlib
import pytest
REPO_ROOT = pathlib.Path(__file__).parents[3] # …/changedetection.io/
SOURCE_ROOT = REPO_ROOT / "changedetectionio"
def _is_route_decorator(node: ast.expr) -> bool:
"""Return True if the decorator looks like @something.route(...)."""
return (
isinstance(node, ast.Call)
and isinstance(node.func, ast.Attribute)
and node.func.attr == "route"
)
def _is_auth_decorator(node: ast.expr) -> bool:
"""Return True if the decorator is @login_optionally_required."""
return isinstance(node, ast.Name) and node.id == "login_optionally_required"
def collect_violations() -> list[str]:
violations = []
for path in SOURCE_ROOT.rglob("*.py"):
try:
tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
except SyntaxError:
continue
for node in ast.walk(tree):
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
continue
decorators = node.decorator_list
auth_indices = [i for i, d in enumerate(decorators) if _is_auth_decorator(d)]
route_indices = [i for i, d in enumerate(decorators) if _is_route_decorator(d)]
# Bad order: auth decorator appears at a lower index (higher up) than a route decorator
for auth_idx in auth_indices:
for route_idx in route_indices:
if auth_idx < route_idx:
rel = path.relative_to(REPO_ROOT)
violations.append(
f"{rel}:{node.lineno} — `{node.name}`: "
f"@login_optionally_required (line {decorators[auth_idx].lineno}) "
f"is above @route (line {decorators[route_idx].lineno}); "
f"auth wrapper will never be called"
)
return violations
def test_auth_decorator_order():
violations = collect_violations()
if violations:
msg = (
"\n\nFound routes where @login_optionally_required is placed ABOVE @blueprint.route().\n"
"This silently disables authentication — @route() registers the raw function\n"
"and the auth wrapper is never called.\n\n"
"Fix: move @blueprint.route() to be the outermost (topmost) decorator.\n\n"
+ "\n".join(f"{v}" for v in violations)
)
pytest.fail(msg)
@@ -64,7 +64,7 @@ class TestTriggerConditions(unittest.TestCase):
"conditions": [
{"operator": ">=", "field": "extracted_number", "value": "10"},
{"operator": "<=", "field": "extracted_number", "value": "5000"},
{"operator": "in", "field": "page_filtered_text", "value": "rock"},
{"operator": "in", "field": "page_text", "value": "rock"},
#{"operator": "starts_with", "field": "page_text", "value": "I saw"},
]
}
@@ -15,9 +15,7 @@ from changedetectionio.diff import (
CHANGED_PLACEMARKER_OPEN,
CHANGED_PLACEMARKER_CLOSED,
CHANGED_INTO_PLACEMARKER_OPEN,
CHANGED_INTO_PLACEMARKER_CLOSED,
extract_changed_from,
extract_changed_to,
CHANGED_INTO_PLACEMARKER_CLOSED
)
@@ -383,140 +381,5 @@ Line 3 with tabs and spaces"""
self.assertNotIn('[-Line 2-]', output)
self.assertNotIn('[+Line 2+]', output)
def test_diff_changed_from_to_word_level(self):
"""Primary use case: extract just the old/new value from a changed line (e.g. price monitoring)"""
before = "Widget costs $99.99 per month"
after = "Widget costs $109.99 per month"
raw = diff.render_diff(before, after, word_diff=True)
self.assertEqual(extract_changed_from(raw), "$99.99")
self.assertEqual(extract_changed_to(raw), "$109.99")
def test_diff_changed_from_to_multiple_changes(self):
"""Multiple changed fragments on different lines are joined with newline.
An unchanged line between the two changes ensures each is a 1-to-1 replace,
so word_diff fires per line rather than falling back to multi-line block mode."""
before = "Price $99\nunchanged\nTax $5"
after = "Price $149\nunchanged\nTax $12"
raw = diff.render_diff(before, after, word_diff=True)
self.assertEqual(extract_changed_from(raw), "$99\n$5")
self.assertEqual(extract_changed_to(raw), "$149\n$12")
def test_diff_changed_from_to_pure_insert_delete(self):
"""Pure line additions/deletions (no inline word diff) are also captured"""
before = "old line"
after = "new line"
# word_diff=False forces line-level CHANGED markers
raw = diff.render_diff(before, after, word_diff=False)
self.assertEqual(extract_changed_from(raw), "old line")
self.assertEqual(extract_changed_to(raw), "new line")
def test_diff_changed_from_to_similar_numbers(self):
"""$90.00 → $9.00 must not produce a partial match like '0.00'.
The tokenizer splits on whitespace only, so '$90.00' and '$9.00' are
each a single atomic token diff never sees their internal characters."""
before = "for sale $90.00"
after = "for sale $9.00"
raw = diff.render_diff(before, after, word_diff=True)
self.assertEqual(extract_changed_from(raw), "$90.00")
self.assertEqual(extract_changed_to(raw), "$9.00")
def test_diff_changed_from_to_whole_line_replaced(self):
"""When every token on the line changed (no common tokens), render_inline_word_diff
takes the whole_line_replaced path using CHANGED/CHANGED_INTO markers instead of
REMOVED/ADDED. Extraction must still work via the alternation in the regex."""
before = "$99"
after = "$109"
raw = diff.render_diff(before, after, word_diff=True)
self.assertEqual(extract_changed_from(raw), "$99")
self.assertEqual(extract_changed_to(raw), "$109")
def test_diff_changed_from_to_multiple_words_same_line(self):
"""When multiple words change on the same line all fragments are joined with newline.
'quick brown fox jumps' -> 'slow brown fox hops' gives 'quick\njumps' / 'slow\nhops'.
These tokens work best when a single value changes per line."""
before = "quick brown fox jumps"
after = "slow brown fox hops"
raw = diff.render_diff(before, after, word_diff=True)
self.assertEqual(extract_changed_from(raw), "quick\njumps")
self.assertEqual(extract_changed_to(raw), "slow\nhops")
def test_diff_changed_from_to_no_change(self):
"""No changes → empty string"""
content = "nothing changed here"
raw = diff.render_diff(content, content, word_diff=True)
self.assertEqual(extract_changed_from(raw), "")
self.assertEqual(extract_changed_to(raw), "")
def test_word_diff_no_prefix_whole_line_replaced(self):
"""When include_change_type_prefix=False, word-level diffs for whole-line
replacements must not include placemarkers (issue #3816)."""
before = "73"
after = "100"
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
self.assertNotIn('PLACEMARKER', raw)
# Should contain just the raw values separated by newline
self.assertIn('73', raw)
self.assertIn('100', raw)
def test_word_diff_no_prefix_inline_changes(self):
"""When include_change_type_prefix=False, inline word-level diffs
must not include placemarkers (issue #3816)."""
before = "the price is 50 dollars"
after = "the price is 75 dollars"
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
self.assertNotIn('PLACEMARKER', raw)
self.assertIn('50', raw)
self.assertIn('75', raw)
def test_word_diff_with_prefix_still_wraps(self):
"""Default include_change_type_prefix=True must still wrap tokens."""
before = "73"
after = "100"
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=True)
self.assertIn('PLACEMARKER', raw)
def test_word_diff_no_prefix_exact_output(self):
"""Pin exact output for include_change_type_prefix=False to catch regressions.
Whole-line replacement: old and new values separated by newline, no markers.
Inline partial replacement: equal tokens kept, changed tokens (both old and new)
appended without markers this means old+new are concatenated in place.
"""
# Whole-line replaced: both values on separate lines, clean
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=False)
self.assertEqual(raw, '73\n100')
# Inline word replacement: equal context preserved, old+new token concatenated in-place
raw = diff.render_diff('the price is 50 dollars', 'the price is 75 dollars',
word_diff=True, include_change_type_prefix=False)
self.assertEqual(raw, 'the price is 5075 dollars')
# Sanity: with prefix the whole-line case is fully wrapped
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=True)
self.assertEqual(raw, '@changed_PLACEMARKER_OPEN73@changed_PLACEMARKER_CLOSED\n'
'@changed_into_PLACEMARKER_OPEN100@changed_into_PLACEMARKER_CLOSED')
if __name__ == '__main__':
unittest.main()
@@ -1,68 +0,0 @@
"""
Unit test for send_step_failure_notification regression.
Before the fix, line 499 called self._check_cascading_vars('notification_format', watch)
which raises AttributeError because _check_cascading_vars is a module-level function,
not a method of NotificationService.
"""
import queue
from unittest.mock import MagicMock
def _make_datastore(watch_uuid, notification_url):
"""Minimal datastore mock that NotificationService and _check_cascading_vars need."""
watch = MagicMock()
watch.get = lambda key, default=None: {
'uuid': watch_uuid,
'url': 'https://example.com',
'notification_urls': [notification_url],
'notification_format': '',
'notification_muted': False,
}.get(key, default)
watch.__getitem__ = lambda self, key: watch.get(key)
datastore = MagicMock()
datastore.data = {
'watching': {watch_uuid: watch},
'settings': {
'application': {
'notification_urls': [],
'notification_format': 'text',
'filter_failure_notification_threshold_attempts': 3,
}
}
}
datastore.get_all_tags_for_watch.return_value = {}
return datastore, watch
def test_send_step_failure_notification_does_not_raise():
"""send_step_failure_notification must not raise AttributeError (wrong self. prefix on module-level function)."""
from changedetectionio.notification_service import NotificationService
watch_uuid = 'test-uuid-1234'
notification_q = queue.Queue()
datastore, _ = _make_datastore(watch_uuid, 'post://localhost/test')
service = NotificationService(datastore=datastore, notification_q=notification_q)
# Before the fix this raised:
# AttributeError: 'NotificationService' object has no attribute '_check_cascading_vars'
service.send_step_failure_notification(watch_uuid=watch_uuid, step_n=0)
def test_send_step_failure_notification_queues_item():
"""A notification object should be placed on the queue when URLs are configured."""
from changedetectionio.notification_service import NotificationService
watch_uuid = 'test-uuid-5678'
notification_q = queue.Queue()
datastore, _ = _make_datastore(watch_uuid, 'post://localhost/test')
service = NotificationService(datastore=datastore, notification_q=notification_q)
service.send_step_failure_notification(watch_uuid=watch_uuid, step_n=1)
assert not notification_q.empty(), "Expected a notification to be queued"
item = notification_q.get_nowait()
assert 'notification_title' in item
assert 'position 2' in item['notification_title']
@@ -1,68 +0,0 @@
#!/usr/bin/env python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_tag_url_match
import unittest
from changedetectionio.model.Tag import model as TagModel
def make_tag(pattern):
"""Minimal Tag instance for testing matches_url — skips datastore wiring."""
tag = TagModel.__new__(TagModel)
dict.__init__(tag)
tag['url_match_pattern'] = pattern
return tag
class TestTagUrlMatch(unittest.TestCase):
def test_wildcard_matches(self):
tag = make_tag('*example.com*')
self.assertTrue(tag.matches_url('https://example.com/page'))
self.assertTrue(tag.matches_url('https://www.example.com/shop/item'))
self.assertFalse(tag.matches_url('https://other.com/page'))
def test_wildcard_case_insensitive(self):
tag = make_tag('*EXAMPLE.COM*')
self.assertTrue(tag.matches_url('https://example.com/page'))
def test_substring_match(self):
tag = make_tag('github.com/myorg')
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
self.assertFalse(tag.matches_url('https://github.com/otherorg/repo'))
def test_substring_case_insensitive(self):
tag = make_tag('GitHub.com/MyOrg')
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
def test_empty_pattern_never_matches(self):
tag = make_tag('')
self.assertFalse(tag.matches_url('https://example.com'))
def test_empty_url_never_matches(self):
tag = make_tag('*example.com*')
self.assertFalse(tag.matches_url(''))
def test_question_mark_wildcard(self):
tag = make_tag('https://example.com/item-?')
self.assertTrue(tag.matches_url('https://example.com/item-1'))
self.assertFalse(tag.matches_url('https://example.com/item-12'))
def test_substring_is_broad(self):
"""Plain substring matching is intentionally broad — 'evil.com' matches anywhere
in the URL string, including 'notevil.com'. Users who need precise domain matching
should use a wildcard pattern like '*://evil.com/*' instead."""
tag = make_tag('evil.com')
self.assertTrue(tag.matches_url('https://evil.com/page'))
self.assertTrue(tag.matches_url('https://notevil.com')) # substring match — expected
def test_precise_domain_match_with_wildcard(self):
"""Use wildcard pattern for precise domain matching to avoid substring surprises."""
tag = make_tag('*://evil.com/*')
self.assertTrue(tag.matches_url('https://evil.com/page'))
self.assertFalse(tag.matches_url('https://notevil.com/page'))
if __name__ == '__main__':
unittest.main()
-2
View File
@@ -76,9 +76,7 @@ These commands read settings from `../../setup.cfg` automatically.
- `en_US` - English (US)
- `fr` - French (Français)
- `it` - Italian (Italiano)
- `ja` - Japanese (日本語)
- `ko` - Korean (한국어)
- `pt_BR` - Portuguese (Brasil)
- `zh` - Chinese Simplified (中文简体)
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
File diff suppressed because it is too large Load Diff
@@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2026-04-19 00:17+0900\n"
"POT-Creation-Date: 2026-02-23 03:54+0100\n"
"PO-Revision-Date: 2026-01-14 03:57+0100\n"
"Last-Translator: \n"
"Language: de\n"
@@ -16,7 +16,7 @@ msgstr ""
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.18.0\n"
"Generated-By: Babel 2.16.0\n"
#: changedetectionio/blueprint/backups/__init__.py
msgid "A backup is already running, check back in a few minutes"
@@ -74,11 +74,6 @@ msgstr ""
msgid "File must be a .zip backup file"
msgstr ""
#: changedetectionio/blueprint/backups/restore.py
#, python-format
msgid "Backup file is too large (max %(mb)s MB)"
msgstr ""
#: changedetectionio/blueprint/backups/restore.py
msgid "Invalid or corrupted zip file"
msgstr ""
@@ -135,11 +130,6 @@ msgstr ""
msgid "Note: This does not override the main application settings, only watches and groups."
msgstr ""
#: changedetectionio/blueprint/backups/templates/backup_restore.html
#, python-format
msgid "Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB"
msgstr ""
#: changedetectionio/blueprint/backups/templates/backup_restore.html
msgid "Include all groups found in backup?"
msgstr ""
@@ -214,10 +204,6 @@ msgstr "Distill.io"
msgid ".XLSX & Wachete"
msgstr ".XLSX & Wachete"
#: changedetectionio/blueprint/imports/templates/import.html
msgid "Backup Restore"
msgstr ""
#: changedetectionio/blueprint/imports/templates/import.html
msgid "Restoring changedetection.io backups is in the"
msgstr ""
@@ -575,15 +561,15 @@ msgstr ""
msgid "all of the ways that the browser is detected"
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Connect using Bright Data proxies, find out more here."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/diff.html
#: changedetectionio/blueprint/ui/templates/edit.html changedetectionio/templates/_common_fields.html
msgid "Tip:"
msgstr "Tipp:"
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/edit.html
msgid "Connect using Bright Data and Oxylabs Proxies, find out more here."
msgstr "Verbinden Sie sich über Bright Data und Oxylabs Proxies. Weitere Informationen finden Sie hier."
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected."
msgstr ""
@@ -604,15 +590,15 @@ msgstr ""
msgid "Changing this could affect the content of your existing watches, possibly trigger alerts etc."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Remove HTML element(s) by CSS and XPath selectors before text conversion."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Don't paste HTML here, use only CSS and XPath selectors"
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML."
msgstr ""
@@ -777,7 +763,7 @@ msgid "Tip"
msgstr "Tipp"
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "\"Residential\" and \"Mobile\" proxy type can be more successful than \"Data Center\" for blocked websites."
msgid "\"Residential\" and \"Mobile\" proxy type can be more successfull than \"Data Center\" for blocked websites."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html
@@ -843,48 +829,10 @@ msgstr "Tag nicht gefunden"
msgid "Updated"
msgstr "Aktualisiert"
#: changedetectionio/blueprint/tags/form.py
msgid "Activate for individual watches in this tag/group?"
msgstr ""
#: changedetectionio/blueprint/tags/form.py
msgid "Auto-apply to watches with URLs matching"
msgstr ""
#: changedetectionio/blueprint/tags/form.py
msgid "e.g. *://example.com/* or github.com/myorg"
msgstr ""
#: changedetectionio/blueprint/tags/form.py changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Tag colour"
msgstr ""
#: changedetectionio/blueprint/tags/form.py
msgid "Tag name"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html changedetectionio/blueprint/ui/templates/edit.html
msgid "Filters & Triggers"
msgstr "Filter und Trigger"
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid ""
"Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or "
"plain substring: <code>github.com/myorg</code>"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Currently matching watches"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Custom colour"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Leave unchecked to use the auto-generated colour based on the tag name."
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "These settings are"
msgstr "Diese Einstellungen sind"
@@ -964,11 +912,7 @@ msgstr "Tag-/Labelname"
msgid "No website organisational tags/groups configured"
msgstr "Keine Gruppen/Labels konfiguriert"
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/templates/menu.html
msgid "Mute notifications"
msgstr "Benachrichtigungen stummschalten"
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/blueprint/ui/edit.py
#: changedetectionio/blueprint/tags/templates/groups-overview.html
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Edit"
msgstr "Bearbeiten"
@@ -1087,10 +1031,6 @@ msgstr "Überwachung nicht gefunden"
msgid "Cleared snapshot history for watch {}"
msgstr "Snapshot-Verlauf für Beobachtung {} gelöscht"
#: changedetectionio/blueprint/ui/__init__.py changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "clear"
msgstr "löschen"
#: changedetectionio/blueprint/ui/__init__.py
msgid "History clearing started in background"
msgstr ""
@@ -1175,14 +1115,6 @@ msgstr ""
msgid "Could not load '{}' processor, processor plugin might be missing."
msgstr ""
#: changedetectionio/blueprint/ui/edit.py
msgid "System settings default"
msgstr ""
#: changedetectionio/blueprint/ui/edit.py
msgid "Default"
msgstr ""
#: changedetectionio/blueprint/ui/edit.py
msgid "Updated watch - unpaused!"
msgstr "Aktualisierte Überwachung fortgesetzt!"
@@ -1195,10 +1127,6 @@ msgstr "Überwachung aktualisiert."
msgid "Preview unavailable - No fetch/check completed or triggers not reached"
msgstr "Vorschau nicht verfügbar Kein Abruf/keine Überprüfung abgeschlossen oder Trigger nicht erreicht"
#: changedetectionio/blueprint/ui/preview.py
msgid "Diff"
msgstr ""
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "This will remove version history (snapshots) for ALL watches, but keep your list of URLs!"
msgstr ""
@@ -1225,6 +1153,10 @@ msgstr "Bestätigungstext"
msgid "Type in the word"
msgstr "Geben Sie das Wort ein"
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "clear"
msgstr "löschen"
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "to confirm that you understand."
msgstr "um zu bestätigen, dass Sie es verstanden haben."
@@ -1310,17 +1242,14 @@ msgid "Jump"
msgstr "Springen"
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
#: changedetectionio/processors/templates/extract.html
msgid "Error Text"
msgstr "Fehlertext"
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
#: changedetectionio/processors/templates/extract.html
msgid "Error Screenshot"
msgstr "Fehler-Screenshot"
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
#: changedetectionio/processors/templates/extract.html
msgid "Text"
msgstr "Text"
@@ -1328,8 +1257,7 @@ msgstr "Text"
msgid "Current screenshot"
msgstr "Aktueller Screenshot"
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/processors/extract.py
#: changedetectionio/processors/templates/extract.html
#: changedetectionio/blueprint/ui/templates/diff.html
msgid "Extract Data"
msgstr "Daten extrahieren"
@@ -1421,10 +1349,6 @@ msgstr "Hilfe und Beispiele finden Sie hier"
msgid "Organisational tag/group name used in the main listing page"
msgstr "Gruppen-/Label-NameGruppen-/Label-NameOrganisations-Tag/Gruppenname, der auf der Haupteintragsseite verwendet wird"
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Also automatically applied by URL pattern:"
msgstr ""
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Automatically uses the page title if found, you can also use your own title/description here"
msgstr ""
@@ -1457,10 +1381,6 @@ msgstr ""
"Die Methode erfordert eine Netzwerkverbindung zu einem laufenden WebDriver+Chrome-Server, der durch die "
"Umgebungsvariable „WEBDRIVER_URL“ festgelegt wird."
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Connect using Bright Data and Oxylabs Proxies, find out more here."
msgstr "Verbinden Sie sich über Bright Data und Oxylabs Proxies. Weitere Informationen finden Sie hier."
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Check/Scan all"
msgstr "Überprüfen Sie alles noch einmal"
@@ -1697,7 +1617,7 @@ msgstr "Bereich zeichnen"
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Clear selection"
msgstr "Auswahl löschen"
msgstr "Klare Auswahl"
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "One moment, fetching screenshot and element information.."
@@ -1943,26 +1863,6 @@ msgstr "Es sind keine Website-Überwachungen konfiguriert. Bitte fügen Sie im F
msgid "import a list"
msgstr "eine Liste importieren"
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Pause checks"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "UnPause checks"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Mute notification"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "UnMute notification"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Create a link to share watch config with others"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Detecting restock and price"
msgstr "Erkennen von Lagerbeständen und Preisen"
@@ -1983,24 +1883,15 @@ msgstr "Preis"
msgid "No information"
msgstr "Keine Informationen"
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Last Checked"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html changedetectionio/templates/base.html
msgid "Checking now"
msgstr "Jetzt prüfen"
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Last Changed"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Queued"
msgstr "Wartend"
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
#: changedetectionio/processors/text_json_diff/difference.py
msgid "History"
msgstr "Verlauf"
@@ -2034,168 +1925,6 @@ msgstr "Überprüfen Sie alles noch einmal"
msgid "in '%(title)s'"
msgstr "in '%(title)s'"
#: changedetectionio/conditions/__init__.py
msgid "Choose one - Operator"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Greater Than"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Less Than"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Greater Than or Equal To"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Less Than or Equal To"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Equals"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Not Equals"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Contains"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Choose one - Field"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Does NOT Contain"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Starts With"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Ends With"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Length minimum"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Length maximum"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Matches Regex"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Does NOT Match Regex"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Extracted number after 'Filters & Triggers'"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Page text after 'Filters & Triggers'"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Field"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Operator"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "A value"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Operator is required."
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Field is required."
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Value is required."
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Levenshtein - Text similarity ratio"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Levenshtein - Text change distance"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Not enough history to calculate Levenshtein metrics"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Snapshot too large for edit statistics, skipping."
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Unable to calculate Levenshtein metrics"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Levenshtein Text Similarity Details"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Raw distance (edits needed)"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Similarity ratio"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Percent similar"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid ""
"Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one "
"into the other."
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Error calculating Levenshtein metrics"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Word count of content"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Content Analysis"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Word count (latest snapshot)"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Word count is a simple measure of content length, calculated by splitting text on whitespace."
msgstr ""
#: changedetectionio/content_fetchers/requests.py
msgid "Basic fast Plaintext/HTTP Client"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html changedetectionio/flask_app.py
#: changedetectionio/realtime/socket_server.py
msgid "Not yet"
@@ -2480,26 +2209,10 @@ msgstr "UI-Optionen"
msgid "Selector"
msgstr "Auswahlmodus:"
#: changedetectionio/forms.py
msgid "CSS or xPath selector"
msgstr ""
#: changedetectionio/forms.py
msgid "value"
msgstr "Wert"
#: changedetectionio/conditions/form.py changedetectionio/forms.py
msgid "Value"
msgstr ""
#: changedetectionio/forms.py
msgid "Web Page URL"
msgstr ""
#: changedetectionio/forms.py
msgid "Group Tag"
msgstr ""
#: changedetectionio/forms.py
msgid "Time Between Check"
msgstr "Prüfintervall"
@@ -2516,10 +2229,6 @@ msgstr "CSS/xPath-Filter"
msgid "Remove elements"
msgstr "Elemente entfernen"
#: changedetectionio/forms.py
msgid "Extract lines containing"
msgstr ""
#: changedetectionio/forms.py
msgid "Extract text"
msgstr "Daten extrahieren"
@@ -2588,8 +2297,7 @@ msgstr "Blockieren Sie die Änderungserkennung, während der Text übereinstimmt
msgid "Execute JavaScript before change detection"
msgstr "Führen Sie JavaScript vor der Änderungserkennung aus"
#: changedetectionio/blueprint/tags/form.py changedetectionio/blueprint/tags/templates/groups-overview.html
#: changedetectionio/forms.py
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/forms.py
msgid "Save"
msgstr "Speichern"
@@ -2658,7 +2366,7 @@ msgstr "Ungültige Vorlagensyntax: %(error)s"
msgid "Invalid template syntax in \"%(header)s\" header: %(error)s"
msgstr "Ungültige Vorlagensyntax im Header „%(header)s“: %(error)s"
#: changedetectionio/blueprint/tags/form.py changedetectionio/forms.py
#: changedetectionio/forms.py
msgid "Name"
msgstr "Name"
@@ -2754,10 +2462,6 @@ msgstr "Text ignorieren"
msgid "Ignore whitespace"
msgstr "Leerzeichen ignorieren"
#: changedetectionio/forms.py
msgid "Screenshot: Minimum Change Percentage"
msgstr ""
#: changedetectionio/forms.py changedetectionio/processors/image_ssim_diff/forms.py
msgid "Must be between 0 and 100"
msgstr "Muss zwischen 0 und 100 liegen"
@@ -2959,42 +2663,6 @@ msgstr "Wiederauffüllung und Preiserkennung für Seiten mit einem EINZELNEN Pro
msgid "Detects if the product goes back to in-stock"
msgstr "Erkennt, ob das Produkt wieder auf Lager ist"
#: changedetectionio/processors/templates/extract.html
msgid "Screenshot"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "This tool will extract text data from all of the watch history."
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract."
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "For example, to extract only the numbers from text"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "Raw text"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "RegEx to extract:"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "Be sure to test your RegEx here."
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "Each RegEx group bracket"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "will be in its own column, the first column value is always the date."
msgstr ""
#: changedetectionio/processors/text_json_diff/processor.py
msgid "Webpage Text/HTML, JSON and PDF changes"
msgstr "Änderungen an Webseitentext/HTML, JSON und PDF"
@@ -3061,11 +2729,6 @@ msgstr "Die Überwachungsgruppe / Tag"
msgid "The URL of the preview page generated by changedetection.io."
msgstr ""
#: changedetectionio/templates/_common_fields.html
#, python-format
msgid "Date/time of the change, accepts format=, change_datetime(format='%A')', default is '%Y-%m-%d %H:%M:%S %Z'"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "The URL of the diff output for the watch."
msgstr ""
@@ -3074,14 +2737,6 @@ msgstr ""
msgid "The diff output - only changes, additions, and removals"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "All diff variants accept"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "args, e.g."
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "The diff output - only changes, additions, and removals —"
msgstr ""
@@ -3118,18 +2773,6 @@ msgstr ""
msgid "The diff output - patch in unified format"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid ""
"Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per "
"line; multiple changed fragments are joined by newline."
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid ""
"Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; "
"multiple changed fragments are joined by newline."
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "The current snapshot text contents value, useful when combined with JSON or CSS filters"
msgstr ""
@@ -3174,7 +2817,7 @@ msgstr ""
msgid "Use"
msgstr "Verwenden"
#: changedetectionio/templates/_common_fields.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/templates/_common_fields.html
msgid "Show advanced help and tips"
msgstr "Erweiterte Hilfe und Tipps anzeigen"
@@ -3278,26 +2921,6 @@ msgstr ""
msgid "Format for all notifications"
msgstr "Format für alle Benachrichtigungen"
#: changedetectionio/templates/_common_fields.html
msgid "Note"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "Discord does not render HTML — switch to"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "Plain Text"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "format to avoid"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "and other HTML entities appearing literally in your notifications."
msgstr ""
#: changedetectionio/templates/_helpers.html
msgid "Entry"
msgstr ""
@@ -3414,18 +3037,10 @@ msgstr "Es wird keine Änderungserkennung stattfinden, da dieser Text existiert.
msgid "Blocked text"
msgstr "Blockierter Text"
#: changedetectionio/templates/base.html
msgid "A new version is available"
msgstr ""
#: changedetectionio/templates/base.html
msgid "Search, or Use Alt+S Key"
msgstr "Suchen oder Alt+S-Taste verwenden"
#: changedetectionio/templates/base.html
msgid "Share this link:"
msgstr ""
#: changedetectionio/templates/base.html
msgid "Real-time updates offline"
msgstr "Echtzeit-Updates offline"
@@ -3506,26 +3121,6 @@ msgstr ""
msgid "All lines here must not exist (think of each line as \"OR\")"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Keep only lines that contain any of these words or phrases (plain text, case-insensitive)"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "One entry per line — any line in the page text that contains a match is kept"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Simpler alternative to regex — use this when you just want lines about a specific topic"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Example: enter"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "to keep only lines mentioning temperature readings"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Extracts text in the final output (line by line) after other filters using regular expressions or string match:"
msgstr ""
@@ -3598,6 +3193,10 @@ msgstr ""
msgid "Unmute notifications"
msgstr "Benachrichtigungen entstummen"
#: changedetectionio/templates/menu.html
msgid "Mute notifications"
msgstr "Benachrichtigungen stummschalten"
#: changedetectionio/templates/menu.html
msgid "Notifications are muted - click to unmute"
msgstr "Benachrichtigungen sind stummgeschaltet - klicken zum Entstummen"
@@ -3786,6 +3385,3 @@ msgstr "Haupteinstellungen"
#~ msgid "Marking watches as viewed in background..."
#~ msgstr ""
#~ msgid "\"Residential\" and \"Mobile\" proxy type can be more successfull than \"Data Center\" for blocked websites."
#~ msgstr ""
@@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: changedetection.io\n"
"Report-Msgid-Bugs-To: https://github.com/dgtlmoon/changedetection.io\n"
"POT-Creation-Date: 2026-04-19 00:17+0900\n"
"POT-Creation-Date: 2026-02-23 03:54+0100\n"
"PO-Revision-Date: 2026-01-12 16:33+0100\n"
"Last-Translator: British English Translation Team\n"
"Language: en_GB\n"
@@ -16,7 +16,7 @@ msgstr ""
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.18.0\n"
"Generated-By: Babel 2.16.0\n"
#: changedetectionio/blueprint/backups/__init__.py
msgid "A backup is already running, check back in a few minutes"
@@ -74,11 +74,6 @@ msgstr ""
msgid "File must be a .zip backup file"
msgstr ""
#: changedetectionio/blueprint/backups/restore.py
#, python-format
msgid "Backup file is too large (max %(mb)s MB)"
msgstr ""
#: changedetectionio/blueprint/backups/restore.py
msgid "Invalid or corrupted zip file"
msgstr ""
@@ -133,11 +128,6 @@ msgstr ""
msgid "Note: This does not override the main application settings, only watches and groups."
msgstr ""
#: changedetectionio/blueprint/backups/templates/backup_restore.html
#, python-format
msgid "Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB"
msgstr ""
#: changedetectionio/blueprint/backups/templates/backup_restore.html
msgid "Include all groups found in backup?"
msgstr ""
@@ -212,10 +202,6 @@ msgstr ""
msgid ".XLSX & Wachete"
msgstr ""
#: changedetectionio/blueprint/imports/templates/import.html
msgid "Backup Restore"
msgstr ""
#: changedetectionio/blueprint/imports/templates/import.html
msgid "Restoring changedetection.io backups is in the"
msgstr ""
@@ -561,15 +547,15 @@ msgstr ""
msgid "all of the ways that the browser is detected"
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Connect using Bright Data proxies, find out more here."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/diff.html
#: changedetectionio/blueprint/ui/templates/edit.html changedetectionio/templates/_common_fields.html
msgid "Tip:"
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/blueprint/ui/templates/edit.html
msgid "Connect using Bright Data and Oxylabs Proxies, find out more here."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected."
msgstr ""
@@ -590,15 +576,15 @@ msgstr ""
msgid "Changing this could affect the content of your existing watches, possibly trigger alerts etc."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Remove HTML element(s) by CSS and XPath selectors before text conversion."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Don't paste HTML here, use only CSS and XPath selectors"
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML."
msgstr ""
@@ -763,7 +749,7 @@ msgid "Tip"
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html
msgid "\"Residential\" and \"Mobile\" proxy type can be more successful than \"Data Center\" for blocked websites."
msgid "\"Residential\" and \"Mobile\" proxy type can be more successfull than \"Data Center\" for blocked websites."
msgstr ""
#: changedetectionio/blueprint/settings/templates/settings.html
@@ -829,48 +815,10 @@ msgstr ""
msgid "Updated"
msgstr ""
#: changedetectionio/blueprint/tags/form.py
msgid "Activate for individual watches in this tag/group?"
msgstr ""
#: changedetectionio/blueprint/tags/form.py
msgid "Auto-apply to watches with URLs matching"
msgstr ""
#: changedetectionio/blueprint/tags/form.py
msgid "e.g. *://example.com/* or github.com/myorg"
msgstr ""
#: changedetectionio/blueprint/tags/form.py changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Tag colour"
msgstr ""
#: changedetectionio/blueprint/tags/form.py
msgid "Tag name"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html changedetectionio/blueprint/ui/templates/edit.html
msgid "Filters & Triggers"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid ""
"Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or "
"plain substring: <code>github.com/myorg</code>"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Currently matching watches"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Custom colour"
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "Leave unchecked to use the auto-generated colour based on the tag name."
msgstr ""
#: changedetectionio/blueprint/tags/templates/edit-tag.html
msgid "These settings are"
msgstr ""
@@ -948,11 +896,7 @@ msgstr ""
msgid "No website organisational tags/groups configured"
msgstr ""
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/templates/menu.html
msgid "Mute notifications"
msgstr ""
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/blueprint/ui/edit.py
#: changedetectionio/blueprint/tags/templates/groups-overview.html
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Edit"
msgstr ""
@@ -1067,10 +1011,6 @@ msgstr ""
msgid "Cleared snapshot history for watch {}"
msgstr ""
#: changedetectionio/blueprint/ui/__init__.py changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "clear"
msgstr ""
#: changedetectionio/blueprint/ui/__init__.py
msgid "History clearing started in background"
msgstr ""
@@ -1155,14 +1095,6 @@ msgstr ""
msgid "Could not load '{}' processor, processor plugin might be missing."
msgstr ""
#: changedetectionio/blueprint/ui/edit.py
msgid "System settings default"
msgstr ""
#: changedetectionio/blueprint/ui/edit.py
msgid "Default"
msgstr ""
#: changedetectionio/blueprint/ui/edit.py
msgid "Updated watch - unpaused!"
msgstr ""
@@ -1175,10 +1107,6 @@ msgstr ""
msgid "Preview unavailable - No fetch/check completed or triggers not reached"
msgstr ""
#: changedetectionio/blueprint/ui/preview.py
msgid "Diff"
msgstr ""
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "This will remove version history (snapshots) for ALL watches, but keep your list of URLs!"
msgstr ""
@@ -1203,6 +1131,10 @@ msgstr ""
msgid "Type in the word"
msgstr ""
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "clear"
msgstr ""
#: changedetectionio/blueprint/ui/templates/clear_all_history.html
msgid "to confirm that you understand."
msgstr ""
@@ -1288,17 +1220,14 @@ msgid "Jump"
msgstr ""
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
#: changedetectionio/processors/templates/extract.html
msgid "Error Text"
msgstr ""
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
#: changedetectionio/processors/templates/extract.html
msgid "Error Screenshot"
msgstr ""
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/blueprint/ui/templates/preview.html
#: changedetectionio/processors/templates/extract.html
msgid "Text"
msgstr ""
@@ -1306,8 +1235,7 @@ msgstr ""
msgid "Current screenshot"
msgstr ""
#: changedetectionio/blueprint/ui/templates/diff.html changedetectionio/processors/extract.py
#: changedetectionio/processors/templates/extract.html
#: changedetectionio/blueprint/ui/templates/diff.html
msgid "Extract Data"
msgstr ""
@@ -1399,10 +1327,6 @@ msgstr ""
msgid "Organisational tag/group name used in the main listing page"
msgstr ""
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Also automatically applied by URL pattern:"
msgstr ""
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Automatically uses the page title if found, you can also use your own title/description here"
msgstr ""
@@ -1429,10 +1353,6 @@ msgstr ""
msgid "method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'."
msgstr ""
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Connect using Bright Data and Oxylabs Proxies, find out more here."
msgstr ""
#: changedetectionio/blueprint/ui/templates/edit.html
msgid "Check/Scan all"
msgstr ""
@@ -1897,26 +1817,6 @@ msgstr ""
msgid "import a list"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Pause checks"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "UnPause checks"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Mute notification"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "UnMute notification"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Create a link to share watch config with others"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Detecting restock and price"
msgstr ""
@@ -1937,24 +1837,15 @@ msgstr ""
msgid "No information"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Last Checked"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html changedetectionio/templates/base.html
msgid "Checking now"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Last Changed"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
msgid "Queued"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
#: changedetectionio/processors/text_json_diff/difference.py
msgid "History"
msgstr ""
@@ -1988,168 +1879,6 @@ msgstr ""
msgid "in '%(title)s'"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Choose one - Operator"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Greater Than"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Less Than"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Greater Than or Equal To"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Less Than or Equal To"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Equals"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Not Equals"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Contains"
msgstr ""
#: changedetectionio/conditions/__init__.py
msgid "Choose one - Field"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Does NOT Contain"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Starts With"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Ends With"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Length minimum"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Length maximum"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Matches Regex"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Text Does NOT Match Regex"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Extracted number after 'Filters & Triggers'"
msgstr ""
#: changedetectionio/conditions/default_plugin.py
msgid "Page text after 'Filters & Triggers'"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Field"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Operator"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "A value"
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Operator is required."
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Field is required."
msgstr ""
#: changedetectionio/conditions/form.py
msgid "Value is required."
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Levenshtein - Text similarity ratio"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Levenshtein - Text change distance"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Not enough history to calculate Levenshtein metrics"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Snapshot too large for edit statistics, skipping."
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Unable to calculate Levenshtein metrics"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Levenshtein Text Similarity Details"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Raw distance (edits needed)"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Similarity ratio"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Percent similar"
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid ""
"Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one "
"into the other."
msgstr ""
#: changedetectionio/conditions/plugins/levenshtein_plugin.py
msgid "Error calculating Levenshtein metrics"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Word count of content"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Content Analysis"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Word count (latest snapshot)"
msgstr ""
#: changedetectionio/conditions/plugins/wordcount_plugin.py
msgid "Word count is a simple measure of content length, calculated by splitting text on whitespace."
msgstr ""
#: changedetectionio/content_fetchers/requests.py
msgid "Basic fast Plaintext/HTTP Client"
msgstr ""
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html changedetectionio/flask_app.py
#: changedetectionio/realtime/socket_server.py
msgid "Not yet"
@@ -2432,26 +2161,10 @@ msgstr ""
msgid "Selector"
msgstr ""
#: changedetectionio/forms.py
msgid "CSS or xPath selector"
msgstr ""
#: changedetectionio/forms.py
msgid "value"
msgstr ""
#: changedetectionio/conditions/form.py changedetectionio/forms.py
msgid "Value"
msgstr ""
#: changedetectionio/forms.py
msgid "Web Page URL"
msgstr ""
#: changedetectionio/forms.py
msgid "Group Tag"
msgstr ""
#: changedetectionio/forms.py
msgid "Time Between Check"
msgstr ""
@@ -2468,10 +2181,6 @@ msgstr ""
msgid "Remove elements"
msgstr ""
#: changedetectionio/forms.py
msgid "Extract lines containing"
msgstr ""
#: changedetectionio/forms.py
msgid "Extract text"
msgstr ""
@@ -2540,8 +2249,7 @@ msgstr ""
msgid "Execute JavaScript before change detection"
msgstr ""
#: changedetectionio/blueprint/tags/form.py changedetectionio/blueprint/tags/templates/groups-overview.html
#: changedetectionio/forms.py
#: changedetectionio/blueprint/tags/templates/groups-overview.html changedetectionio/forms.py
msgid "Save"
msgstr ""
@@ -2609,7 +2317,7 @@ msgstr ""
msgid "Invalid template syntax in \"%(header)s\" header: %(error)s"
msgstr ""
#: changedetectionio/blueprint/tags/form.py changedetectionio/forms.py
#: changedetectionio/forms.py
msgid "Name"
msgstr ""
@@ -2705,10 +2413,6 @@ msgstr ""
msgid "Ignore whitespace"
msgstr ""
#: changedetectionio/forms.py
msgid "Screenshot: Minimum Change Percentage"
msgstr ""
#: changedetectionio/forms.py changedetectionio/processors/image_ssim_diff/forms.py
msgid "Must be between 0 and 100"
msgstr ""
@@ -2908,42 +2612,6 @@ msgstr ""
msgid "Detects if the product goes back to in-stock"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "Screenshot"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "This tool will extract text data from all of the watch history."
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract."
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "For example, to extract only the numbers from text"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "Raw text"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "RegEx to extract:"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "Be sure to test your RegEx here."
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "Each RegEx group bracket"
msgstr ""
#: changedetectionio/processors/templates/extract.html
msgid "will be in its own column, the first column value is always the date."
msgstr ""
#: changedetectionio/processors/text_json_diff/processor.py
msgid "Webpage Text/HTML, JSON and PDF changes"
msgstr ""
@@ -3010,11 +2678,6 @@ msgstr ""
msgid "The URL of the preview page generated by changedetection.io."
msgstr ""
#: changedetectionio/templates/_common_fields.html
#, python-format
msgid "Date/time of the change, accepts format=, change_datetime(format='%A')', default is '%Y-%m-%d %H:%M:%S %Z'"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "The URL of the diff output for the watch."
msgstr ""
@@ -3023,14 +2686,6 @@ msgstr ""
msgid "The diff output - only changes, additions, and removals"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "All diff variants accept"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "args, e.g."
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "The diff output - only changes, additions, and removals —"
msgstr ""
@@ -3067,18 +2722,6 @@ msgstr ""
msgid "The diff output - patch in unified format"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid ""
"Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per "
"line; multiple changed fragments are joined by newline."
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid ""
"Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; "
"multiple changed fragments are joined by newline."
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "The current snapshot text contents value, useful when combined with JSON or CSS filters"
msgstr ""
@@ -3123,7 +2766,7 @@ msgstr ""
msgid "Use"
msgstr ""
#: changedetectionio/templates/_common_fields.html changedetectionio/templates/edit/include_subtract.html
#: changedetectionio/templates/_common_fields.html
msgid "Show advanced help and tips"
msgstr ""
@@ -3227,26 +2870,6 @@ msgstr ""
msgid "Format for all notifications"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "Note"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "Discord does not render HTML — switch to"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "Plain Text"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "format to avoid"
msgstr ""
#: changedetectionio/templates/_common_fields.html
msgid "and other HTML entities appearing literally in your notifications."
msgstr ""
#: changedetectionio/templates/_helpers.html
msgid "Entry"
msgstr ""
@@ -3363,18 +2986,10 @@ msgstr ""
msgid "Blocked text"
msgstr ""
#: changedetectionio/templates/base.html
msgid "A new version is available"
msgstr ""
#: changedetectionio/templates/base.html
msgid "Search, or Use Alt+S Key"
msgstr ""
#: changedetectionio/templates/base.html
msgid "Share this link:"
msgstr ""
#: changedetectionio/templates/base.html
msgid "Real-time updates offline"
msgstr ""
@@ -3453,26 +3068,6 @@ msgstr ""
msgid "All lines here must not exist (think of each line as \"OR\")"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Keep only lines that contain any of these words or phrases (plain text, case-insensitive)"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "One entry per line — any line in the page text that contains a match is kept"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Simpler alternative to regex — use this when you just want lines about a specific topic"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Example: enter"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "to keep only lines mentioning temperature readings"
msgstr ""
#: changedetectionio/templates/edit/text-options.html
msgid "Extracts text in the final output (line by line) after other filters using regular expressions or string match:"
msgstr ""
@@ -3545,6 +3140,10 @@ msgstr ""
msgid "Unmute notifications"
msgstr ""
#: changedetectionio/templates/menu.html
msgid "Mute notifications"
msgstr ""
#: changedetectionio/templates/menu.html
msgid "Notifications are muted - click to unmute"
msgstr ""
@@ -3616,6 +3215,3 @@ msgstr ""
#~ msgid "Marking watches as viewed in background..."
#~ msgstr ""
#~ msgid "\"Residential\" and \"Mobile\" proxy type can be more successfull than \"Data Center\" for blocked websites."
#~ msgstr ""

Some files were not shown because too many files have changed in this diff Show More