Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon 259e44940c WIP
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled
2026-03-08 14:17:28 +01:00
dgtlmoon 1dbd25cdb4 Simplify change 2026-03-08 14:03:57 +01:00
dgtlmoon bb2c9855ee Various memory and CPU improvements 2026-03-08 14:00:22 +01:00
184 changed files with 1230 additions and 20141 deletions
+1 -18
View File
@@ -103,14 +103,6 @@ jobs:
ghcr.io/${{ github.repository }} ghcr.io/${{ github.repository }}
tags: | tags: |
type=raw,value=dev type=raw,value=dev
labels: |
org.opencontainers.image.created=${{ github.event.release.published_at }}
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
org.opencontainers.image.documentation=https://changedetection.io
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
org.opencontainers.image.title=changedetection.io
org.opencontainers.image.url=https://changedetection.io
- name: Build and push :dev - name: Build and push :dev
id: docker_build id: docker_build
@@ -136,7 +128,7 @@ jobs:
echo "Release tag: ${{ github.event.release.tag_name }}" echo "Release tag: ${{ github.event.release.tag_name }}"
echo "Github ref: ${{ github.ref }}" echo "Github ref: ${{ github.ref }}"
echo "Github ref name: ${{ github.ref_name }}" echo "Github ref name: ${{ github.ref_name }}"
- name: Docker meta :tag - name: Docker meta :tag
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/metadata-action@v6 uses: docker/metadata-action@v6
@@ -150,15 +142,6 @@ jobs:
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }} type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }} type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
type=raw,value=latest type=raw,value=latest
labels: |
org.opencontainers.image.created=${{ github.event.release.published_at }}
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
org.opencontainers.image.documentation=https://changedetection.io
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
org.opencontainers.image.title=changedetection.io
org.opencontainers.image.url=https://changedetection.io
org.opencontainers.image.version=${{ github.event.release.tag_name }}
- name: Build and push :tag - name: Build and push :tag
id: docker_build_tag_release id: docker_build_tag_release
@@ -99,7 +99,11 @@ jobs:
- name: Run Unit Tests - name: Run Unit Tests
run: | run: |
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
# Basic pytest tests with ancillary services # Basic pytest tests with ancillary services
basic-tests: basic-tests:
@@ -288,8 +292,8 @@ jobs:
- name: Specific tests in built container for Selenium - name: Specific tests in built container for Selenium
run: | run: |
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
# SMTP tests # SMTP tests
smtp-tests: smtp-tests:
@@ -583,10 +587,6 @@ jobs:
run: | run: |
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor' docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
- name: Plugin get_html_head_extras hook injects into base.html
run: |
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
# Container startup tests # Container startup tests
container-tests: container-tests:
runs-on: ubuntu-latest runs-on: ubuntu-latest
+1 -2
View File
@@ -1,6 +1,5 @@
[python: **.py] [python: **.py]
keywords = _ _l gettext keywords = _:1,_l:1,gettext:1
[jinja2: **/templates/**.html] [jinja2: **/templates/**.html]
encoding = utf-8 encoding = utf-8
keywords = _ _l gettext
+2 -1
View File
@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki # Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1. # Semver means never use .01, or 00. Should be .1.
__version__ = '0.54.9' __version__ = '0.54.4'
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
@@ -10,6 +10,7 @@ from json.decoder import JSONDecodeError
from loguru import logger from loguru import logger
import getopt import getopt
import logging import logging
import os
import platform import platform
import signal import signal
import threading import threading
+2 -9
View File
@@ -154,10 +154,11 @@ class Import(Resource):
if extras['processor'] not in available: if extras['processor'] not in available:
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400 return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
# Validate fetch_backend if provided (legacy API compat — still accepted, stored as-is) # Validate fetch_backend if provided
if 'fetch_backend' in extras: if 'fetch_backend' in extras:
from changedetectionio.content_fetchers import available_fetchers from changedetectionio.content_fetchers import available_fetchers
available = [f[0] for f in available_fetchers()] available = [f[0] for f in available_fetchers()]
# Also allow 'system' and extra_browser_* patterns
is_valid = ( is_valid = (
extras['fetch_backend'] == 'system' or extras['fetch_backend'] == 'system' or
extras['fetch_backend'] in available or extras['fetch_backend'] in available or
@@ -166,14 +167,6 @@ class Import(Resource):
if not is_valid: if not is_valid:
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400 return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
# Validate browser_profile if provided
if 'browser_profile' in extras:
from changedetectionio.model.browser_profile import get_builtin_profiles, RESERVED_MACHINE_NAMES
store_profiles = self.datastore.data['settings']['application'].get('browser_profiles', {})
known = set(get_builtin_profiles().keys()) | set(store_profiles.keys()) | {'system', None}
if extras['browser_profile'] not in known:
return f"Invalid browser_profile '{extras['browser_profile']}'. Available: {', '.join(str(k) for k in known)}", 400
# Validate notification_urls if provided # Validate notification_urls if provided
if 'notification_urls' in extras: if 'notification_urls' in extras:
from wtforms import ValidationError from wtforms import ValidationError
-10
View File
@@ -85,9 +85,6 @@ class Tag(Resource):
# Create clean tag dict without Watch-specific fields # Create clean tag dict without Watch-specific fields
clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields} clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}
# fetch_backend is a legacy field superseded by browser_profile — omit from API response
clean_tag.pop('fetch_backend', None)
return clean_tag return clean_tag
@auth.check_token @auth.check_token
@@ -180,13 +177,6 @@ class Tag(Resource):
new_uuid = self.datastore.add_tag(title=title) new_uuid = self.datastore.add_tag(title=title)
if new_uuid: if new_uuid:
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
extra = {k: v for k, v in json_data.items() if k != 'title'}
if extra:
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
if tag:
tag.update(extra)
tag.commit()
return {'uuid': new_uuid}, 201 return {'uuid': new_uuid}, 201
else: else:
return "Invalid or unsupported tag", 400 return "Invalid or unsupported tag", 400
+3 -6
View File
@@ -105,9 +105,6 @@ class Watch(Resource):
watch['viewed'] = watch_obj.viewed watch['viewed'] = watch_obj.viewed
watch['link'] = watch_obj.link, watch['link'] = watch_obj.link,
# fetch_backend is a legacy field superseded by browser_profile — omit from API response
watch.pop('fetch_backend', None)
return watch return watch
@auth.check_token @auth.check_token
@@ -341,7 +338,7 @@ class WatchHistoryDiff(Resource):
word_diff = True word_diff = True
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG # Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
changes_only = strtobool(request.args.get('changesOnly', 'false')) changes_only = strtobool(request.args.get('changesOnly', 'true'))
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false')) ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
include_removed = strtobool(request.args.get('removed', 'true')) include_removed = strtobool(request.args.get('removed', 'true'))
include_added = strtobool(request.args.get('added', 'true')) include_added = strtobool(request.args.get('added', 'true'))
@@ -352,7 +349,7 @@ class WatchHistoryDiff(Resource):
previous_version_file_contents=from_version_file_contents, previous_version_file_contents=from_version_file_contents,
newest_version_file_contents=to_version_file_contents, newest_version_file_contents=to_version_file_contents,
ignore_junk=ignore_whitespace, ignore_junk=ignore_whitespace,
include_equal=not changes_only, include_equal=changes_only,
include_removed=include_removed, include_removed=include_removed,
include_added=include_added, include_added=include_added,
include_replaced=include_replaced, include_replaced=include_replaced,
@@ -570,4 +567,4 @@ class CreateWatch(Resource):
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202 return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
return list, 200 return list, 200
@@ -98,8 +98,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore)) backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
backup_threads = [] backup_threads = []
@backups_blueprint.route("/request-backup", methods=['GET'])
@login_optionally_required @login_optionally_required
@backups_blueprint.route("/request-backup", methods=['GET'])
def request_backup(): def request_backup():
if any(thread.is_alive() for thread in backup_threads): if any(thread.is_alive() for thread in backup_threads):
flash(gettext("A backup is already running, check back in a few minutes"), "error") flash(gettext("A backup is already running, check back in a few minutes"), "error")
@@ -141,8 +141,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return backup_info return backup_info
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
@login_optionally_required @login_optionally_required
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
def download_backup(filename): def download_backup(filename):
import re import re
filename = filename.strip() filename = filename.strip()
@@ -165,9 +165,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.debug(f"Backup download request for '{full_path}'") logger.debug(f"Backup download request for '{full_path}'")
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True) return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
@login_optionally_required
@backups_blueprint.route("/", methods=['GET']) @backups_blueprint.route("/", methods=['GET'])
@backups_blueprint.route("/create", methods=['GET']) @backups_blueprint.route("/create", methods=['GET'])
@login_optionally_required
def create(): def create():
backups = find_backups() backups = find_backups()
output = render_template("backup_create.html", output = render_template("backup_create.html",
@@ -176,8 +176,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
) )
return output return output
@backups_blueprint.route("/remove-backups", methods=['GET'])
@login_optionally_required @login_optionally_required
@backups_blueprint.route("/remove-backups", methods=['GET'])
def remove_backups(): def remove_backups():
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*")) backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
@@ -174,8 +174,8 @@ def construct_restore_blueprint(datastore):
restore_blueprint = Blueprint('restore', __name__, template_folder="templates") restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
restore_threads = [] restore_threads = []
@restore_blueprint.route("/restore", methods=['GET'])
@login_optionally_required @login_optionally_required
@restore_blueprint.route("/restore", methods=['GET'])
def restore(): def restore():
form = RestoreForm() form = RestoreForm()
return render_template("backup_restore.html", return render_template("backup_restore.html",
@@ -184,8 +184,8 @@ def construct_restore_blueprint(datastore):
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024), max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024)) max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
@restore_blueprint.route("/restore/start", methods=['POST'])
@login_optionally_required @login_optionally_required
@restore_blueprint.route("/restore/start", methods=['POST'])
def backups_restore_start(): def backups_restore_start():
if any(t.is_alive() for t in restore_threads): if any(t.is_alive() for t in restore_threads):
flash(gettext("A restore is already running, check back in a few minutes"), "error") flash(gettext("A restore is already running, check back in a few minutes"), "error")
@@ -20,7 +20,8 @@
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p> <p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p> <p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
<p class="pure-form-message"> <p class="pure-form-message">
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }} {{ _('Max upload size: %(upload)s MB &nbsp;·&nbsp; Max decompressed size: %(decomp)s MB',
upload=max_upload_mb, decomp=max_decompressed_mb) }}
</p> </p>
<form class="pure-form pure-form-stacked settings" <form class="pure-form pure-form-stacked settings"
@@ -102,35 +102,6 @@ def run_async_in_browser_loop(coro):
else: else:
raise RuntimeError("Browser steps event loop is not available") raise RuntimeError("Browser steps event loop is not available")
async def _close_session_resources(session_data, label=''):
"""Close all browser resources for a session in the correct order.
browserstepper.cleanup() closes page+context but not the browser itself.
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
"""
browserstepper = session_data.get('browserstepper')
if browserstepper:
try:
await browserstepper.cleanup()
except Exception as e:
logger.error(f"Error cleaning up browserstepper{label}: {e}")
browser = session_data.get('browser')
if browser:
try:
await asyncio.wait_for(browser.close(), timeout=5.0)
except Exception as e:
logger.warning(f"Error closing browser{label}: {e}")
playwright_context = session_data.get('playwright_context')
if playwright_context:
try:
await playwright_context.stop()
except Exception as e:
logger.warning(f"Error stopping playwright context{label}: {e}")
def cleanup_expired_sessions(): def cleanup_expired_sessions():
"""Remove expired browsersteps sessions and cleanup their resources""" """Remove expired browsersteps sessions and cleanup their resources"""
global browsersteps_sessions, browsersteps_watch_to_session global browsersteps_sessions, browsersteps_watch_to_session
@@ -148,10 +119,13 @@ def cleanup_expired_sessions():
logger.debug(f"Cleaning up expired browsersteps session {session_id}") logger.debug(f"Cleaning up expired browsersteps session {session_id}")
session_data = browsersteps_sessions[session_id] session_data = browsersteps_sessions[session_id]
try: # Cleanup playwright resources asynchronously
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}")) browserstepper = session_data.get('browserstepper')
except Exception as e: if browserstepper:
logger.error(f"Error cleaning up session {session_id}: {e}") try:
run_async_in_browser_loop(browserstepper.cleanup())
except Exception as e:
logger.error(f"Error cleaning up session {session_id}: {e}")
# Remove from sessions dict # Remove from sessions dict
del browsersteps_sessions[session_id] del browsersteps_sessions[session_id]
@@ -178,10 +152,12 @@ def cleanup_session_for_watch(watch_uuid):
session_data = browsersteps_sessions.get(session_id) session_data = browsersteps_sessions.get(session_id)
if session_data: if session_data:
try: browserstepper = session_data.get('browserstepper')
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}")) if browserstepper:
except Exception as e: try:
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}") run_async_in_browser_loop(browserstepper.cleanup())
except Exception as e:
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
# Remove from sessions dict # Remove from sessions dict
del browsersteps_sessions[session_id] del browsersteps_sessions[session_id]
@@ -202,69 +178,64 @@ def construct_blueprint(datastore: ChangeDetectionStore):
import time import time
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
# We keep the playwright session open for many minutes
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60 keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
keepalive_ms = ((keepalive_seconds + 3) * 1000)
browsersteps_start_session = {'start_time': time.time()} browsersteps_start_session = {'start_time': time.time()}
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers # Create a new async playwright instance for browser steps
proxy_url = datastore.get_proxy_url_for_watch(uuid=watch_uuid) playwright_instance = async_playwright()
proxy = None playwright_context = await playwright_instance.start()
if proxy_url:
from urllib.parse import urlparse
parsed = urlparse(proxy_url)
proxy = {'server': proxy_url}
if parsed.username:
proxy['username'] = parsed.username
if parsed.password:
proxy['password'] = parsed.password
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
# Resolve the fetcher class for this watch so we can ask it to launch its own browser keepalive_ms = ((keepalive_seconds + 3) * 1000)
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP) base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
watch = datastore.data['watching'][watch_uuid] a = "?" if not '?' in base_url else '&'
from changedetectionio import content_fetchers base_url += a + f"timeout={keepalive_ms}"
fetcher_class = content_fetchers.get_fetcher(watch.effective_browser_profile.fetch_backend)
browser = None
playwright_context = None
# If the fetcher has its own browser launch for the live steps UI, use it.
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
# or None to fall back to the default CDP path.
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
if result is not None:
browser, playwright_context = result
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_class.__name__}'")
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
if browser is None:
playwright_instance = async_playwright()
playwright_context = await playwright_instance.start()
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
a = "?" if '?' not in base_url else '&'
base_url += a + f"timeout={keepalive_ms}"
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
browsersteps_start_session['browser'] = browser browsersteps_start_session['browser'] = browser
browsersteps_start_session['playwright_context'] = playwright_context browsersteps_start_session['playwright_context'] = playwright_context
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
proxy = None
if proxy_id:
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
if proxy_url:
# Playwright needs separate username and password values
from urllib.parse import urlparse
parsed = urlparse(proxy_url)
proxy = {'server': proxy_url}
if parsed.username:
proxy['username'] = parsed.username
if parsed.password:
proxy['password'] = parsed.password
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
browserstepper = browser_steps.browsersteps_live_ui( browserstepper = browser_steps.browsersteps_live_ui(
playwright_browser=browser, playwright_browser=browser,
proxy=proxy, proxy=proxy,
start_url=watch.link, start_url=datastore.data['watching'][watch_uuid].link,
headers=watch.get('headers') headers=datastore.data['watching'][watch_uuid].get('headers')
) )
# Initialize the async connection
await browserstepper.connect(proxy=proxy) await browserstepper.connect(proxy=proxy)
browsersteps_start_session['browserstepper'] = browserstepper browsersteps_start_session['browserstepper'] = browserstepper
# For test
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
return browsersteps_start_session return browsersteps_start_session
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
@login_optionally_required @login_optionally_required
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
def browsersteps_start_session(): def browsersteps_start_session():
# A new session was requested, return sessionID # A new session was requested, return sessionID
import uuid import uuid
@@ -299,8 +270,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.debug("Starting connection with playwright - done") logger.debug("Starting connection with playwright - done")
return {'browsersteps_session_id': browsersteps_session_id} return {'browsersteps_session_id': browsersteps_session_id}
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
@login_optionally_required @login_optionally_required
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
def browser_steps_fetch_screenshot_image(): def browser_steps_fetch_screenshot_image():
from flask import ( from flask import (
make_response, make_response,
@@ -325,8 +296,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401) return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
# A request for an action was received # A request for an action was received
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
@login_optionally_required @login_optionally_required
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
def browsersteps_ui_update(): def browsersteps_ui_update():
import base64 import base64
@@ -40,14 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
contents = '' contents = ''
now = time.time() now = time.time()
try: try:
import asyncio
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore, update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid watch_uuid=uuid
) )
update_handler.preferred_proxy_override = preferred_proxy update_handler.call_browser(preferred_proxy_id=preferred_proxy)
asyncio.run(update_handler.call_browser())
# title, size is len contents not len xfer # title, size is len contents not len xfer
except content_fetcher_exceptions.Non200ErrorCodeReceived as e: except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 404: if e.status_code == 404:
@@ -160,7 +160,8 @@ class import_xlsx_wachete(Importer):
flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error') flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error')
return return
for row_id, row in enumerate(wb.active.iter_rows(min_row=2), start=2): row_id = 2
for row in wb.active.iter_rows(min_row=row_id):
try: try:
extras = {} extras = {}
data = {} data = {}
@@ -174,9 +175,9 @@ class import_xlsx_wachete(Importer):
dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower() # Convert bool to str to cover all cases dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower() # Convert bool to str to cover all cases
# libreoffice and others can have it as =FALSE() =TRUE(), or bool(true) # libreoffice and others can have it as =FALSE() =TRUE(), or bool(true)
if 'true' in dynamic_wachet or dynamic_wachet == '1': if 'true' in dynamic_wachet or dynamic_wachet == '1':
extras['browser_profile'] = 'browser_chromeplaywright' extras['fetch_backend'] = 'html_webdriver'
elif 'false' in dynamic_wachet or dynamic_wachet == '0': elif 'false' in dynamic_wachet or dynamic_wachet == '0':
extras['browser_profile'] = 'direct_http_requests' extras['fetch_backend'] = 'html_requests'
if data.get('xpath'): if data.get('xpath'):
# @todo split by || ? # @todo split by || ?
@@ -211,6 +212,8 @@ class import_xlsx_wachete(Importer):
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error') flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error')
else:
row_id += 1
flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now)) flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
@@ -238,10 +241,10 @@ class import_xlsx_custom(Importer):
# @todo cehck atleast 2 rows, same in other method # @todo cehck atleast 2 rows, same in other method
from changedetectionio.forms import validate_url from changedetectionio.forms import validate_url
row_i = 0 row_i = 1
try: try:
for row_i, row in enumerate(wb.active.iter_rows(), start=1): for row in wb.active.iter_rows():
url = None url = None
tags = None tags = None
extras = {} extras = {}
@@ -292,5 +295,7 @@ class import_xlsx_custom(Importer):
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error') flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error')
else:
row_i += 1
flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now)) flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
@@ -9,7 +9,6 @@
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li> <li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li> <li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li> <li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
</ul> </ul>
</div> </div>
@@ -15,9 +15,6 @@ from changedetectionio.auth_decorator import login_optionally_required
def construct_blueprint(datastore: ChangeDetectionStore): def construct_blueprint(datastore: ChangeDetectionStore):
settings_blueprint = Blueprint('settings', __name__, template_folder="templates") settings_blueprint = Blueprint('settings', __name__, template_folder="templates")
from changedetectionio.blueprint.settings.browser_profile import construct_blueprint as construct_browser_profile_blueprint
settings_blueprint.register_blueprint(construct_browser_profile_blueprint(datastore), url_prefix='/browsers')
@settings_blueprint.route("", methods=['GET', "POST"]) @settings_blueprint.route("", methods=['GET', "POST"])
@login_optionally_required @login_optionally_required
def settings_page(): def settings_page():
@@ -1,200 +0,0 @@
import flask_login
from flask import Blueprint, render_template, request, redirect, url_for, flash
from flask_babel import gettext
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
def construct_blueprint(datastore: ChangeDetectionStore):
settings_browser_profile_blueprint = Blueprint(
'settings_browsers',
__name__,
template_folder="templates"
)
def _render_index(browser_profile_form=None, editing_machine_name=None):
from changedetectionio import forms
from changedetectionio import content_fetchers as cf
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
# Only browser-capable fetchers are valid profile types
fetcher_choices = cf.available_browser_fetchers()
if browser_profile_form is None:
browser_profile_form = forms.BrowserProfileForm()
browser_profile_form.fetch_backend.choices = fetcher_choices
fetcher_supports_screenshots = {name: True for name, _ in fetcher_choices}
fetcher_requires_connection_url = {name: True for name, cls in cf.FETCHERS.items()
if getattr(cls, 'requires_connection_url', False)}
# Table shows default built-in profiles first, then user-created profiles
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
user_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
for machine_name, raw in store_profiles.items():
try:
user_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
except Exception:
pass
current_default = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
return render_template(
"browser_profiles.html",
browser_profiles=user_profiles,
browser_profile_form=browser_profile_form,
reserved_browser_profile_names=RESERVED_MACHINE_NAMES,
fetcher_choices=fetcher_choices,
fetcher_supports_screenshots=fetcher_supports_screenshots,
fetcher_requires_connection_url=fetcher_requires_connection_url,
current_default_profile=current_default,
editing_machine_name=editing_machine_name,
)
@settings_browser_profile_blueprint.route("", methods=['GET'])
@login_optionally_required
def index():
return _render_index()
@settings_browser_profile_blueprint.route("/<string:machine_name>/edit", methods=['GET'])
@login_optionally_required
def edit(machine_name):
from changedetectionio import forms
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
if machine_name in RESERVED_MACHINE_NAMES:
flash(gettext("Built-in browser profiles cannot be edited."), 'error')
return redirect(url_for('settings.settings_browsers.index'))
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
raw = store_profiles.get(machine_name)
if raw is None:
flash(gettext("Browser profile not found."), 'error')
return redirect(url_for('settings.settings_browsers.index'))
profile = BrowserProfile(**raw) if isinstance(raw, dict) else raw
form = forms.BrowserProfileForm(data=profile.model_dump())
return _render_index(browser_profile_form=form, editing_machine_name=machine_name)
@settings_browser_profile_blueprint.route("/save", methods=['POST'])
@login_optionally_required
def save():
from changedetectionio import forms
from changedetectionio import content_fetchers as cf
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
fetcher_choices = [(name, desc) for name, desc in cf.available_fetchers()]
browser_profile_form = forms.BrowserProfileForm(formdata=request.form)
browser_profile_form.fetch_backend.choices = fetcher_choices
if not browser_profile_form.validate():
flash(gettext("Browser profile error: {}").format(
'; '.join(str(e) for errs in browser_profile_form.errors.values() for e in errs)
), 'error')
return redirect(url_for('settings.settings_browsers.index'))
name = browser_profile_form.name.data.strip()
machine_name = BrowserProfile.machine_name_from_str(name)
if machine_name in RESERVED_MACHINE_NAMES:
flash(gettext("Cannot use reserved profile name '{}'. Please choose a different name.").format(name), 'error')
return redirect(url_for('settings.settings_browsers.index'))
original_machine_name = request.form.get('original_machine_name', '').strip()
store_profiles = datastore.data['settings']['application'].setdefault('browser_profiles', {})
if machine_name != original_machine_name and machine_name in store_profiles:
flash(gettext("A browser profile named '{}' already exists.").format(name), 'error')
return redirect(url_for('settings.settings_browsers.index'))
profile_data = {
'name': name,
'fetch_backend': browser_profile_form.fetch_backend.data,
'browser_connection_url': browser_profile_form.browser_connection_url.data or None,
'viewport_width': browser_profile_form.viewport_width.data or 1280,
'viewport_height': browser_profile_form.viewport_height.data or 1000,
'block_images': bool(browser_profile_form.block_images.data),
'block_fonts': bool(browser_profile_form.block_fonts.data),
'ignore_https_errors': bool(browser_profile_form.ignore_https_errors.data),
'user_agent': browser_profile_form.user_agent.data or None,
'locale': browser_profile_form.locale.data or None,
'custom_headers': browser_profile_form.custom_headers.data or '',
'is_builtin': False,
}
try:
BrowserProfile(**profile_data)
except Exception as e:
flash(gettext("Browser profile validation error: {}").format(str(e)), 'error')
return redirect(url_for('settings.settings_browsers.index'))
# Handle rename: remove old key, cascade-update watches and tags
if original_machine_name and original_machine_name != machine_name and original_machine_name in store_profiles:
del store_profiles[original_machine_name]
for watch in datastore.data['watching'].values():
if watch.get('browser_profile') == original_machine_name:
watch['browser_profile'] = machine_name
for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
if tag.get('browser_profile') == original_machine_name:
tag['browser_profile'] = machine_name
store_profiles[machine_name] = profile_data
datastore.commit()
flash(gettext("Browser profile '{}' saved.").format(name), 'notice')
return redirect(url_for('settings.settings_browsers.index'))
@settings_browser_profile_blueprint.route("/<string:machine_name>/delete", methods=['GET'])
@login_optionally_required
def delete(machine_name):
from changedetectionio.model.browser_profile import RESERVED_MACHINE_NAMES
if machine_name in RESERVED_MACHINE_NAMES:
flash(gettext("Built-in browser profiles cannot be deleted."), 'error')
return redirect(url_for('settings.settings_browsers.index'))
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
if machine_name not in store_profiles:
flash(gettext("Browser profile not found."), 'error')
return redirect(url_for('settings.settings_browsers.index'))
raw = store_profiles[machine_name]
profile_name = raw.get('name', machine_name) if isinstance(raw, dict) else machine_name
for watch in datastore.data['watching'].values():
if watch.get('browser_profile') == machine_name:
watch['browser_profile'] = None
for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
if tag.get('browser_profile') == machine_name:
tag['browser_profile'] = None
if datastore.data['settings']['application'].get('browser_profile') == machine_name:
datastore.data['settings']['application']['browser_profile'] = None
del store_profiles[machine_name]
datastore.commit()
flash(gettext("Browser profile '{}' deleted.").format(profile_name), 'notice')
return redirect(url_for('settings.settings_browsers.index'))
@settings_browser_profile_blueprint.route("/set-default", methods=['POST'])
@login_optionally_required
def set_default():
from changedetectionio import content_fetchers as cf
machine_name = request.form.get('machine_name', '').strip()
if not machine_name:
flash(gettext("No profile specified."), 'error')
return redirect(url_for('settings.settings_browsers.index'))
from changedetectionio.model.browser_profile import get_profile
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
if get_profile(machine_name, store_profiles) is None:
flash(gettext("Unknown browser profile '{}'.").format(machine_name), 'error')
return redirect(url_for('settings.settings_browsers.index'))
datastore.data['settings']['application']['browser_profile'] = machine_name
datastore.commit()
flash(gettext("Default browser profile set to '{}'.").format(machine_name), 'notice')
return redirect(url_for('settings.settings_browsers.index'))
return settings_browser_profile_blueprint
@@ -1,163 +0,0 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
<div class="edit-form">
<div class="box-wrap inner">
<h2>{{ _('Browser Profiles') }}</h2>
<p>{{ _('Create named profiles to configure browser settings — viewport size, connection URL, image/font blocking, and more. Each profile is based on an available browser type.') }}</p>
<form id="set-default-form" action="{{ url_for('settings.settings_browsers.set_default') }}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<input type="hidden" name="machine_name" id="default-machine-name" value="">
</form>
{% if browser_profiles %}
<table class="pure-table pure-table-striped" style="width:100%; margin-bottom:1.5em;">
<thead>
<tr>
<th style="width:2.5em; text-align:center;" title="{{ _('System default') }}">{{ _('Default') }}</th>
<th>{{ _('Name') }}</th>
<th>{{ _('Type') }}</th>
<th style="width:3em; text-align:center;"></th>
<th>{{ _('Viewport') }}</th>
<th>{{ _('Options') }}</th>
<th></th>
</tr>
</thead>
<tbody>
{% for machine_name, profile in browser_profiles.items() %}
<tr>
<td style="text-align:center;">
<input type="radio"
name="default_profile"
value="{{ machine_name }}"
title="{{ _('Set as system default') }}"
{% if machine_name == current_default_profile %}checked{% endif %}
onchange="setDefaultProfile('{{ machine_name }}')">
</td>
<td>{{ profile.name }}</td>
<td><code>{{ profile.fetch_backend }}</code></td>
<td style="text-align:center;">{{ profile.get_fetcher_class_name()|fetcher_status_icons }}</td>
<td>{{ profile.viewport_width }}×{{ profile.viewport_height }}</td>
<td style="font-size:0.8em; line-height:1.6;">
{% if profile.block_images %}{{ _('No images') }}<br>{% endif %}
{% if profile.block_fonts %}{{ _('No fonts') }}<br>{% endif %}
{% if profile.ignore_https_errors %}{{ _('Ignore TLS') }}<br>{% endif %}
{% if profile.browser_connection_url %}<span title="{{ profile.browser_connection_url }}">{{ _('Custom URL') }}</span>{% endif %}
</td>
<td style="white-space:nowrap;">
{% if not profile.is_builtin %}
<a href="{{ url_for('settings.settings_browsers.edit', machine_name=machine_name) }}"
class="pure-button button-small">{{ _('Edit') }}</a>
<a href="{{ url_for('settings.settings_browsers.delete', machine_name=machine_name) }}"
class="pure-button button-small button-error"
onclick="return confirm('{{ _('Delete this browser profile?') }}')">{{ _('Delete') }}</a>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p style="color:#888; font-style:italic;">{{ _('No browser profiles configured yet. Add one below.') }}</p>
{% endif %}
<div class="border-fieldset">
<h3 id="profile-form-heading">{{ _('Edit browser profile') if editing_machine_name else _('Add new browser profile') }}</h3>
{% if not editing_machine_name %}
<p style="font-size:0.9em; color:#666;">{{ _('Choose a browser type, give it a name, and configure its settings. You can create multiple profiles from the same type with different connection URLs or options.') }}</p>
{% endif %}
<form class="pure-form pure-form-stacked"
id="browser-profile-form"
action="{{ url_for('settings.settings_browsers.save') }}"
method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<input type="hidden" name="original_machine_name" id="original_machine_name" value="{{ editing_machine_name or '' }}">
<fieldset>
<div class="pure-control-group">
{{ render_field(browser_profile_form.name) }}
</div>
<div class="pure-control-group inline-radio">
{{ render_field(browser_profile_form.fetch_backend, id="profile-fetch-backend") }}
</div>
<div class="pure-control-group browser-only-field cdp-only-field">
{{ render_field(browser_profile_form.browser_connection_url) }}
<span class="pure-form-message-inline">{{ _('Optional — override the system CDP/WebSocket URL for this profile only (e.g.') }} <code>ws://my-chrome:3000</code>).</span>
</div>
<div class="pure-control-group browser-only-field" style="display:flex; gap:1em; flex-wrap:wrap;">
<div>{{ render_field(browser_profile_form.viewport_width) }}</div>
<div>{{ render_field(browser_profile_form.viewport_height) }}</div>
</div>
<div class="pure-control-group browser-only-field">
{{ render_checkbox_field(browser_profile_form.block_images) }}
<span class="pure-form-message-inline">{{ _('Block image downloads — speeds up loads on image-heavy pages.') }}</span>
</div>
<div class="pure-control-group browser-only-field">
{{ render_checkbox_field(browser_profile_form.block_fonts) }}
<span class="pure-form-message-inline">{{ _('Block web font downloads.') }}</span>
</div>
<div class="pure-control-group browser-only-field">
{{ render_checkbox_field(browser_profile_form.ignore_https_errors) }}
<span class="pure-form-message-inline">{{ _('Ignore TLS/HTTPS certificate errors (useful for self-signed certs on staging sites).') }}</span>
</div>
<div class="pure-control-group browser-only-field">
{{ render_field(browser_profile_form.user_agent) }}
<span class="pure-form-message-inline">{{ _("Leave blank to use the fetcher's default User-Agent.") }}</span>
</div>
<div class="pure-control-group browser-only-field">
{{ render_field(browser_profile_form.locale) }}
<span class="pure-form-message-inline">{{ _('Sets Accept-Language and navigator.language (e.g. en-US, de-DE).') }}</span>
</div>
<div class="pure-control-group">
{{ render_field(browser_profile_form.custom_headers) }}
<span class="pure-form-message-inline">{{ _('Extra HTTP headers for all requests using this profile (one per line, Key: Value). Applied before per-watch headers.') }}</span>
</div>
<div class="pure-control-group">
<button type="submit" class="pure-button pure-button-primary" id="profile-submit-btn">{{ _('Save profile') }}</button>
{% if editing_machine_name %}
<a href="{{ url_for('settings.settings_browsers.index') }}" class="pure-button button-cancel">{{ _('Cancel') }}</a>
{% endif %}
<a href="{{ url_for('settings.settings_page') }}" class="pure-button button-cancel">{{ _('Back to Settings') }}</a>
</div>
</fieldset>
</form>
</div>
</div>
</div>
<script>
function setDefaultProfile(machineName) {
document.getElementById('default-machine-name').value = machineName;
document.getElementById('set-default-form').submit();
}
const fetcherSupportsBrowser = {{ fetcher_supports_screenshots | tojson }};
const fetcherRequiresConnectionUrl = {{ fetcher_requires_connection_url | tojson }};
function updateBrowserFieldVisibility() {
const fetchBackend = document.getElementById('profile-fetch-backend').value;
const isBrowser = !!fetcherSupportsBrowser[fetchBackend];
const isCdp = !!fetcherRequiresConnectionUrl[fetchBackend];
document.querySelectorAll('.browser-only-field').forEach(function(el) {
el.style.display = isBrowser ? '' : 'none';
});
document.querySelectorAll('.cdp-only-field').forEach(function(el) {
el.style.display = isCdp ? '' : 'none';
});
}
document.addEventListener('DOMContentLoaded', function() {
const sel = document.getElementById('profile-fetch-backend');
if (sel) {
sel.addEventListener('change', updateBrowserFieldVisibility);
updateBrowserFieldVisibility();
}
});
{% if editing_machine_name %}
document.addEventListener('DOMContentLoaded', function() {
document.getElementById('browser-profile-form').scrollIntoView({behavior: 'smooth'});
});
{% endif %}
</script>
{% endblock %}
@@ -28,7 +28,6 @@
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li> <li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li> <li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li> <li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
<li class="tab"><a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Browsers') }}</a></li>
{% if plugin_tabs %} {% if plugin_tabs %}
{% for tab in plugin_tabs %} {% for tab in plugin_tabs %}
<li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li> <li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
@@ -116,7 +115,14 @@
</div> </div>
<div class="tab-pane-inner" id="fetching"> <div class="tab-pane-inner" id="fetching">
<fieldset class="pure-group" id="webdriver-override-options"> <div class="pure-control-group inline-radio">
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
<span class="pure-form-message-inline">
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched sites don\'t need Javascript to render.') }}</p>
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var') }} 'WEBDRIVER_URL'. </p>
</span>
</div>
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
<div class="pure-form-message-inline"> <div class="pure-form-message-inline">
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong> <strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
<br> <br>
@@ -140,9 +146,17 @@
{{ render_field(form.requests.form.timeout) }} {{ render_field(form.requests.form.timeout) }}
<span class="pure-form-message-inline">{{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}</span><br> <span class="pure-form-message-inline">{{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}</span><br>
</div> </div>
<div class="pure-control-group inline-radio">
{{ render_field(form.requests.form.default_ua) }}
<span class="pure-form-message-inline">
{{ _('Applied to all requests.') }}<br><br>
{{ _('Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it\'s important to consider') }} <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">{{ _('all of the ways that the browser is detected') }}</a>.
</span>
</div>
<div class="pure-control-group"> <div class="pure-control-group">
<br> <br>
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a> {{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
</div> </div>
</div> </div>
@@ -338,7 +352,7 @@ nav
</div> </div>
</div> </div>
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p> <p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
<div class="pure-control-group" id="extra-proxies-setting"> <div class="pure-control-group" id="extra-proxies-setting">
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }} {{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
@@ -22,14 +22,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags']) tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
from changedetectionio import processors
output = render_template("groups-overview.html", output = render_template("groups-overview.html",
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'), app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
available_tags=sorted_tags, available_tags=sorted_tags,
form=add_form, form=add_form,
generate_tag_colors=processors.generate_processor_badge_colors,
tag_count=tag_count, tag_count=tag_count,
wcag_text_color=processors.wcag_text_color,
) )
return output return output
@@ -211,17 +208,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
template = env.from_string(template_str) template = env.from_string(template_str)
included_content = template.render(**template_args) included_content = template.render(**template_args)
# Watches whose URL currently matches this tag's pattern
matching_watches = {
w_uuid: watch
for w_uuid, watch in datastore.data['watching'].items()
if default.matches_url(watch.get('url', ''))
}
output = render_template("edit-tag.html", output = render_template("edit-tag.html",
extra_form_content=included_content, extra_form_content=included_content,
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None, extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
matching_watches=matching_watches,
settings_application=datastore.data['settings']['application'], settings_application=datastore.data['settings']['application'],
**template_args **template_args
) )
+4 -3
View File
@@ -10,11 +10,12 @@ from changedetectionio.processors.restock_diff.forms import processor_settings_f
class group_restock_settings_form(restock_settings_form): class group_restock_settings_form(restock_settings_form):
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False) overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
url_match_pattern = StringField('Auto-apply to watches with URLs matching',
render_kw={"placeholder": "e.g. *://example.com/* or github.com/myorg"})
tag_colour = StringField('Tag colour', default='')
class SingleTag(Form): class SingleTag(Form):
name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"}) name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"})
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -43,46 +43,6 @@
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }} {{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
</div> </div>
<div class="pure-control-group">
{{ render_field(form.url_match_pattern, class="m-d") }}
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
</div>
{% if matching_watches %}
<div class="pure-control-group">
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
<ul class="tag-url-match-list">
{% for w_uuid, w in matching_watches.items() %}
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
{% endfor %}
</ul>
</div>
{% endif %}
<div class="pure-control-group">
<label>{{ _('Tag colour') }}</label>
<div style="display:flex; align-items:center; gap:0.75em;">
<input type="checkbox" id="use_custom_colour"
{% if data.get('tag_colour') %}checked{% endif %}>
<label for="use_custom_colour" style="margin:0">{{ _('Custom colour') }}</label>
<input type="color" id="tag_colour_picker"
value="{{ data.get('tag_colour') or '#4f8ef7' }}"
{% if not data.get('tag_colour') %}disabled{% endif %}>
<input type="hidden" name="tag_colour" id="tag_colour_hidden"
value="{{ data.get('tag_colour', '') }}">
</div>
<span class="pure-form-message-inline">{{ _('Leave unchecked to use the auto-generated colour based on the tag name.') }}</span>
</div>
<script>
(function () {
var cb = document.getElementById('use_custom_colour');
var picker = document.getElementById('tag_colour_picker');
var hidden = document.getElementById('tag_colour_hidden');
picker.addEventListener('input', function () { hidden.value = this.value; });
cb.addEventListener('change', function () {
picker.disabled = !this.checked;
hidden.value = this.checked ? picker.value : '';
});
})();
</script>
</fieldset> </fieldset>
</div> </div>
@@ -3,26 +3,6 @@
{% from '_helpers.html' import render_simple_field, render_field %} {% from '_helpers.html' import render_simple_field, render_field %}
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script> <script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
<style>
{%- for uuid, tag in available_tags -%}
{%- if tag and tag.title -%}
{%- set class_name = tag.title|sanitize_tag_class -%}
{%- if tag.get('tag_colour') -%}
.watch-tag-list.tag-{{ class_name }} { background-color: {{ tag.tag_colour }}; color: {{ wcag_text_color(tag.tag_colour) }}; }
{%- else -%}
{%- set colors = generate_tag_colors(tag.title) -%}
.watch-tag-list.tag-{{ class_name }} {
background-color: {{ colors['light']['bg'] }};
color: {{ colors['light']['color'] }};
}
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
background-color: {{ colors['dark']['bg'] }};
color: {{ colors['dark']['color'] }};
}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
</style>
<div class="box"> <div class="box">
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form"> <form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
@@ -68,7 +48,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a> <a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
</td> </td>
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td> <td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td> <td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
<td> <td>
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a> <a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a> <a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
+5 -42
View File
@@ -67,10 +67,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
default['proxy'] = '' default['proxy'] = ''
# proxy_override set to the json/text list of the items # proxy_override set to the json/text list of the items
# browser_profile: None means "use system default" — map to 'system' so the radio pre-selects correctly
if not default.get('browser_profile'):
default['browser_profile'] = 'system'
# Does it use some custom form? does one exist? # Does it use some custom form? does one exist?
processor_name = datastore.data['watching'][uuid].get('processor', '') processor_name = datastore.data['watching'][uuid].get('processor', '')
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None) processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
@@ -143,37 +139,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
except Exception as e: except Exception as e:
logger.warning(f"Failed to load processor config: {e}") logger.warning(f"Failed to load processor config: {e}")
from changedetectionio.model.browser_profile import BrowserProfile for p in datastore.extra_browsers:
from changedetectionio import content_fetchers as cf form.fetch_backend.choices.append(p)
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
# Resolve the name of the system-level default profile for the label form.fetch_backend.choices.append(("system", 'System settings default'))
from changedetectionio.model.browser_profile import get_profile
_system_default_machine_name = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
_all_store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
_default_profile = get_profile(_system_default_machine_name, _all_store_profiles)
if _default_profile:
_system_label = gettext('System settings default') + ' \u2013 ' + _default_profile.name
else:
_system_label = gettext('System settings default')
# Choices: system default + always-present defaults (requests) + user-created profiles
form.browser_profile.choices = [('system', _system_label)] + [
(p.get_machine_name(), p.name)
for p in cf.DEFAULT_BROWSER_PROFILES.values()
] + [
(machine_name, raw.get('name', machine_name) if isinstance(raw, dict) else getattr(raw, 'name', machine_name))
for machine_name, raw in store_profiles.items()
]
# Build a map of machine_name → fetcher class name for the JS visibility system
all_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
for machine_name, raw in store_profiles.items():
try:
all_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
except Exception:
pass
browser_profile_fetchers = {mn: p.get_fetcher_class_name() for mn, p in all_profiles.items()}
# form.browser_steps[0] can be assumed that we 'goto url' first # form.browser_steps[0] can be assumed that we 'goto url' first
@@ -241,7 +210,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Recast it if need be to right data Watch handler # Recast it if need be to right data Watch handler
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor')) watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore, default=datastore.data['watching'][uuid]) datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid])
# Save the watch immediately # Save the watch immediately
datastore.data['watching'][uuid].commit() datastore.data['watching'][uuid].commit()
@@ -327,7 +296,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
template_args = { template_args = {
'available_processors': processors.available_processors(), 'available_processors': processors.available_processors(),
'available_timezones': sorted(available_timezones()), 'available_timezones': sorted(available_timezones()),
'browser_profile_fetchers': browser_profile_fetchers,
'browser_steps_config': browser_step_ui_config, 'browser_steps_config': browser_step_ui_config,
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
'extra_classes': ' '.join(c), 'extra_classes': ' '.join(c),
@@ -352,12 +320,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'using_global_webdriver_wait': not default['webdriver_delay'], 'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid, 'uuid': uuid,
'watch': watch, 'watch': watch,
'capabilities': capabilities, 'capabilities': capabilities
'auto_applied_tags': {
tag_uuid: tag
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
},
} }
included_content = None included_content = None
+8 -7
View File
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
def construct_blueprint(datastore: ChangeDetectionStore): def construct_blueprint(datastore: ChangeDetectionStore):
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates") preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET'])
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
@login_optionally_required @login_optionally_required
def preview_page(uuid): def preview_page(uuid):
""" """
@@ -60,8 +59,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
versions = [] versions = []
timestamp = None timestamp = None
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
fetcher_supports_screenshots = watch.fetcher_supports_screenshots
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
triggered_line_numbers = [] triggered_line_numbers = []
ignored_line_numbers = [] ignored_line_numbers = []
@@ -71,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error") flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
else: else:
# So prepare the latest preview or not # So prepare the latest preview or not
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version') preferred_version = request.args.get('version')
versions = list(watch.history.keys()) versions = list(watch.history.keys())
timestamp = versions[-1] timestamp = versions[-1]
if preferred_version and preferred_version in versions: if preferred_version and preferred_version in versions:
@@ -112,7 +113,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
highlight_triggered_line_numbers=triggered_line_numbers, highlight_triggered_line_numbers=triggered_line_numbers,
highlight_blocked_line_numbers=blocked_line_numbers, highlight_blocked_line_numbers=blocked_line_numbers,
history_n=watch.history_n, history_n=watch.history_n,
fetcher_supports_screenshots=fetcher_supports_screenshots, is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'], last_error=watch['last_error'],
last_error_screenshot=watch.get_error_snapshot(), last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(), last_error_text=watch.get_error_text(),
@@ -143,7 +143,7 @@
<div class="tip"> <div class="tip">
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }} {{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
</div> </div>
{% if fetcher_supports_screenshots %} {% if is_html_webdriver %}
{% if screenshot %} {% if screenshot %}
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div> <div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
<img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}" > <img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}" >
@@ -27,8 +27,7 @@
const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}"; const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}";
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}"; const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
const default_system_fetch_backend = {{ (browser_profile_fetchers.get(settings_application.get('browser_profile') or 'direct_http_requests', 'requests')) | tojson }}; const default_system_fetch_backend="{{ settings_application['fetch_backend'] }}";
const browserProfileFetcherMap = {{ browser_profile_fetchers | tojson }};
</script> </script>
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
@@ -82,14 +81,6 @@
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.tags) }} {{ render_field(form.tags) }}
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span> <span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
{% if auto_applied_tags %}
<span class="pure-form-message-inline">
{{ _('Also automatically applied by URL pattern:') }}
{% for tag_uuid, tag in auto_applied_tags.items() %}
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
{% endfor %}
</span>
{% endif %}
</div> </div>
<div class="pure-control-group inline-radio"> <div class="pure-control-group inline-radio">
{{ render_field(form.processor) }} {{ render_field(form.processor) }}
@@ -140,19 +131,11 @@
{% if capabilities.supports_request_type %} {% if capabilities.supports_request_type %}
<div class="tab-pane-inner" id="request"> <div class="tab-pane-inner" id="request">
<div class="pure-control-group inline-radio"> <div class="pure-control-group inline-radio">
<div><label for="browser_profile">{{ form.browser_profile.label.text }}</label></div> {{ render_field(form.fetch_backend, class="fetch-backend") }}
<div><ul class="fetch-backend" id="browser_profile">
{%- for subfield in form.browser_profile %}
<li>
{{ subfield() }}
{{ browser_profile_fetchers.get(subfield.data, '')|fetcher_status_icons }}
<label for="{{ subfield.id }}">{{ subfield.label.text }}</label>
</li>
{%- endfor %}
</ul></div>
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<p>{{ _('Choose how this watch fetches its target URL. \'System settings default\' inherits the global setting.') }}</p> <p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched site doesn\'t need Javascript to render.') }}</p>
<p>{{ _('Manage browser profiles in') }} <a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Settings → Browsers') }}</a>.</p> <p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.') }} </p>
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
</span> </span>
</div> </div>
{% if form.proxy %} {% if form.proxy %}
@@ -166,7 +149,7 @@
{% endif %} {% endif %}
<!-- webdriver always --> <!-- webdriver always -->
<fieldset data-visible-for="fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser" style="display: none;"> <fieldset data-visible-for="fetch_backend=html_webdriver" style="display: none;">
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.webdriver_delay) }} {{ render_field(form.webdriver_delay) }}
<div class="pure-form-message-inline"> <div class="pure-form-message-inline">
@@ -189,8 +172,8 @@
</div> </div>
</div> </div>
</fieldset> </fieldset>
<!-- requests always --> <!-- html requests always -->
<fieldset data-visible-for="fetch_backend=requests"> <fieldset data-visible-for="fetch_backend=html_requests">
<div class="pure-control-group"> <div class="pure-control-group">
<a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a> <a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
</div> </div>
@@ -227,7 +210,7 @@ Math: {{ 1 + 1 }}") }}
({{ _('Not supported by Selenium browser') }}) ({{ _('Not supported by Selenium browser') }})
</div> </div>
</div> </div>
<fieldset data-visible-for="fetch_backend=requests fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser" > <fieldset data-visible-for="fetch_backend=html_requests fetch_backend=html_webdriver" >
<div class="pure-control-group inline-radio advanced-options" style="display: none;"> <div class="pure-control-group inline-radio advanced-options" style="display: none;">
{{ render_checkbox_field(form.ignore_status_codes) }} {{ render_checkbox_field(form.ignore_status_codes) }}
</div> </div>
@@ -17,7 +17,7 @@
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script> <script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
{% if versions|length >= 2 %} {% if versions|length >= 2 %}
<div id="diff-form" style="text-align: center;"> <div id="diff-form" style="text-align: center;">
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST"> <form class="pure-form " action="" method="POST">
<fieldset> <fieldset>
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version" <label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
name="from_version" name="from_version"
@@ -28,7 +28,6 @@
</option> </option>
{% endfor %} {% endfor %}
</select> </select>
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button> <button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
</fieldset> </fieldset>
@@ -92,7 +92,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
extra_classes='has-queue' if not update_q.empty() else '', extra_classes='has-queue' if not update_q.empty() else '',
form=form, form=form,
generate_tag_colors=processors.generate_processor_badge_colors, generate_tag_colors=processors.generate_processor_badge_colors,
wcag_text_color=processors.wcag_text_color,
guid=datastore.data['app_guid'], guid=datastore.data['app_guid'],
has_proxies=proxy_list, has_proxies=proxy_list,
hosted_sticky=os.getenv("SALTED_PASS", False) == False, hosted_sticky=os.getenv("SALTED_PASS", False) == False,
@@ -106,7 +105,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
search_q=request.args.get('q', '').strip(), search_q=request.args.get('q', '').strip(),
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'), sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
system_default_fetcher=datastore.data['settings']['application'].get('browser_profile'), system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
tags=sorted_tags, tags=sorted_tags,
unread_changes_count=datastore.unread_changes_count, unread_changes_count=datastore.unread_changes_count,
watches=sorted_watches watches=sorted_watches
@@ -71,13 +71,6 @@ document.addEventListener('DOMContentLoaded', function() {
{%- for uuid, tag in tags -%} {%- for uuid, tag in tags -%}
{%- if tag and tag.title -%} {%- if tag and tag.title -%}
{%- set class_name = tag.title|sanitize_tag_class -%} {%- set class_name = tag.title|sanitize_tag_class -%}
{%- if tag.get('tag_colour') -%}
.button-tag.tag-{{ class_name }},
.watch-tag-list.tag-{{ class_name }} {
background-color: {{ tag.tag_colour }};
color: {{ wcag_text_color(tag.tag_colour) }};
}
{%- else -%}
{%- set colors = generate_tag_colors(tag.title) -%} {%- set colors = generate_tag_colors(tag.title) -%}
.button-tag.tag-{{ class_name }} { .button-tag.tag-{{ class_name }} {
background-color: {{ colors['light']['bg'] }}; background-color: {{ colors['light']['bg'] }};
@@ -99,7 +92,6 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
color: {{ colors['dark']['color'] }}; color: {{ colors['dark']['color'] }};
} }
{%- endif -%} {%- endif -%}
{%- endif -%}
{%- endfor -%} {%- endfor -%}
</style> </style>
<div class="box" id="form-quick-watch-add"> <div class="box" id="form-quick-watch-add">
@@ -293,7 +285,10 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
</div> </div>
<div class="status-icons"> <div class="status-icons">
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a> <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
{{ watch.effective_browser_profile.get_fetcher_class_name()|fetcher_status_icons }} {%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
{{ effective_fetcher|fetcher_status_icons }}
{%- endif -%}
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%} {%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%} {%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
+60 -43
View File
@@ -1,4 +1,5 @@
import sys import sys
from changedetectionio.strtobool import strtobool
from loguru import logger from loguru import logger
from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
import os import os
@@ -24,71 +25,87 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
# Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe # Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000)) SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))
# available_fetchers() will scan this implementation looking for anything starting with html_
# this information is used in the form selections
from changedetectionio.content_fetchers.requests import fetcher as html_requests
import importlib.resources import importlib.resources
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8') FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')
# Registry: clean fetcher name → fetcher class (e.g. 'requests', 'playwright', 'cloakbrowser')
FETCHERS: dict = {}
def register_fetcher(name: str, cls) -> None:
"""Register a fetcher class under its clean name (no html_ prefix)."""
FETCHERS[name] = cls
def get_fetcher(name: str):
"""Return the fetcher class for a clean name, or None."""
return FETCHERS.get(name)
def available_fetchers(): def available_fetchers():
"""Return list of (name, description) for all registered fetchers.""" # See the if statement at the bottom of this file for how we switch between playwright and webdriver
return [(name, cls.fetcher_description) for name, cls in FETCHERS.items() import inspect
if hasattr(cls, 'fetcher_description')] p = []
# Get built-in fetchers (but skip plugin fetchers that were added via setattr)
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
if inspect.isclass(obj):
# @todo html_ is maybe better as fetcher_ or something
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
if name.startswith('html_'):
# Skip plugin fetchers that were already registered
if name not in _plugin_fetchers:
t = tuple([name, obj.fetcher_description])
p.append(t)
# Get plugin fetchers from cache (already loaded at module init)
for name, fetcher_class in _plugin_fetchers.items():
if hasattr(fetcher_class, 'fetcher_description'):
t = tuple([name, fetcher_class.fetcher_description])
p.append(t)
else:
logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
return p
def available_browser_fetchers(): def get_plugin_fetchers():
"""Return list of (name, description) for fetchers that support screenshots (browser-type fetchers).""" """Load and return all plugin fetchers from the centralized plugin manager."""
return [(name, cls.fetcher_description) for name, cls in FETCHERS.items() from changedetectionio.pluggy_interface import plugin_manager
if cls.supports_screenshots]
fetchers = {}
def _load_fetchers():
"""Load all fetchers (built-ins + plugins) into the FETCHERS registry."""
from changedetectionio.pluggy_interface import plugin_manager, register_builtin_fetchers
# Built-ins must be registered first
register_builtin_fetchers()
# Then external plugins
try: try:
# Call the register_content_fetcher hook from all registered plugins
results = plugin_manager.hook.register_content_fetcher() results = plugin_manager.hook.register_content_fetcher()
for result in results: for result in results:
if result: if result:
name, fetcher_class = result name, fetcher_class = result
register_fetcher(name, fetcher_class) fetchers[name] = fetcher_class
logger.info(f"Registered fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', '?')}") # Register in current module so hasattr() checks work
setattr(sys.modules[__name__], name, fetcher_class)
logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
except Exception as e: except Exception as e:
logger.error(f"Error loading plugin fetchers: {e}") logger.error(f"Error loading plugin fetchers: {e}")
return fetchers
# Default browser profiles always shown in the browser profiles table (keyed by machine name) # Initialize plugins at module load time
DEFAULT_BROWSER_PROFILES: dict = {} _plugin_fetchers = get_plugin_fetchers()
def _register_default_browser_profiles(): # Decide which is the 'real' HTML webdriver, this is more a system wide config
"""Register browser profiles that are always present in the profiles table.""" # rather than site-specific.
from changedetectionio.model.browser_profile import BUILTIN_REQUESTS use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
DEFAULT_BROWSER_PROFILES[BUILTIN_REQUESTS.get_machine_name()] = BUILTIN_REQUESTS if use_playwright_as_chrome_fetcher:
# @note - For now, browser steps always uses playwright
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
logger.debug('Using Playwright library as fetcher')
from .playwright import fetcher as html_webdriver
else:
logger.debug('Using direct Python Puppeteer library as fetcher')
from .puppeteer import fetcher as html_webdriver
else:
logger.debug("Falling back to selenium as fetcher")
from .webdriver_selenium import fetcher as html_webdriver
# Populate the registry at module load time # Register built-in fetchers as plugins after all imports are complete
_load_fetchers() from changedetectionio.pluggy_interface import register_builtin_fetchers
register_builtin_fetchers()
_register_default_browser_profiles()
+18 -32
View File
@@ -70,41 +70,37 @@ class Fetcher():
supports_screenshots = False # Can capture page screenshots supports_screenshots = False # Can capture page screenshots
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
# Icon shown in the watch list when this fetcher is the effective fetcher.
# Set to a dict with 'filename', 'alt', 'title' keys (image served from static/images/).
# None means no icon is shown (e.g. plain HTTP requests fetcher).
status_icon = None
# Screenshot element locking - prevents layout shifts during screenshot capture # Screenshot element locking - prevents layout shifts during screenshot capture
# Only needed for visual comparison (image_ssim_diff processor) # Only needed for visual comparison (image_ssim_diff processor)
# Locks element dimensions in the first viewport to prevent headers/ads from resizing # Locks element dimensions in the first viewport to prevent headers/ads from resizing
lock_viewport_elements = False # Default: disabled for performance lock_viewport_elements = False # Default: disabled for performance
# BrowserProfile-derived settings — applied by browser fetchers, ignored by html_requests
viewport_width: int = 1280
viewport_height: int = 1000
block_images: bool = False
block_fonts: bool = False
profile_user_agent: str = None # Profile-level UA; lower priority than request_headers User-Agent
ignore_https_errors: bool = False
locale: str = None
service_workers: str = 'allow'
extra_delay: int = 0
def __init__(self, **kwargs): def __init__(self, **kwargs):
if kwargs and 'screenshot_format' in kwargs: if kwargs and 'screenshot_format' in kwargs:
self.screenshot_format = kwargs.get('screenshot_format') self.screenshot_format = kwargs.get('screenshot_format')
# Allow lock_viewport_elements to be set via kwargs
if kwargs and 'lock_viewport_elements' in kwargs: if kwargs and 'lock_viewport_elements' in kwargs:
self.lock_viewport_elements = kwargs.get('lock_viewport_elements') self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
# BrowserProfile fields — store whatever was passed, subclasses use them
for field in ('viewport_width', 'viewport_height', 'block_images', 'block_fonts',
'profile_user_agent', 'ignore_https_errors', 'locale',
'service_workers', 'extra_delay'):
if field in kwargs:
setattr(self, field, kwargs[field])
@classmethod
def get_status_icon_data(cls):
"""Return data for status icon to display in the watch overview.
This method can be overridden by subclasses to provide custom status icons.
Returns:
dict or None: Dictionary with icon data:
{
'filename': 'icon-name.svg', # Icon filename
'alt': 'Alt text', # Alt attribute
'title': 'Tooltip text', # Title attribute
'style': 'height: 1em;' # Optional inline CSS
}
Or None if no icon
"""
return None
def clear_content(self): def clear_content(self):
""" """
@@ -202,16 +198,6 @@ class Fetcher():
# Stop processing here # Stop processing here
raise BrowserStepsStepException(step_n=step_n, original_e=e) raise BrowserStepsStepException(step_n=step_n, original_e=e)
def disk_cleanup_after_fetch(self):
"""Remove any temporary files written to disk during a fetch.
The default implementation is a no-op. Browser-based fetchers
override this to delete browser-step screenshots and any other
ephemeral files they create. Called by the processor after
``quit()`` regardless of whether the fetch succeeded or failed.
"""
pass
# It's always good to reset these # It's always good to reset these
def delete_browser_steps_screenshots(self): def delete_browser_steps_screenshots(self):
import glob import glob
@@ -49,9 +49,6 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
if page_height > page.viewport_size['height']: if page_height > page.viewport_size['height']:
if page_height < step_size: if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
viewport_start = time.time() viewport_start = time.time()
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size") logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
# Set viewport to a larger size to capture more content at once # Set viewport to a larger size to capture more content at once
@@ -1,41 +0,0 @@
"""
Playwright CDP fetcher connects to a remote browser via Chrome DevTools Protocol.
browser_connection_url must be supplied via the resolved BrowserProfile
(set by preconfigure_browser_profiles_based_on_env at startup or edited in the UI).
"""
from loguru import logger
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
class fetcher(PlaywrightBaseFetcher):
fetcher_description = "Playwright Chrome (CDP/Remote)"
requires_connection_url = True
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(proxy_override=proxy_override, custom_browser_connection_url=custom_browser_connection_url, **kwargs)
if custom_browser_connection_url:
self.browser_connection_is_custom = True
self.browser_connection_url = custom_browser_connection_url
else:
logger.critical("Playwright CDP fetcher has no browser_connection_url — browser profile was not configured. "
"Set PLAYWRIGHT_DRIVER_URL or configure a browser profile in Settings.")
self.browser_connection_url = None
# CDP always connects to Chromium
self.browser_type = 'chromium'
async def _connect_browser(self, p):
browser_type = getattr(p, self.browser_type)
return await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60_000)
class PlaywrightCDPPlugin:
@hookimpl
def register_content_fetcher(self):
return ('playwright_cdp', fetcher)
cdp_plugin = PlaywrightCDPPlugin()
@@ -1,403 +0,0 @@
"""
Playwright-based content fetchers.
Submodules:
cdp connect to a remote browser via Chrome DevTools Protocol (CDP/WebSocket)
chrome launch a local Chromium browser
firefox launch a local Firefox browser
webkit launch a local WebKit (Safari-engine) browser
"""
import asyncio
import gc
import json
import os
import re
from urllib.parse import urlparse
from loguru import logger
from changedetectionio.content_fetchers import (
SCREENSHOT_MAX_HEIGHT_DEFAULT,
SCREENSHOT_MAX_TOTAL_HEIGHT,
SCREENSHOT_SIZE_STITCH_THRESHOLD,
FAVICON_FETCHER_JS,
INSTOCK_DATA_JS,
XPATH_ELEMENT_JS,
visualselector_xpath_selectors,
)
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import (
BrowserStepsStepException,
EmptyReply,
Non200ErrorCodeReceived,
PageUnloadable,
ScreenshotUnavailable,
)
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
import time
start = time.time()
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
setup_start = time.time()
page_height = await page.evaluate("document.documentElement.scrollHeight")
page_width = await page.evaluate("document.documentElement.scrollWidth")
original_viewport = page.viewport_size
dimensions_time = time.time() - setup_start
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD
screenshot_chunks = []
y = 0
elements_locked = False
if lock_viewport_elements and page_height > page.viewport_size['height']:
lock_start = time.time()
lock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'lock-elements-sizing.js')
with open(lock_elements_js_path, 'r') as f:
lock_elements_js = f.read()
await page.evaluate(lock_elements_js)
elements_locked = True
logger.debug(f"{watch_info}Viewport element locking enabled (took {time.time() - lock_start:.2f}s)")
if page_height > page.viewport_size['height']:
if page_height < step_size:
step_size = page_height
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
capture_start = time.time()
chunk_times = []
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
if y > 0:
await page.evaluate(f"window.scrollTo(0, {y})")
await _safe_request_gc(page)
screenshot_kwargs = {'type': screenshot_type, 'full_page': False}
if screenshot_type == 'jpeg':
screenshot_kwargs['quality'] = screenshot_quality
chunk_start = time.time()
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
chunk_time = time.time() - chunk_start
chunk_times.append(chunk_time)
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
y += step_size
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
if elements_locked:
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'unlock-elements-sizing.js')
with open(unlock_elements_js_path, 'r') as f:
unlock_elements_js = f.read()
await page.evaluate(unlock_elements_js)
capture_time = time.time() - capture_start
if len(screenshot_chunks) > 1:
stitch_start = time.time()
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
import multiprocessing
import struct
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
for chunk in screenshot_chunks:
parent_conn.send_bytes(chunk)
screenshot = parent_conn.recv_bytes()
p.join()
parent_conn.close()
child_conn.close()
del p, parent_conn, child_conn
stitch_time = time.time() - stitch_start
total_time = time.time() - start
setup_time = total_time - capture_time - stitch_time
logger.debug(
f"{watch_info}Screenshot complete - Page height: {page_height}px | "
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
return screenshot
total_time = time.time() - start
logger.debug(
f"{watch_info}Screenshot complete - Page height: {page_height}px | "
f"Setup: {total_time - capture_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
return screenshot_chunks[0]
async def _safe_request_gc(page):
"""Request browser GC — Chromium-specific, silently ignored on Firefox/WebKit."""
try:
await page.request_gc()
except Exception:
pass
class PlaywrightBaseFetcher(Fetcher):
"""
Shared base for all Playwright fetchers.
Subclasses implement ``_connect_browser(playwright_instance)`` to return a
connected-or-launched browser object. Everything else context creation,
page interaction, screenshot capture, browser-steps execution lives here.
"""
playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
proxy = None
# Capability flags
supports_browser_steps = True
supports_screenshots = True
supports_xpath_element_data = True
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
# Subclasses may use this (e.g. CDP); others ignore it
self._custom_browser_connection_url = custom_browser_connection_url
proxy_args = {}
for k in self.playwright_proxy_settings_mappings:
v = os.getenv('playwright_proxy_' + k, False)
if v:
proxy_args[k] = v.strip('"')
if proxy_args:
self.proxy = proxy_args
if proxy_override:
self.proxy = {'server': proxy_override}
if self.proxy:
parsed = urlparse(self.proxy.get('server', ''))
if parsed.username:
self.proxy['username'] = parsed.username
self.proxy['password'] = parsed.password
def disk_cleanup_after_fetch(self):
"""Delete browser-step screenshots written during this fetch."""
self.delete_browser_steps_screenshots()
async def _connect_browser(self, playwright_instance):
"""Return an open browser object. Must be overridden by each subclass."""
raise NotImplementedError(f"{type(self).__name__} must implement _connect_browser()")
async def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
watch_uuid = getattr(self, 'watch_uuid', None)
screenshot = await capture_full_page_async(
page=self.page,
screenshot_format=self.screenshot_format,
watch_uuid=watch_uuid,
lock_viewport_elements=self.lock_viewport_elements,
)
await _safe_request_gc(self.page)
if self.browser_steps_screenshot_path is not None:
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
logger.debug(f"Saving step screenshot to {destination}")
with open(destination, 'wb') as f:
f.write(screenshot)
del screenshot
gc.collect()
async def save_step_html(self, step_n):
super().save_step_html(step_n=step_n)
content = await self.page.content()
await _safe_request_gc(self.page)
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
logger.debug(f"Saving step HTML to {destination}")
with open(destination, 'w', encoding='utf-8') as f:
f.write(content)
del content
gc.collect()
async def run(self,
fetch_favicon=True,
current_include_filters=None,
empty_pages_are_a_change=False,
ignore_status_codes=False,
is_binary=False,
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,
):
from playwright.async_api import async_playwright
import playwright._impl._errors
import time
self.delete_browser_steps_screenshots()
self.watch_uuid = watch_uuid
response = None
async with async_playwright() as p:
browser = await self._connect_browser(p)
ua = manage_user_agent(headers=request_headers) or self.profile_user_agent or None
context_kwargs = dict(
accept_downloads=False,
bypass_csp=True,
extra_http_headers=request_headers,
ignore_https_errors=self.ignore_https_errors,
proxy=self.proxy,
service_workers=self.service_workers,
user_agent=ua,
viewport={'width': self.viewport_width, 'height': self.viewport_height},
)
if self.locale:
context_kwargs['locale'] = self.locale
context = await browser.new_context(**context_kwargs)
if self.block_images:
await context.route(
re.compile(r'\.(png|jpe?g|gif|svg|ico|webp|avif|bmp)(\?.*)?$', re.IGNORECASE),
lambda route: route.abort()
)
if self.block_fonts:
await context.route(
re.compile(r'\.(woff2?|ttf|otf|eot)(\?.*)?$', re.IGNORECASE),
lambda route: route.abort()
)
self.page = await context.new_page()
self.page.on("console", lambda msg: logger.debug(f"Playwright console: {url} {msg.type}: {msg.text}"))
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
browsersteps_interface = steppable_browser_interface(start_url=url)
browsersteps_interface.page = self.page
response = await browsersteps_interface.action_goto_url(value=url)
if response is None:
await context.close()
await browser.close()
raise EmptyReply(url=url, status_code=None)
try:
self.headers = await response.all_headers()
except TypeError:
self.headers = response.all_headers()
try:
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
except playwright._impl._errors.TimeoutError:
await context.close()
await browser.close()
pass
except Exception as e:
await context.close()
await browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e))
extra_wait = self.extra_delay + self.render_extract_delay
await self.page.wait_for_timeout(extra_wait * 1000)
try:
self.status_code = response.status
except Exception as e:
await context.close()
await browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e))
if fetch_favicon:
try:
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
await _safe_request_gc(self.page)
except Exception as e:
logger.error(f"Error fetching favicon: {e}")
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
try:
page_html = await self.page.content()
except Exception as e:
logger.warning(f"Got non-200 status {self.status_code} but failed to fetch page content: {e}")
page_html = None
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot, page_html=page_html)
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
await context.close()
await browser.close()
raise EmptyReply(url=url, status_code=response.status)
try:
if self.browser_steps:
try:
await self.iterate_browser_steps(start_url=url)
except BrowserStepsStepException:
raise
await self.page.wait_for_timeout(extra_wait * 1000)
now = time.time()
if current_include_filters is not None:
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
else:
await self.page.evaluate("var include_filters=''")
await _safe_request_gc(self.page)
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
"visualselector_xpath_selectors": visualselector_xpath_selectors,
"max_height": MAX_TOTAL_HEIGHT
})
await _safe_request_gc(self.page)
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
await _safe_request_gc(self.page)
self.content = await self.page.content()
await _safe_request_gc(self.page)
logger.debug(f"Scrape xPath element data done in {time.time() - now:.2f}s")
self.screenshot = await capture_full_page_async(
page=self.page,
screenshot_format=self.screenshot_format,
watch_uuid=watch_uuid,
lock_viewport_elements=self.lock_viewport_elements,
)
await _safe_request_gc(self.page)
gc.collect()
except ScreenshotUnavailable:
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
finally:
for obj, name, close_coro in [
(self.page if hasattr(self, 'page') and self.page else None, 'page', lambda: self.page.close() if self.page else asyncio.sleep(0)),
(context, 'context', lambda: context.close() if context else asyncio.sleep(0)),
(browser, 'browser', lambda: browser.close() if browser else asyncio.sleep(0)),
]:
try:
await asyncio.wait_for(close_coro(), timeout=5.0)
except asyncio.TimeoutError:
logger.warning(f"Timed out closing {name} for {url}")
except Exception as e:
logger.warning(f"Error closing {name} for {url}: {e}")
self.page = None
context = None
browser = None
gc.collect()
@@ -1,27 +0,0 @@
"""
Playwright Chrome fetcher launches a local Chromium browser directly.
No external browser container is required. Playwright must be installed
with Chromium browsers: ``playwright install chromium``.
"""
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
class fetcher(PlaywrightBaseFetcher):
fetcher_description = "Playwright Chrome (local)"
async def _connect_browser(self, p):
launch_kwargs = {'headless': True}
if self.proxy:
launch_kwargs['proxy'] = self.proxy
return await p.chromium.launch(**launch_kwargs)
class PlaywrightChromePlugin:
@hookimpl
def register_content_fetcher(self):
return ('playwright_chrome', fetcher)
chrome_plugin = PlaywrightChromePlugin()
@@ -1,33 +0,0 @@
"""
Playwright Firefox fetcher launches a local Firefox browser directly.
No external browser container is required. Playwright must be installed
with Firefox browsers: ``playwright install firefox``.
Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
on Firefox this is handled transparently by ``_safe_request_gc()`` in
the base package.
"""
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
class fetcher(PlaywrightBaseFetcher):
fetcher_description = "Playwright Firefox (local)"
status_icon = {'filename': 'firefox-icon.svg', 'alt': 'Using Firefox', 'title': 'Using Firefox'}
async def _connect_browser(self, p):
launch_kwargs = {'headless': True}
if self.proxy:
launch_kwargs['proxy'] = self.proxy
return await p.firefox.launch(**launch_kwargs)
class PlaywrightFirefoxPlugin:
@hookimpl
def register_content_fetcher(self):
return ('playwright_firefox', fetcher)
firefox_plugin = PlaywrightFirefoxPlugin()
@@ -1,30 +0,0 @@
"""
Playwright WebKit fetcher launches a local WebKit (Safari-engine) browser.
No external browser container is required. Playwright must be installed
with WebKit browsers: ``playwright install webkit``.
Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
on WebKit handled transparently by ``_safe_request_gc()`` in the base package.
"""
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
class fetcher(PlaywrightBaseFetcher):
fetcher_description = "Playwright WebKit/Safari (local)"
async def _connect_browser(self, p):
launch_kwargs = {'headless': True}
if self.proxy:
launch_kwargs['proxy'] = self.proxy
return await p.webkit.launch(**launch_kwargs)
class PlaywrightWebKitPlugin:
@hookimpl
def register_content_fetcher(self):
return ('playwright_webkit', fetcher)
webkit_plugin = PlaywrightWebKitPlugin()
+20 -23
View File
@@ -7,7 +7,6 @@ from urllib.parse import urlparse
from loguru import logger from loguru import logger
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \ SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
@@ -76,9 +75,6 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
if page_height > page.viewport['height']: if page_height > page.viewport['height']:
if page_height < step_size: if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
viewport_start = time.time() viewport_start = time.time()
await page.setViewport({'width': page.viewport['width'], 'height': step_size}) await page.setViewport({'width': page.viewport['width'], 'height': step_size})
viewport_time = time.time() - viewport_start viewport_time = time.time() - viewport_start
@@ -170,8 +166,11 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
class fetcher(Fetcher): class fetcher(Fetcher):
fetcher_description = "Puppeteer Chromium" fetcher_description = "Puppeteer/direct {}/Javascript".format(
requires_connection_url = True os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
)
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
browser = None browser = None
browser_type = '' browser_type = ''
@@ -183,10 +182,14 @@ class fetcher(Fetcher):
supports_screenshots = True supports_screenshots = True
supports_xpath_element_data = True supports_xpath_element_data = True
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'} @classmethod
def get_status_icon_data(cls):
def disk_cleanup_after_fetch(self): """Return Chrome browser icon data for Puppeteer fetcher."""
self.delete_browser_steps_screenshots() return {
'filename': 'google-chrome-icon.png',
'alt': 'Using a Chrome browser',
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs): def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
@@ -195,10 +198,9 @@ class fetcher(Fetcher):
self.browser_connection_is_custom = True self.browser_connection_is_custom = True
self.browser_connection_url = custom_browser_connection_url self.browser_connection_url = custom_browser_connection_url
else: else:
from loguru import logger # Fallback to fetching from system
logger.critical("Puppeteer fetcher has no browser_connection_url — browser profile was not configured. " # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
"Set PLAYWRIGHT_DRIVER_URL or configure a browser profile in Settings.") self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')
self.browser_connection_url = None
# allow per-watch proxy selection override # allow per-watch proxy selection override
# @todo check global too? # @todo check global too?
@@ -268,7 +270,7 @@ class fetcher(Fetcher):
import re import re
self.delete_browser_steps_screenshots() self.delete_browser_steps_screenshots()
n = self.extra_delay + self.render_extract_delay n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 12)) + self.render_extract_delay
extra_wait = min(n, 15) extra_wait = min(n, 15)
logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.") logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
@@ -445,12 +447,8 @@ class fetcher(Fetcher):
if self.status_code != 200 and not ignore_status_codes: if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements) screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
try:
page_html = await self.page.content raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
except Exception as e:
logger.warning(f"Got non-200 status {self.status_code} but failed to fetch page content: {e}")
page_html = None
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot, page_html=page_html)
content = await self.page.content content = await self.page.content
@@ -550,10 +548,9 @@ class fetcher(Fetcher):
class PuppeteerFetcherPlugin: class PuppeteerFetcherPlugin:
"""Plugin class that registers the Puppeteer fetcher as a built-in plugin.""" """Plugin class that registers the Puppeteer fetcher as a built-in plugin."""
@hookimpl
def register_content_fetcher(self): def register_content_fetcher(self):
"""Register the Puppeteer fetcher""" """Register the Puppeteer fetcher"""
return ('puppeteer', fetcher) return ('html_webdriver', fetcher)
# Create module-level instance for plugin registration # Create module-level instance for plugin registration
@@ -8,7 +8,6 @@ import asyncio
from changedetectionio import strtobool from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.validate_url import is_private_hostname from changedetectionio.validate_url import is_private_hostname
@@ -259,10 +258,9 @@ class fetcher(Fetcher):
class RequestsFetcherPlugin: class RequestsFetcherPlugin:
"""Plugin class that registers the requests fetcher as a built-in plugin.""" """Plugin class that registers the requests fetcher as a built-in plugin."""
@hookimpl
def register_content_fetcher(self): def register_content_fetcher(self):
"""Register the requests fetcher""" """Register the requests fetcher"""
return ('requests', fetcher) return ('html_requests', fetcher)
# Create module-level instance for plugin registration # Create module-level instance for plugin registration
@@ -38,39 +38,26 @@
if (a.size !== b.size) { if (a.size !== b.size) {
return b.size - a.size; return b.size - a.size;
} }
// Second priority: apple-touch-icon over regular icon // Second priority: apple-touch-icon over regular icon
const isAppleA = /apple-touch-icon/.test(a.rel); const isAppleA = /apple-touch-icon/.test(a.rel);
const isAppleB = /apple-touch-icon/.test(b.rel); const isAppleB = /apple-touch-icon/.test(b.rel);
if (isAppleA && !isAppleB) return -1; if (isAppleA && !isAppleB) return -1;
if (!isAppleA && isAppleB) return 1; if (!isAppleA && isAppleB) return 1;
// Third priority: icons with no size attribute (fallback icons) last // Third priority: icons with no size attribute (fallback icons) last
const hasNoSizeA = !a.hasSizes; const hasNoSizeA = !a.hasSizes;
const hasNoSizeB = !b.hasSizes; const hasNoSizeB = !b.hasSizes;
if (hasNoSizeA && !hasNoSizeB) return 1; if (hasNoSizeA && !hasNoSizeB) return 1;
if (!hasNoSizeA && hasNoSizeB) return -1; if (!hasNoSizeA && hasNoSizeB) return -1;
return 0; return 0;
}); });
const timeoutMs = 2000; const timeoutMs = 2000;
// 1 MB — matches the server-side limit in bump_favicon()
const MAX_BYTES = 1 * 1024 * 1024;
for (const icon of icons) { for (const icon of icons) {
try { try {
// Inline data URI — no network fetch needed, data is already here
if (icon.href.startsWith('data:')) {
const match = icon.href.match(/^data:([^;]+);base64,([A-Za-z0-9+/=]+)$/);
if (!match) continue;
const mime_type = match[1];
const base64 = match[2];
// Rough size check: base64 is ~4/3 the binary size
if (base64.length * 0.75 > MAX_BYTES) continue;
return { url: icon.href, mime_type, base64 };
}
const controller = new AbortController(); const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs); const timeout = setTimeout(() => controller.abort(), timeoutMs);
@@ -87,15 +74,12 @@
const blob = await resp.blob(); const blob = await resp.blob();
if (blob.size > MAX_BYTES) continue;
// Convert blob to base64 // Convert blob to base64
const reader = new FileReader(); const reader = new FileReader();
return await new Promise(resolve => { return await new Promise(resolve => {
reader.onloadend = () => { reader.onloadend = () => {
resolve({ resolve({
url: icon.href, url: icon.href,
mime_type: blob.type,
base64: reader.result.split(",")[1] base64: reader.result.split(",")[1]
}); });
}; };
@@ -114,3 +98,4 @@
// Auto-execute and return result for page.evaluate() // Auto-execute and return result for page.evaluate()
return await window.getFaviconAsBlob(); return await window.getFaviconAsBlob();
})(); })();
@@ -56,10 +56,6 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
im.close() im.close()
del images del images
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
if total_height > capture_height:
stitched = stitched.crop((0, 0, max_width, capture_height))
# Draw caption only if page was trimmed # Draw caption only if page was trimmed
if original_page_height > capture_height: if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched) draw = ImageDraw.Draw(stitched)
@@ -3,13 +3,13 @@ import time
from loguru import logger from loguru import logger
from changedetectionio.content_fetchers.base import Fetcher from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.content_fetchers.exceptions import Non200ErrorCodeReceived
from changedetectionio.pluggy_interface import hookimpl
class fetcher(Fetcher): class fetcher(Fetcher):
fetcher_description = "Selenium WebDriver Chrome" if os.getenv("WEBDRIVER_URL"):
requires_connection_url = True fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\""
else:
fetcher_description = "WebDriver Chrome/Javascript"
proxy = None proxy = None
proxy_url = None proxy_url = None
@@ -19,21 +19,26 @@ class fetcher(Fetcher):
supports_screenshots = True supports_screenshots = True
supports_xpath_element_data = True supports_xpath_element_data = True
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'} @classmethod
def get_status_icon_data(cls):
"""Return Chrome browser icon data for WebDriver fetcher."""
return {
'filename': 'google-chrome-icon.png',
'alt': 'Using a Chrome browser',
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs): def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
from urllib.parse import urlparse from urllib.parse import urlparse
from selenium.webdriver.common.proxy import Proxy from selenium.webdriver.common.proxy import Proxy
if custom_browser_connection_url: # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
if not custom_browser_connection_url:
self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
else:
self.browser_connection_is_custom = True self.browser_connection_is_custom = True
self.browser_connection_url = custom_browser_connection_url self.browser_connection_url = custom_browser_connection_url
else:
from loguru import logger
logger.critical("Selenium WebDriver fetcher has no browser_connection_url — browser profile was not configured. "
"Set WEBDRIVER_URL or configure a browser profile in Settings.")
self.browser_connection_url = None
##### PROXY SETUP ##### ##### PROXY SETUP #####
@@ -99,17 +104,15 @@ class fetcher(Fetcher):
from selenium.webdriver.remote.remote_connection import RemoteConnection from selenium.webdriver.remote.remote_connection import RemoteConnection
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
from selenium.webdriver.remote.client_config import ClientConfig
from urllib3.util import Timeout
driver = None driver = None
try: try:
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90)) # Create the RemoteConnection and set timeout (e.g., 30 seconds)
client_config = ClientConfig( remote_connection = RemoteConnection(
remote_server_addr=self.browser_connection_url, self.browser_connection_url,
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
) )
remote_connection = RemoteConnection(client_config=client_config) remote_connection.set_timeout(30) # seconds
# Now create the driver with the RemoteConnection
driver = RemoteWebDriver( driver = RemoteWebDriver(
command_executor=remote_connection, command_executor=remote_connection,
options=options options=options
@@ -127,28 +130,22 @@ class fetcher(Fetcher):
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""): if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
driver.set_window_size(1280, 1024) driver.set_window_size(1280, 1024)
driver.implicitly_wait(self.extra_delay) driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
if self.webdriver_js_execute_code is not None: if self.webdriver_js_execute_code is not None:
driver.execute_script(self.webdriver_js_execute_code) driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
driver.implicitly_wait(self.extra_delay) driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
# @todo - how to check this? is it possible?
self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
# raise EmptyReply(url=url, status_code=r.status_code)
# @todo - dom wait loaded? # @todo - dom wait loaded?
import time import time
time.sleep(self.extra_delay + self.render_extract_delay) time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = driver.page_source self.content = driver.page_source
# Use Navigation Timing API to get the real HTTP status code (Chrome 102+)
# Read after the sleep so the page is fully settled
try:
nav_status = driver.execute_script(
"return window.performance.getEntriesByType('navigation')[0]?.responseStatus"
)
# Guard against 0 (file://, blocked requests) which should not raise Non200
self.status_code = int(nav_status) if nav_status and int(nav_status) > 0 else 200
except Exception:
self.status_code = 200
self.headers = {} self.headers = {}
# Selenium always captures as PNG, convert to JPEG if needed # Selenium always captures as PNG, convert to JPEG if needed
@@ -178,10 +175,6 @@ class fetcher(Fetcher):
img.close() img.close()
else: else:
self.screenshot = screenshot_png self.screenshot = screenshot_png
if self.status_code != 200 and not ignore_status_codes:
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=self.screenshot, page_html=self.content)
except Exception as e: except Exception as e:
driver.quit() driver.quit()
raise e raise e
@@ -197,10 +190,9 @@ class fetcher(Fetcher):
class WebDriverSeleniumFetcherPlugin: class WebDriverSeleniumFetcherPlugin:
"""Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin.""" """Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""
@hookimpl
def register_content_fetcher(self): def register_content_fetcher(self):
"""Register the WebDriver Selenium fetcher""" """Register the WebDriver Selenium fetcher"""
return ('selenium', fetcher) return ('html_webdriver', fetcher)
# Create module-level instance for plugin registration # Create module-level instance for plugin registration
+16 -58
View File
@@ -45,38 +45,8 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
# Compiled regex patterns for performance # Compiled regex patterns for performance
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+') WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
# Regexes built from the constants above — no brittle hardcoded strings
_EXTRACT_REMOVED_RE = re.compile(
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
+ r'|' +
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
)
_EXTRACT_ADDED_RE = re.compile(
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
+ r'|' +
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
)
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
def extract_changed_from(raw_diff: str) -> str:
"""Extract only the removed/changed-from fragments from a raw diff string.
Useful for {{diff_changed_from}} gives just the old value (e.g. old price),
not the full surrounding line. Multiple fragments joined with newlines.
"""
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
def extract_changed_to(raw_diff: str) -> str:
"""Extract only the added/changed-into fragments from a raw diff string.
Useful for {{diff_changed_to}} gives just the new value (e.g. new price),
not the full surrounding line. Multiple fragments joined with newlines.
"""
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
""" """
Render word-level differences between two lines inline using diff-match-patch library. Render word-level differences between two lines inline using diff-match-patch library.
@@ -163,20 +133,14 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
if removed_tokens: if removed_tokens:
removed_full = ''.join(removed_tokens).rstrip() removed_full = ''.join(removed_tokens).rstrip()
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else '' trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
if include_change_type_prefix: result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
else:
result_parts.append(f'{removed_full}{trailing_removed}')
if added_tokens: if added_tokens:
if result_parts: # Add newline between removed and added if result_parts: # Add newline between removed and added
result_parts.append('\n') result_parts.append('\n')
added_full = ''.join(added_tokens).rstrip() added_full = ''.join(added_tokens).rstrip()
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else '' trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
if include_change_type_prefix: result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
else:
result_parts.append(f'{added_full}{trailing_added}')
return ''.join(result_parts), has_changes return ''.join(result_parts), has_changes
else: else:
@@ -186,27 +150,21 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
if op == 0: # Equal if op == 0: # Equal
result_parts.append(text) result_parts.append(text)
elif op == 1: # Insertion elif op == 1: # Insertion
if not include_change_type_prefix: # Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
result_parts.append(text) content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
else: else:
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip) result_parts.append(trailing)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
elif op == -1: # Deletion elif op == -1: # Deletion
if not include_change_type_prefix: # Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
result_parts.append(text) content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
else: else:
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip) result_parts.append(trailing)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
return ''.join(result_parts), has_changes return ''.join(result_parts), has_changes
@@ -402,7 +360,7 @@ def customSequenceMatcher(
# Use inline word-level diff for single line replacements when word_diff is enabled # Use inline word-level diff for single line replacements when word_diff is enabled
if word_diff and len(before_lines) == 1 and len(after_lines) == 1: if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix) inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled) # Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
if ignore_junk and not has_changes: if ignore_junk and not has_changes:
# No real changes, skip this line # No real changes, skip this line
+34 -23
View File
@@ -212,11 +212,6 @@ def _is_safe_valid_url(test_url):
from .validate_url import is_safe_valid_url from .validate_url import is_safe_valid_url
return is_safe_valid_url(test_url) return is_safe_valid_url(test_url)
@app.template_global('get_html_head_extras')
def _get_html_head_extras():
from .pluggy_interface import collect_html_head_extras
return collect_html_head_extras()
@app.template_filter('format_number_locale') @app.template_filter('format_number_locale')
def _jinja2_filter_format_number_locale(value: float) -> str: def _jinja2_filter_format_number_locale(value: float) -> str:
@@ -346,36 +341,52 @@ def _jinja2_filter_format_duration(seconds):
@app.template_filter('fetcher_status_icons') @app.template_filter('fetcher_status_icons')
def _jinja2_filter_fetcher_status_icons(fetcher_name): def _jinja2_filter_fetcher_status_icons(fetcher_name):
"""Return status icon HTML for a fetcher, or empty string if none. """Get status icon HTML for a given fetcher.
Built-in fetchers declare their icon via the ``status_icon`` class attribute This filter checks both built-in fetchers and plugin fetchers for status icons.
on their ``Fetcher`` subclass. Plugin fetchers may still use the pluggy
``collect_fetcher_status_icons`` hook as a fallback. Args:
fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
Returns:
str: HTML string containing status icon elements
""" """
from changedetectionio import content_fetchers from changedetectionio import content_fetchers
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
from markupsafe import Markup from markupsafe import Markup
from flask import url_for from flask import url_for
icon_data = None icon_data = None
fetcher_class = content_fetchers.get_fetcher(fetcher_name) # First check if it's a plugin fetcher (plugins have priority)
if fetcher_class is not None: plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
icon_data = getattr(fetcher_class, 'status_icon', None) if plugin_icon_data:
if not icon_data and callable(getattr(fetcher_class, 'get_status_icon_data', None)): icon_data = plugin_icon_data
# Check if it's a built-in fetcher
elif hasattr(content_fetchers, fetcher_name):
fetcher_class = getattr(content_fetchers, fetcher_name)
if hasattr(fetcher_class, 'get_status_icon_data'):
icon_data = fetcher_class.get_status_icon_data() icon_data = fetcher_class.get_status_icon_data()
# Fallback: pluggy hook for plugins that implement fetcher_status_icon # Build HTML from icon data
if not icon_data: if icon_data and isinstance(icon_data, dict):
from changedetectionio.pluggy_interface import collect_fetcher_status_icons # Use 'group' from icon_data if specified, otherwise default to 'images'
icon_data = collect_fetcher_status_icons(fetcher_name) group = icon_data.get('group', 'images')
if not icon_data: # Try to use url_for, but fall back to manual URL building if endpoint not registered yet
return '' try:
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
except:
# Fallback: build URL manually respecting APPLICATION_ROOT
from flask import request
app_root = request.script_root if hasattr(request, 'script_root') else ''
icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"
group = icon_data.get('group', 'images') style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
icon_url = url_for('static_content', group=group, filename=icon_data['filename']) html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else '' return Markup(html)
return Markup(f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>')
return ''
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]') _RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
+11 -67
View File
@@ -667,11 +667,9 @@ class ValidateCSSJSONXPATHInput(object):
# `jq` requires full compilation in windows and so isn't generally available # `jq` requires full compilation in windows and so isn't generally available
raise ValidationError("jq not support not found") raise ValidationError("jq not support not found")
from changedetectionio.html_tools import validate_jq_expression
input = line.replace('jq:', '') input = line.replace('jq:', '')
try: try:
validate_jq_expression(input)
jq.compile(input) jq.compile(input)
except (ValueError) as e: except (ValueError) as e:
message = field.gettext('\'%s\' is not a valid jq expression. (%s)') message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
@@ -725,7 +723,7 @@ class ValidateStartsWithRegex(object):
raise ValidationError(self.message or _l("Invalid value.")) raise ValidationError(self.message or _l("Invalid value."))
class quickWatchForm(Form): class quickWatchForm(Form):
url = StringField(_l('URL'), validators=[validateURL()]) url = fields.URLField(_l('URL'), validators=[validateURL()])
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()]) tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"}) watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor) processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
@@ -742,6 +740,7 @@ class commonSettingsForm(Form):
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
fetch_backend = RadioField(_l('Fetch Method'), choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items())) notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
@@ -778,8 +777,7 @@ class SingleBrowserStep(Form):
class processor_text_json_diff_form(commonSettingsForm): class processor_text_json_diff_form(commonSettingsForm):
browser_profile = RadioField(_l('Browser / Fetch method'), choices=[]) # populated at runtime in edit.py url = fields.URLField('Web Page URL', validators=[validateURL()])
url = StringField('Web Page URL', validators=[validateURL()])
tags = StringTagUUID('Group Tag', [validators.Optional()], default='') tags = StringTagUUID('Group Tag', [validators.Optional()], default='')
time_between_check = EnhancedFormField( time_between_check = EnhancedFormField(
@@ -798,7 +796,6 @@ class processor_text_json_diff_form(commonSettingsForm):
subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)]) subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
extract_lines_containing = StringListField(_l('Extract lines containing'), [validators.Optional()])
extract_text = StringListField(_l('Extract text'), [ValidateListRegex()]) extract_text = StringListField(_l('Extract text'), [ValidateListRegex()])
title = StringField(_l('Title'), default='') title = StringField(_l('Title'), default='')
@@ -941,66 +938,10 @@ class SingleExtraBrowser(Form):
ValidateSimpleURL() ValidateSimpleURL()
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) ], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
class DefaultUAInputForm(Form):
class BrowserProfileForm(Form): html_requests = StringField(_l('Plaintext requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
"""Create or edit a named BrowserProfile stored in settings.application.browser_profiles.""" if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
html_webdriver = StringField(_l('Chrome requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
name = StringField(
_l('Profile name'),
[validators.DataRequired(), validators.Length(max=100)],
render_kw={"placeholder": _l("e.g. Mobile Chrome, Bright Data CDP"), "maxlength": "100"}
)
fetch_backend = SelectField(
_l('Fetch method'),
choices=[], # populated at runtime from available_fetchers()
)
browser_connection_url = StringField(
_l('Browser connection URL'),
[
validators.Optional(),
ValidateStartsWithRegex(
regex=r'^(wss?|ws|http|https)://',
flags=re.IGNORECASE,
message=_l('Browser connection URL must start with ws://, wss://, http://, https://')
),
ValidateSimpleURL(),
],
render_kw={"placeholder": "ws://my-chrome:3000", "size": 50}
)
viewport_width = IntegerField(
_l('Viewport width (px)'),
[validators.Optional(), validators.NumberRange(min=100, max=7680)],
default=1280,
render_kw={"style": "width:5em;"}
)
viewport_height = IntegerField(
_l('Viewport height (px)'),
[validators.Optional(), validators.NumberRange(min=100, max=4320)],
default=1000,
render_kw={"style": "width:5em;"}
)
block_images = BooleanField(_l('Block images (faster loads)'), default=False)
block_fonts = BooleanField(_l('Block web fonts'), default=False)
ignore_https_errors = BooleanField(_l('Ignore HTTPS/TLS errors'), default=False)
user_agent = StringField(
_l('User-Agent override'),
[validators.Optional(), validators.Length(max=500)],
render_kw={"placeholder": _l("Leave blank to use fetcher default"), "size": 60}
)
locale = StringField(
_l('Locale'),
[validators.Optional(), validators.Length(max=20)],
render_kw={"placeholder": "en-US, de-DE, fr-FR …", "size": 15}
)
custom_headers = TextAreaField(
_l('Custom headers'),
[validators.Optional()],
render_kw={
"placeholder": "Header-Name: value\nAnother-Header: value",
"rows": 4, "cols": 60,
"style": "font-family:monospace;"
}
)
# datastore.data['settings']['requests'].. # datastore.data['settings']['requests']..
class globalSettingsRequestForm(Form): class globalSettingsRequestForm(Form):
@@ -1024,6 +965,8 @@ class globalSettingsRequestForm(Form):
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
default_ua = FormField(DefaultUAInputForm, label=_l("Default User-Agent overrides"))
def validate_extra_proxies(self, extra_validators=None): def validate_extra_proxies(self, extra_validators=None):
for e in self.data['extra_proxies']: for e in self.data['extra_proxies']:
if e.get('proxy_name') or e.get('proxy_url'): if e.get('proxy_name') or e.get('proxy_url'):
@@ -1046,6 +989,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')} render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
) )
empty_pages_are_a_change = BooleanField(_l('Treat empty pages as a change?'), default=False) empty_pages_are_a_change = BooleanField(_l('Treat empty pages as a change?'), default=False)
fetch_backend = RadioField(_l('Fetch Method'), default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField(_l('Ignore Text'), [ValidateListRegex()]) global_ignore_text = StringListField(_l('Ignore Text'), [ValidateListRegex()])
global_subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)]) global_subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
ignore_whitespace = BooleanField(_l('Ignore whitespace')) ignore_whitespace = BooleanField(_l('Ignore whitespace'))
@@ -1061,7 +1005,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
render_kw={"placeholder": "0.1", "style": "width: 8em;"} render_kw={"placeholder": "0.1", "style": "width: 8em;"}
) )
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"}) password = SaltyPasswordField(_l('Password'))
pager_size = IntegerField(_l('Pager size'), pager_size = IntegerField(_l('Pager size'),
render_kw={"style": "width: 5em;"}, render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, validators=[validators.NumberRange(min=0,
+9 -71
View File
@@ -4,7 +4,6 @@ from loguru import logger
from typing import List from typing import List
import html import html
import json import json
import os
import re import re
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
@@ -14,45 +13,6 @@ PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S) TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I) META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
# jq builtins that can leak sensitive data or cause harm when user-supplied expressions are executed.
# env/$ENV reads all process environment variables (passwords, API keys, etc.)
# include/import can read arbitrary files from disk
# input/inputs reads beyond the supplied JSON data
# debug/stderr leaks data to stderr
# halt/halt_error terminates the process (DoS)
_JQ_BLOCKED_PATTERNS = [
(re.compile(r'\benv\b'), 'env (reads environment variables)'),
(re.compile(r'\$ENV\b'), '$ENV (reads environment variables)'),
(re.compile(r'\binclude\b'), 'include (reads files from disk)'),
(re.compile(r'\bimport\b'), 'import (reads files from disk)'),
(re.compile(r'\binputs?\b'), 'input/inputs (reads beyond provided data)'),
(re.compile(r'\bdebug\b'), 'debug (leaks data to stderr)'),
(re.compile(r'\bstderr\b'), 'stderr (leaks data to stderr)'),
(re.compile(r'\bhalt(?:_error)?\b'), 'halt/halt_error (terminates the process)'),
(re.compile(r'\$__loc__\b'), '$__loc__ (leaks file path information)'),
(re.compile(r'\bbuiltins\b'), 'builtins (enumerates available functions)'),
(re.compile(r'\bmodulemeta\b'), 'modulemeta (leaks module information)'),
(re.compile(r'\$JQ_BUILD_CONFIGURATION\b'), '$JQ_BUILD_CONFIGURATION (leaks build information)'),
]
def validate_jq_expression(expression: str) -> None:
"""Raise ValueError if the jq expression uses any dangerous builtin.
User-supplied jq expressions are executed server-side. Without this check,
builtins like `env` expose every process environment variable (SALTED_PASS,
proxy credentials, API keys, etc.) as watch output.
"""
from changedetectionio.strtobool import strtobool
if strtobool(os.getenv('JQ_ALLOW_RISKY_EXPRESSIONS', 'false')):
return
for pattern, description in _JQ_BLOCKED_PATTERNS:
if pattern.search(expression):
msg = f"jq expression uses disallowed builtin: {description}"
logger.critical(f"Security: blocked jq expression containing '{description}' - expression: {expression!r}")
raise ValueError(msg)
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I) META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
# 'price' , 'lowPrice', 'highPrice' are usually under here # 'price' , 'lowPrice', 'highPrice' are usually under here
@@ -70,12 +30,6 @@ _DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
'unparsed-text-available', 'unparsed-text-available',
'doc', 'doc',
'doc-available', 'doc-available',
'json-doc',
'json-doc-available',
'collection', # XPath 2.0+: loads XML node collections from arbitrary URIs
'uri-collection', # XPath 3.0+: enumerates URIs from resource collections
'transform', # XPath 3.1: XSLT transformation (currently raises, block proactively)
'load-xquery-module', # XPath 3.1: loads XQuery modules (currently raises, block proactively)
'environment-variable', 'environment-variable',
'available-environment-variables', 'available-environment-variables',
] ]
@@ -424,16 +378,12 @@ def _parse_json(json_data, json_filter):
raise Exception("jq not support not found") raise Exception("jq not support not found")
if json_filter.startswith("jq:"): if json_filter.startswith("jq:"):
expr = json_filter.removeprefix("jq:") jq_expression = jq.compile(json_filter.removeprefix("jq:"))
validate_jq_expression(expr)
jq_expression = jq.compile(expr)
match = jq_expression.input(json_data).all() match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match) return _get_stripped_text_from_json_match(match)
if json_filter.startswith("jqraw:"): if json_filter.startswith("jqraw:"):
expr = json_filter.removeprefix("jqraw:") jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
validate_jq_expression(expr)
jq_expression = jq.compile(expr)
match = jq_expression.input(json_data).all() match = jq_expression.input(json_data).all()
return '\n'.join(str(item) for item in match) return '\n'.join(str(item) for item in match)
@@ -537,25 +487,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})") logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
else: else:
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...}) # Probably something else, go fish inside for it
# Server may claim application/json but actually return JSONP try:
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL) stripped_text_from_html = extract_json_blob_from_html(content=content,
if jsonp_match: ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
try: json_filter=json_filter )
inner = jsonp_match.group(1).strip() except json.JSONDecodeError as e:
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'") logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
if not stripped_text_from_html:
# Probably something else, go fish inside for it
try:
stripped_text_from_html = extract_json_blob_from_html(content=content,
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
json_filter=json_filter)
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
if not stripped_text_from_html: if not stripped_text_from_html:
# Re 265 - Just return an empty string when filter not found # Re 265 - Just return an empty string when filter not found
+11 -14
View File
@@ -28,20 +28,18 @@ def get_timeago_locale(flask_locale):
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT') str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
""" """
locale_map = { locale_map = {
'zh': 'zh_CN', # Chinese Simplified 'zh': 'zh_CN', # Chinese Simplified
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646. # timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW) 'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW 'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
'pt': 'pt_PT', # Portuguese (Portugal) 'pt': 'pt_PT', # Portuguese (Portugal)
'pt_BR': 'pt_BR', # Portuguese (Brasil) 'sv': 'sv_SE', # Swedish
'sv': 'sv_SE', # Swedish 'no': 'nb_NO', # Norwegian Bokmål
'no': 'nb_NO', # Norwegian Bokmål 'hi': 'in_HI', # Hindi
'hi': 'in_HI', # Hindi 'cs': 'en', # Czech not supported by timeago, fallback to English
'cs': 'en', # Czech not supported by timeago, fallback to English 'uk': 'uk', # Ukrainian
'ja': 'ja', # Japanese 'en_GB': 'en', # British English - timeago uses 'en'
'uk': 'uk', # Ukrainian 'en_US': 'en', # American English - timeago uses 'en'
'en_GB': 'en', # British English - timeago uses 'en'
'en_US': 'en', # American English - timeago uses 'en'
} }
return locale_map.get(flask_locale, flask_locale) return locale_map.get(flask_locale, flask_locale)
@@ -55,8 +53,7 @@ LANGUAGE_DATA = {
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'}, 'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'}, 'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
'es': {'flag': 'fi fi-es fis', 'name': 'Español'}, 'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'}, 'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'}, 'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'}, 'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'}, 'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
+6 -3
View File
@@ -12,6 +12,7 @@ from changedetectionio.notification import (
# Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification # Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6 _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
@@ -30,6 +31,10 @@ class model(dict):
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, 'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds 'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections 'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections
'default_ua': {
'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
'html_webdriver': None,
}
}, },
'application': { 'application': {
# Custom notification content # Custom notification content
@@ -38,9 +43,7 @@ class model(dict):
'api_access_token_enabled': True, 'api_access_token_enabled': True,
'base_url' : None, 'base_url' : None,
'empty_pages_are_a_change': False, 'empty_pages_are_a_change': False,
'browser_profile': None, # machine-name of the system-default BrowserProfile 'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
'browser_profiles': {}, # user-defined profiles keyed by machine name
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "requests"),
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, 'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum 'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
'global_subtractive_selectors': [], 'global_subtractive_selectors': [],
-15
View File
@@ -46,26 +46,11 @@ class model(EntityPersistenceMixin, watch_base):
super(model, self).__init__(*arg, **kw) super(model, self).__init__(*arg, **kw)
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch') self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
if kw.get('default'): if kw.get('default'):
self.update(kw['default']) self.update(kw['default'])
del kw['default'] del kw['default']
def matches_url(self, url: str) -> bool:
"""Return True if this tag should be auto-applied to the given watch URL.
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
substring match. Returns False if no pattern is configured.
"""
import fnmatch
pattern = self.get('url_match_pattern', '').strip()
if not pattern or not url:
return False
if any(c in pattern for c in ('*', '?', '[')):
return fnmatch.fnmatch(url.lower(), pattern.lower())
return pattern.lower() in url.lower()
# _save_to_disk() method provided by EntityPersistenceMixin # _save_to_disk() method provided by EntityPersistenceMixin
# commit() and _get_commit_data() methods inherited from watch_base # commit() and _get_commit_data() methods inherited from watch_base
# Tag uses default _get_commit_data() (includes all keys) # Tag uses default _get_commit_data() (includes all keys)
+53 -84
View File
@@ -353,40 +353,40 @@ class model(EntityPersistenceMixin, watch_base):
def is_source_type_url(self): def is_source_type_url(self):
return self.get('url', '').startswith('source:') return self.get('url', '').startswith('source:')
@property
def effective_browser_profile(self):
"""Resolve the effective BrowserProfile for this watch.
Walks the chain: watch tag (overrides_watch=True) global settings built-in fallback.
Never raises. Returns a BrowserProfile instance.
"""
from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS
if not self._datastore:
return BUILTIN_REQUESTS
try:
return resolve_browser_profile(self, self._datastore)
except Exception:
return BUILTIN_REQUESTS
@property @property
def get_fetch_backend(self): def get_fetch_backend(self):
"""Legacy property — prefer effective_browser_profile.fetch_backend for new code.
Returns the raw fetch_backend stored on this watch (or 'requests' for PDFs).
Does NOT walk the tag/global resolution chain.
""" """
if self.is_pdf: Get the fetch backend for this watch with special case handling.
return 'requests'
return self.get('fetch_backend')
@property CHAIN RESOLUTION OPPORTUNITY:
def fetcher_supports_screenshots(self): Currently returns watch.fetch_backend directly, but doesn't implement
"""Return True if the resolved fetcher for this watch supports screenshots.""" Watch Tag Global resolution chain. With Pydantic:
from changedetectionio import content_fetchers
fetcher_class = content_fetchers.get_fetcher(self.effective_browser_profile.fetch_backend) @computed_field
if fetcher_class is None: def resolved_fetch_backend(self) -> str:
return False # Special case: PDFs always use html_requests
return bool(getattr(fetcher_class, 'supports_screenshots', False)) if self.is_pdf:
return 'html_requests'
# Watch override
if self.fetch_backend and self.fetch_backend != 'system':
return self.fetch_backend
# Tag override (first tag with overrides_watch=True wins)
for tag_uuid in self.tags:
tag = self._datastore.get_tag(tag_uuid)
if tag.overrides_watch and tag.fetch_backend:
return tag.fetch_backend
# Global default
return self._datastore.settings.fetch_backend
"""
# Maybe also if is_image etc?
# This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
if self.is_pdf:
return 'html_requests'
return self.get('fetch_backend')
@property @property
def is_pdf(self): def is_pdf(self):
@@ -779,50 +779,24 @@ class model(EntityPersistenceMixin, watch_base):
# Also in the case that the file didnt exist # Also in the case that the file didnt exist
return True return True
def bump_favicon(self, url, favicon_base_64: str, mime_type: str = None) -> None: def bump_favicon(self, url, favicon_base_64: str) -> None:
from urllib.parse import urlparse from urllib.parse import urlparse
import base64 import base64
import binascii import binascii
import re decoded = None
MAX_FAVICON_BYTES = 1 * 1024 * 1024 # 1 MB if url:
MIME_TO_EXT = {
'image/png': 'png',
'image/x-icon': 'ico',
'image/vnd.microsoft.icon': 'ico',
'image/jpeg': 'jpg',
'image/gif': 'gif',
'image/svg+xml': 'svg',
'image/webp': 'webp',
'image/bmp': 'bmp',
}
extension = None
# If the caller already resolved the MIME type (e.g. from blob.type or a data URI),
# use that directly — it's more reliable than guessing from a URL path.
if mime_type:
extension = MIME_TO_EXT.get(mime_type.lower().split(';')[0].strip(), None)
# Fall back to extracting extension from URL path, unless it's a data URI.
if not extension and url and not url.startswith('data:'):
try: try:
parsed = urlparse(url) parsed = urlparse(url)
filename = os.path.basename(parsed.path) filename = os.path.basename(parsed.path)
(_base, ext) = filename.lower().strip().rsplit('.', 1) (base, extension) = filename.lower().strip().rsplit('.', 1)
extension = ext
except ValueError: except ValueError:
logger.warning(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}', defaulting to ico") logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
return None
# Handle data URIs: extract MIME type from the URI itself when not already known else:
if not extension and url and url.startswith('data:'): # Assume favicon.ico
m = re.match(r'^data:([^;]+);base64,', url) base = "favicon"
if m: extension = "ico"
extension = MIME_TO_EXT.get(m.group(1).lower(), None)
if not extension:
extension = 'ico'
fname = os.path.join(self.data_dir, f"favicon.{extension}") fname = os.path.join(self.data_dir, f"favicon.{extension}")
@@ -831,27 +805,22 @@ class model(EntityPersistenceMixin, watch_base):
decoded = base64.b64decode(favicon_base_64, validate=True) decoded = base64.b64decode(favicon_base_64, validate=True)
except (binascii.Error, ValueError) as e: except (binascii.Error, ValueError) as e:
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}") logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
return None else:
if decoded:
try:
with open(fname, 'wb') as f:
f.write(decoded)
if len(decoded) > MAX_FAVICON_BYTES: # Invalidate module-level favicon filename cache for this watch
logger.warning(f"UUID: {self.get('uuid')} Favicon too large ({len(decoded)} bytes), skipping") _FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
return None
try: # A signal that could trigger the socket server to update the browser also
with open(fname, 'wb') as f: watch_check_update = signal('watch_favicon_bump')
f.write(decoded) if watch_check_update:
watch_check_update.send(watch_uuid=self.get('uuid'))
# Invalidate module-level favicon filename cache for this watch except Exception as e:
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None) logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
# A signal that could trigger the socket server to update the browser also
watch_check_update = signal('watch_favicon_bump')
if watch_check_update:
watch_check_update.send(watch_uuid=self.get('uuid'))
except Exception as e:
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
return None
# @todo - Store some checksum and only write when its different # @todo - Store some checksum and only write when its different
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}") logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
+1 -6
View File
@@ -186,9 +186,7 @@ class watch_base(dict):
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'content-type': None, 'content-type': None,
'date_created': None, 'date_created': None,
'extract_lines_containing': [], # Keep only lines containing these substrings (plain text, case-insensitive)
'extract_text': [], # Extract text by regex after filters 'extract_text': [], # Extract text by regex after filters
'browser_profile': 'system', # machine-name key of a BrowserProfile; 'system' → resolve via chain
'fetch_backend': 'system', # plaintext, playwright etc 'fetch_backend': 'system', # plaintext, playwright etc
'fetch_time': 0.0, 'fetch_time': 0.0,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
@@ -339,7 +337,6 @@ class watch_base(dict):
# These are set by processors/workers and should not trigger edited flag # These are set by processors/workers and should not trigger edited flag
additional_system_fields = { additional_system_fields = {
'last_check_status', # Set by processors 'last_check_status', # Set by processors
'last_filter_config_hash', # Set by text_json_diff processor, internal skip-cache
'restock', # Set by restock processor 'restock', # Set by restock processor
'last_viewed', # Set by mark_all_viewed endpoint 'last_viewed', # Set by mark_all_viewed endpoint
} }
@@ -592,9 +589,7 @@ class watch_base(dict):
return None return None
try: try:
# _datastore is a ChangeDetectionStore (has .data) or a plain dict (unit tests) value = self._datastore['settings']
store_data = self._datastore.data if hasattr(self._datastore, 'data') else self._datastore
value = store_data['settings']
for key in path: for key in path:
value = value[key] value = value[key]
return value return value
-380
View File
@@ -1,380 +0,0 @@
"""
BrowserProfile named, reusable browser/fetcher configuration.
Storage key
-----------
Profiles are stored in ``settings.application.browser_profiles`` as a plain dict
keyed by *machine name* a lowercase, underscore-separated slug derived from the
human-readable ``name`` field:
'My Blocking Chrome' 'my_blocking_chrome'
'Custom CDP — Mobile (375px)' 'custom_cdp_mobile_375px'
Using the machine name as the key means that deleting a profile and recreating
it with the same name restores the original key, so all watches that referenced
it continue to work without any manual re-linking.
Resolution chain
----------------
``resolve_browser_profile(watch, datastore)`` walks:
watch.browser_profile first tag with overrides_watch=True
settings.application.browser_profile built-in fallback
It never raises. Stale / missing machine-name references are logged and the
resolver falls through to the next level.
Built-in profiles
-----------------
``BUILTIN_REQUESTS`` and ``BUILTIN_BROWSER`` are always available and cannot be
deleted from the UI (``is_builtin=True``). Their machine names are stored in
``RESERVED_MACHINE_NAMES`` to block user profiles from shadowing them.
Migration
---------
``store/updates.py::update_31`` converts the legacy ``fetch_backend`` field on
watches, tags and global settings into ``browser_profile`` machine-name
references. After that migration no legacy paths are needed here.
"""
from __future__ import annotations
import os
import re
from typing import Optional
from loguru import logger
from pydantic import BaseModel, field_validator
# Default User-Agent for the built-in plaintext requests profile.
# Overridable via environment variable for deployments that need a custom UA.
_DEFAULT_REQUESTS_UA = os.getenv(
"DEFAULT_SETTINGS_HEADERS_USERAGENT",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
NAME_MAX_LEN = 100
# ---------------------------------------------------------------------------
# Model
# ---------------------------------------------------------------------------
class BrowserProfile(BaseModel):
"""
A named, reusable configuration for how a watch fetches its target URL.
The *machine name* (see ``get_machine_name()``) is the stable storage key.
Updating ``name`` changes the machine name; any watch that referenced the
old machine name will then fall back through the resolution chain until it
is explicitly re-pointed. To replace a profile without breaking watches,
delete it and recreate it with the *same* name.
"""
name: str
"""Human-readable label shown in the UI. Max 100 characters."""
fetch_backend: str = 'requests'
"""
Which fetch engine to use. This is the *clean* fetcher name without the
``html_`` module prefix (e.g. ``'requests'``, ``'webdriver'``,
``'playwright'``, ``'puppeteer'``, ``'cloakbrowser'``).
The module-level ``html_`` prefix (``html_requests``, ``html_webdriver``,
) is an implementation detail of ``content_fetchers/``. Use
``get_fetcher_class_name()`` to obtain the full module attribute name when
you need to look up the class.
Must be non-empty and contain only ``[a-z0-9_]`` characters.
"""
is_builtin: bool = False
"""Built-in profiles are always present and cannot be deleted from the UI."""
# ------------------------------------------------------------------
# Browser-specific settings (silently ignored by html_requests)
# ------------------------------------------------------------------
browser_connection_url: Optional[str] = None
"""
Custom CDP / WebSocket endpoint, e.g. ``ws://my-chrome:3000``.
Overrides the system-wide ``PLAYWRIGHT_DRIVER_URL`` for this profile.
Only meaningful for ``html_webdriver`` profiles.
"""
viewport_width: int = 1280
"""
Browser viewport width in pixels.
Common presets: 375 (iPhone), 768 (tablet), 1280 (desktop).
"""
viewport_height: int = 1000
"""
Browser viewport height in pixels.
Common presets: 812 (iPhone), 1024 (tablet), 1000 (desktop).
"""
block_images: bool = False
"""
Block all image requests. Typically cuts page-load time by 40-70 % on
image-heavy sites with no impact on text-based change detection.
"""
block_fonts: bool = False
"""Block web-font requests. Modest speed gain; rarely affects detection."""
user_agent: Optional[str] = None
"""
Override the browser User-Agent string.
``None`` keeps the fetcher's built-in default, which already strips
obvious headless markers such as ``HeadlessChrome``.
"""
ignore_https_errors: bool = False
"""
Proceed even when the server's TLS certificate is invalid or self-signed.
Useful for staging / development environments.
"""
locale: Optional[str] = None
"""
Browser locale (e.g. ``en-US``, ``de-DE``).
Sets the ``Accept-Language`` header and ``navigator.language``.
Some sites serve different prices or copy based on locale.
"""
custom_headers: str = ''
"""
Extra HTTP headers sent with every request using this profile, in ``Key: Value`` format
(one per line, ``#`` lines are ignored). Applied before per-watch headers so
individual watches can override them.
"""
service_workers: str = 'allow'
"""
Whether to allow Service Workers in the browser context.
Playwright accepts ``'allow'`` or ``'block'``.
Block to avoid large Service Worker data transfers (e.g. YouTube).
"""
extra_delay: int = 0
"""
Extra seconds to wait after page load before extracting content
(on top of the per-watch ``render_extract_delay``).
Sourced from ``WEBDRIVER_DELAY_BEFORE_CONTENT_READY`` at startup.
"""
model_config = {"frozen": False}
# ------------------------------------------------------------------
# Validators
# ------------------------------------------------------------------
@field_validator('fetch_backend')
@classmethod
def _validate_fetch_backend(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError('fetch_backend cannot be empty')
if not re.fullmatch(r'[a-z0-9_]+', v):
raise ValueError(
f"fetch_backend must contain only lowercase letters, digits and underscores, got {v!r}"
)
if v.startswith('html_'):
raise ValueError(
f"fetch_backend should be the clean fetcher name without the 'html_' prefix "
f"(e.g. 'requests', 'webdriver', 'playwright'). Got {v!r}. "
f"Use get_fetcher_class_name() to obtain the full module attribute name."
)
return v
@field_validator('name')
@classmethod
def _validate_name(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError('Name cannot be empty')
if len(v) > NAME_MAX_LEN:
raise ValueError(f'Name must be {NAME_MAX_LEN} characters or less')
return v
# ------------------------------------------------------------------
# Machine-name helpers
# ------------------------------------------------------------------
@staticmethod
def machine_name_from_str(name: str) -> str:
"""
Convert a human name to a machine-safe storage key.
Transformation rules (applied in order):
1. Strip surrounding whitespace; lower-case.
2. Replace runs of whitespace or hyphens with a single ``_``.
3. Drop every character that is not ``[a-z0-9_]``.
4. Collapse consecutive underscores.
5. Strip leading / trailing underscores.
6. Truncate to ``NAME_MAX_LEN`` characters.
Examples::
'My Blocking Browser Chrome' 'my_blocking_browser_chrome'
'Custom CDP — Mobile (375px)' 'custom_cdp_mobile_375px'
' Weird --- Name ' 'weird_name'
"""
s = name.strip().lower()
s = re.sub(r'[\s\-]+', '_', s) # whitespace / hyphens → underscore
s = re.sub(r'[^a-z0-9_]', '', s) # drop everything else
s = re.sub(r'_+', '_', s) # collapse repeated underscores
s = s.strip('_') # drop leading / trailing underscores
return s[:NAME_MAX_LEN]
def get_machine_name(self) -> str:
"""Return the machine-safe storage key derived from this profile's ``name``."""
return self.machine_name_from_str(self.name)
def get_fetcher_class_name(self) -> str:
"""Return the clean fetcher name for this profile (same as ``fetch_backend``).
Use with ``content_fetchers.get_fetcher()``::
from changedetectionio import content_fetchers
fetcher_cls = content_fetchers.get_fetcher(profile.get_fetcher_class_name())
"""
return self.fetch_backend
# ---------------------------------------------------------------------------
# Built-in profiles (always present, cannot be deleted)
# ---------------------------------------------------------------------------
BUILTIN_REQUESTS = BrowserProfile(
name='Direct HTTP (requests)',
fetch_backend='requests',
is_builtin=True,
user_agent=_DEFAULT_REQUESTS_UA,
)
BUILTIN_PLAYWRIGHT = BrowserProfile(
name='Browser (Chrome/Playwright)',
fetch_backend='playwright_cdp',
is_builtin=True,
)
BUILTIN_SELENIUM = BrowserProfile(
name='Browser (Chrome/Selenium)',
fetch_backend='selenium',
is_builtin=True,
)
BUILTIN_PUPPETEER = BrowserProfile(
name='Browser (Chrome/Puppeteer)',
fetch_backend='puppeteer',
is_builtin=True,
)
# Backwards-compatible alias — code that imported BUILTIN_BROWSER keeps working.
BUILTIN_BROWSER = BUILTIN_PLAYWRIGHT
# Keyed by machine name for O(1) lookup.
_BUILTINS: dict[str, BrowserProfile] = {
b.get_machine_name(): b
for b in (BUILTIN_REQUESTS, BUILTIN_PLAYWRIGHT, BUILTIN_SELENIUM, BUILTIN_PUPPETEER)
}
# Machine names that cannot be used by user-created profiles.
RESERVED_MACHINE_NAMES: frozenset[str] = frozenset(_BUILTINS.keys())
def get_default_browser_builtin() -> BrowserProfile:
"""Final fallback when no profile can be resolved through the chain.
``preconfigure_browser_profiles_based_on_env()`` sets
``settings.application.browser_profile`` explicitly at startup, so this
fallback is only reached for watches with stale / missing machine-name
references. Safe default is always direct HTTP requests.
"""
return BUILTIN_REQUESTS
# ---------------------------------------------------------------------------
# Lookup helpers
# ---------------------------------------------------------------------------
def get_builtin_profiles() -> dict[str, BrowserProfile]:
"""Return a shallow copy of the built-in profiles dict (keyed by machine name)."""
return dict(_BUILTINS)
def get_profile(machine_name: str, store_profiles: dict) -> Optional[BrowserProfile]:
"""
Look up a ``BrowserProfile`` by machine name.
Stored profiles are checked first so that env-configured built-ins (written
by ``preconfigure_browser_profiles_based_on_env``) take priority over the
bare module-level defaults. Falls back to ``_BUILTINS`` when no stored
version exists.
Returns ``None`` when the machine name is unknown or the stored data is
corrupt (a warning is logged in the latter case).
"""
raw = store_profiles.get(machine_name)
if raw is not None:
if isinstance(raw, BrowserProfile):
return raw
try:
return BrowserProfile(**raw)
except Exception as exc:
logger.warning(f"BrowserProfile '{machine_name}': failed to deserialize — {exc}")
# Fall through to built-in
if machine_name in _BUILTINS:
return _BUILTINS[machine_name]
return None
# ---------------------------------------------------------------------------
# Resolution
# ---------------------------------------------------------------------------
def resolve_browser_profile(watch, datastore) -> BrowserProfile:
"""
Resolve the effective ``BrowserProfile`` for *watch*.
Resolution chain
~~~~~~~~~~~~~~~~
1. ``watch['browser_profile']`` explicit machine name set on the watch.
2. First tag with ``overrides_watch=True`` that has ``browser_profile`` set.
3. ``settings.application['browser_profile']`` system-wide default.
4. Built-in fallback: ``BUILTIN_REQUESTS`` (requests is always the safe default).
Never raises. A stale / missing machine-name reference produces a
``logger.warning`` and the resolver continues down the chain.
"""
from changedetectionio.model.resolver import resolve_setting
store_profiles: dict = datastore.data['settings']['application'].get('browser_profiles', {})
machine_name = resolve_setting(
watch, datastore,
field_name='browser_profile',
sentinel_values={'system', 'default', ''},
default=None,
require_tag_override=True,
)
if machine_name:
profile = get_profile(machine_name, store_profiles)
if profile:
return profile
logger.warning(
f"Watch {watch.get('uuid')!r}: browser_profile {machine_name!r} not found, "
f"falling back through the chain"
)
return get_default_browser_builtin()
-63
View File
@@ -1,63 +0,0 @@
"""
Unified Watch Tag Global settings cascade resolver.
All settings resolution follows the same priority order:
1. Watch-level setting (if set and not a sentinel "use parent" value)
2. First tag with overrides_watch=True that has the field set
3. Global application settings
4. Caller-supplied default
This replaces the previously scattered manual resolution loops found in
notification_service.py, processors/base.py, and the restock processor.
"""
def resolve_setting(watch, datastore, field_name, *,
sentinel_values=None,
default=None,
require_tag_override=True):
"""
Resolve a single setting value by walking the Watch Tag Global chain.
Args:
watch: Watch dict / model object.
datastore: App datastore (must have get_all_tags_for_watch() and
data['settings']['application']).
field_name: The setting key to look up at each level.
sentinel_values: Set of values that mean "not configured here, keep looking".
For example {'system'} for fetch_backend.
default: Value returned when nothing is found in the chain.
require_tag_override: If True (default), only tags where overrides_watch=True
contribute to the cascade. Set to False when every tag
that carries the field should be considered (e.g. for
fields that make sense to merge/override at any tag level).
Returns:
The first non-sentinel, non-empty value found, or *default*.
"""
_sentinels = set(sentinel_values) if sentinel_values else set()
def _is_unset(v):
return v is None or v == '' or v in _sentinels
# 1. Watch level
v = watch.get(field_name)
if not _is_unset(v):
return v
# 2. Tag level
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
if tags:
for tag in tags.values():
if require_tag_override and not tag.get('overrides_watch'):
continue
v = tag.get(field_name)
if not _is_unset(v):
return v
# 3. Global application settings
v = datastore.data['settings']['application'].get(field_name)
if not _is_unset(v):
return v
return default
+1 -4
View File
@@ -259,12 +259,9 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
or url.startswith('https://discord.com/api'))\ or url.startswith('https://discord.com/api'))\
and 'html' in requested_output_format: and 'html' in requested_output_format:
# Discord doesn't render HTML — convert markup to plain text equivalents. # Discord doesn't support HTML, replace <br> with newlines
# &nbsp; is injected upstream to preserve double-spaces for HTML email clients;
# Discord displays it as the literal string "&nbsp;" so strip it here.
n_body = n_body.strip().replace('<br>', '\n') n_body = n_body.strip().replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n') n_body = n_body.replace('</br>', '\n')
n_body = n_body.replace('&nbsp;', ' ')
n_body = newline_re.sub('\n', n_body) n_body = newline_re.sub('\n', n_body)
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it # Don't replace placeholders or truncate here - let the custom Discord plugin handle it
@@ -1,3 +0,0 @@
from .registry import registry, NotificationProfileType, AppriseProfileType
__all__ = ['registry', 'NotificationProfileType', 'AppriseProfileType']
@@ -1,73 +0,0 @@
"""
Per-profile notification log.
Each profile gets its own log file at:
{datastore_path}/notification-logs/{profile_uuid}.log
Entries are stored as JSON-lines (one JSON object per line).
The file is capped at MAX_ENTRIES lines (oldest pruned first).
"""
import json
import os
from datetime import datetime, timezone
MAX_ENTRIES = 100
_LOG_DIR = 'notification-logs'
def _log_file(datastore_path: str, profile_uuid: str) -> str:
return os.path.join(datastore_path, _LOG_DIR, f'{profile_uuid}.log')
def write_profile_log(datastore_path: str, profile_uuid: str, *,
watch_url: str = '',
watch_uuid: str = '',
status: str, # 'ok' | 'error' | 'test'
message: str = ''):
"""Append one log entry; prune to MAX_ENTRIES."""
log_dir = os.path.join(datastore_path, _LOG_DIR)
os.makedirs(log_dir, exist_ok=True)
entry = json.dumps({
'ts': datetime.now(tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'),
'watch_url': watch_url[:200],
'watch_uuid': watch_uuid,
'status': status,
'message': message[:500],
}, ensure_ascii=False)
path = _log_file(datastore_path, profile_uuid)
try:
with open(path, 'r', encoding='utf-8') as fh:
lines = [l for l in fh.read().splitlines() if l.strip()]
except FileNotFoundError:
lines = []
lines.append(entry)
lines = lines[-MAX_ENTRIES:]
with open(path, 'w', encoding='utf-8') as fh:
fh.write('\n'.join(lines) + '\n')
def read_profile_log(datastore_path: str, profile_uuid: str) -> list:
"""Return log entries as a list of dicts, newest first."""
path = _log_file(datastore_path, profile_uuid)
try:
with open(path, 'r', encoding='utf-8') as fh:
lines = [l.strip() for l in fh if l.strip()]
except FileNotFoundError:
return []
entries = []
for line in reversed(lines):
try:
entries.append(json.loads(line))
except (json.JSONDecodeError, ValueError):
pass
return entries
def has_log(datastore_path: str, profile_uuid: str) -> bool:
return os.path.exists(_log_file(datastore_path, profile_uuid))
@@ -1,111 +0,0 @@
"""
Notification Profile Type plugin registry.
NotificationProfileType is the abstract base the only contract is send().
Plugins are free to use any delivery mechanism (Apprise, direct HTTP, SDK, etc.).
Built-in: AppriseProfileType (raw Apprise URL list).
Third-party plugins register additional types:
from changedetectionio.notification_profiles.registry import registry, NotificationProfileType
@registry.register
class MyProfileType(NotificationProfileType):
type_id = "mytype"
display_name = "My Service"
icon = "bell"
template = "my_plugin/notification_profiles/types/mytype.html"
def send(self, config: dict, n_object: dict, datastore) -> bool:
requests.post(config['webhook_url'], json={"text": n_object['notification_body']})
return True
"""
from abc import ABC, abstractmethod
class NotificationProfileType(ABC):
type_id: str = NotImplemented
display_name: str = NotImplemented
icon: str = "bell" # feather icon name
template: str = NotImplemented # Jinja2 partial rendered in the profile edit form
@abstractmethod
def send(self, config: dict, n_object: dict, datastore) -> bool:
"""
Deliver the notification.
Args:
config: The profile's config dict (type-specific fields).
n_object: Fully-rendered NotificationContextData (title, body, format, etc.).
datastore: App datastore for any extra lookups.
Returns True on success, False on failure (do not raise log instead).
"""
def validate(self, config: dict) -> None:
"""Raise ValueError with a user-readable message on invalid config."""
pass
def get_url_hint(self, config: dict) -> str:
"""Short display string shown in the selector chip tooltip / dropdown row."""
return ''
class AppriseProfileType(NotificationProfileType):
"""Delivers notifications via Apprise using a raw URL list."""
type_id = "apprise"
display_name = "Apprise"
icon = "bell"
template = "notification_profiles/types/apprise.html"
def get_apprise_urls(self, config: dict) -> list:
return config.get('notification_urls') or []
def send(self, config: dict, n_object, datastore) -> bool:
from changedetectionio.notification.handler import process_notification
from changedetectionio.notification_service import NotificationContextData
urls = self.get_apprise_urls(config)
if not urls:
return False
if not isinstance(n_object, NotificationContextData):
n_object = NotificationContextData(n_object)
n_object['notification_urls'] = urls
n_object['notification_title'] = config.get('notification_title') or n_object.get('notification_title')
n_object['notification_body'] = config.get('notification_body') or n_object.get('notification_body')
n_object['notification_format'] = config.get('notification_format') or n_object.get('notification_format')
process_notification(n_object, datastore)
return True
def get_url_hint(self, config: dict) -> str:
urls = config.get('notification_urls') or []
if urls:
u = urls[0]
return (u[:60] + '') if len(u) > 60 else u
return ''
class _Registry:
def __init__(self):
self._types: dict = {}
def register(self, cls):
"""Register a NotificationProfileType subclass. Usable as a decorator."""
instance = cls()
self._types[instance.type_id] = instance
return cls
def get(self, type_id: str) -> NotificationProfileType:
return self._types.get(type_id, self._types.get('apprise'))
def all(self) -> list:
return list(self._types.values())
def choices(self) -> list:
return [(t.type_id, t.display_name) for t in self._types.values()]
registry = _Registry()
registry.register(AppriseProfileType)
@@ -1,49 +0,0 @@
"""
Resolve the full set of NotificationProfile objects that should fire for a given watch.
Merges profile UUIDs from: Watch Tags System (union, deduplicated).
Mute cascade is checked separately via resolve_setting() before calling this.
"""
from loguru import logger
def resolve_notification_profiles(watch, datastore) -> list:
"""
Return list of (profile_dict, NotificationProfileType) tuples to fire for *watch*.
Profiles are deduplicated by UUID if the same UUID appears at multiple levels
it fires once, not multiple times.
"""
from changedetectionio.notification_profiles.registry import registry
all_profiles = datastore.data['settings']['application'].get('notification_profile_data', {})
seen = set()
result = []
def _add(uuids):
for uid in (uuids or []):
if uid in seen:
continue
profile = all_profiles.get(uid)
if not profile:
logger.warning(f"Notification profile UUID {uid!r} not found, skipping")
continue
seen.add(uid)
type_handler = registry.get(profile.get('type', 'apprise'))
result.append((profile, type_handler))
# 1. Watch-level
_add(watch.get('notification_profiles', []))
# 2. Tag/group level
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
if tags:
for tag in tags.values():
_add(tag.get('notification_profiles', []))
# 3. System level
_add(datastore.data['settings']['application'].get('notification_profiles', []))
return result
+8 -43
View File
@@ -88,28 +88,6 @@ class FormattableTimestamp(str):
return self._dt.isoformat() return self._dt.isoformat()
class FormattableExtract(str):
"""
A str subclass that holds only the extracted changed fragments from a diff.
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
{{ diff_changed_from }} old value(s) only, e.g. "$99.99"
{{ diff_changed_to }} new value(s) only, e.g. "$109.99"
Multiple changed fragments are joined with newlines.
Being a str subclass means it is natively JSON serializable.
"""
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
if prev_snapshot or current_snapshot:
from changedetectionio import diff as diff_module
raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
extracted = extract_fn(raw)
else:
extracted = ''
instance = super().__new__(cls, extracted)
return instance
class FormattableDiff(str): class FormattableDiff(str):
""" """
A str subclass representing a rendered diff. As a plain string it renders A str subclass representing a rendered diff. As a plain string it renders
@@ -183,8 +161,6 @@ class NotificationContextData(dict):
'diff_patch': FormattableDiff('', '', patch_format=True), 'diff_patch': FormattableDiff('', '', patch_format=True),
'diff_removed': FormattableDiff('', '', include_added=False), 'diff_removed': FormattableDiff('', '', include_added=False),
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False), 'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
'diff_url': None, 'diff_url': None,
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen 'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
'notification_timestamp': time.time(), 'notification_timestamp': time.time(),
@@ -268,27 +244,16 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False}, 'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
} }
from changedetectionio.diff import extract_changed_from, extract_changed_to
extract_specs = {
'diff_changed_from': extract_changed_from,
'diff_changed_to': extract_changed_to,
}
ret = {} ret = {}
rendered_count = 0 rendered_count = 0
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text # Only create FormattableDiff objects for diff keys actually used in the notification text
for key in NotificationContextData().keys(): for key in NotificationContextData().keys():
if not key.startswith('diff'): if key.startswith('diff') and key in diff_specs:
continue # Check if this placeholder is actually used in the notification text
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])" pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
if not re.search(pattern, notification_scan_text, re.IGNORECASE): if re.search(pattern, notification_scan_text, re.IGNORECASE):
continue ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
if key in diff_specs: rendered_count += 1
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
rendered_count += 1
elif key in extract_specs:
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
rendered_count += 1
if rendered_count: if rendered_count:
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s") logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
@@ -496,7 +461,7 @@ Thanks - Your omniscient changedetection.io installation.
n_object = NotificationContextData({ n_object = NotificationContextData({
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run", 'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
'notification_body': body, 'notification_body': body,
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch), 'notification_format': self._check_cascading_vars('notification_format', watch),
}) })
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
+47 -101
View File
@@ -174,64 +174,6 @@ class ChangeDetectionSpec:
""" """
pass pass
@hookspec
def get_html_head_extras():
"""Return HTML to inject into the <head> of every page via base.html.
Plugins can use this to add <script>, <style>, or <link> tags that should
be present on all pages. Return a raw HTML string or None.
IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
work correctly. This hook is called inside a request context so url_for() is
always available.
For small amounts of CSS/JS, return them inline no file-serving needed::
from changedetectionio.pluggy_interface import hookimpl
@hookimpl
def get_html_head_extras(self):
return (
'<style>.my-module-banner { color: red; }</style>\\n'
'<script>console.log("my_module_content loaded");</script>'
)
For larger assets, register your own lightweight Flask routes in the plugin
module and point to them with url_for() so the sub-path prefix is handled
automatically::
from flask import url_for, Response
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.flask_app import app as _app
MY_CSS = ".my-module-example { color: red; }"
MY_JS = "console.log('my_module_content loaded');"
@_app.route('/my_module_content/css')
def my_module_content_css():
return Response(MY_CSS, mimetype='text/css',
headers={'Cache-Control': 'max-age=3600'})
@_app.route('/my_module_content/js')
def my_module_content_js():
return Response(MY_JS, mimetype='application/javascript',
headers={'Cache-Control': 'max-age=3600'})
@hookimpl
def get_html_head_extras(self):
css = url_for('my_module_content_css')
js = url_for('my_module_content_js')
return (
f'<link rel="stylesheet" href="{css}">\\n'
f'<script src="{js}" defer></script>'
)
Returns:
str or None: Raw HTML string to inject inside <head>, or None
"""
pass
# Set up Plugin Manager # Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -295,23 +237,14 @@ def register_builtin_fetchers():
This is called from content_fetchers/__init__.py after all fetchers are imported This is called from content_fetchers/__init__.py after all fetchers are imported
to avoid circular import issues. to avoid circular import issues.
""" """
from changedetectionio.content_fetchers import requests, puppeteer, webdriver_selenium from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium
from changedetectionio.content_fetchers.playwright import CDP, chrome, firefox, webkit
# Register each built-in fetcher plugin
if hasattr(requests, 'requests_plugin'): if hasattr(requests, 'requests_plugin'):
plugin_manager.register(requests.requests_plugin, 'builtin_requests') plugin_manager.register(requests.requests_plugin, 'builtin_requests')
if hasattr(CDP, 'cdp_plugin'): if hasattr(playwright, 'playwright_plugin'):
plugin_manager.register(CDP.cdp_plugin, 'builtin_playwright_cdp') plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')
if hasattr(chrome, 'chrome_plugin'):
plugin_manager.register(chrome.chrome_plugin, 'builtin_playwright_chrome')
if hasattr(firefox, 'firefox_plugin'):
plugin_manager.register(firefox.firefox_plugin, 'builtin_playwright_firefox')
if hasattr(webkit, 'webkit_plugin'):
plugin_manager.register(webkit.webkit_plugin, 'builtin_playwright_webkit')
if hasattr(puppeteer, 'puppeteer_plugin'): if hasattr(puppeteer, 'puppeteer_plugin'):
plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer') plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
@@ -427,28 +360,57 @@ def get_active_plugins():
def get_fetcher_capabilities(watch, datastore): def get_fetcher_capabilities(watch, datastore):
"""Get capability flags for a watch's resolved fetcher. """Get capability flags for a watch's fetcher.
Uses the BrowserProfile resolution chain (watch tag global built-in) Args:
to determine the actual fetcher class, then reads its capability flags. watch: The watch object/dict
datastore: The datastore to resolve 'system' fetcher
Returns: Returns:
dict: {'supports_browser_steps': bool, 'supports_screenshots': bool, dict: Dictionary with capability flags:
'supports_xpath_element_data': bool} {
'supports_browser_steps': bool,
'supports_screenshots': bool,
'supports_xpath_element_data': bool
}
""" """
from changedetectionio.model.browser_profile import resolve_browser_profile # Get the fetcher name from watch
fetcher_name = watch.get('fetch_backend', 'system')
# Resolve 'system' to actual fetcher
if fetcher_name == 'system':
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
# Get the fetcher class
from changedetectionio import content_fetchers from changedetectionio import content_fetchers
profile = resolve_browser_profile(watch, datastore) # Try to get from built-in fetchers first
fetcher_class = content_fetchers.get_fetcher(profile.fetch_backend) if hasattr(content_fetchers, fetcher_name):
fetcher_class = getattr(content_fetchers, fetcher_name)
return {
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
}
if fetcher_class is None: # Try to get from plugin-provided fetchers
return {'supports_browser_steps': False, 'supports_screenshots': False, 'supports_xpath_element_data': False} # Query all plugins for registered fetchers
plugin_fetchers = plugin_manager.hook.register_content_fetcher()
for fetcher_registration in plugin_fetchers:
if fetcher_registration:
name, fetcher_class = fetcher_registration
if name == fetcher_name:
return {
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
}
# Default: no capabilities
return { return {
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False), 'supports_browser_steps': False,
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False), 'supports_screenshots': False,
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False), 'supports_xpath_element_data': False
} }
@@ -644,20 +606,4 @@ def apply_update_finalize(update_handler, watch, datastore, processing_exception
except Exception as e: except Exception as e:
# Don't let plugin errors crash the worker # Don't let plugin errors crash the worker
logger.error(f"Error in update_finalize hook: {e}") logger.error(f"Error in update_finalize hook: {e}")
logger.exception(f"update_finalize hook exception details:") logger.exception(f"update_finalize hook exception details:")
def collect_html_head_extras():
"""Collect and combine HTML head extras from all plugins.
Called from a Flask template global so it always runs inside a request context.
This means url_for() works correctly in plugin implementations, including when the
app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
sets SCRIPT_NAME so url_for() automatically prepends the prefix).
Returns:
str: Combined HTML string to inject inside <head>, or empty string
"""
results = plugin_manager.hook.get_html_head_extras()
parts = [r for r in results if r]
return "\n".join(parts) if parts else ""
-12
View File
@@ -341,18 +341,6 @@ def get_processor_descriptions():
return descriptions return descriptions
def wcag_text_color(hex_bg: str) -> str:
"""Return #000000 or #ffffff for maximum WCAG contrast against hex_bg."""
hex_bg = hex_bg.lstrip('#')
if len(hex_bg) != 6:
return '#000000'
r, g, b = (int(hex_bg[i:i+2], 16) / 255 for i in (0, 2, 4))
def lin(c):
return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
L = 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)
return '#000000' if L > 0.179 else '#ffffff'
def generate_processor_badge_colors(processor_name): def generate_processor_badge_colors(processor_name):
""" """
Generate consistent colors for a processor badge based on its name. Generate consistent colors for a processor badge based on its name.
+63 -57
View File
@@ -23,7 +23,6 @@ class difference_detection_processor():
watch = None watch = None
xpath_data = None xpath_data = None
preferred_proxy = None preferred_proxy = None
preferred_proxy_override = None # Set externally to force a specific proxy (e.g. proxy checker)
screenshot_format = SCREENSHOT_FORMAT_JPEG screenshot_format = SCREENSHOT_FORMAT_JPEG
last_raw_content_checksum = None last_raw_content_checksum = None
@@ -37,8 +36,6 @@ class difference_detection_processor():
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict() # 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data # 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid)) self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
if self.watch is None:
raise KeyError(f"Watch UUID {watch_uuid} not found in datastore (deleted before processing?)")
# Generic fetcher that should be extended (requests, playwright etc) # Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher() self.fetcher = Fetcher()
@@ -100,6 +97,7 @@ class difference_detection_processor():
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}") logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
self.last_raw_content_checksum = None self.last_raw_content_checksum = None
async def validate_iana_url(self): async def validate_iana_url(self):
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop. """Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
@@ -117,65 +115,82 @@ class difference_detection_processor():
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow." f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
) )
async def call_browser(self): async def call_browser(self, preferred_proxy_id=None):
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS
url = self.watch.link url = self.watch.link
# Protect against file:, file:/, file:// access # Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
if re.search(r'^file:', url.strip(), re.IGNORECASE): if re.search(r'^file:', url.strip(), re.IGNORECASE):
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')): if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
raise Exception("file:// type access is denied for security reasons.") raise Exception(
"file:// type access is denied for security reasons."
)
await self.validate_iana_url() await self.validate_iana_url()
# Resolve the full browser profile for this watch (watch → tag → global → built-in) # Requests, playwright, other browser via wss:// etc, fetch_extra_something
profile = resolve_browser_profile(self.watch, self.datastore) prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
# PDFs always use the requests fetcher — browsers render them in an embedded viewer # Proxy ID "key"
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(
uuid=self.watch.get('uuid'))
# Pluggable content self.fetcher
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
# In the case that the preferred fetcher was a browser config with custom connection URL..
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
custom_browser_connection_url = None
if prefer_fetch_backend.startswith('extra_browser_'):
(t, key) = prefer_fetch_backend.split('extra_browser_')
connection = list(
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
if connection:
prefer_fetch_backend = 'html_webdriver'
custom_browser_connection_url = connection[0].get('browser_connection_url')
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019 # @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
# @todo needs test to or a fix
if self.watch.is_pdf: if self.watch.is_pdf:
profile = BUILTIN_REQUESTS prefer_fetch_backend = "html_requests"
# Resolve proxy for the target URL fetch. # Grab the right kind of 'fetcher', (playwright, requests, etc)
# Note: browser_connection_url is the WebSocket endpoint to reach the remote browser,
# which is separate from the proxy used by the browser to fetch target pages.
proxy_url = self.datastore.get_proxy_url_for_watch(self.watch.get('uuid'), override_id=self.preferred_proxy_override)
if proxy_url:
logger.debug(f"Proxy '{proxy_url}' for {url}")
logger.debug(f"BrowserProfile '{profile.get_machine_name()}' (fetcher={profile.fetch_backend}) for watch {self.watch['uuid']}")
# Select the fetcher class
from changedetectionio import content_fetchers from changedetectionio import content_fetchers
fetcher_class_name = profile.get_fetcher_class_name() if hasattr(content_fetchers, prefer_fetch_backend):
# @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
# This is never supported in selenium anyway
logger.warning(
"Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
fetcher_obj = playwright_fetcher
else:
fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
else:
# What it referenced doesnt exist, Just use a default
fetcher_obj = getattr(content_fetchers, "html_requests")
fetcher_obj = content_fetchers.get_fetcher(fetcher_class_name) proxy_url = None
if fetcher_obj is None: if preferred_proxy_id:
logger.warning(f"Fetcher '{fetcher_class_name}' not found, falling back to requests") # Custom browser endpoints should NOT have a proxy added
fetcher_obj = content_fetchers.get_fetcher('requests') if not prefer_fetch_backend.startswith('extra_browser_'):
elif self.watch.has_browser_steps and not getattr(fetcher_obj, 'supports_browser_steps', False): proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
# Browser steps require Playwright — override if the resolved fetcher doesn't support them logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
logger.warning(f"Fetcher '{fetcher_class_name}' does not support browser steps, overriding to Playwright") else:
fetcher_obj = content_fetchers.get_fetcher('playwright') logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
self.fetcher = fetcher_obj( logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
proxy_override=proxy_url,
custom_browser_connection_url=profile.browser_connection_url, # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
screenshot_format=self.screenshot_format, # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
# BrowserProfile fields — browser fetchers use these; html_requests ignores them self.fetcher = fetcher_obj(proxy_override=proxy_url,
viewport_width=profile.viewport_width, custom_browser_connection_url=custom_browser_connection_url,
viewport_height=profile.viewport_height, screenshot_format=self.screenshot_format
block_images=profile.block_images, )
block_fonts=profile.block_fonts,
profile_user_agent=profile.user_agent,
ignore_https_errors=profile.ignore_https_errors,
locale=profile.locale,
service_workers=profile.service_workers,
extra_delay=profile.extra_delay,
)
if self.watch.has_browser_steps: if self.watch.has_browser_steps:
self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', [])) self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', []))
@@ -185,17 +200,9 @@ class difference_detection_processor():
from changedetectionio.jinja2_custom import render as jinja_render from changedetectionio.jinja2_custom import render as jinja_render
request_headers = CaseInsensitiveDict() request_headers = CaseInsensitiveDict()
# Browser profile: UA override (lowest priority — watch headers override this) ua = self.datastore.data['settings']['requests'].get('default_ua')
if profile.user_agent: if ua and ua.get(prefer_fetch_backend):
request_headers['User-Agent'] = profile.user_agent request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
# Browser profile: custom headers (override profile UA, but watch headers override these)
if profile.custom_headers:
for line in profile.custom_headers.splitlines():
line = line.strip()
if not line.startswith('#') and ':' in line:
k, v = line.split(':', 1)
request_headers[k.strip()] = v.strip()
request_headers.update(self.watch.get('headers', {})) request_headers.update(self.watch.get('headers', {}))
request_headers.update(self.datastore.get_all_base_headers()) request_headers.update(self.datastore.get_all_base_headers())
@@ -252,7 +259,6 @@ class difference_detection_processor():
# @todo .quit here could go on close object, so we can run JS if change-detected # @todo .quit here could go on close object, so we can run JS if change-detected
await self.fetcher.quit(watch=self.watch) await self.fetcher.quit(watch=self.watch)
self.fetcher.disk_cleanup_after_fetch()
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding # Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
# content that gets decoded into surrogate characters (e.g. \udcad). Without this, # content that gets decoded into surrogate characters (e.g. \udcad). Without this,
+5 -2
View File
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
# Get error information for the template # Get error information for the template
screenshot_url = watch.get_screenshot() screenshot_url = watch.get_screenshot()
fetcher_supports_screenshots = watch.fetcher_supports_screenshots system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False): if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
@@ -59,7 +62,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
last_error_screenshot=watch.get_error_snapshot(), last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(), last_error_text=watch.get_error_text(),
screenshot=screenshot_url, screenshot=screenshot_url,
fetcher_supports_screenshots=fetcher_supports_screenshots, is_html_webdriver=is_html_webdriver,
password_enabled_and_share_is_off=password_enabled_and_share_is_off, password_enabled_and_share_is_off=password_enabled_and_share_is_off,
extra_title=f" - {watch.label} - Extract Data", extra_title=f" - {watch.label} - Extract Data",
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')], extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
+1 -7
View File
@@ -100,13 +100,7 @@ class guess_stream_type():
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES): if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
self.is_rss = True self.is_rss = True
elif any(s in http_content_header for s in JSON_CONTENT_TYPES): elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...})) self.is_json = True
# A JSONP response starts with an identifier followed by '(' - not valid JSON
if re.match(r'^\w[\w.]*\s*\(', test_content):
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
self.is_plaintext = True
else:
self.is_json = True
elif 'pdf' in magic_content_header: elif 'pdf' in magic_content_header:
self.is_pdf = True self.is_pdf = True
# magic will call a rss document 'xml' # magic will call a rss document 'xml'
@@ -1,7 +1,6 @@
from babel.numbers import parse_decimal from babel.numbers import parse_decimal
from changedetectionio.model.Watch import model as BaseWatch from changedetectionio.model.Watch import model as BaseWatch
from decimal import Decimal, InvalidOperation
from typing import Union from typing import Union
import re import re
@@ -11,8 +10,6 @@ supports_browser_steps = True
supports_text_filters_and_triggers = True supports_text_filters_and_triggers = True
supports_text_filters_and_triggers_elements = True supports_text_filters_and_triggers_elements = True
supports_request_type = True supports_request_type = True
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
class Restock(dict): class Restock(dict):
@@ -66,17 +63,6 @@ class Restock(dict):
super().__setitem__(key, value) super().__setitem__(key, value)
def get_price_from_history_str(history_str):
m = _price_re.search(history_str)
if not m:
return None
try:
return str(Decimal(m.group(1)))
except InvalidOperation:
return None
class Watch(BaseWatch): class Watch(BaseWatch):
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
super().__init__(*arg, **kw) super().__init__(*arg, **kw)
@@ -90,27 +76,13 @@ class Watch(BaseWatch):
def extra_notification_token_values(self): def extra_notification_token_values(self):
values = super().extra_notification_token_values() values = super().extra_notification_token_values()
values['restock'] = self.get('restock', {}) values['restock'] = self.get('restock', {})
values['restock']['previous_price'] = None
if self.history_n >= 2:
history = self.history
if history and len(history) >=2:
"""Unfortunately for now timestamp is stored as string key"""
sorted_keys = sorted(list(history), key=lambda x: int(x))
sorted_keys.reverse()
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
if price_str:
values['restock']['previous_price'] = get_price_from_history_str(price_str)
return values return values
def extra_notification_token_placeholder_info(self): def extra_notification_token_placeholder_info(self):
values = super().extra_notification_token_placeholder_info() values = super().extra_notification_token_placeholder_info()
values.append(('restock.price', "Price detected")) values.append(('restock.price', "Price detected"))
values.append(('restock.in_stock', "In stock status"))
values.append(('restock.original_price', "Original price at first check")) values.append(('restock.original_price', "Original price at first check"))
values.append(('restock.previous_price', "Previous price in history"))
return values return values
@@ -489,9 +489,19 @@ class perform_site_check(difference_detection_processor):
# @TODO !!! some setting like "Use as fallback" or "always use", "t # @TODO !!! some setting like "Use as fallback" or "always use", "t
if not (has_price and has_availability) or True: if not (has_price and has_availability) or True:
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
# Use the actual resolved fetcher name from the fetcher instance fetcher_name = watch.get('fetch_backend', 'html_requests')
fetcher_name = self.watch.effective_browser_profile.fetch_backend
logger.debug(f"Resolved effective fetcher: {fetcher_name}") # Resolve 'system' to the actual fetcher being used
# This allows plugins to work even when watch uses "system settings default"
if fetcher_name == 'system':
# Get the actual fetcher that was used (from self.fetcher)
# Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
actual_fetcher = type(self.fetcher).__name__
if 'html_requests' in actual_fetcher.lower():
fetcher_name = 'html_requests'
elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
fetcher_name = 'html_webdriver'
logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")
# Try plugin override - plugins can decide if they support this fetcher # Try plugin override - plugins can decide if they support this fetcher
if fetcher_name: if fetcher_name:
@@ -154,7 +154,11 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
screenshot_url = watch.get_screenshot() screenshot_url = watch.get_screenshot()
fetcher_supports_screenshots = watch.fetcher_supports_screenshots system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False): if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
@@ -210,7 +214,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
extra_title=f" - {watch.label} - History", extra_title=f" - {watch.label} - History",
extract_form=extract_form, extract_form=extract_form,
from_version=str(from_version), from_version=str(from_version),
fetcher_supports_screenshots=fetcher_supports_screenshots, is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'], last_error=watch['last_error'],
last_error_screenshot=watch.get_error_snapshot(), last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(), last_error_text=watch.get_error_text(),
@@ -85,10 +85,6 @@ class FilterConfig:
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors] self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
return self._subtractive_selectors_cache return self._subtractive_selectors_cache
@property
def extract_lines_containing(self):
return self._get_merged_rules('extract_lines_containing')
@property @property
def extract_text(self): def extract_text(self):
return self._get_merged_rules('extract_text') return self._get_merged_rules('extract_text')
@@ -105,30 +101,6 @@ class FilterConfig:
def text_should_not_be_present(self): def text_should_not_be_present(self):
return self._get_merged_rules('text_should_not_be_present') return self._get_merged_rules('text_should_not_be_present')
def get_filter_config_hash(self):
"""
Stable hash of the effective filter configuration.
Used by the skip-logic in run_changedetection() so that any change to
global settings, tag overrides, or watch filters automatically invalidates
the raw-content-unchanged shortcut without needing scattered
clear_all_last_checksums() calls at every settings mutation site.
"""
app = self.datastore.data['settings']['application']
config = {
'extract_lines_containing': sorted(self.extract_lines_containing),
'extract_text': sorted(self.extract_text),
'ignore_text': sorted(self.ignore_text),
'include_filters': sorted(self.include_filters),
'subtractive_selectors': sorted(self.subtractive_selectors),
'text_should_not_be_present': sorted(self.text_should_not_be_present),
'trigger_text': sorted(self.trigger_text),
# Global processing flags not captured by the filter lists above
'ignore_whitespace': app.get('ignore_whitespace', False),
'strip_ignored_lines': app.get('strip_ignored_lines', False),
}
return hashlib.md5(json.dumps(config, sort_keys=True).encode()).hexdigest()
@property @property
def has_include_filters(self): def has_include_filters(self):
return bool(self.include_filters) and bool(self.include_filters[0].strip()) return bool(self.include_filters) and bool(self.include_filters[0].strip())
@@ -163,17 +135,6 @@ class ContentTransformer:
text = text.replace("\n\n", "\n") text = text.replace("\n\n", "\n")
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower())) return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
@staticmethod
def extract_lines_containing(text, substrings):
"""Keep only lines that contain at least one of the given substrings (case-insensitive)."""
needles = [s.lower() for s in substrings if s.strip()]
if not needles:
return text
return '\n'.join(
line for line in text.splitlines()
if any(needle in line.lower() for needle in needles)
)
@staticmethod @staticmethod
def extract_by_regex(text, regex_patterns): def extract_by_regex(text, regex_patterns):
"""Extract text matching regex patterns.""" """Extract text matching regex patterns."""
@@ -416,26 +377,19 @@ class perform_site_check(difference_detection_processor):
raise Exception("Watch no longer exists.") raise Exception("Watch no longer exists.")
current_raw_document_checksum = self.get_raw_document_checksum() current_raw_document_checksum = self.get_raw_document_checksum()
# Skip processing only if BOTH conditions are true:
# Build filter config up front so we can hash it for the skip check. # 1. HTML content unchanged (checksum matches last saved checksum)
filter_config = FilterConfig(watch, self.datastore) # 2. Watch configuration was not edited (including trigger_text, filters, etc.)
current_filter_config_hash = filter_config.get_filter_config_hash() # The was_edited flag handles all watch configuration changes, so we don't need
# separate checks for trigger_text or other processing rules.
# Skip only when ALL of these hold:
# 1. raw HTML is unchanged
# 2. watch config was not edited (was_edited covers per-watch field changes)
# 3. effective filter config is unchanged (covers global/tag setting changes that
# bypass was_edited — e.g. global_ignore_text, global_subtractive_selectors)
# last_filter_config_hash being False means first run or upgrade: don't skip.
if (not force_reprocess and if (not force_reprocess and
not watch.was_edited and not watch.was_edited and
self.last_raw_content_checksum and self.last_raw_content_checksum and
self.last_raw_content_checksum == current_raw_document_checksum and self.last_raw_content_checksum == current_raw_document_checksum):
watch.get('last_filter_config_hash') and
watch.get('last_filter_config_hash') == current_filter_config_hash):
raise checksumFromPreviousCheckWasTheSame() raise checksumFromPreviousCheckWasTheSame()
# Initialize remaining components # Initialize components
filter_config = FilterConfig(watch, self.datastore)
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore) content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
transformer = ContentTransformer() transformer = ContentTransformer()
rule_engine = RuleEngine() rule_engine = RuleEngine()
@@ -456,7 +410,6 @@ class perform_site_check(difference_detection_processor):
# Save the raw content checksum to file (processor implementation detail, not watch config) # Save the raw content checksum to file (processor implementation detail, not watch config)
self.update_last_raw_content_checksum(current_raw_document_checksum) self.update_last_raw_content_checksum(current_raw_document_checksum)
update_obj['last_filter_config_hash'] = current_filter_config_hash
# === CONTENT PREPROCESSING === # === CONTENT PREPROCESSING ===
# Avoid creating unnecessary intermediate string copies by reassigning only when needed # Avoid creating unnecessary intermediate string copies by reassigning only when needed
@@ -550,10 +503,6 @@ class perform_site_check(difference_detection_processor):
update_obj["last_check_status"] = self.fetcher.get_last_status_code() update_obj["last_check_status"] = self.fetcher.get_last_status_code()
# === LINE FILTER (plain-text substring) ===
if filter_config.extract_lines_containing:
stripped_text = transformer.extract_lines_containing(stripped_text, filter_config.extract_lines_containing)
# === REGEX EXTRACTION === # === REGEX EXTRACTION ===
if filter_config.extract_text: if filter_config.extract_text:
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text) extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
@@ -587,8 +536,8 @@ class perform_site_check(difference_detection_processor):
# === BLOCKING RULES EVALUATION === # === BLOCKING RULES EVALUATION ===
blocked = False blocked = False
# Check trigger_text - use text_for_checksuming so ignore_text can suppress trigger_text # Check trigger_text
if rule_engine.evaluate_trigger_text(text_for_checksuming, filter_config.trigger_text): if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
blocked = True blocked = True
# Check text_should_not_be_present # Check text_should_not_be_present
-2
View File
@@ -29,11 +29,9 @@ def register_watch_operation_handlers(socketio, datastore):
# Perform the operation # Perform the operation
if op == 'pause': if op == 'pause':
watch.toggle_pause() watch.toggle_pause()
watch.commit()
logger.info(f"Socket.IO: Toggled pause for watch {uuid}") logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
elif op == 'mute': elif op == 'mute':
watch.toggle_mute() watch.toggle_mute()
watch.commit()
logger.info(f"Socket.IO: Toggled mute for watch {uuid}") logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
elif op == 'recheck': elif op == 'recheck':
# Import here to avoid circular imports # Import here to avoid circular imports
@@ -199,31 +199,8 @@ def handle_watch_update(socketio, **kwargs):
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}") logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
The exception is caught inside run_wsgi and routed to self.server.log() it never
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
disconnect correctly via its own disconnect handler and timeout logic."""
try:
from werkzeug.serving import BaseWSGIServer
_original_log = BaseWSGIServer.log
def _filtered_log(self, type, message, *args):
if type == 'error' and 'write() before start_response' in message:
return
_original_log(self, type, message, *args)
BaseWSGIServer.log = _filtered_log
except Exception:
pass
def init_socketio(app, datastore): def init_socketio(app, datastore):
"""Initialize SocketIO with the main Flask app""" """Initialize SocketIO with the main Flask app"""
_suppress_werkzeug_ws_abrupt_disconnect_noise()
import platform import platform
import sys import sys
File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 10 KiB

@@ -1,20 +1,5 @@
function checkDiscordHtmlWarning() {
var urls = $('textarea.notification-urls').val() || '';
var format = $('select.notification-format').val() || '';
var isDiscord = /discord:\/\/|https:\/\/discord(?:app)?\.com\/api/i.test(urls);
var isHtml = format === 'html' || format === 'htmlcolor';
if (isDiscord && isHtml) {
$('#discord-html-format-warning').show();
} else {
$('#discord-html-format-warning').hide();
}
}
$(document).ready(function () { $(document).ready(function () {
$('textarea.notification-urls, select.notification-format').on('change input', checkDiscordHtmlWarning);
checkDiscordHtmlWarning();
$('#add-email-helper').click(function (e) { $('#add-email-helper').click(function (e) {
e.preventDefault(); e.preventDefault();
email = prompt("Destination email"); email = prompt("Destination email");
-8
View File
@@ -116,14 +116,6 @@ $(document).ready(function () {
$('#realtime-conn-error').show(); $('#realtime-conn-error').show();
}); });
// Tell the server we're leaving cleanly so it can release the connection
// immediately rather than waiting for a timeout.
// Note: this only fires for voluntary closes (tab/window close, navigation away).
// Hard kills, crashes and network drops will still timeout normally on the server.
window.addEventListener('beforeunload', function () {
socket.disconnect();
});
socket.on('queue_size', function (data) { socket.on('queue_size', function (data) {
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`); console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
if(queueSizePagerInfoText) { if(queueSizePagerInfoText) {
+1 -1
View File
@@ -4,7 +4,7 @@ $(document).ready(function(){
}); });
var checkUserVal = function(){ var checkUserVal = function(){
if($('#fetch_backend input:checked').val()=='requests') { if($('#fetch_backend input:checked').val()=='html_requests') {
$('#request-override').show(); $('#request-override').show();
$('#webdriver-stepper').hide(); $('#webdriver-stepper').hide();
} else { } else {
+6 -25
View File
@@ -3,40 +3,21 @@ $(document).ready(function () {
// Lazy Hide/Show elements mechanism // Lazy Hide/Show elements mechanism
$('[data-visible-for]').hide(); $('[data-visible-for]').hide();
function show_related_elem(e) { function show_related_elem(e) {
var name = $(e).attr('name'); var n = $(e).attr('name') + "=" + $(e).val();
var val = $(e).val(); if (n === 'fetch_backend=system') {
var n = name + "=" + val;
// Resolve browser_profile select → underlying fetch_backend class name
// browserProfileFetcherMap is injected by the page as {machine_name: 'playwright', ...}
if (name && name.endsWith('browser_profile') && typeof browserProfileFetcherMap !== 'undefined') {
var fetcherClass = val === 'system'
? (typeof default_system_fetch_backend !== 'undefined' ? default_system_fetch_backend : null)
: browserProfileFetcherMap[val];
if (fetcherClass) {
n = 'fetch_backend=' + fetcherClass;
}
} else if (n === 'fetch_backend=system') {
n = "fetch_backend=" + default_system_fetch_backend; n = "fetch_backend=" + default_system_fetch_backend;
} }
$(`[data-visible-for~="${n}"]`).show(); $(`[data-visible-for~="${n}"]`).show();
} }
$(':radio').on('keyup keypress blur change click', function (e) {
$('select, :radio').on('change', function (e) {
$(`[data-visible-for]`).hide();
$('.advanced-options').hide();
show_related_elem(this);
});
// Retain original click/keyup handling for radio buttons
$(':radio').on('keyup keypress blur click', function (e) {
$(`[data-visible-for]`).hide(); $(`[data-visible-for]`).hide();
$('.advanced-options').hide(); $('.advanced-options').hide();
show_related_elem(this); show_related_elem(this);
}); });
$(':radio:checked, select').each(function (e) { $(':radio:checked').each(function (e) {
show_related_elem(this); show_related_elem(this);
}); })
// Show advanced // Show advanced
@@ -45,4 +26,4 @@ $(document).ready(function () {
$(this).toggle(); $(this).toggle();
}) })
}); });
}); });
+14 -162
View File
@@ -143,7 +143,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
self.__data['settings']['application']['tags'][uuid] = Tag.model( self.__data['settings']['application']['tags'][uuid] = Tag.model(
datastore_path=self.datastore_path, datastore_path=self.datastore_path,
__datastore=self, __datastore=self.__data,
default=tag default=tag
) )
logger.info(f"Tag: {uuid} {tag['title']}") logger.info(f"Tag: {uuid} {tag['title']}")
@@ -207,7 +207,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json") self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
# Base definition for all watchers (deepcopy part of #569) # Base definition for all watchers (deepcopy part of #569)
self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, __datastore=self, default={})) self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, __datastore=self.__data, default={}))
# Load build SHA if available (Docker deployments) # Load build SHA if available (Docker deployments)
if path.isfile('changedetectionio/source.txt'): if path.isfile('changedetectionio/source.txt'):
@@ -245,10 +245,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Maybe they copied a bunch of watch subdirs across too # Maybe they copied a bunch of watch subdirs across too
self._load_state() self._load_state()
# Apply env-var browser config after state is fully loaded so we can safely
# read existing settings without risk of being overwritten.
self.preconfigure_browser_profiles_based_on_env()
def init_fresh_install(self, include_default_watches, version_tag): def init_fresh_install(self, include_default_watches, version_tag):
# Generate app_guid FIRST (required for all operations) # Generate app_guid FIRST (required for all operations)
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ: if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
@@ -272,11 +268,13 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if include_default_watches: if include_default_watches:
self.add_watch( self.add_watch(
url='https://news.ycombinator.com/', url='https://news.ycombinator.com/',
tag='Tech news' tag='Tech news',
extras={'fetch_backend': 'html_requests'}
) )
self.add_watch( self.add_watch(
url='https://changedetection.io/CHANGELOG.txt', url='https://changedetection.io/CHANGELOG.txt',
tag='changedetection.io' tag='changedetection.io',
extras={'fetch_backend': 'html_requests'}
) )
# Create changedetection.json immediately # Create changedetection.json immediately
@@ -333,64 +331,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if entity.get('processor') != 'text_json_diff': if entity.get('processor') != 'text_json_diff':
logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}") logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
entity = watch_class(datastore_path=self.datastore_path, __datastore=self, default=entity) entity = watch_class(datastore_path=self.datastore_path, __datastore=self.__data, default=entity)
return entity return entity
def preconfigure_browser_profiles_based_on_env(self):
"""Instantiate browser profiles from environment variables and store them.
Always runs at the end of reload_state() covers fresh installs,
existing datastores, and server restarts. Env vars always win so that
changing PLAYWRIGHT_DRIVER_URL and restarting is reflected immediately.
Creates BrowserProfile instances from env vars and stores them in
``settings.application.browser_profiles`` under their machine names,
then sets ``settings.application.browser_profile`` to that profile as
the system-wide default.
"""
from changedetectionio.model import browser_profile as bp
from changedetectionio.strtobool import strtobool
store_profiles = self.__data['settings']['application'].setdefault('browser_profiles', {})
service_workers = os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow')
extra_delay = int(os.getenv('WEBDRIVER_DELAY_BEFORE_CONTENT_READY', 0))
configured_profile = None
playwright_url = os.getenv('PLAYWRIGHT_DRIVER_URL')
if playwright_url:
playwright_url = playwright_url.strip('"')
builtin = bp.BUILTIN_PUPPETEER if strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) else bp.BUILTIN_PLAYWRIGHT
profile = bp.BrowserProfile(
name=builtin.name,
fetch_backend=builtin.fetch_backend,
browser_connection_url=playwright_url,
service_workers=service_workers,
extra_delay=extra_delay,
is_builtin=True,
)
logger.debug(f"Configuring browser profile '{profile.get_machine_name()}' from env")
store_profiles[profile.get_machine_name()] = profile.model_dump()
configured_profile = profile
webdriver_url = os.getenv('WEBDRIVER_URL')
if webdriver_url:
profile = bp.BrowserProfile(
name=bp.BUILTIN_SELENIUM.name,
fetch_backend=bp.BUILTIN_SELENIUM.fetch_backend,
browser_connection_url=webdriver_url.strip('"'),
extra_delay=extra_delay,
is_builtin=True,
)
logger.debug(f"Configuring browser profile '{profile.get_machine_name()}' from env")
store_profiles[profile.get_machine_name()] = profile.model_dump()
if not configured_profile:
configured_profile = profile
if configured_profile:
logger.debug(f"Setting system default browser profile to '{configured_profile.get_machine_name()}'")
self.__data['settings']['application']['browser_profile'] = configured_profile.get_machine_name()
# ============================================================================ # ============================================================================
# FileSavingDataStore Abstract Method Implementations # FileSavingDataStore Abstract Method Implementations
# ============================================================================ # ============================================================================
@@ -422,14 +365,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Is saved as {uuid}/tag.json # Is saved as {uuid}/tag.json
settings_copy['application']['tags'] = {} settings_copy['application']['tags'] = {}
# Serialize BrowserProfile Pydantic instances to plain dicts for JSON storage
raw_profiles = settings_copy['application'].get('browser_profiles', {})
from changedetectionio.model.browser_profile import BrowserProfile
settings_copy['application']['browser_profiles'] = {
k: v.model_dump() if isinstance(v, BrowserProfile) else v
for k, v in raw_profiles.items()
}
return { return {
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json', 'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
'app_guid': self.__data.get('app_guid'), 'app_guid': self.__data.get('app_guid'),
@@ -486,7 +421,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
return Tag.model( return Tag.model(
datastore_path=self.datastore_path, datastore_path=self.datastore_path,
__datastore=self, __datastore=self.__data,
default=entity_dict default=entity_dict
) )
@@ -832,7 +767,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# If the processor also has its own Watch implementation # If the processor also has its own Watch implementation
watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor')) watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
new_watch = watch_class(datastore_path=self.datastore_path, __datastore=self, url=url) new_watch = watch_class(datastore_path=self.datastore_path, __datastore=self.__data, url=url)
new_uuid = new_watch.get('uuid') new_uuid = new_watch.get('uuid')
@@ -917,16 +852,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
return proxy_list if len(proxy_list) else None return proxy_list if len(proxy_list) else None
def get_proxy_url_for_watch(self, uuid, override_id=None):
"""
Returns the resolved proxy URL string for a watch, or None.
override_id forces a specific proxy (e.g. proxy checker bypass).
"""
proxy_id = override_id or self.get_preferred_proxy_for_watch(uuid)
if proxy_id:
return self.proxy_list.get(proxy_id, {}).get('url')
return None
def get_preferred_proxy_for_watch(self, uuid): def get_preferred_proxy_for_watch(self, uuid):
""" """
Returns the preferred proxy by ID key Returns the preferred proxy by ID key
@@ -960,71 +885,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
return None return None
# ------------------------------------------------------------------
# BrowserProfile helpers
# ------------------------------------------------------------------
def get_browser_profile(self, machine_name: str):
"""Return a BrowserProfile by machine name, or None if not found.
Built-in profiles (direct_http_requests, browser_chromeplaywright) are
always available and checked first.
"""
from changedetectionio.model.browser_profile import get_profile
store_profiles = self.data['settings']['application'].get('browser_profiles', {})
return get_profile(machine_name, store_profiles)
def delete_browser_profile(self, machine_name: str):
"""Delete a user-defined BrowserProfile by machine name.
Rules enforced:
- Built-in profiles cannot be deleted.
- The profile cannot be the current system default
(settings.application.browser_profile); caller must change the
default first.
- Any watch or tag that referenced this profile is reset to None
(falls back through the chain on next fetch).
Returns the number of watches/tags that were reset.
"""
from changedetectionio.model.browser_profile import RESERVED_MACHINE_NAMES
if machine_name in RESERVED_MACHINE_NAMES:
raise ValueError(f"Built-in profile '{machine_name}' cannot be deleted")
system_default = self.data['settings']['application'].get('browser_profile')
if system_default == machine_name:
raise ValueError(
f"Profile '{machine_name}' is the system default. "
f"Change the system default before deleting it."
)
store_profiles = self.data['settings']['application'].get('browser_profiles', {})
if machine_name not in store_profiles:
return 0
del store_profiles[machine_name]
reset_count = 0
# Reset watches that reference this profile
for uuid, watch in self.data['watching'].items():
if watch.get('browser_profile') == machine_name:
watch['browser_profile'] = None
watch.commit()
reset_count += 1
# Reset tags that reference this profile
for tag_uuid, tag in self.data['settings']['application'].get('tags', {}).items():
if tag.get('browser_profile') == machine_name:
tag['browser_profile'] = None
tag.commit()
reset_count += 1
self._save_settings()
logger.info(f"Deleted BrowserProfile '{machine_name}', reset {reset_count} watches/tags")
return reset_count
@property @property
def has_extra_headers_file(self): def has_extra_headers_file(self):
filepath = os.path.join(self.datastore_path, 'headers.txt') filepath = os.path.join(self.datastore_path, 'headers.txt')
@@ -1102,7 +962,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
from ..model import Tag from ..model import Tag
new_tag = Tag.model( new_tag = Tag.model(
datastore_path=self.datastore_path, datastore_path=self.datastore_path,
__datastore=self, __datastore=self.__data,
default={ default={
'title': title.strip(), 'title': title.strip(),
'date_created': int(time.time()) 'date_created': int(time.time())
@@ -1120,20 +980,12 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
def get_all_tags_for_watch(self, uuid): def get_all_tags_for_watch(self, uuid):
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet""" """This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
watch = self.data['watching'].get(uuid) watch = self.data['watching'].get(uuid)
if not watch:
return {}
# Start with manually assigned tags # Should return a dict of full tag info linked by UUID
result = dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', [])) if watch:
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
# Additionally include any tag whose url_match_pattern matches this watch's URL return {}
watch_url = watch.get('url', '')
if watch_url:
for tag_uuid, tag in self.__data['settings']['application']['tags'].items():
if tag_uuid not in result and tag.matches_url(watch_url):
result[tag_uuid] = tag
return result
@property @property
def extra_browsers(self): def extra_browsers(self):
-139
View File
@@ -15,7 +15,6 @@ import tarfile
import time import time
from loguru import logger from loguru import logger
from copy import deepcopy from copy import deepcopy
from typing import Optional
# Try to import orjson for faster JSON serialization # Try to import orjson for faster JSON serialization
@@ -731,144 +730,6 @@ class DatastoreUpdatesMixin:
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk) # (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
self._save_settings() self._save_settings()
def update_31(self):
"""
Migrate legacy ``fetch_backend`` strings to the new ``browser_profile``
machine-name system.
What this migration does
------------------------
1. ``settings.requests.extra_browsers`` entries are converted into
``BrowserProfile`` objects and stored in
``settings.application.browser_profiles`` keyed by machine name.
2. ``settings.application.fetch_backend`` (the system-wide default) is
translated to a machine name and written to
``settings.application.browser_profile``.
3. Every watch that has an explicit ``fetch_backend`` (not ``'system'``)
gets a corresponding ``browser_profile`` machine name set, then
``fetch_backend`` is reset to ``'system'``.
4. The same translation is applied to tags with ``overrides_watch=True``
that carry an explicit ``fetch_backend``.
Legacy mapping
~~~~~~~~~~~~~~
* ``'html_requests'`` built-in ``'direct_http_requests'``
* ``'html_webdriver'`` built-in ``'browser_chromeplaywright'``
* ``'extra_browser_<name>'`` machine name of the migrated custom profile
* ``'system'`` / missing ``None`` (continue to use chain resolution)
Safe to re-run: skips watches / tags that already have ``browser_profile``
set, and skips extra_browser entries that have already been migrated.
"""
from ..model.browser_profile import (
BrowserProfile,
BUILTIN_REQUESTS,
BUILTIN_BROWSER,
)
app_settings = self.data['settings']['application']
# ------------------------------------------------------------------
# 1. Migrate extra_browsers → browser_profiles
# ------------------------------------------------------------------
extra_browsers = self.data['settings']['requests'].get('extra_browsers', [])
browser_profiles: dict = app_settings.setdefault('browser_profiles', {})
extra_browser_name_to_machine: dict[str, str] = {}
for entry in extra_browsers:
browser_name = entry.get('browser_name', '').strip()
connection_url = entry.get('browser_connection_url', '').strip()
if not browser_name:
continue
profile = BrowserProfile(
name=browser_name,
fetch_backend='playwright_cdp',
browser_connection_url=connection_url or None,
)
machine_name = profile.get_machine_name()
if machine_name not in browser_profiles:
browser_profiles[machine_name] = profile.model_dump()
logger.info(f"update_31: migrated extra_browser '{browser_name}' → profile '{machine_name}'")
extra_browser_name_to_machine[browser_name] = machine_name
# ------------------------------------------------------------------
# Helper: translate a fetch_backend string to a machine name
# ------------------------------------------------------------------
builtin_requests_name = BUILTIN_REQUESTS.get_machine_name()
builtin_browser_name = BUILTIN_BROWSER.get_machine_name()
def _to_machine_name(fetch_backend: str) -> Optional[str]:
if not fetch_backend or fetch_backend in ('system', 'default', ''):
return None
if fetch_backend.startswith('extra_browser_'):
key = fetch_backend[len('extra_browser_'):]
return extra_browser_name_to_machine.get(key)
# Strip legacy html_ prefix then query the fetcher registry
from changedetectionio import content_fetchers as cf
clean = fetch_backend[5:] if fetch_backend.startswith('html_') else fetch_backend
fetcher_cls = cf.get_fetcher(clean)
if fetcher_cls is None:
logger.warning(f"update_31: unknown fetch_backend value {fetch_backend!r}, skipping")
return None
if fetcher_cls.supports_screenshots:
return builtin_browser_name
return builtin_requests_name
# ------------------------------------------------------------------
# 2. Migrate system-wide default
# ------------------------------------------------------------------
system_fetch_backend = app_settings.get('fetch_backend', 'requests')
if not app_settings.get('browser_profile'):
machine = _to_machine_name(system_fetch_backend)
app_settings['browser_profile'] = machine
logger.info(
f"update_31: system fetch_backend '{system_fetch_backend}' → browser_profile '{machine}'"
)
# ------------------------------------------------------------------
# 3. Migrate watches
# ------------------------------------------------------------------
for uuid, watch in self.data['watching'].items():
if watch.get('browser_profile'):
continue # already migrated
fetch_backend = watch.get('fetch_backend', 'system')
machine = _to_machine_name(fetch_backend)
watch['browser_profile'] = machine
watch['fetch_backend'] = 'system' # clear legacy value
watch.commit()
if machine:
logger.info(
f"update_31: watch {uuid} fetch_backend '{fetch_backend}' → browser_profile '{machine}'"
)
# ------------------------------------------------------------------
# 4. Migrate tags
# ------------------------------------------------------------------
for tag_uuid, tag in app_settings.get('tags', {}).items():
if tag.get('browser_profile'):
continue # already migrated
fetch_backend = tag.get('fetch_backend', 'system')
machine = _to_machine_name(fetch_backend)
if machine:
tag['browser_profile'] = machine
tag['fetch_backend'] = 'system'
tag.commit()
logger.info(
f"update_31: tag {tag_uuid} fetch_backend '{fetch_backend}' → browser_profile '{machine}'"
)
self._save_settings()
logger.success("update_31: fetch_backend → browser_profile migration complete")
def update_30(self): def update_30(self):
"""Migrate restock_settings out of watch.json into restock_diff.json processor config file. """Migrate restock_settings out of watch.json into restock_diff.json processor config file.
@@ -98,14 +98,6 @@
<td><code>{{ '{{diff_patch}}' }}</code></td> <td><code>{{ '{{diff_patch}}' }}</code></td>
<td>{{ _('The diff output - patch in unified format') }}</td> <td>{{ _('The diff output - patch in unified format') }}</td>
</tr> </tr>
<tr>
<td><code>{{ '{{diff_changed_from}}' }}</code></td>
<td>{{ _('Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_changed_to}}' }}</code></td>
<td>{{ _('Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
</tr>
<tr> <tr>
<td><code>{{ '{{current_snapshot}}' }}</code></td> <td><code>{{ '{{current_snapshot}}' }}</code></td>
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }} <td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
@@ -195,10 +187,6 @@
<div class=""> <div class="">
{{ render_field(form.notification_format , class="notification-format") }} {{ render_field(form.notification_format , class="notification-format") }}
<span class="pure-form-message-inline">{{ _('Format for all notifications') }}</span> <span class="pure-form-message-inline">{{ _('Format for all notifications') }}</span>
<div id="discord-html-format-warning" class="inline-warning" style="display: none; margin-top: 6px;">
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Note') }}" title="{{ _('Note') }}">
{{ _('Discord does not render HTML — switch to') }} <strong>{{ _('Plain Text') }}</strong> {{ _('format to avoid') }} <code>&amp;nbsp;</code> {{ _('and other HTML entities appearing literally in your notifications.') }}
</div>
</div> </div>
</div> </div>
{% endmacro %} {% endmacro %}
@@ -1,208 +0,0 @@
{#
Notification Profile Selector widget.
Usage:
{% from '_notification_profiles_selector.html' import render_notification_profile_selector %}
{{ render_notification_profile_selector(
own_profiles=watch.get('notification_profiles', []),
inherited_profiles=inherited_notification_profiles,
all_profile_data=settings_application.get('notification_profile_data', {}),
registry=registry
) }}
own_profiles — list of UUIDs directly linked to this watch/group
inherited_profiles — list of (uuid, origin_label) tuples from parent groups/system
all_profile_data — dict of uuid→profile from settings.application.notification_profile_data
registry — notification_profiles.registry instance
#}
{% macro render_notification_profile_selector(own_profiles, inherited_profiles, all_profile_data, registry) %}
<div class="notification-profile-selector" id="notification-profile-selector">
{# Hidden inputs — one per selected UUID, submitted with the form #}
<div id="np-hidden-inputs">
{% for uid in own_profiles %}
<input type="hidden" name="notification_profiles" value="{{ uid }}">
{% endfor %}
</div>
<div class="np-chips" id="np-chips">
{# Own profiles — solid chips, removable #}
{% for uid in own_profiles %}
{% set profile = all_profile_data.get(uid) %}
{% if profile %}
{% set handler = registry.get(profile.get('type', 'apprise')) %}
<span class="np-chip np-chip-own" data-uuid="{{ uid }}"
title="{{ handler.get_url_hint(profile.get('config', {})) }}">
<i data-feather="{{ handler.icon }}" class="np-chip-icon"></i>
<span class="np-chip-name">{{ profile.get('name', uid) }}</span>
<span class="np-chip-remove" data-uuid="{{ uid }}" title="{{ _('Remove') }}">×</span>
</span>
{% endif %}
{% endfor %}
{# Inherited profiles — dimmed, read-only, show origin #}
{% for uid, origin_label in (inherited_profiles or []) %}
{% if uid not in own_profiles %}
{% set profile = all_profile_data.get(uid) %}
{% if profile %}
{% set handler = registry.get(profile.get('type', 'apprise')) %}
<span class="np-chip np-chip-inherited"
title="{{ _('Inherited from') }}: {{ origin_label }} — {{ handler.get_url_hint(profile.get('config', {})) }}">
<i data-feather="{{ handler.icon }}" class="np-chip-icon"></i>
<span class="np-chip-name">{{ profile.get('name', uid) }}</span>
<i data-feather="lock" class="np-chip-lock"></i>
</span>
{% endif %}
{% endif %}
{% endfor %}
{# Add button + dropdown #}
<div class="np-add-wrapper" id="np-add-wrapper">
<button type="button" class="np-add-btn pure-button button-xsmall" id="np-add-btn">
<i data-feather="plus"></i> {{ _('Add profile') }}
</button>
<div class="np-dropdown" id="np-dropdown" style="display:none;">
<input type="text" class="np-search" id="np-search" placeholder="{{ _('Search profiles…') }}" autocomplete="off">
<div class="np-options" id="np-options">
{% set has_options = [] %}
{% for uid, profile in all_profile_data.items() %}
{% if uid not in own_profiles %}
{% set handler = registry.get(profile.get('type', 'apprise')) %}
{% set hint = handler.get_url_hint(profile.get('config', {})) %}
<div class="np-option" data-uuid="{{ uid }}"
data-name="{{ profile.get('name', '') }}"
data-icon="{{ handler.icon }}"
data-hint="{{ hint }}">
<i data-feather="{{ handler.icon }}" class="np-option-icon"></i>
<span class="np-option-text">
<strong class="np-option-name">{{ profile.get('name', uid) }}</strong>
{% if hint %}<small class="np-option-hint">{{ hint }}</small>{% endif %}
</span>
</div>
{% if has_options.append(1) %}{% endif %}
{% endif %}
{% endfor %}
{% if not has_options %}
<div class="np-option np-no-results" style="pointer-events:none; color: var(--color-grey-600);">
{{ _('No other profiles available') }}
</div>
{% endif %}
<div class="np-no-match" style="display:none; padding: 8px 12px; color: var(--color-grey-600); font-size: 0.85em;">
{{ _('No profiles match') }}
</div>
</div>
<a href="{{ url_for('notification_profiles.edit') }}" class="np-create-new">
<i data-feather="plus-circle"></i> {{ _('Create new profile') }}
</a>
</div>
</div>
</div>{# .np-chips #}
{% if not own_profiles and not inherited_profiles %}
<p class="pure-form-message-inline" style="margin: 4px 0 0 0; color: var(--color-grey-600);">
{{ _('No notification profiles linked. Notifications will not be sent for this watch.') }}
</p>
{% endif %}
</div>{# .notification-profile-selector #}
<script>
(function() {
var selector = document.getElementById('notification-profile-selector');
if (!selector) return;
var addBtn = selector.querySelector('#np-add-btn');
var dropdown = selector.querySelector('#np-dropdown');
var search = selector.querySelector('#np-search');
var chips = selector.querySelector('#np-chips');
var hiddenWrap = selector.querySelector('#np-hidden-inputs');
var noMatch = selector.querySelector('.np-no-match');
// Toggle dropdown
addBtn.addEventListener('click', function(e) {
e.stopPropagation();
var open = dropdown.style.display !== 'none';
dropdown.style.display = open ? 'none' : 'block';
if (!open) { search.value = ''; filterOptions(''); search.focus(); }
});
// Close on outside click
document.addEventListener('click', function(e) {
if (!selector.contains(e.target)) dropdown.style.display = 'none';
});
// Search filter
search.addEventListener('input', function() { filterOptions(this.value.toLowerCase()); });
function filterOptions(q) {
var opts = selector.querySelectorAll('.np-option:not(.np-no-results)');
var visible = 0;
opts.forEach(function(opt) {
var match = !q || opt.dataset.name.toLowerCase().indexOf(q) !== -1
|| (opt.dataset.hint || '').toLowerCase().indexOf(q) !== -1;
opt.style.display = match ? '' : 'none';
if (match) visible++;
});
noMatch.style.display = (visible === 0 && q) ? 'block' : 'none';
}
// Add profile
selector.querySelectorAll('.np-option:not(.np-no-results)').forEach(function(opt) {
opt.addEventListener('click', function() {
var uuid = this.dataset.uuid;
var name = this.dataset.name;
var icon = this.dataset.icon;
var hint = this.dataset.hint;
// Add hidden input
var inp = document.createElement('input');
inp.type = 'hidden'; inp.name = 'notification_profiles'; inp.value = uuid;
hiddenWrap.appendChild(inp);
// Add chip (before the add-wrapper)
var chip = document.createElement('span');
chip.className = 'np-chip np-chip-own';
chip.dataset.uuid = uuid;
chip.title = hint || '';
chip.innerHTML = '<i data-feather="' + icon + '" class="np-chip-icon"></i>'
+ '<span class="np-chip-name">' + escHtml(name) + '</span>'
+ '<span class="np-chip-remove" data-uuid="' + uuid + '" title="{{ _("Remove") }}">×</span>';
chips.insertBefore(chip, selector.querySelector('#np-add-wrapper'));
chip.querySelector('.np-chip-remove').addEventListener('click', removeChip);
// Hide this option in dropdown
this.style.display = 'none';
dropdown.style.display = 'none';
if (window.feather) feather.replace();
});
});
// Remove chip
selector.querySelectorAll('.np-chip-remove').forEach(function(btn) {
btn.addEventListener('click', removeChip);
});
function removeChip() {
var uuid = this.dataset.uuid;
var chip = selector.querySelector('.np-chip-own[data-uuid="' + uuid + '"]');
if (chip) chip.remove();
var inp = hiddenWrap.querySelector('input[value="' + uuid + '"]');
if (inp) inp.remove();
// Re-show in dropdown
var opt = selector.querySelector('.np-option[data-uuid="' + uuid + '"]');
if (opt) opt.style.display = '';
}
function escHtml(s) {
return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
}
if (window.feather) feather.replace();
})();
</script>
{% endmacro %}
-4
View File
@@ -45,10 +45,6 @@
<script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script> <script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script>
{% endif %} {% endif %}
{%- set _html_head_extras = get_html_head_extras() -%}
{%- if _html_head_extras %}
{{ _html_head_extras | safe }}
{%- endif %}
</head> </head>
<body class="{{extra_classes}}"> <body class="{{extra_classes}}">
@@ -10,7 +10,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br> <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %} {% endif %}
<span class="pure-form-message-inline">One CSS, xPath 1 &amp; 2, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br> <span class="pure-form-message-inline">One CSS, xPath 1 &amp; 2, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">{{ _('Show advanced help and tips') }}</span><br> <span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
<ul id="advanced-help-selectors" style="display: none;"> <ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
@@ -47,9 +47,9 @@ nav
//*[contains(text(), 'Advertisement')]") }} //*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li> {{ _('Remove HTML element(s) by CSS and XPath selectors before text conversion.') }} </li> <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> {{ _("Don't paste HTML here, use only CSS and XPath selectors") }} </li> <li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> {{ _('Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML.') }} </li> <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul> </ul>
</span> </span>
</fieldset> </fieldset>
@@ -49,21 +49,6 @@ Unavailable") }}
</span> </span>
</div> </div>
</fieldset> </fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_lines_containing, rows=5, placeholder="celsius
temperature
price") }}
<span class="pure-form-message-inline">
<ul>
<li>{{ _('Keep only lines that contain any of these words or phrases (plain text, case-insensitive)') }}</li>
<li>{{ _('One entry per line — any line in the page text that contains a match is kept') }}</li>
<li>{{ _('Simpler alternative to regex — use this when you just want lines about a specific topic') }}</li>
<li>{{ _('Example: enter') }} <code>celsius</code> {{ _('to keep only lines mentioning temperature readings') }}</li>
</ul>
</span>
</div>
</fieldset>
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/ {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
@@ -4,53 +4,29 @@ import os
from flask import url_for from flask import url_for
from ..util import live_server_setup, wait_for_all_checks from ..util import live_server_setup, wait_for_all_checks
CUSTOM_PROFILE_NAME = 'Custom Browser URL'
CUSTOM_PROFILE_MACHINE_NAME = 'custom_browser_url'
CUSTOM_BROWSER_WS = 'ws://sockpuppetbrowser-custom-url:3000'
def create_custom_browser_profile(client):
"""Create a browser profile that uses the custom sockpuppet container."""
res = client.post(
url_for("settings.settings_browsers.save"),
data={
"name": CUSTOM_PROFILE_NAME,
"fetch_backend": "playwright_cdp",
"browser_connection_url": CUSTOM_BROWSER_WS,
"viewport_width": 1280,
"viewport_height": 1000,
"block_images": "",
"block_fonts": "",
"ignore_https_errors": "",
"user_agent": "",
"locale": "",
"original_machine_name": "",
},
follow_redirects=True
)
assert b"saved." in res.data, f"Expected profile save confirmation, got: {res.data[:500]}"
def do_test(client, live_server, make_test_use_extra_browser=False): def do_test(client, live_server, make_test_use_extra_browser=False):
# Grep for this string in the logs?
test_url = "https://changedetection.io/ci-test.html?non-custom-default=true"
# "non-custom-default" should not appear in the custom browser connection
custom_browser_name = 'custom browser URL'
# needs to be set and something like 'ws://127.0.0.1:3000' # needs to be set and something like 'ws://127.0.0.1:3000'
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
test_url = "https://changedetection.io/ci-test.html?non-custom-default=true" #####################
# preconfigure_browser_profiles_based_on_env() already set the correct system default
res = client.post( res = client.post(
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={ data={"application-empty_pages_are_a_change": "",
"application-empty_pages_are_a_change": "", "requests-time_between_check-minutes": 180,
"requests-time_between_check-minutes": 180, 'application-fetch_backend': "html_webdriver",
}, 'requests-extra_browsers-0-browser_connection_url': 'ws://sockpuppetbrowser-custom-url:3000',
'requests-extra_browsers-0-browser_name': custom_browser_name
},
follow_redirects=True follow_redirects=True
) )
assert b"Settings updated." in res.data
# Create the custom browser profile assert b"Settings updated." in res.data
create_custom_browser_profile(client)
# Add our URL to the import page # Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
@@ -59,24 +35,23 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
if make_test_use_extra_browser: if make_test_use_extra_browser:
# The custom profile name should appear in the edit page under "Request" tab # So the name should appear in the edit page under "Request" > "Fetch Method"
res = client.get( res = client.get(
url_for("ui.ui_edit.edit_page", uuid="first"), url_for("ui.ui_edit.edit_page", uuid="first"),
follow_redirects=True follow_redirects=True
) )
assert CUSTOM_PROFILE_NAME.encode() in res.data, \ assert b'custom browser URL' in res.data
f"Expected '{CUSTOM_PROFILE_NAME}' in edit page fetch method choices"
res = client.post( res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"), url_for("ui.ui_edit.edit_page", uuid="first"),
data={ data={
# 'run_custom_browser_url_tests.sh' will grep for this string in the custom container logs # 'run_customer_browser_url_tests.sh' will search for this string to know if we hit the right browser container or not
"url": "https://changedetection.io/ci-test.html?custom-browser-search-string=1", "url": "https://changedetection.io/ci-test.html?custom-browser-search-string=1",
"tags": "", "tags": "",
"headers": "", "headers": "",
"browser_profile": CUSTOM_PROFILE_MACHINE_NAME, 'fetch_backend': f"extra_browser_{custom_browser_name}",
"webdriver_js_execute_code": "", 'webdriver_js_execute_code': '',
"time_between_check_use_default": "y" "time_between_check_use_default": "y"
}, },
follow_redirects=True follow_redirects=True
) )
@@ -99,10 +74,12 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
# Requires playwright to be installed # Requires playwright to be installed
def test_request_via_custom_browser_url(client, live_server, measure_memory_usage, datastore_path): def test_request_via_custom_browser_url(client, live_server, measure_memory_usage, datastore_path):
# live_server_setup(live_server) # Setup on conftest per function
# We do this so we can grep the logs of the custom container and see if the request actually went through that container # We do this so we can grep the logs of the custom container and see if the request actually went through that container
do_test(client, live_server, make_test_use_extra_browser=True) do_test(client, live_server, make_test_use_extra_browser=True)
def test_request_not_via_custom_browser_url(client, live_server, measure_memory_usage, datastore_path): def test_request_not_via_custom_browser_url(client, live_server, measure_memory_usage, datastore_path):
# live_server_setup(live_server) # Setup on conftest per function
# We do this so we can grep the logs of the custom container and see if the request actually went through that container # We do this so we can grep the logs of the custom container and see if the request actually went through that container
do_test(client, live_server, make_test_use_extra_browser=False) do_test(client, live_server, make_test_use_extra_browser=False)
@@ -12,13 +12,12 @@ def test_fetch_webdriver_content(client, live_server, measure_memory_usage, data
# live_server_setup(live_server) # Setup on conftest per function # live_server_setup(live_server) # Setup on conftest per function
##################### #####################
# preconfigure_browser_profiles_based_on_env() already set the correct system default
# (playwright or puppeteer depending on FAST_PUPPETEER_CHROME_FETCHER) — no need to override it.
res = client.post( res = client.post(
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={ data={
"application-empty_pages_are_a_change": "", "application-empty_pages_are_a_change": "",
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_webdriver",
'application-ui-favicons_enabled': "y", 'application-ui-favicons_enabled': "y",
}, },
follow_redirects=True follow_redirects=True
@@ -25,6 +25,7 @@ def test_execute_custom_js(client, live_server, measure_memory_usage, datastore_
data={ data={
"url": test_url, "url": test_url,
"tags": "", "tags": "",
'fetch_backend': "html_webdriver",
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();', 'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();',
'headers': "testheader: yes\buser-agent: MyCustomAgent", 'headers': "testheader: yes\buser-agent: MyCustomAgent",
"time_between_check_use_default": "y", "time_between_check_use_default": "y",
@@ -1,83 +0,0 @@
"""Test that plugins can inject HTML into base.html <head> via get_html_head_extras hookimpl."""
import pytest
from flask import url_for, Response
from changedetectionio.pluggy_interface import hookimpl, plugin_manager
_MY_JS = "console.log('my_module_content loaded');"
_MY_CSS = ".my-module-example { color: red; }"
class _HeadExtrasPlugin:
"""Test plugin that injects tags pointing at its own Flask routes."""
@hookimpl
def get_html_head_extras(self):
css_url = url_for('test_plugin_my_module_content_css')
js_url = url_for('test_plugin_my_module_content_js')
return (
f'<link rel="stylesheet" id="test-head-extra-css" href="{css_url}">\n'
f'<script id="test-head-extra-js" src="{js_url}" defer></script>'
)
@pytest.fixture(scope='module')
def plugin_routes(live_server):
"""Register plugin asset routes once per module (Flask routes can't be added twice)."""
app = live_server.app
@app.route('/test-plugin/my_module_content/css')
def test_plugin_my_module_content_css():
return Response(_MY_CSS, mimetype='text/css',
headers={'Cache-Control': 'max-age=3600'})
@app.route('/test-plugin/my_module_content/js')
def test_plugin_my_module_content_js():
return Response(_MY_JS, mimetype='application/javascript',
headers={'Cache-Control': 'max-age=3600'})
@pytest.fixture
def head_extras_plugin(plugin_routes):
"""Register the hookimpl for one test then unregister it — function-scoped for clean isolation."""
plugin = _HeadExtrasPlugin()
plugin_manager.register(plugin, name="test_head_extras")
yield plugin
plugin_manager.unregister(name="test_head_extras")
def test_plugin_html_injected_into_head(client, live_server, measure_memory_usage, datastore_path, head_extras_plugin):
"""get_html_head_extras output must appear inside <head> in the rendered page."""
res = client.get(url_for("watchlist.index"), follow_redirects=True)
assert res.status_code == 200
assert b'id="test-head-extra-css"' in res.data, "Plugin <link> tag missing from rendered page"
assert b'id="test-head-extra-js"' in res.data, "Plugin <script> tag missing from rendered page"
head_end = res.data.find(b'</head>')
assert head_end != -1
for marker in (b'id="test-head-extra-css"', b'id="test-head-extra-js"'):
pos = res.data.find(marker)
assert pos != -1 and pos < head_end, f"{marker} must appear before </head>"
def test_plugin_js_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
"""The plugin-registered JS route must return JS with the right Content-Type."""
res = client.get(url_for('test_plugin_my_module_content_js'))
assert res.status_code == 200
assert 'javascript' in res.content_type
assert _MY_JS.encode() in res.data
def test_plugin_css_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
"""The plugin-registered CSS route must return CSS with the right Content-Type."""
res = client.get(url_for('test_plugin_my_module_content_css'))
assert res.status_code == 200
assert 'css' in res.content_type
assert _MY_CSS.encode() in res.data
def test_no_extras_without_plugin(client, live_server, measure_memory_usage, datastore_path):
"""With no hookimpl registered the markers must not appear (isolation check)."""
res = client.get(url_for("watchlist.index"), follow_redirects=True)
assert b'id="test-head-extra-css"' not in res.data
assert b'id="test-head-extra-js"' not in res.data
@@ -22,7 +22,7 @@ def test_preferred_proxy(client, live_server, measure_memory_usage, datastore_pa
url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1), url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
data={ data={
"include_filters": "", "include_filters": "",
"browser_profile": "system", "fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
"headers": "", "headers": "",
"proxy": "proxy-two", "proxy": "proxy-two",
"tags": "", "tags": "",
@@ -22,6 +22,7 @@ def test_noproxy_option(client, live_server, measure_memory_usage, datastore_pat
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y", "application-ignore_whitespace": "y",
"application-fetch_backend": "html_requests",
"requests-extra_proxies-0-proxy_name": "custom-one-proxy", "requests-extra_proxies-0-proxy_name": "custom-one-proxy",
"requests-extra_proxies-0-proxy_url": "http://test:awesome@squid-one:3128", "requests-extra_proxies-0-proxy_url": "http://test:awesome@squid-one:3128",
"requests-extra_proxies-1-proxy_name": "custom-two-proxy", "requests-extra_proxies-1-proxy_name": "custom-two-proxy",
@@ -56,6 +57,7 @@ def test_noproxy_option(client, live_server, measure_memory_usage, datastore_pat
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={ data={
"include_filters": "", "include_filters": "",
"fetch_backend": "html_requests",
"headers": "", "headers": "",
"proxy": "no-proxy", "proxy": "no-proxy",
"tags": "", "tags": "",
@@ -21,6 +21,7 @@ def test_proxy_noconnect_custom(client, live_server, measure_memory_usage, datas
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y", "application-ignore_whitespace": "y",
"application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy", "requests-extra_proxies-0-proxy_name": "custom-test-proxy",
# test:awesome is set in tests/proxy_list/squid-passwords.txt # test:awesome is set in tests/proxy_list/squid-passwords.txt
"requests-extra_proxies-0-proxy_url": "http://127.0.0.1:3128", "requests-extra_proxies-0-proxy_url": "http://127.0.0.1:3128",
@@ -41,7 +42,7 @@ def test_proxy_noconnect_custom(client, live_server, measure_memory_usage, datas
options = { options = {
"url": test_url, "url": test_url,
"browser_profile": "system", "fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
"proxy": "ui-0custom-test-proxy", "proxy": "ui-0custom-test-proxy",
"time_between_check_use_default": "y", "time_between_check_use_default": "y",
} }
@@ -15,6 +15,7 @@ def test_select_custom(client, live_server, measure_memory_usage, datastore_path
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y", "application-ignore_whitespace": "y",
"application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy", "requests-extra_proxies-0-proxy_name": "custom-test-proxy",
# test:awesome is set in tests/proxy_list/squid-passwords.txt # test:awesome is set in tests/proxy_list/squid-passwords.txt
"requests-extra_proxies-0-proxy_url": "http://test:awesome@squid-custom:3128", "requests-extra_proxies-0-proxy_url": "http://test:awesome@squid-custom:3128",
@@ -58,6 +59,7 @@ def test_custom_proxy_validation(client, live_server, measure_memory_usage, data
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y", "application-ignore_whitespace": "y",
"application-fetch_backend": 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy", "requests-extra_proxies-0-proxy_name": "custom-test-proxy",
"requests-extra_proxies-0-proxy_url": "xxxxhtt/333??p://test:awesome@squid-custom:3128", "requests-extra_proxies-0-proxy_url": "xxxxhtt/333??p://test:awesome@squid-custom:3128",
}, },
@@ -73,6 +75,7 @@ def test_custom_proxy_validation(client, live_server, measure_memory_usage, data
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y", "application-ignore_whitespace": "y",
"application-fetch_backend": 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy", "requests-extra_proxies-0-proxy_name": "custom-test-proxy",
"requests-extra_proxies-0-proxy_url": "https://", "requests-extra_proxies-0-proxy_url": "https://",
}, },
@@ -29,6 +29,7 @@ def test_socks5(client, live_server, measure_memory_usage, datastore_path):
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y", "application-ignore_whitespace": "y",
"application-fetch_backend": "html_requests",
# set in .github/workflows/test-only.yml # set in .github/workflows/test-only.yml
"requests-extra_proxies-0-proxy_url": "socks5://proxy_user123:proxy_pass123@socks5proxy:1080", "requests-extra_proxies-0-proxy_url": "socks5://proxy_user123:proxy_pass123@socks5proxy:1080",
"requests-extra_proxies-0-proxy_name": "socks5proxy", "requests-extra_proxies-0-proxy_name": "socks5proxy",
@@ -60,7 +61,7 @@ def test_socks5(client, live_server, measure_memory_usage, datastore_path):
url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1), url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
data={ data={
"include_filters": "", "include_filters": "",
"browser_profile": "system", "fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
"headers": "", "headers": "",
"proxy": "ui-0socks5proxy", "proxy": "ui-0socks5proxy",
"tags": "", "tags": "",
@@ -48,7 +48,7 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage,
url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1), url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
data={ data={
"include_filters": "", "include_filters": "",
"browser_profile": "system", "fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
"headers": "", "headers": "",
"proxy": "socks5proxy", "proxy": "socks5proxy",
"tags": "", "tags": "",
@@ -60,14 +60,15 @@ def test_restock_detection(client, live_server, measure_memory_usage, datastore_
##################### #####################
# preconfigure_browser_profiles_based_on_env() already set the correct system default # Set this up for when we remove the notification from the watch, it should fallback with these details
res = client.post( res = client.post(
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={"application-notification_urls": notification_url, data={"application-notification_urls": notification_url,
"application-notification_title": "fallback-title "+default_notification_title, "application-notification_title": "fallback-title "+default_notification_title,
"application-notification_body": "fallback-body "+default_notification_body, "application-notification_body": "fallback-body "+default_notification_body,
"application-notification_format": default_notification_format, "application-notification_format": default_notification_format,
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_webdriver"},
follow_redirects=True follow_redirects=True
) )
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
set_longer_modified_response, delete_all_watches set_longer_modified_response, delete_all_watches
import logging import logging
import os
# NOTE - RELIES ON mailserver as hostname running, see github build recipes # NOTE - RELIES ON mailserver as hostname running, see github build recipes
smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver') smtp_test_server = 'mailserver'
ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys()) ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())
@@ -56,7 +56,8 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
"application-notification_title": "fallback-title " + default_notification_title, "application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": "some text\nfallback-body<br> " + default_notification_body, "application-notification_body": "some text\nfallback-body<br> " + default_notification_body,
"application-notification_format": 'html', "application-notification_format": 'html',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
@@ -125,7 +126,8 @@ def test_check_notification_plaintext_format(client, live_server, measure_memory
"application-notification_title": "fallback-title {{watch_title}} {{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }} " + default_notification_title, "application-notification_title": "fallback-title {{watch_title}} {{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }} " + default_notification_title,
"application-notification_body": f"some text\n" + default_notification_body + f"\nMore output test\n{ALL_MARKUP_TOKENS}", "application-notification_body": f"some text\n" + default_notification_body + f"\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_format": 'text', "application-notification_format": 'text',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -186,7 +188,8 @@ def test_check_notification_html_color_format(client, live_server, measure_memor
"application-notification_title": "fallback-title {{watch_title}} - diff_added_lines_test : '{{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }}' " + default_notification_title, "application-notification_title": "fallback-title {{watch_title}} - diff_added_lines_test : '{{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }}' " + default_notification_title,
"application-notification_body": f"some text\n{default_notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", "application-notification_body": f"some text\n{default_notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_format": 'htmlcolor', "application-notification_format": 'htmlcolor',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -270,7 +273,8 @@ def test_check_notification_markdown_format(client, live_server, measure_memory_
"application-notification_title": "fallback-title diff_added_lines_test : '{{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }}' " + default_notification_title, "application-notification_title": "fallback-title diff_added_lines_test : '{{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }}' " + default_notification_title,
"application-notification_body": "*header*\n\nsome text\n" + default_notification_body, "application-notification_body": "*header*\n\nsome text\n" + default_notification_body,
"application-notification_format": 'markdown', "application-notification_format": 'markdown',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -365,7 +369,8 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
"application-notification_title": "fallback-title " + default_notification_title, "application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": notification_body, "application-notification_body": notification_body,
"application-notification_format": 'text', "application-notification_format": 'text',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
@@ -415,7 +420,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
data={ data={
"url": test_url, "url": test_url,
"notification_format": 'html', "notification_format": 'html',
'browser_profile': "direct_http_requests", 'fetch_backend': "html_requests",
"time_between_check_use_default": "y"}, "time_between_check_use_default": "y"},
follow_redirects=True follow_redirects=True
) )
@@ -475,7 +480,8 @@ def test_check_plaintext_document_plaintext_notification_smtp(client, live_serve
"application-notification_title": "fallback-title " + default_notification_title, "application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_format": 'text', "application-notification_format": 'text',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
@@ -527,7 +533,8 @@ def test_check_plaintext_document_html_notifications(client, live_server, measur
"application-notification_title": "fallback-title " + default_notification_title, "application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_format": 'html', "application-notification_format": 'html',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
@@ -606,7 +613,8 @@ def test_check_plaintext_document_html_color_notifications(client, live_server,
"application-notification_title": "fallback-title " + default_notification_title, "application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_format": 'htmlcolor', "application-notification_format": 'htmlcolor',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -678,7 +686,8 @@ def test_check_html_document_plaintext_notification(client, live_server, measure
"application-notification_title": "fallback-title " + default_notification_title, "application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_format": 'text', "application-notification_format": 'text',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -731,7 +740,8 @@ def test_check_html_notification_with_apprise_format_is_html(client, live_server
"application-notification_title": "fallback-title " + default_notification_title, "application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": "some text\nfallback-body<br> " + default_notification_body, "application-notification_body": "some text\nfallback-body<br> " + default_notification_body,
"application-notification_format": 'html', "application-notification_format": 'html',
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
+10 -6
View File
@@ -32,7 +32,8 @@ def test_check_access_control(app, client, live_server, measure_memory_usage, da
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={"application-password": "foobar", data={"application-password": "foobar",
"application-shared_diff_access": "True", "application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -90,7 +91,8 @@ def test_check_access_control(app, client, live_server, measure_memory_usage, da
res = c.post( res = c.post(
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={ data={
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -125,16 +127,16 @@ def test_check_access_control(app, client, live_server, measure_memory_usage, da
assert b"IMPORT" in res.data assert b"IMPORT" in res.data
assert b"LOG OUT" in res.data assert b"LOG OUT" in res.data
assert b"time_between_check-minutes" in res.data assert b"time_between_check-minutes" in res.data
assert b"fetch_backend" in res.data
################################################## ##################################################
# Remove password button, and check that it worked # Remove password button, and check that it worked
################################################## ##################################################
# preconfigure_browser_profiles_based_on_env() already set the correct system default
res = c.post( res = c.post(
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-fetch_backend": "html_webdriver",
"application-removepassword_button": "Remove password" "application-removepassword_button": "Remove password"
}, },
follow_redirects=True, follow_redirects=True,
@@ -148,7 +150,8 @@ def test_check_access_control(app, client, live_server, measure_memory_usage, da
res = c.post( res = c.post(
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={"application-password": "", data={"application-password": "",
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -161,7 +164,8 @@ def test_check_access_control(app, client, live_server, measure_memory_usage, da
data={"application-password": "foobar", data={"application-password": "foobar",
# Should be disabled # Should be disabled
"application-shared_diff_access": "", "application-shared_diff_access": "",
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -60,6 +60,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
url_for("ui.ui_edit.edit_page", uuid="first"), url_for("ui.ui_edit.edit_page", uuid="first"),
data={"trigger_text": 'The golden line', data={"trigger_text": 'The golden line',
"url": test_url, "url": test_url,
'fetch_backend': "html_requests",
'filter_text_removed': 'y', 'filter_text_removed': 'y',
"time_between_check_use_default": "y"}, "time_between_check_use_default": "y"},
follow_redirects=True follow_redirects=True
@@ -126,7 +127,8 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_urls": test_notification_url, "application-notification_urls": test_notification_url,
"application-notification_format": 'text', "application-notification_format": 'text',
"application-minutes_between_check": 180 "application-minutes_between_check": 180,
"application-fetch_backend": "html_requests"
}, },
follow_redirects=True follow_redirects=True
) )
@@ -147,6 +149,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
data={"trigger_text": 'Oh yes please', data={"trigger_text": 'Oh yes please',
"url": test_url, "url": test_url,
'processor': 'text_json_diff', 'processor': 'text_json_diff',
'fetch_backend': "html_requests",
'filter_text_removed': '', 'filter_text_removed': '',
'filter_text_added': 'y', 'filter_text_added': 'y',
"time_between_check_use_default": "y"}, "time_between_check_use_default": "y"},
+4 -15
View File
@@ -170,14 +170,6 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
headers={'x-api-key': api_key}, headers={'x-api-key': api_key},
) )
assert b'(changed) Which is across' in res.data assert b'(changed) Which is across' in res.data
assert b'Some text thats the same' in res.data
# Fetch the difference between two versions (default text format)
res = client.get(
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+"?changesOnly=true",
headers={'x-api-key': api_key},
)
assert b'Some text thats the same' not in res.data
# Test htmlcolor format # Test htmlcolor format
res = client.get( res = client.get(
@@ -374,9 +366,6 @@ def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path
watch['last_changed'] = 454444444444 watch['last_changed'] = 454444444444
watch['date_created'] = 454444444444 watch['date_created'] = 454444444444
# Exercise the new extract_lines_containing field
watch['extract_lines_containing'] = ['celsius', 'temperature']
# HTTP PUT ( UPDATE an existing watch ) # HTTP PUT ( UPDATE an existing watch )
res = client.put( res = client.put(
url_for("watch", uuid=uuid), url_for("watch", uuid=uuid),
@@ -400,9 +389,6 @@ def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path
assert date_created != 454444444444 assert date_created != 454444444444
assert date_created != "454444444444" assert date_created != "454444444444"
assert res.json.get('extract_lines_containing') == ['celsius', 'temperature'], \
"extract_lines_containing should be persisted and returned via API"
def test_access_denied(client, live_server, measure_memory_usage, datastore_path): def test_access_denied(client, live_server, measure_memory_usage, datastore_path):
# `config_api_token_enabled` Should be On by default # `config_api_token_enabled` Should be On by default
@@ -422,6 +408,7 @@ def test_access_denied(client, live_server, measure_memory_usage, datastore_path
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-fetch_backend": "html_requests",
"application-api_access_token_enabled": "" "application-api_access_token_enabled": ""
}, },
follow_redirects=True follow_redirects=True
@@ -441,6 +428,7 @@ def test_access_denied(client, live_server, measure_memory_usage, datastore_path
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-fetch_backend": "html_requests",
"application-api_access_token_enabled": "y" "application-api_access_token_enabled": "y"
}, },
follow_redirects=True follow_redirects=True
@@ -911,7 +899,8 @@ def test_api_conflict_UI_password(client, live_server, measure_memory_usage, dat
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={"application-password": "foobar", # password is now set! API should still work! data={"application-password": "foobar", # password is now set! API should still work!
"application-api_access_token_enabled": "y", "application-api_access_token_enabled": "y",
"requests-time_between_check-minutes": 180}, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -177,6 +177,7 @@ def test_openapi_validation_get_requests_bypass_validation(client, live_server,
url_for("settings.settings_page"), url_for("settings.settings_page"),
data={ data={
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
"application-fetch_backend": "html_requests",
"application-api_access_token_enabled": "" "application-api_access_token_enabled": ""
}, },
follow_redirects=True follow_redirects=True
+5 -26
View File
@@ -178,44 +178,23 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path): def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
""" """
Test that a tag/group can be created and updated with processor_config_restock_diff via the API. Test that a tag/group can be updated with processor_config_restock_diff via the API.
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags. Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
""" """
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path) set_original_response(datastore_path=datastore_path)
# Create a tag with processor_config_restock_diff in a single POST (issue #3966) # Create a tag
res = client.post( res = client.post(
url_for("tag"), url_for("tag"),
data=json.dumps({ data=json.dumps({"title": "Restock Group"}),
"title": "Restock Group",
"overrides_watch": True,
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": True,
"price_change_min": 7777777
}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key} headers={'content-type': 'application/json', 'x-api-key': api_key}
) )
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}" assert res.status_code == 201
tag_uuid = res.json.get('uuid') tag_uuid = res.json.get('uuid')
# Verify processor config was saved during creation (the bug: these were discarded) # Update tag with valid processor_config_restock_diff
res = client.get(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
tag_data = res.json
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
"processor_config_restock_diff should be saved on POST"
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
"price_change_min should be saved on POST"
# Update tag with valid processor_config_restock_diff via PUT
res = client.put( res = client.put(
url_for("tag", uuid=tag_uuid), url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'}, headers={'x-api-key': api_key, 'content-type': 'application/json'},
+1 -1
View File
@@ -19,7 +19,7 @@ def test_basic_auth(client, live_server, measure_memory_usage, datastore_path):
# Check form validation # Check form validation
res = client.post( res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"), url_for("ui.ui_edit.edit_page", uuid="first"),
data={"include_filters": "", "url": test_url, "tags": "", "headers": "", "time_between_check_use_default": "y"}, data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data

Some files were not shown because too many files have changed in this diff Show More