Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
3acf9fa60d BUmp docs 2025-08-25 14:39:00 +02:00
dgtlmoon
001d294654 Validate API calls against our YAML 2025-08-25 14:34:51 +02:00
dgtlmoon
994d17fc7a Improvements to schema and adding YAML validation 2025-08-25 13:41:59 +02:00
140 changed files with 1738 additions and 5009 deletions

View File

@@ -33,6 +33,7 @@ venv/
# Test and development files
test-datastore/
tests/
docs/
*.md
!README.md

View File

@@ -4,11 +4,11 @@ updates:
directory: /
schedule:
interval: "weekly"
"caronc/apprise":
versioning-strategy: "increase"
schedule:
interval: "daily"
groups:
all:
patterns:
- "*"
- package-ecosystem: pip
directory: /
schedule:
interval: "weekly"

View File

@@ -34,7 +34,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,7 +45,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v4
uses: github/codeql-action/autobuild@v3
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -59,4 +59,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
uses: github/codeql-action/analyze@v3

View File

@@ -41,7 +41,7 @@ jobs:
steps:
- uses: actions/checkout@v5
- name: Set up Python 3.11
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: 3.11
@@ -95,7 +95,7 @@ jobs:
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
cache-from: type=gha
cache-to: type=gha,mode=max
@@ -133,7 +133,7 @@ jobs:
file: ./Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
cache-from: type=gha
cache-to: type=gha,mode=max
# Looks like this was disabled

View File

@@ -9,7 +9,7 @@ jobs:
steps:
- uses: actions/checkout@v5
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install pypa/build
@@ -39,7 +39,7 @@ jobs:
name: python-package-distributions
path: dist/
- name: Set up Python 3.11
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Test that the basic pip built package runs without error

View File

@@ -38,6 +38,8 @@ jobs:
dockerfile: ./Dockerfile
- platform: linux/arm/v8
dockerfile: ./Dockerfile
- platform: linux/arm64/v8
dockerfile: ./Dockerfile
# Alpine Dockerfile platforms (musl via alpine check)
- platform: linux/amd64
dockerfile: ./.github/test/Dockerfile-alpine
@@ -46,7 +48,7 @@ jobs:
steps:
- uses: actions/checkout@v5
- name: Set up Python 3.11
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: 3.11
@@ -74,5 +76,5 @@ jobs:
file: ${{ matrix.dockerfile }}
platforms: ${{ matrix.platform }}
cache-from: type=gha
cache-to: type=gha,mode=min
cache-to: type=gha,mode=max

View File

@@ -24,7 +24,7 @@ jobs:
# Mainly just for link/flake8
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -54,10 +54,7 @@ jobs:
- name: Spin up ancillary SMTP+Echo message test server
run: |
# Debug SMTP server/echo message back server, telnet 11080 to it should immediately bounce back the most recent message that tried to send (then you can see if cdio tried to send, the format, etc)
# 11025 is the SMTP port for testing
# apprise example would be 'mailto://changedetection@localhost:11025/?to=fff@home.com (it will also echo to STDOUT)
# telnet localhost 11080
# Debug SMTP server/echo message back server
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
docker ps
@@ -256,30 +253,6 @@ jobs:
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
- name: Extract and display memory test report
if: always()
run: |
# Extract test-memory.log from the container
echo "Extracting test-memory.log from container..."
docker cp test-cdio-basic-tests:/app/changedetectionio/test-memory.log output-logs/test-memory-${{ env.PYTHON_VERSION }}.log || echo "test-memory.log not found in container"
# Display the memory log contents for immediate visibility in workflow output
echo "=== Top 10 Highest Peak Memory Tests ==="
if [ -f output-logs/test-memory-${{ env.PYTHON_VERSION }}.log ]; then
# Sort by peak memory value (extract number before MB and sort numerically, reverse order)
grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log | \
sed 's/.*Peak memory: //' | \
paste -d'|' - <(grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log) | \
sort -t'|' -k1 -nr | \
cut -d'|' -f2 | \
head -10
echo ""
echo "=== Full Memory Test Report ==="
cat output-logs/test-memory-${{ env.PYTHON_VERSION }}.log
else
echo "No memory log available"
fi
- name: Store everything including test-datastore
if: always()
uses: actions/upload-artifact@v4

View File

@@ -5,6 +5,7 @@ ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
# See `cryptography` pin comment in requirements.txt
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN apt-get update && apt-get install -y --no-install-recommends \
g++ \
@@ -16,7 +17,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libxslt-dev \
make \
patch \
pkg-config \
zlib1g-dev
RUN mkdir /install
@@ -26,14 +26,6 @@ COPY requirements.txt /requirements.txt
# Use cache mounts and multiple wheel sources for faster ARM builds
ENV PIP_CACHE_DIR=/tmp/pip-cache
# Help Rust find OpenSSL for cryptography package compilation on ARM
ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig"
ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1
ENV OPENSSL_DIR="/usr"
ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
# Additional environment variables for cryptography Rust build
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN --mount=type=cache,target=/tmp/pip-cache \
pip install \
--extra-index-url https://www.piwheels.org/simple \
@@ -84,11 +76,6 @@ EXPOSE 5000
# The actual flask app module
COPY changedetectionio /app/changedetectionio
# Also for OpenAPI validation wrapper - needs the YML
RUN [ ! -d "/app/docs" ] && mkdir /app/docs
COPY docs/api-spec.yaml /app/docs/api-spec.yaml
# Starting wrapper
COPY changedetection.py /app/changedetection.py

View File

@@ -1,9 +1,7 @@
recursive-include changedetectionio/api *
include docs/api-spec.yaml
recursive-include changedetectionio/blueprint *
recursive-include changedetectionio/conditions *
recursive-include changedetectionio/content_fetchers *
recursive-include changedetectionio/jinja2_custom *
recursive-include changedetectionio/conditions *
recursive-include changedetectionio/model *
recursive-include changedetectionio/notification *
recursive-include changedetectionio/processors *
@@ -11,7 +9,6 @@ recursive-include changedetectionio/realtime *
recursive-include changedetectionio/static *
recursive-include changedetectionio/templates *
recursive-include changedetectionio/tests *
recursive-include changedetectionio/widgets *
prune changedetectionio/static/package-lock.json
prune changedetectionio/static/styles/node_modules
prune changedetectionio/static/styles/package-lock.json

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.50.24'
__version__ = '0.50.10'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -14,39 +14,6 @@ import copy
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
def validate_time_between_check_required(json_data):
"""
Validate that at least one time interval is specified when not using default settings.
Returns None if valid, or error message string if invalid.
Defaults to using global settings if time_between_check_use_default is not provided.
"""
# Default to using global settings if not specified
use_default = json_data.get('time_between_check_use_default', True)
# If using default settings, no validation needed
if use_default:
return None
# If not using defaults, check if time_between_check exists and has at least one non-zero value
time_check = json_data.get('time_between_check')
if not time_check:
# No time_between_check provided and not using defaults - this is an error
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
# time_between_check exists, check if it has at least one non-zero value
if any([
(time_check.get('weeks') or 0) > 0,
(time_check.get('days') or 0) > 0,
(time_check.get('hours') or 0) > 0,
(time_check.get('minutes') or 0) > 0,
(time_check.get('seconds') or 0) > 0
]):
return None
# time_between_check exists but all values are 0 or empty - this is an error
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
class Watch(Resource):
def __init__(self, **kwargs):
# datastore is a black box dependency
@@ -88,8 +55,6 @@ class Watch(Resource):
# attr .last_changed will check for the last written text snapshot on change
watch['last_changed'] = watch.last_changed
watch['viewed'] = watch.viewed
watch['link'] = watch.link,
return watch
@auth.check_token
@@ -116,11 +81,6 @@ class Watch(Resource):
if not request.json.get('proxy') in plist:
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
# Validate time_between_check when not using defaults
validation_error = validate_time_between_check_required(request.json)
if validation_error:
return validation_error, 400
watch.update(request.json)
return "OK", 200
@@ -236,11 +196,6 @@ class CreateWatch(Resource):
if not json_data.get('proxy') in plist:
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
# Validate time_between_check when not using defaults
validation_error = validate_time_between_check_required(json_data)
if validation_error:
return validation_error, 400
extras = copy.deepcopy(json_data)
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
@@ -275,8 +230,6 @@ class CreateWatch(Resource):
'last_changed': watch.last_changed,
'last_checked': watch['last_checked'],
'last_error': watch['last_error'],
'link': watch.link,
'page_title': watch['page_title'],
'title': watch['title'],
'url': watch['url'],
'viewed': watch.viewed

View File

@@ -1,7 +1,9 @@
import copy
import yaml
import functools
from flask import request, abort
from loguru import logger
from openapi_core import OpenAPI
from openapi_core.contrib.flask import FlaskOpenAPIRequest
from . import api_schema
from ..model import watch_base
@@ -11,7 +13,6 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
schema_create_watch = copy.deepcopy(schema)
schema_create_watch['required'] = ['url']
del schema_create_watch['properties']['last_viewed']
schema_update_watch = copy.deepcopy(schema)
schema_update_watch['additionalProperties'] = False
@@ -29,17 +30,17 @@ schema_create_notification_urls['required'] = ['notification_urls']
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
schema_delete_notification_urls['required'] = ['notification_urls']
@functools.cache
def get_openapi_spec():
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
import os
import yaml # Lazy import - only loaded when API validation is actually used
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
# Load OpenAPI spec for validation
_openapi_spec = None
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
with open(spec_path, 'r') as f:
spec_dict = yaml.safe_load(f)
_openapi_spec = OpenAPI.from_dict(spec_dict)
def get_openapi_spec():
global _openapi_spec
if _openapi_spec is None:
import os
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
with open(spec_path, 'r') as f:
spec_dict = yaml.safe_load(f)
_openapi_spec = OpenAPI.from_dict(spec_dict)
return _openapi_spec
def validate_openapi_request(operation_id):
@@ -48,28 +49,16 @@ def validate_openapi_request(operation_id):
@functools.wraps(f)
def wrapper(*args, **kwargs):
try:
# Skip OpenAPI validation for GET requests since they don't have request bodies
if request.method.upper() != 'GET':
# Lazy import - only loaded when actually validating a request
from openapi_core.contrib.flask import FlaskOpenAPIRequest
spec = get_openapi_spec()
openapi_request = FlaskOpenAPIRequest(request)
result = spec.unmarshal_request(openapi_request)
if result.errors:
from werkzeug.exceptions import BadRequest
error_details = []
for error in result.errors:
error_details.append(str(error))
raise BadRequest(f"OpenAPI validation failed: {error_details}")
except BadRequest:
# Re-raise BadRequest exceptions (validation failures)
raise
spec = get_openapi_spec()
openapi_request = FlaskOpenAPIRequest(request)
result = spec.unmarshal_request(openapi_request, operation_id)
if result.errors:
abort(400, message=f"OpenAPI validation failed: {result.errors}")
return f(*args, **kwargs)
except Exception as e:
# If OpenAPI spec loading fails, log but don't break existing functionality
logger.critical(f"OpenAPI validation warning for {operation_id}: {e}")
abort(500)
return f(*args, **kwargs)
# If OpenAPI validation fails, log but don't break existing functionality
print(f"OpenAPI validation warning for {operation_id}: {e}")
return f(*args, **kwargs)
return wrapper
return decorator
@@ -79,4 +68,3 @@ from .Tags import Tags, Tag
from .Import import Import
from .SystemInfo import SystemInfo
from .Notifications import Notifications

View File

@@ -78,13 +78,6 @@ def build_watch_json_schema(d):
]:
schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
for v in ['last_viewed']:
schema['properties'][v] = {
"type": "integer",
"description": "Unix timestamp in seconds of the last time the watch was viewed.",
"minimum": 0
}
# None or Boolean
schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
@@ -119,12 +112,6 @@ def build_watch_json_schema(d):
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
schema['properties']['time_between_check_use_default'] = {
"type": "boolean",
"default": True,
"description": "Whether to use global settings for time between checks - defaults to true if not set"
}
schema['properties']['browser_steps'] = {
"anyOf": [
{

View File

@@ -310,6 +310,15 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
continue
if process_changedetection_results:
# Extract title if needed
if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
if not watch['title'] or not len(watch['title']):
try:
update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}")
except Exception as e:
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
try:
datastore.update_watch(uuid=uuid, update_obj=update_obj)
@@ -334,10 +343,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
# Explicitly delete large content variables to free memory IMMEDIATELY after saving
# These are no longer needed after being saved to history
del contents
# Send notifications on second+ check
if watch.history_n >= 2:
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
@@ -352,14 +357,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
# Always record attempt count
count = watch.get('check_count', 0) + 1
# Always record page title (used in notifications, and can change even when the content is the same)
try:
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
except Exception as e:
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
# Record server header
try:
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
@@ -376,12 +373,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
'check_count': count})
# NOW clear fetcher content - after all processing is complete
# This is the last point where we need the fetcher data
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
update_handler.fetcher.clear_content()
logger.debug(f"Cleared fetcher content for UUID {uuid}")
except Exception as e:
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
@@ -402,28 +393,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
watch_check_update.send(watch_uuid=watch['uuid'])
# Explicitly clean up update_handler and all its references
if update_handler:
# Clear fetcher content using the proper method
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
update_handler.fetcher.clear_content()
# Clear processor references
if hasattr(update_handler, 'content_processor'):
update_handler.content_processor = None
update_handler = None
# Clear local contents variable if it still exists
if 'contents' in locals():
del contents
# Note: We don't set watch = None here because:
# 1. watch is just a local reference to datastore.data['watching'][uuid]
# 2. Setting it to None doesn't affect the datastore
# 3. GC can't collect the object anyway (still referenced by datastore)
# 4. It would just cause confusion
update_handler = None
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
except Exception as cleanup_error:
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")

View File

@@ -6,7 +6,7 @@ from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
from changedetectionio.content_fetchers.base import manage_user_agent
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.safe_jinja import render as jinja_render

View File

@@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
def long_task(uuid, preferred_proxy):
import time
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.safe_jinja import render as jinja_render
status = {'status': '', 'length': 0, 'text': ''}

View File

@@ -1,5 +1,5 @@
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.safe_jinja import render as jinja_render
from changedetectionio.store import ChangeDetectionStore
from feedgen.feed import FeedGenerator
from flask import Blueprint, make_response, request, url_for, redirect
@@ -108,13 +108,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
fe.link(link=diff_link)
# Same logic as watch-overview.html
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
watch_label = watch.label
else:
watch_label = watch.get('url')
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
fe.title(title=watch_label)
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
fe.title(title=watch_title)
try:
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
@@ -130,7 +127,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# @todo User could decide if <link> goes to the diff page, or to the watch link
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
# Out of range chars could also break feedgen
if scan_invalid_chars_in_rss(content):

View File

@@ -119,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
hide_remove_pass=os.getenv("SALTED_PASS", False),
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
settings_application=datastore.data['settings']['application'],
timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
timezone_default_config=datastore.data['settings']['application'].get('timezone'),
utc_time=utc_time,
)

View File

@@ -1,7 +1,7 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
{% from '_common_fields.html' import render_common_settings_form %}
<script>
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
@@ -72,23 +72,33 @@
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.pager_size) }}
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_content_format) }}
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
</div>
<div class="grey-form-border">
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_content_format) }}
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
</div>
{% if form.requests.proxy %}
<div class="pure-control-group inline-radio">
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
<span class="pure-form-message-inline">
Choose a default proxy for all watches
</span>
</div>
{% endif %}
</fieldset>
</div>
@@ -131,10 +141,6 @@
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.timeout) }}
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
</div>
<div class="pure-control-group inline-radio">
{{ render_field(form.requests.form.default_ua) }}
<span class="pure-form-message-inline">
@@ -193,12 +199,6 @@ nav
</ul>
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
</span>
</fieldset>
</div>
<div class="tab-pane-inner" id="api">
@@ -238,7 +238,7 @@ nav
<p><strong>UTC Time &amp Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
<p><strong>Local Time &amp Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
<p>
{{ render_field(form.application.form.scheduler_timezone_default) }}
{{ render_field(form.application.form.timezone) }}
<datalist id="timezones" style="display: none;">
{% for tz_name in available_timezones %}
<option value="{{ tz_name }}">{{ tz_name }}</option>
@@ -260,13 +260,6 @@ nav
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.pager_size) }}
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
</div>
</div>
<div class="tab-pane-inner" id="proxies">
@@ -316,33 +309,23 @@ nav
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
<div class="pure-control-group" id="extra-proxies-setting">
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
{{ render_field(form.requests.form.extra_proxies) }}
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
{% if form.requests.proxy %}
<div>
<br>
<div class="inline-radio">
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
<span class="pure-form-message-inline">Choose a default proxy for all watches</span>
</div>
</div>
{% endif %}
</div>
<div class="pure-control-group" id="extra-browsers-setting">
<p>
<span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br>
<span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span>
</p>
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
{{ render_field(form.requests.form.extra_browsers) }}
</div>
</div>
<div id="actions">
<div class="pure-control-group">
{{ render_button(form.save_button) }}
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a>
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a>
<a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a>
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
</div>
</div>
</form>

View File

@@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
{% from '_common_fields.html' import render_common_settings_form %}
<script>
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}";
@@ -64,7 +64,7 @@
<div class="tab-pane-inner" id="notifications">
<fieldset>
<div class="pure-control-group inline-radio">
{{ render_ternary_field(form.notification_muted, BooleanField=True) }}
{{ render_checkbox_field(form.notification_muted) }}
</div>
{% if 1 %}
<div class="pure-control-group inline-radio">

View File

@@ -187,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
tz_name = time_schedule_limit.get('timezone')
if not tz_name:
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
if time_schedule_limit and time_schedule_limit.get('enabled'):
try:
@@ -242,7 +242,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'available_timezones': sorted(available_timezones()),
'browser_steps_config': browser_step_ui_config,
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
'extra_processor_config': form.extra_tab_content(),
'extra_title': f" - Edit - {watch.label}",
@@ -257,7 +256,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'),
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid,
'watch': watch,

View File

@@ -2,7 +2,6 @@ from flask import Blueprint, request, make_response
import random
from loguru import logger
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
@@ -20,7 +19,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
import apprise
from changedetectionio.notification.handler import process_notification
from changedetectionio.notification.apprise_plugin.assets import apprise_asset
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler
@@ -63,20 +61,16 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return 'Error: No Notification URLs set/found'
for n_url in notification_urls:
# We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs
generic_notification_context_data = NotificationContextData()
generic_notification_context_data.set_random_for_validation()
n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip()
if len(n_url.strip()):
if not apobj.add(n_url):
return f'Error: {n_url} is not a valid AppRise URL.'
try:
# use the same as when it is triggered, but then override it with the form test values
n_object = NotificationContextData({
n_object = {
'watch_url': request.form.get('window_url', "https://changedetection.io"),
'notification_urls': notification_urls
})
}
# Only use if present, if not set in n_object it should use the default system value
if 'notification_format' in request.form and request.form['notification_format'].strip():

View File

@@ -44,16 +44,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Sort by last_changed and add the uuid which is usually the key..
sorted_watches = []
with_errors = request.args.get('with_errors') == "1"
unread_only = request.args.get('unread') == "1"
errored_count = 0
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
for uuid, watch in datastore.data['watching'].items():
if with_errors and not watch.get('last_error'):
continue
if unread_only and (watch.viewed or watch.last_changed == 0) :
continue
if active_tag_uuid and not active_tag_uuid in watch['tags']:
continue
if watch.get('last_error'):
@@ -87,6 +83,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
form=form,
guid=datastore.data['app_guid'],
has_proxies=datastore.proxy_list,
has_unviewed=datastore.has_unviewed,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
now_time_server=round(time.time()),
pagination=pagination,
@@ -96,7 +93,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
tags=sorted_tags,
unread_changes_count=datastore.unread_changes_count,
watches=sorted_watches
)

View File

@@ -82,11 +82,8 @@ document.addEventListener('DOMContentLoaded', function() {
{%- set cols_required = cols_required + 1 -%}
{%- endif -%}
{%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
{%- set wrapper_classes = [
'has-unread-changes' if unread_changes_count else '',
'has-error' if errored_count else '',
] -%}
<div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}">
<div id="watch-table-wrapper">
{%- set table_classes = [
'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
] -%}
@@ -121,8 +118,7 @@ document.addEventListener('DOMContentLoaded', function() {
{%- set checking_now = is_checking_now(watch) -%}
{%- set history_n = watch.history_n -%}
{%- set favicon = watch.get_favicon_filename() -%}
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
{# Mirror in changedetectionio/static/js/realtime.js for the frontend #}
{%- set row_classes = [
loop.cycle('pure-table-odd', 'pure-table-even'),
'processor-' ~ watch['processor'],
@@ -137,8 +133,7 @@ document.addEventListener('DOMContentLoaded', function() {
'checking-now' if checking_now else '',
'notification_muted' if watch.notification_muted else '',
'single-history' if history_n == 1 else '',
'multiple-history' if history_n >= 2 else '',
'use-html-title' if system_use_url_watchlist else 'no-html-title',
'multiple-history' if history_n >= 2 else '',
] -%}
<tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
@@ -160,12 +155,7 @@ document.addEventListener('DOMContentLoaded', function() {
{% endif %}
<div>
<span class="watch-title">
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
{{ watch.label }}
{% else %}
{{ watch.get('title') or watch.link }}
{% endif %}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}&nbsp;<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
</span>
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
{%- if watch['processor'] == 'text_json_diff' -%}
@@ -244,10 +234,10 @@ document.addEventListener('DOMContentLoaded', function() {
</tbody>
</table>
<ul id="post-list-buttons">
<li id="post-list-with-errors" style="display: none;" >
<li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" >
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
</li>
<li id="post-list-mark-views" style="display: none;" >
<li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" >
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
</li>
{%- if active_tag_uuid -%}
@@ -255,9 +245,6 @@ document.addEventListener('DOMContentLoaded', function() {
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
</li>
{%- endif -%}
<li id="post-list-unread" style="display: none;" >
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
</li>
<li>
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>

View File

@@ -64,19 +64,6 @@ class Fetcher():
# Time ONTOP of the system defined env minimum time
render_extract_delay = 0
def clear_content(self):
"""
Explicitly clear all content from memory to free up heap space.
Call this after content has been saved to disk.
"""
self.content = None
if hasattr(self, 'raw_content'):
self.raw_content = None
self.screenshot = None
self.xpath_data = None
# Keep headers and status_code as they're small
logger.trace("Fetcher content cleared from memory")
@abstractmethod
def get_error(self):
return self.error
@@ -141,7 +128,7 @@ class Fetcher():
async def iterate_browser_steps(self, start_url=None):
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
from playwright._impl._errors import TimeoutError, Error
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.safe_jinja import render as jinja_render
step_n = 0
if self.browser_steps is not None and len(self.browser_steps):

View File

@@ -51,7 +51,6 @@ class fetcher(Fetcher):
session = requests.Session()
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
from requests_file import FileAdapter
session.mount('file://', FileAdapter())

View File

@@ -47,7 +47,6 @@ async () => {
'nicht lieferbar',
'nicht verfügbar',
'nicht vorrätig',
'nicht mehr lieferbar',
'nicht zur verfügung',
'nie znaleziono produktów',
'niet beschikbaar',

View File

@@ -795,7 +795,7 @@ def ticker_thread_check_time_launch_checks():
else:
time_schedule_limit = watch.get('time_schedule_limit')
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
if time_schedule_limit and time_schedule_limit.get('enabled'):
try:

View File

@@ -5,7 +5,6 @@ from wtforms.widgets.core import TimeInput
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
from changedetectionio.conditions.form import ConditionFormRow
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.strtobool import strtobool
from wtforms import (
@@ -24,14 +23,11 @@ from wtforms import (
)
from flask_wtf.file import FileField, FileAllowed
from wtforms.fields import FieldList
from wtforms.utils import unset_value
from wtforms.validators import ValidationError
from validators.url import url as url_validator
from changedetectionio.widgets import TernaryNoneBooleanField
# default
# each select <option data-enabled="enabled-0-0"
@@ -58,8 +54,6 @@ valid_method = {
default_method = 'GET'
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.'
class StringListField(StringField):
widget = widgets.TextArea()
@@ -216,35 +210,6 @@ class ScheduleLimitForm(Form):
self.sunday.form.enabled.label.text = "Sunday"
def validate_time_between_check_has_values(form):
"""
Custom validation function for TimeBetweenCheckForm.
Returns True if at least one time interval field has a value > 0.
"""
res = any([
form.weeks.data and int(form.weeks.data) > 0,
form.days.data and int(form.days.data) > 0,
form.hours.data and int(form.hours.data) > 0,
form.minutes.data and int(form.minutes.data) > 0,
form.seconds.data and int(form.seconds.data) > 0
])
return res
class RequiredTimeInterval(object):
"""
WTForms validator that ensures at least one time interval field has a value > 0.
Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]).
"""
def __init__(self, message=None):
self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
def __call__(self, form, field):
if not validate_time_between_check_has_values(field.form):
raise ValidationError(self.message)
class TimeBetweenCheckForm(Form):
weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
@@ -253,123 +218,6 @@ class TimeBetweenCheckForm(Form):
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
self.require_at_least_one = kwargs.get('require_at_least_one', False)
self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT)
def validate(self, **kwargs):
"""Custom validation that can optionally require at least one time interval."""
# Run normal field validation first
if not super().validate(**kwargs):
return False
# Apply optional "at least one" validation
if self.require_at_least_one:
if not validate_time_between_check_has_values(self):
# Add error to the form's general errors (not field-specific)
if not hasattr(self, '_formdata_errors'):
self._formdata_errors = []
self._formdata_errors.append(self.require_at_least_one_message)
return False
return True
class EnhancedFormField(FormField):
"""
An enhanced FormField that supports conditional validation with top-level error messages.
Adds a 'top_errors' property for validation errors at the FormField level.
"""
def __init__(self, form_class, label=None, validators=None, separator="-",
conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs):
"""
Initialize EnhancedFormField with optional conditional validation.
:param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default')
:param conditional_message: Error message to show when validation fails
:param conditional_test_function: Custom function to test if FormField has valid values.
Should take self.form as parameter and return True if valid.
"""
super().__init__(form_class, label, validators, separator, **kwargs)
self.top_errors = []
self.conditional_field = conditional_field
self.conditional_message = conditional_message or "At least one field must have a value when not using defaults."
self.conditional_test_function = conditional_test_function
def validate(self, form, extra_validators=()):
"""
Custom validation that supports conditional logic and stores top-level errors.
"""
self.top_errors = []
# First run the normal FormField validation
base_valid = super().validate(form, extra_validators)
# Apply conditional validation if configured
if self.conditional_field and hasattr(form, self.conditional_field):
conditional_field_obj = getattr(form, self.conditional_field)
# If the conditional field is False/unchecked, check if this FormField has any values
if not conditional_field_obj.data:
# Use custom test function if provided, otherwise use generic fallback
if self.conditional_test_function:
has_any_value = self.conditional_test_function(self.form)
else:
# Generic fallback - check if any field has truthy data
has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data)
if not has_any_value:
self.top_errors.append(self.conditional_message)
base_valid = False
return base_valid
class RequiredFormField(FormField):
"""
A FormField that passes require_at_least_one=True to TimeBetweenCheckForm.
Use this when you want the sub-form to always require at least one value.
"""
def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs):
super().__init__(form_class, label, validators, separator, **kwargs)
def process(self, formdata, data=unset_value, extra_filters=None):
if extra_filters:
raise TypeError(
"FormField cannot take filters, as the encapsulated"
"data is not mutable."
)
if data is unset_value:
try:
data = self.default()
except TypeError:
data = self.default
self._obj = data
self.object_data = data
prefix = self.name + self.separator
# Pass require_at_least_one=True to the sub-form
if isinstance(data, dict):
self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data)
else:
self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True)
@property
def errors(self):
"""Include sub-form validation errors"""
form_errors = self.form.errors
# Add any general form errors to a special 'form' key
if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors:
form_errors = dict(form_errors) # Make a copy
form_errors['form'] = self.form._formdata_errors
return form_errors
# Separated by key:value
class StringDictKeyValue(StringField):
widget = widgets.TextArea()
@@ -470,16 +318,11 @@ class ValidateAppRiseServers(object):
import apprise
from .notification.apprise_plugin.assets import apprise_asset
from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler # noqa: F401
from changedetectionio.jinja2_custom import render as jinja_render
apobj = apprise.Apprise(asset=apprise_asset)
for server_url in field.data:
generic_notification_context_data = NotificationContextData()
# Make sure something is atleast in all those regular token fields
generic_notification_context_data.set_random_for_validation()
url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip()
url = server_url.strip()
if url.startswith("#"):
continue
@@ -493,8 +336,9 @@ class ValidateJinja2Template(object):
"""
def __call__(self, form, field):
from changedetectionio import notification
from changedetectionio.jinja2_custom import create_jinja_env
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
from jinja2.sandbox import ImmutableSandboxedEnvironment
from jinja2.meta import find_undeclared_variables
import jinja2.exceptions
@@ -502,11 +346,9 @@ class ValidateJinja2Template(object):
joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}"
try:
# Use the shared helper to create a properly configured environment
jinja2_env = create_jinja_env(loader=BaseLoader)
# Add notification tokens for validation
jinja2_env.globals.update(NotificationContextData())
jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader)
jinja2_env.globals.update(notification.valid_tokens)
# Extra validation tokens provided on the form_class(... extra_tokens={}) setup
if hasattr(field, 'extra_notification_tokens'):
jinja2_env.globals.update(field.extra_notification_tokens)
@@ -518,7 +360,6 @@ class ValidateJinja2Template(object):
except jinja2.exceptions.SecurityError as e:
raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
# Check for undeclared variables
ast = jinja2_env.parse(joined_data)
undefined = ", ".join(find_undeclared_variables(ast))
if undefined:
@@ -686,51 +527,6 @@ class ValidateCSSJSONXPATHInput(object):
except:
raise ValidationError("A system-error occurred when validating your jq expression")
class ValidateSimpleURL:
"""Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc."""
def __init__(self, message=None):
self.message = message or "Invalid URL."
def __call__(self, form, field):
data = (field.data or "").strip()
if not data:
return # empty is OK — pair with validators.Optional()
from urllib.parse import urlparse
parsed = urlparse(data)
if not parsed.scheme or not parsed.netloc:
raise ValidationError(self.message)
class ValidateStartsWithRegex(object):
def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True):
# compile with given flags (well pass re.IGNORECASE below)
self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex
self.message = message
self.allow_empty = allow_empty
self.split_lines = split_lines
def __call__(self, form, field):
data = field.data
if not data:
return
# normalize into list of lines
if isinstance(data, str) and self.split_lines:
lines = data.splitlines()
elif isinstance(data, (list, tuple)):
lines = data
else:
lines = [data]
for line in lines:
stripped = line.strip()
if not stripped:
if self.allow_empty:
continue
raise ValidationError(self.message or "Empty value not allowed.")
if not self.pattern.match(stripped):
raise ValidationError(self.message or "Invalid value.")
class quickWatchForm(Form):
from . import processors
@@ -752,13 +548,14 @@ class commonSettingsForm(Form):
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
@@ -785,16 +582,11 @@ class processor_text_json_diff_form(commonSettingsForm):
url = fields.URLField('URL', validators=[validateURL()])
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
time_between_check = EnhancedFormField(
TimeBetweenCheckForm,
conditional_field='time_between_check_use_default',
conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
conditional_test_function=validate_time_between_check_has_values
)
time_between_check = FormField(TimeBetweenCheckForm)
time_schedule_limit = FormField(ScheduleLimitForm)
time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False)
time_between_check_use_default = BooleanField('Use global settings for time between check', default=False)
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
@@ -812,7 +604,6 @@ class processor_text_json_diff_form(commonSettingsForm):
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
filter_text_added = BooleanField('Added lines', default=True)
@@ -825,18 +616,18 @@ class processor_text_json_diff_form(commonSettingsForm):
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
proxy = RadioField('Proxy')
# filter_failure_notification_send @todo make ternary
filter_failure_notification_send = BooleanField(
'Send a notification when the filter can no longer be found on the page', default=False)
notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On")
notification_muted = BooleanField('Notifications Muted / Off', default=False)
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None)
def extra_tab_content(self):
return None
@@ -848,7 +639,7 @@ class processor_text_json_diff_form(commonSettingsForm):
if not super().validate():
return False
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.safe_jinja import render as jinja_render
result = True
# Fail form validation when a body is set for a GET
@@ -911,36 +702,23 @@ class processor_text_json_diff_form(commonSettingsForm):
):
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
if kwargs and kwargs.get('default_system_settings'):
default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default')
default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone')
if default_tz:
self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz
class SingleExtraProxy(Form):
# maybe better to set some <script>var..
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
proxy_url = StringField('Proxy URL', [
validators.Optional(),
ValidateStartsWithRegex(
regex=r'^(https?|socks5)://', # ✅ main pattern
flags=re.IGNORECASE, # ✅ makes it case-insensitive
message='Proxy URLs must start with http://, https:// or socks5://',
),
ValidateSimpleURL()
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
# @todo do the validation here instead
class SingleExtraBrowser(Form):
browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
browser_connection_url = StringField('Browser connection URL', [
validators.Optional(),
ValidateStartsWithRegex(
regex=r'^(wss?|ws)://',
flags=re.IGNORECASE,
message='Browser URLs must start with wss:// or ws://'
),
ValidateSimpleURL()
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
# @todo do the validation here instead
class DefaultUAInputForm(Form):
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
@@ -949,9 +727,9 @@ class DefaultUAInputForm(Form):
# datastore.data['settings']['requests']..
class globalSettingsRequestForm(Form):
time_between_check = RequiredFormField(TimeBetweenCheckForm)
time_between_check = FormField(TimeBetweenCheckForm)
time_schedule_limit = FormField(ScheduleLimitForm)
proxy = RadioField('Default proxy')
proxy = RadioField('Proxy')
jitter_seconds = IntegerField('Random jitter seconds ± check',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
@@ -960,12 +738,7 @@ class globalSettingsRequestForm(Form):
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=1, max=50,
message="Should be between 1 and 50")])
timeout = IntegerField('Requests timeout in seconds',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=1, max=999,
message="Should be between 1 and 999")])
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
@@ -982,7 +755,6 @@ class globalSettingsApplicationUIForm(Form):
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):
@@ -1007,14 +779,9 @@ class globalSettingsApplicationForm(commonSettingsForm):
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
strip_ignored_lines = BooleanField('Strip ignored lines')
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
validators=[validators.Optional()])
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
validators=[validators.Optional()])
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,
@@ -1034,7 +801,7 @@ class globalSettingsForm(Form):
requests = FormField(globalSettingsRequestForm)
application = FormField(globalSettingsApplicationForm)
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
class extractDataForm(Form):

View File

@@ -1,6 +1,6 @@
from loguru import logger
from lxml import etree
from typing import List
import html
import json
import re
@@ -9,11 +9,6 @@ TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
# 'price' , 'lowPrice', 'highPrice' are usually under here
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
@@ -57,17 +52,13 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
return html_block
def subtractive_css_selector(css_selector, content):
def subtractive_css_selector(css_selector, html_content):
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
soup = BeautifulSoup(html_content, "html.parser")
# So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
elements_to_remove = soup.select(css_selector)
if not elements_to_remove:
# Better to return the original that rebuild with BeautifulSoup
return content
# Then, remove them in a separate loop
for item in elements_to_remove:
item.decompose()
@@ -75,7 +66,6 @@ def subtractive_css_selector(css_selector, content):
return str(soup)
def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
from lxml import etree
# Parse the HTML content using lxml
html_tree = etree.HTML(html_content)
@@ -87,10 +77,6 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
# Collect elements for each selector
elements_to_remove.extend(html_tree.xpath(selector))
# If no elements were found, return the original HTML content
if not elements_to_remove:
return html_content
# Then, remove them in a separate loop
for element in elements_to_remove:
if element.getparent() is not None: # Ensure the element has a parent before removing
@@ -108,7 +94,7 @@ def element_removal(selectors: List[str], html_content):
xpath_selectors = []
for selector in selectors:
if selector.strip().startswith(('xpath:', 'xpath1:', '//')):
if selector.startswith(('xpath:', 'xpath1:', '//')):
# Handle XPath selectors separately
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
xpath_selectors.append(xpath_selector)
@@ -303,92 +289,70 @@ def _get_stripped_text_from_json_match(match):
return stripped_text_from_html
def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter):
from bs4 import BeautifulSoup
stripped_text_from_html = ''
# Foreach <script json></script> blob.. just return the first that matches json_filter
# As a last resort, try to parse the whole <body>
soup = BeautifulSoup(content, 'html.parser')
if ensure_is_ldjson_info_type:
bs_result = soup.find_all('script', {"type": "application/ld+json"})
else:
bs_result = soup.find_all('script')
bs_result += soup.find_all('body')
bs_jsons = []
for result in bs_result:
# result.text is how bs4 magically strips JSON from the body
content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else ''
# Skip empty tags, and things that dont even look like JSON
if not result.text or not (content_start[0] == '{' or content_start[0] == '['):
continue
try:
json_data = json.loads(result.text)
bs_jsons.append(json_data)
except json.JSONDecodeError:
# Skip objects which cannot be parsed
continue
if not bs_jsons:
raise JSONNotFound("No parsable JSON found in this document")
for json_data in bs_jsons:
stripped_text_from_html = _parse_json(json_data, json_filter)
if ensure_is_ldjson_info_type:
# Could sometimes be list, string or something else random
if isinstance(json_data, dict):
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
# 1833 - could be either str or dict, should not be anything else
t = json_data.get('@type')
if t and stripped_text_from_html:
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
break
# The non-standard part, some have a list
elif isinstance(t, list):
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
break
elif stripped_text_from_html:
break
return stripped_text_from_html
# content - json
# json_filter - ie json:$..price
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
from bs4 import BeautifulSoup
stripped_text_from_html = False
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
try:
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
except json.JSONDecodeError as e:
logger.warning(str(e))
# Looks like clean JSON, dont bother extracting from HTML
# Foreach <script json></script> blob.. just return the first that matches json_filter
# As a last resort, try to parse the whole <body>
soup = BeautifulSoup(content, 'html.parser')
content_start = content.lstrip("\ufeff").strip()[:100]
if ensure_is_ldjson_info_type:
bs_result = soup.find_all('script', {"type": "application/ld+json"})
else:
bs_result = soup.find_all('script')
bs_result += soup.find_all('body')
if content_start[0] == '{' or content_start[0] == '[':
try:
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter)
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
else:
# Probably something else, go fish inside for it
try:
stripped_text_from_html = extract_json_blob_from_html(content=content,
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
json_filter=json_filter )
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
bs_jsons = []
for result in bs_result:
# Skip empty tags, and things that dont even look like JSON
if not result.text or '{' not in result.text:
continue
try:
json_data = json.loads(result.text)
bs_jsons.append(json_data)
except json.JSONDecodeError:
# Skip objects which cannot be parsed
continue
if not bs_jsons:
raise JSONNotFound("No parsable JSON found in this document")
for json_data in bs_jsons:
stripped_text_from_html = _parse_json(json_data, json_filter)
if ensure_is_ldjson_info_type:
# Could sometimes be list, string or something else random
if isinstance(json_data, dict):
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
# 1833 - could be either str or dict, should not be anything else
t = json_data.get('@type')
if t and stripped_text_from_html:
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
break
# The non-standard part, some have a list
elif isinstance(t, list):
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
break
elif stripped_text_from_html:
break
if not stripped_text_from_html:
# Re 265 - Just return an empty string when filter not found
@@ -546,43 +510,3 @@ def get_triggered_text(content, trigger_text):
i += 1
return triggered_text
def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None:
try:
# Only decode/process the prefix we need for title extraction
match data:
case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")):
prefix = data[:scan_chars * 2].decode("utf-16", errors="replace")
case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")):
prefix = data[:scan_chars * 4].decode("utf-32", errors="replace")
case bytes():
try:
prefix = data[:scan_chars].decode("utf-8")
except UnicodeDecodeError:
try:
head = data[:sniff_bytes].decode("ascii", errors="ignore")
if m := (META_CS.search(head) or META_CT.search(head)):
enc = m.group(1).lower()
else:
enc = "cp1252"
prefix = data[:scan_chars * 2].decode(enc, errors="replace")
except Exception as e:
logger.error(f"Title extraction encoding detection failed: {e}")
return None
case str():
prefix = data[:scan_chars] if len(data) > scan_chars else data
case _:
logger.error(f"Title extraction received unsupported data type: {type(data)}")
return None
# Search only in the prefix
if m := TITLE_RE.search(prefix):
title = html.unescape(" ".join(m.group(1).split())).strip()
# Some safe limit
return title[:2000]
return None
except Exception as e:
logger.error(f"Title extraction failed: {e}")
return None

View File

@@ -1,20 +0,0 @@
"""
Jinja2 custom extensions and safe rendering utilities.
"""
from .extensions.TimeExtension import TimeExtension
from .safe_jinja import (
render,
render_fully_escaped,
create_jinja_env,
JINJA2_MAX_RETURN_PAYLOAD_SIZE,
DEFAULT_JINJA2_EXTENSIONS,
)
__all__ = [
'TimeExtension',
'render',
'render_fully_escaped',
'create_jinja_env',
'JINJA2_MAX_RETURN_PAYLOAD_SIZE',
'DEFAULT_JINJA2_EXTENSIONS',
]

View File

@@ -1,221 +0,0 @@
"""
Jinja2 TimeExtension - Custom date/time handling for templates.
This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware
date/time formatting with support for time offsets.
Why This Extension Exists:
The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot
directly call Python functions - they need extensions or filters to expose functionality.
This TimeExtension serves as a Jinja2-to-Arrow bridge that:
1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags
through extensions. You cannot use arrow.now() directly in a template.
2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags:
{% now 'UTC' %}
{% now 'UTC' + 'hours=2' %}
{% now 'Europe/London', '%Y-%m-%d' %}
3. Adds convenience features on top of Arrow:
- Default timezone from environment variable (TZ) or config
- Default datetime format configuration
- Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30)
- Empty string timezone support to use configured defaults
4. Maintains security - Works within Jinja2's sandboxed environment so users
cannot access arbitrary Python code or objects.
Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that
provides user-friendly template syntax while maintaining security.
Basic Usage:
{% now 'UTC' %}
# Output: Wed, 09 Dec 2015 23:33:01
Custom Format:
{% now 'UTC', '%Y-%m-%d %H:%M:%S' %}
# Output: 2015-12-09 23:33:01
Timezone Support:
{% now 'America/New_York' %}
{% now 'Europe/London' %}
{% now '' %} # Uses default timezone from environment.default_timezone
Time Offsets (Addition):
{% now 'UTC' + 'hours=2' %}
{% now 'UTC' + 'hours=2,minutes=30' %}
{% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %}
Time Offsets (Subtraction):
{% now 'UTC' - 'minutes=11' %}
{% now 'UTC' - 'days=2,minutes=33,seconds=1' %}
Time Offsets with Custom Format:
{% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %}
# Output: 2015-12-10 01:33:01
Weekday Support (for finding next/previous weekday):
{% now 'UTC' + 'weekday=0' %} # Next Monday (0=Monday, 6=Sunday)
{% now 'UTC' + 'weekday=4' %} # Next Friday
Configuration:
- Default timezone: Set via TZ environment variable or override environment.default_timezone
- Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format)
Environment Customization:
from changedetectionio.jinja2_custom import create_jinja_env
jinja2_env = create_jinja_env()
jinja2_env.default_timezone = 'America/New_York' # Override default timezone
jinja2_env.datetime_format = '%Y-%m-%d %H:%M' # Override default format
Supported Offset Parameters:
- years, months, weeks, days
- hours, minutes, seconds, microseconds
- weekday (0=Monday through 6=Sunday, must be integer)
Note:
This extension uses the Arrow library for timezone-aware datetime handling.
All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York').
"""
import arrow
from jinja2 import nodes
from jinja2.ext import Extension
import os
class TimeExtension(Extension):
"""
Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering.
This extension adds two attributes to the Jinja2 environment:
- datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S')
- default_timezone: Default timezone for rendering (default: TZ env var or 'UTC')
Both can be overridden after environment creation by setting the attributes directly.
"""
tags = {'now'}
def __init__(self, environment):
"""Jinja2 Extension constructor."""
super().__init__(environment)
environment.extend(
datetime_format='%a, %d %b %Y %H:%M:%S',
default_timezone=os.getenv('TZ', 'UTC').strip()
)
def _datetime(self, timezone, operator, offset, datetime_format):
"""
Get current datetime with time offset applied.
Args:
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
operator: '+' for addition or '-' for subtraction
offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30')
datetime_format: strftime format string or None to use environment default
Returns:
Formatted datetime string with offset applied
Example:
_datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S')
# Returns current time + 2.5 hours
"""
# Use default timezone if none specified
if not timezone or timezone == '':
timezone = self.environment.default_timezone
d = arrow.now(timezone)
# parse shift params from offset and include operator
shift_params = {}
for param in offset.split(','):
interval, value = param.split('=')
shift_params[interval.strip()] = float(operator + value.strip())
# Fix weekday parameter can not be float
if 'weekday' in shift_params:
shift_params['weekday'] = int(shift_params['weekday'])
d = d.shift(**shift_params)
if datetime_format is None:
datetime_format = self.environment.datetime_format
return d.strftime(datetime_format)
def _now(self, timezone, datetime_format):
"""
Get current datetime without any offset.
Args:
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
datetime_format: strftime format string or None to use environment default
Returns:
Formatted datetime string for current time
Example:
_now('America/New_York', '%Y-%m-%d %H:%M:%S')
# Returns current time in New York timezone
"""
# Use default timezone if none specified
if not timezone or timezone == '':
timezone = self.environment.default_timezone
if datetime_format is None:
datetime_format = self.environment.datetime_format
return arrow.now(timezone).strftime(datetime_format)
def parse(self, parser):
"""
Parse the {% now %} tag and generate appropriate AST nodes.
This method is called by Jinja2 when it encounters a {% now %} tag.
It parses the tag syntax and determines whether to call _now() or _datetime()
based on whether offset operations (+ or -) are present.
Supported syntax:
{% now 'timezone' %} -> calls _now()
{% now 'timezone', 'format' %} -> calls _now()
{% now 'timezone' + 'offset' %} -> calls _datetime()
{% now 'timezone' + 'offset', 'format' %} -> calls _datetime()
{% now 'timezone' - 'offset', 'format' %} -> calls _datetime()
Args:
parser: Jinja2 parser instance
Returns:
nodes.Output: AST output node containing the formatted datetime string
"""
lineno = next(parser.stream).lineno
node = parser.parse_expression()
if parser.stream.skip_if('comma'):
datetime_format = parser.parse_expression()
else:
datetime_format = nodes.Const(None)
if isinstance(node, nodes.Add):
call_method = self.call_method(
'_datetime',
[node.left, nodes.Const('+'), node.right, datetime_format],
lineno=lineno,
)
elif isinstance(node, nodes.Sub):
call_method = self.call_method(
'_datetime',
[node.left, nodes.Const('-'), node.right, datetime_format],
lineno=lineno,
)
else:
call_method = self.call_method(
'_now',
[node, datetime_format],
lineno=lineno,
)
return nodes.Output([call_method], lineno=lineno)

View File

@@ -1,55 +0,0 @@
"""
Safe Jinja2 render with max payload sizes
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
"""
import jinja2.sandbox
import typing as t
import os
from .extensions.TimeExtension import TimeExtension
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
# Default extensions - can be overridden in create_jinja_env()
DEFAULT_JINJA2_EXTENSIONS = [TimeExtension]
def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment:
"""
Create a sandboxed Jinja2 environment with our custom extensions and default timezone.
Args:
extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS)
**kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment
Returns:
Configured Jinja2 environment
"""
if extensions is None:
extensions = DEFAULT_JINJA2_EXTENSIONS
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(
extensions=extensions,
**kwargs
)
# Get default timezone from environment variable
default_timezone = os.getenv('TZ', 'UTC').strip()
jinja2_env.default_timezone = default_timezone
return jinja2_env
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
# (Which also limits available functions that could be called)
def render(template_str, **args: t.Any) -> str:
jinja2_env = create_jinja_env()
output = jinja2_env.from_string(template_str).render(args)
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
def render_fully_escaped(content):
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
template = env.from_string("{{ some_html|e }}")
return template.render(some_html=content)

View File

@@ -39,12 +39,12 @@ class model(dict):
'api_access_token_enabled': True,
'base_url' : None,
'empty_pages_are_a_change': False,
'extract_title_as_title': False,
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
'global_subtractive_selectors': [],
'ignore_whitespace': True,
'ignore_status_codes': False, #@todo implement, as ternary.
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'notification_title': default_notification_title,
@@ -55,15 +55,12 @@ class model(dict):
'rss_access_token': None,
'rss_content_format': RSS_FORMAT_TYPES[0][0],
'rss_hide_muted_watches': True,
'rss_reader_mode': False,
'scheduler_timezone_default': None, # Default IANA timezone name
'schema_version' : 0,
'shared_diff_access': False,
'strip_ignored_lines': False,
'tags': {}, #@todo use Tag.model initialisers
'webdriver_delay': None , # Extra delay in seconds before extracting text
'tags': {}, #@todo use Tag.model initialisers
'timezone': None, # Default IANA timezone name
'ui': {
'use_page_title_in_list': True,
'open_diff_in_new_tab': True,
'socket_io_enabled': True,
'favicons_enabled': True

View File

@@ -1,14 +1,14 @@
from blinker import signal
from changedetectionio.strtobool import strtobool
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.safe_jinja import render as jinja_render
from . import watch_base
import os
import re
from pathlib import Path
from loguru import logger
from .. import jinja2_custom as safe_jinja
from .. import safe_jinja
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
# Allowable protocols, protects against javascript: etc
@@ -169,8 +169,8 @@ class model(watch_base):
@property
def label(self):
# Used for sorting, display, etc
return self.get('title') or self.get('page_title') or self.link
# Used for sorting
return self.get('title') if self.get('title') else self.get('url')
@property
def last_changed(self):

View File

@@ -24,6 +24,7 @@ class watch_base(dict):
'content-type': None,
'date_created': None,
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': 'system', # plaintext, playwright etc
'fetch_time': 0.0,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
@@ -34,7 +35,6 @@ class watch_base(dict):
'has_ldjson_price_data': None,
'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'ignore_status_codes': None,
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
'include_filters': [],
'last_checked': 0,
@@ -49,7 +49,6 @@ class watch_base(dict):
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
'notification_title': None,
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'page_title': None, # <title> from the page
'paused': False,
'previous_md5': False,
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
@@ -58,7 +57,6 @@ class watch_base(dict):
'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header
'sort_text_alphabetically': False,
'strip_ignored_lines': None,
'subtractive_selectors': [],
'tag': '', # Old system of text name for a tag, to be removed
'tags': [], # list of UUIDs to App.Tags
@@ -124,13 +122,12 @@ class watch_base(dict):
}
},
},
'title': None, # An arbitrary field that overrides 'page_title'
'title': None,
'track_ldjson_price_data': None,
'trim_text_whitespace': False,
'remove_duplicate_lines': False,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',
'use_page_title_in_list': None, # None = use system settings
'uuid': str(uuid.uuid4()),
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection

View File

@@ -16,3 +16,20 @@ valid_notification_formats = {
default_notification_format_for_watch: default_notification_format_for_watch
}
valid_tokens = {
'base_url': '',
'current_snapshot': '',
'diff': '',
'diff_added': '',
'diff_full': '',
'diff_patch': '',
'diff_removed': '',
'diff_url': '',
'preview_url': '',
'triggered_text': '',
'watch_tag': '',
'watch_title': '',
'watch_url': '',
'watch_uuid': '',
}

View File

@@ -1,88 +1,18 @@
import time
import apprise
from apprise import NotifyFormat
from loguru import logger
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
from ..notification_service import NotificationContextData
def markup_text_links_to_html(body):
"""
Convert plaintext to HTML with clickable links.
Uses Jinja2's escape and Markup for XSS safety.
"""
from linkify_it import LinkifyIt
from markupsafe import Markup, escape
linkify = LinkifyIt()
# Match URLs in the ORIGINAL text (before escaping)
matches = linkify.match(body)
if not matches:
# No URLs, just escape everything
return Markup(escape(body))
result = []
last_index = 0
# Process each URL match
for match in matches:
# Add escaped text before the URL
if match.index > last_index:
text_part = body[last_index:match.index]
result.append(escape(text_part))
# Add the link with escaped URL (both in href and display)
url = match.url
result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>'))
last_index = match.last_index
# Add remaining escaped text
if last_index < len(body):
result.append(escape(body[last_index:]))
# Join all parts
return str(Markup(''.join(str(part) for part in result)))
def notification_format_align_with_apprise(n_format : str):
"""
Correctly align changedetection's formats with apprise's formats
Probably these are the same - but good to be sure.
:param n_format:
:return:
"""
if n_format.lower().startswith('html'):
# Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here
n_format = NotifyFormat.HTML
elif n_format.lower().startswith('markdown'):
# probably the same but just to be safe
n_format = NotifyFormat.MARKDOWN
elif n_format.lower().startswith('text'):
# probably the same but just to be safe
n_format = NotifyFormat.TEXT
else:
n_format = NotifyFormat.TEXT
# Must be str for apprise notify body_format
return str(n_format)
def process_notification(n_object: NotificationContextData, datastore):
from changedetectionio.jinja2_custom import render as jinja_render
def process_notification(n_object, datastore):
from changedetectionio.safe_jinja import render as jinja_render
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
# be sure its registered
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
if not isinstance(n_object, NotificationContextData):
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
now = time.time()
if n_object.get('notification_timestamp'):
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
# Insert variables into the notification content
notification_parameters = create_notification_parameters(n_object, datastore)
@@ -94,9 +24,7 @@ def process_notification(n_object: NotificationContextData, datastore):
# If we arrived with 'System default' then look it up
if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch:
# Initially text or whatever
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]).lower()
n_format = notification_format_align_with_apprise(n_format=n_format)
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
@@ -119,11 +47,7 @@ def process_notification(n_object: NotificationContextData, datastore):
# Get the notification body from datastore
n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
if n_object.get('markup_text_to_html'):
n_body = markup_text_links_to_html(body=n_body)
if n_format == str(NotifyFormat.HTML):
if n_object.get('notification_format', '').startswith('HTML'):
n_body = n_body.replace("\n", '<br>')
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
@@ -217,15 +141,17 @@ def process_notification(n_object: NotificationContextData, datastore):
# Notification title + body content parameters get created here.
# ( Where we prepare the tokens in the notification to be replaced with actual values )
def create_notification_parameters(n_object: NotificationContextData, datastore):
if not isinstance(n_object, NotificationContextData):
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
def create_notification_parameters(n_object, datastore):
from copy import deepcopy
from . import valid_tokens
watch = datastore.data['watching'].get(n_object['uuid'])
if watch:
watch_title = datastore.data['watching'][n_object['uuid']].label
# in the case we send a test notification from the main settings, there is no UUID.
uuid = n_object['uuid'] if 'uuid' in n_object else ''
if uuid:
watch_title = datastore.data['watching'][uuid].get('title', '')
tag_list = []
tags = datastore.get_all_tags_for_watch(n_object['uuid'])
tags = datastore.get_all_tags_for_watch(uuid)
if tags:
for tag_uuid, tag in tags.items():
tag_list.append(tag.get('title'))
@@ -240,10 +166,14 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
watch_url = n_object['watch_url']
diff_url = "{}/diff/{}".format(base_url, n_object['uuid'])
preview_url = "{}/preview/{}".format(base_url, n_object['uuid'])
diff_url = "{}/diff/{}".format(base_url, uuid)
preview_url = "{}/preview/{}".format(base_url, uuid)
n_object.update(
# Not sure deepcopy is needed here, but why not
tokens = deepcopy(valid_tokens)
# Valid_tokens also used as a field validator
tokens.update(
{
'base_url': base_url,
'diff_url': diff_url,
@@ -251,10 +181,13 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
'watch_tag': watch_tag if watch_tag is not None else '',
'watch_title': watch_title if watch_title is not None else '',
'watch_url': watch_url,
'watch_uuid': n_object['uuid'],
'watch_uuid': uuid,
})
if watch:
n_object.update(datastore.data['watching'].get(n_object['uuid']).extra_notification_token_values())
# n_object will contain diff, diff_added etc etc
tokens.update(n_object)
return n_object
if uuid:
tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values())
return tokens

View File

@@ -6,51 +6,9 @@ Extracted from update_worker.py to provide standalone notification functionality
for both sync and async workers
"""
from loguru import logger
import time
from loguru import logger
from changedetectionio.notification import default_notification_format
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
class NotificationContextData(dict):
def __init__(self, initial_data=None, **kwargs):
super().__init__({
'current_snapshot': None,
'diff': None,
'diff_added': None,
'diff_full': None,
'diff_patch': None,
'diff_removed': None,
'notification_timestamp': time.time(),
'screenshot': None,
'triggered_text': None,
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
'watch_url': 'https://WATCH-PLACE-HOLDER/',
'base_url': None,
'diff_url': None,
'preview_url': None,
'watch_tag': None,
'watch_title': None,
'markup_text_to_html': False, # If automatic conversion of plaintext to HTML should happen
})
# Apply any initial data passed in
self.update({'watch_uuid': self.get('uuid')})
if initial_data:
self.update(initial_data)
# Apply any keyword arguments
if kwargs:
self.update(kwargs)
def set_random_for_validation(self):
import random, string
"""Randomly fills all dict keys with random strings (for validation/testing)."""
for key in self.keys():
if key in ['uuid', 'time', 'watch_uuid']:
continue
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
self[key] = rand_str
class NotificationService:
"""
@@ -62,16 +20,13 @@ class NotificationService:
self.datastore = datastore
self.notification_q = notification_q
def queue_notification_for_watch(self, n_object: NotificationContextData, watch):
def queue_notification_for_watch(self, n_object, watch):
"""
Queue a notification for a watch with full diff rendering and template variables
"""
from changedetectionio import diff
from changedetectionio.notification import default_notification_format_for_watch
if not isinstance(n_object, NotificationContextData):
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
dates = []
trigger_text = ''
@@ -124,15 +79,15 @@ class NotificationService:
n_object.update({
'current_snapshot': snapshot_contents,
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
'notification_timestamp': now,
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'triggered_text': triggered_text,
'uuid': watch.get('uuid') if watch else None,
'watch_url': watch.get('url') if watch else None,
'watch_uuid': watch.get('uuid') if watch else None,
})
if watch:
@@ -185,7 +140,7 @@ class NotificationService:
"""
Send notification when content changes are detected
"""
n_object = NotificationContextData()
n_object = {}
watch = self.datastore.data['watching'].get(watch_uuid)
if not watch:
return
@@ -228,26 +183,11 @@ class NotificationService:
if not watch:
return
n_format = self.datastore.data['settings']['application'].get('notification_format', default_notification_format)
filter_list = ", ".join(watch['include_filters'])
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_to_html' is not needed
body = f"""Hello,
Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts.
It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab )
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
Thanks - Your omniscient changedetection.io installation.
"""
n_object = NotificationContextData({
'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
'notification_body': body,
'notification_format': n_format,
'markup_text_to_html': n_format.lower().startswith('html')
})
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
", ".join(watch['include_filters']),
threshold),
'notification_format': 'text'}
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']
@@ -275,28 +215,12 @@ Thanks - Your omniscient changedetection.io installation.
if not watch:
return
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
n_format = self.datastore.data['settings']['application'].get('notification_format', default_notification_format).lower()
step = step_n + 1
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_to_html' is not needed
# {{{{ }}}} because this will be Jinja2 {{ }} tokens
body = f"""Hello,
Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout?
The element may have moved and needs editing, or does it need a delay added?
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
Thanks - Your omniscient changedetection.io installation.
"""
n_object = NotificationContextData({
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
'notification_body': body,
'notification_format': n_format,
'markup_text_to_html': n_format.lower().startswith('html')
})
n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1),
'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} "
"did not appear on the page after {} attempts, did the page change layout? "
"Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n"
"Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold),
'notification_format': 'text'}
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']

View File

@@ -102,7 +102,7 @@ class difference_detection_processor():
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
# Tweak the base config with the per-watch ones
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.safe_jinja import render as jinja_render
request_headers = CaseInsensitiveDict()
ua = self.datastore.data['settings']['requests'].get('default_ua')

View File

@@ -1,133 +0,0 @@
"""
Content Type Detection and Stream Classification
This module provides intelligent content-type detection for changedetection.io.
It addresses the common problem where HTTP Content-Type headers are missing, incorrect,
or too generic, which would otherwise cause the wrong processor to be used.
The guess_stream_type class combines:
1. HTTP Content-Type headers (when available and reliable)
2. Python-magic library for MIME detection (analyzing actual file content)
3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.)
This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF,
plain text, CSV, YAML, and XML formats - even when servers provide misleading headers.
Used by: processors/text_json_diff/processor.py and other content processors
"""
# When to apply the 'cdata to real HTML' hack
RSS_XML_CONTENT_TYPES = [
"application/rss+xml",
"application/rdf+xml",
"application/atom+xml",
"text/rss+xml", # rare, non-standard
"application/x-rss+xml", # legacy (older feed software)
"application/x-atom+xml", # legacy (older Atom)
]
# JSON Content-types
JSON_CONTENT_TYPES = [
"application/activity+json",
"application/feed+json",
"application/json",
"application/ld+json",
"application/vnd.api+json",
]
# Generic XML Content-types (non-RSS/Atom)
XML_CONTENT_TYPES = [
"text/xml",
"application/xml",
]
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
from loguru import logger
class guess_stream_type():
is_pdf = False
is_json = False
is_html = False
is_plaintext = False
is_rss = False
is_csv = False
is_xml = False # Generic XML, not RSS/Atom
is_yaml = False
def __init__(self, http_content_header, content):
import re
magic_content_header = http_content_header
test_content = content[:200].lower().strip()
# Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.)
test_content_normalized = re.sub(r'<\s+', '<', test_content)
# Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic)
magic_result = None
try:
import puremagic
# puremagic needs bytes, so encode if we have a string
content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200]
# puremagic returns a list of PureMagic objects with confidence scores
detections = puremagic.magic_string(content_bytes)
if detections:
# Get the highest confidence detection
mime = detections[0].mime_type
logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
if mime and "/" in mime:
magic_result = mime
# Ignore generic/fallback mime types
if mime in ['application/octet-stream', 'application/x-empty', 'binary']:
logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library")
# Trust puremagic for non-text types immediately
elif mime not in ['text/html', 'text/plain']:
magic_content_header = mime
except Exception as e:
logger.error(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
# Content-based detection (most reliable for text formats)
# Check for HTML patterns first - if found, override magic's text/plain
has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
# Always trust headers first
if 'text/plain' in http_content_header:
self.is_plaintext = True
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
self.is_rss = True
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
self.is_json = True
elif 'pdf' in magic_content_header:
self.is_pdf = True
elif has_html_patterns or http_content_header == 'text/html':
self.is_html = True
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
self.is_json = True
# magic will call a rss document 'xml'
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
self.is_rss = True
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
# Only mark as generic XML if not already detected as RSS
if not self.is_rss:
self.is_xml = True
elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
# Generic XML that's not RSS/Atom (RSS/Atom checked above)
self.is_xml = True
elif '%pdf-1' in test_content:
self.is_pdf = True
elif http_content_header.startswith('text/'):
self.is_plaintext = True
# Only trust magic for 'text' if no other patterns matched
elif 'text' in magic_content_header:
self.is_plaintext = True
# If magic says text/plain and we found no HTML patterns, trust it
elif magic_result == 'text/plain':
self.is_plaintext = True
logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")

View File

@@ -13,17 +13,12 @@ from changedetectionio import html_tools, content_fetchers
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from loguru import logger
from changedetectionio.processors.magic import guess_stream_type
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Webpage Text/HTML, JSON and PDF changes'
description = 'Detects all text changes where possible'
JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']
# Assume it's this type if the server says nothing on content-type
DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html'
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
class FilterNotFoundInResponse(ValueError):
def __init__(self, msg, screenshot=None, xpath_data=None):
@@ -37,560 +32,356 @@ class PDFToHTMLToolNotFound(ValueError):
ValueError.__init__(self, msg)
class FilterConfig:
"""Consolidates all filter and rule configurations from watch, tags, and global settings."""
def __init__(self, watch, datastore):
self.watch = watch
self.datastore = datastore
self.watch_uuid = watch.get('uuid')
# Cache computed properties to avoid repeated list operations
self._include_filters_cache = None
self._subtractive_selectors_cache = None
def _get_merged_rules(self, attr, include_global=False):
"""Merge rules from watch, tags, and optionally global settings."""
watch_rules = self.watch.get(attr, [])
tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr)
rules = list(dict.fromkeys(watch_rules + tag_rules))
if include_global:
global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', [])
rules = list(dict.fromkeys(rules + global_rules))
return rules
@property
def include_filters(self):
if self._include_filters_cache is None:
filters = self._get_merged_rules('include_filters')
# Inject LD+JSON price tracker rule if enabled
if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
self._include_filters_cache = filters
return self._include_filters_cache
@property
def subtractive_selectors(self):
if self._subtractive_selectors_cache is None:
watch_selectors = self.watch.get("subtractive_selectors", [])
tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors')
global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
return self._subtractive_selectors_cache
@property
def extract_text(self):
return self._get_merged_rules('extract_text')
@property
def ignore_text(self):
return self._get_merged_rules('ignore_text', include_global=True)
@property
def trigger_text(self):
return self._get_merged_rules('trigger_text')
@property
def text_should_not_be_present(self):
return self._get_merged_rules('text_should_not_be_present')
@property
def has_include_filters(self):
return bool(self.include_filters) and bool(self.include_filters[0].strip())
@property
def has_include_json_filters(self):
return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES)
@property
def has_subtractive_selectors(self):
return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip())
class ContentTransformer:
"""Handles text transformations like trimming, sorting, and deduplication."""
@staticmethod
def trim_whitespace(text):
"""Remove leading/trailing whitespace from each line."""
# Use generator expression to avoid building intermediate list
return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines())
@staticmethod
def remove_duplicate_lines(text):
"""Remove duplicate lines while preserving order."""
return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines()))
@staticmethod
def sort_alphabetically(text):
"""Sort lines alphabetically (case-insensitive)."""
# Remove double line feeds before sorting
text = text.replace("\n\n", "\n")
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
@staticmethod
def extract_by_regex(text, regex_patterns):
"""Extract text matching regex patterns."""
# Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior)
regex_matched_output = []
for s_re in regex_patterns:
# Check if it's perl-style regex /.../
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
result = re.findall(regex, text)
for match in result:
if type(match) is tuple:
regex_matched_output.extend(match)
regex_matched_output.append('\n')
else:
regex_matched_output.append(match)
regex_matched_output.append('\n')
else:
# Plain text search (case-insensitive)
r = re.compile(re.escape(s_re), re.IGNORECASE)
res = r.findall(text)
if res:
for match in res:
regex_matched_output.append(match)
regex_matched_output.append('\n')
return ''.join(regex_matched_output) if regex_matched_output else ''
class RuleEngine:
"""Evaluates blocking rules (triggers, conditions, text_should_not_be_present)."""
@staticmethod
def evaluate_trigger_text(content, trigger_patterns):
"""
Check if trigger text is present. If trigger_text is configured,
content is blocked UNLESS the trigger is found.
Returns True if blocked, False if allowed.
"""
if not trigger_patterns:
return False
# Assume blocked if trigger_text is configured
result = html_tools.strip_ignore_text(
content=str(content),
wordlist=trigger_patterns,
mode="line numbers"
)
# Unblock if trigger was found
return not bool(result)
@staticmethod
def evaluate_text_should_not_be_present(content, patterns):
"""
Check if forbidden text is present. If found, block the change.
Returns True if blocked, False if allowed.
"""
if not patterns:
return False
result = html_tools.strip_ignore_text(
content=str(content),
wordlist=patterns,
mode="line numbers"
)
# Block if forbidden text was found
return bool(result)
@staticmethod
def evaluate_conditions(watch, datastore, content):
"""
Evaluate custom conditions ruleset.
Returns True if blocked, False if allowed.
"""
if not watch.get('conditions') or not watch.get('conditions_match_logic'):
return False
conditions_result = execute_ruleset_against_all_plugins(
current_watch_uuid=watch.get('uuid'),
application_datastruct=datastore.data,
ephemeral_data={'text': content}
)
# Block if conditions not met
return not conditions_result.get('result')
class ContentProcessor:
"""Handles content preprocessing, filtering, and extraction."""
def __init__(self, fetcher, watch, filter_config, datastore):
self.fetcher = fetcher
self.watch = watch
self.filter_config = filter_config
self.datastore = datastore
def preprocess_rss(self, content):
"""
Convert CDATA/comments in RSS to usable text.
Supports two RSS processing modes:
- 'default': Inline CDATA replacement (original behavior)
- 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked)
"""
from changedetectionio import rss_tools
rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode")
if rss_mode:
# Format RSS items nicely with CDATA content unmarked and converted to text
return rss_tools.format_rss_items(content)
else:
# Default: Original inline CDATA replacement
return cdata_in_document_to_text(html_content=content)
def preprocess_pdf(self, raw_content):
"""Convert PDF to HTML using external tool."""
from shutil import which
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
if not which(tool):
raise PDFToHTMLToolNotFound(
f"Command-line `{tool}` tool was not found in system PATH, was it installed?"
)
import subprocess
proc = subprocess.Popen(
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE
)
proc.stdin.write(raw_content)
proc.stdin.close()
html_content = proc.stdout.read().decode('utf-8')
proc.wait(timeout=60)
# Add metadata for change detection
metadata = (
f"<p>Added by changedetection.io: Document checksum - "
f"{hashlib.md5(raw_content).hexdigest().upper()} "
f"Original file size - {len(raw_content)} bytes</p>"
)
return html_content.replace('</body>', metadata + '</body>')
def preprocess_json(self, raw_content):
"""Format and sort JSON content."""
# Then we re-format it, else it does have filters (later on) which will reformat it anyway
content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$")
# Sort JSON to avoid false alerts from reordering
try:
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
except Exception:
# Might be malformed JSON, continue anyway
pass
return content
def apply_include_filters(self, content, stream_content_type):
"""Apply CSS, XPath, or JSON filters to extract specific content."""
filtered_content = ""
for filter_rule in self.filter_config.include_filters:
# XPath filters
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
filtered_content += html_tools.xpath_filter(
xpath_filter=filter_rule.replace('xpath:', ''),
html_content=content,
append_pretty_line_formatting=not self.watch.is_source_type_url,
is_rss=stream_content_type.is_rss
)
# XPath1 filters (first match only)
elif filter_rule.startswith('xpath1:'):
filtered_content += html_tools.xpath1_filter(
xpath_filter=filter_rule.replace('xpath1:', ''),
html_content=content,
append_pretty_line_formatting=not self.watch.is_source_type_url,
is_rss=stream_content_type.is_rss
)
# JSON filters
elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES):
filtered_content += html_tools.extract_json_as_string(
content=content,
json_filter=filter_rule
)
# CSS selectors, default fallback
else:
filtered_content += html_tools.include_filters(
include_filters=filter_rule,
html_content=content,
append_pretty_line_formatting=not self.watch.is_source_type_url
)
# Raise error if filter returned nothing
if not filtered_content.strip():
raise FilterNotFoundInResponse(
msg=self.filter_config.include_filters,
screenshot=self.fetcher.screenshot,
xpath_data=self.fetcher.xpath_data
)
return filtered_content
def apply_subtractive_selectors(self, content):
"""Remove elements matching subtractive selectors."""
return html_tools.element_removal(self.filter_config.subtractive_selectors, content)
def extract_text_from_html(self, html_content, stream_content_type):
"""Convert HTML to plain text."""
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
return html_tools.html_to_text(
html_content=html_content,
render_anchor_tag_content=do_anchor,
is_rss=stream_content_type.is_rss
)
class ChecksumCalculator:
"""Calculates checksums with various options."""
@staticmethod
def calculate(text, ignore_whitespace=False):
"""Calculate MD5 checksum of text content."""
if ignore_whitespace:
text = text.translate(TRANSLATE_WHITESPACE_TABLE)
return hashlib.md5(text.encode('utf-8')).hexdigest()
# Some common stuff here that can be moved to a base class
# (set_proxy_from_list)
class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch):
changed_detected = False
html_content = ""
screenshot = False # as bytes
stripped_text_from_html = ""
if not watch:
raise Exception("Watch no longer exists.")
# Initialize components
filter_config = FilterConfig(watch, self.datastore)
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
transformer = ContentTransformer()
rule_engine = RuleEngine()
# Get content type and stream info
ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower()
stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content)
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False}
url = watch.link
self.screenshot = self.fetcher.screenshot
self.xpath_data = self.fetcher.xpath_data
# Track the content type and checksum before filters
update_obj['content_type'] = ctype_header
# Track the content type
update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower()
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
# === CONTENT PREPROCESSING ===
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
content = self.fetcher.content
# Fetching complete, now filters
# RSS preprocessing
if stream_content_type.is_rss:
content = content_processor.preprocess_rss(content)
if self.datastore.data["settings"]["application"].get("rss_reader_mode"):
# Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc)
stream_content_type.is_rss = False
stream_content_type.is_html = True
self.fetcher.content = content
# @note: I feel like the following should be in a more obvious chain system
# - Check filter text
# - Is the checksum different?
# - Do we convert to JSON?
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
# return content().textfilter().jsonextract().checksumcompare() ?
# PDF preprocessing
if watch.is_pdf or stream_content_type.is_pdf:
content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content)
stream_content_type.is_html = True
is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower()
is_html = not is_json
is_rss = False
# JSON - Always reformat it nicely for consistency.
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
# Go into RSS preprocess for converting CDATA/comment to usable text
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
if '<rss' in self.fetcher.content[:100].lower():
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
is_rss = True
if stream_content_type.is_json:
if not filter_config.has_include_json_filters:
content = content_processor.preprocess_json(raw_content=content)
#else, otherwise it gets sorted/formatted in the filter stage anyway
# HTML obfuscation workarounds
if stream_content_type.is_html:
content = html_tools.workarounds_for_obfuscations(content)
# Check for LD+JSON price data (for HTML content)
if stream_content_type.is_html:
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content)
# === FILTER APPLICATION ===
# Start with content reference, avoid copy until modification
html_content = content
# Apply include filters (CSS, XPath, JSON)
# Except for plaintext (incase they tried to confuse the system, it will HTML escape
#if not stream_content_type.is_plaintext:
if filter_config.has_include_filters:
html_content = content_processor.apply_include_filters(content, stream_content_type)
# Apply subtractive selectors
if filter_config.has_subtractive_selectors:
html_content = content_processor.apply_subtractive_selectors(html_content)
# === TEXT EXTRACTION ===
# source: support, basically treat it as plaintext
if watch.is_source_type_url:
# For source URLs, keep raw content
stripped_text = html_content
elif stream_content_type.is_plaintext:
# For plaintext, keep as-is without HTML-to-text conversion
stripped_text = html_content
else:
# Extract text from HTML/RSS content (not generic XML)
if stream_content_type.is_html or stream_content_type.is_rss:
stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type)
is_html = False
is_json = False
inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10]
if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
from shutil import which
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
if not which(tool):
raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool))
import subprocess
proc = subprocess.Popen(
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
proc.stdin.write(self.fetcher.raw_content)
proc.stdin.close()
self.fetcher.content = proc.stdout.read().decode('utf-8')
proc.wait(timeout=60)
# Add a little metadata so we know if the file changes (like if an image changes, but the text is the same
# @todo may cause problems with non-UTF8?
metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format(
hashlib.md5(self.fetcher.raw_content).hexdigest().upper(),
len(self.fetcher.content))
self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>')
# Better would be if Watch.model could access the global data also
# and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
# https://realpython.com/inherit-python-dict/ instead of doing it procedurely
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
# 1845 - remove duplicated filters in both group and watch include filter
include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
*watch.get("subtractive_selectors", []),
*self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
]
# Inject a virtual LD+JSON price tracker rule
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
if is_json and not has_filter_rule:
include_filters_rule.append("json:$")
has_filter_rule = True
if is_json:
# Sort the JSON so we dont get false alerts when the content is just re-ordered
try:
self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True)
except Exception as e:
# Might have just been a snippet, or otherwise bad JSON, continue
pass
if has_filter_rule:
for filter in include_filters_rule:
if any(prefix in filter for prefix in json_filter_prefixes):
stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
is_html = False
if is_html or watch.is_source_type_url:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
html_content = self.fetcher.content
# If not JSON, and if it's not text/plain..
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
# Don't run get_text or xpath/css filters on plaintext
stripped_text_from_html = html_content
else:
stripped_text = html_content
# Does it have some ld+json price data? used for easier monitoring
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content)
# Then we assume HTML
if has_filter_rule:
html_content = ""
for filter_rule in include_filters_rule:
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url,
is_rss=is_rss)
elif filter_rule.startswith('xpath1:'):
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url,
is_rss=is_rss)
else:
html_content += html_tools.include_filters(include_filters=filter_rule,
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url)
if not html_content.strip():
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content)
if watch.is_source_type_url:
stripped_text_from_html = html_content
else:
# extract text
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
render_anchor_tag_content=do_anchor,
is_rss=is_rss) # 1874 activate the <title workaround hack
# === TEXT TRANSFORMATIONS ===
if watch.get('trim_text_whitespace'):
stripped_text = transformer.trim_whitespace(stripped_text)
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
# Save text before ignore filters (for diff calculation)
text_content_before_ignored_filter = stripped_text
# Re #340 - return the content before the 'ignore text' was applied
# Also used to calculate/show what was removed
text_content_before_ignored_filter = stripped_text_from_html
# @todo whitespace coming from missing rtrim()?
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
# Rewrite's the processing text based on only what diff result they want to see
# === DIFF FILTERING ===
# If user wants specific diff types (added/removed/replaced only)
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter)
if stripped_text is None:
# No differences found, but content exists
c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
from changedetectionio import diff
# needs to not include (added) etc or it may get used twice
# Replace the processed text with the preferred result
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
newest_version_file_contents=stripped_text_from_html,
include_equal=False, # not the same lines
include_added=watch.get('filter_text_added', True),
include_removed=watch.get('filter_text_removed', True),
include_replaced=watch.get('filter_text_replaced', True),
line_feed_sep="\n",
include_change_type_prefix=False)
# === EMPTY PAGE CHECK ===
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
if not rendered_diff and stripped_text_from_html:
# We had some content, but no differences were found
# Store our new file as the MD5 so it will trigger in the future
c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
else:
stripped_text_from_html = rendered_diff
# Treat pages with no renderable text content as a change? No by default
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
raise content_fetchers.exceptions.ReplyWithContentButNoText(
url=url,
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
has_filters=filter_config.has_include_filters,
html_content=html_content,
xpath_data=self.fetcher.xpath_data
)
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
has_filters=has_filter_rule,
html_content=html_content,
xpath_data=self.fetcher.xpath_data
)
# We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms.
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
# === REGEX EXTRACTION ===
if filter_config.extract_text:
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
stripped_text = extracted
# 615 Extract text by regex
extract_text = watch.get('extract_text', [])
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
if len(extract_text) > 0:
regex_matched_output = []
for s_re in extract_text:
# incase they specified something in '/.../x'
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
result = re.findall(regex, stripped_text_from_html)
for l in result:
if type(l) is tuple:
# @todo - some formatter option default (between groups)
regex_matched_output += list(l) + ['\n']
else:
# @todo - some formatter option default (between each ungrouped result)
regex_matched_output += [l] + ['\n']
else:
# Doesnt look like regex, just hunt for plaintext and return that which matches
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
r = re.compile(re.escape(s_re), re.IGNORECASE)
res = r.findall(stripped_text_from_html)
if res:
for match in res:
regex_matched_output += [match] + ['\n']
##########################################################
stripped_text_from_html = ''
if regex_matched_output:
# @todo some formatter for presentation?
stripped_text_from_html = ''.join(regex_matched_output)
# === MORE TEXT TRANSFORMATIONS ===
if watch.get('remove_duplicate_lines'):
stripped_text = transformer.remove_duplicate_lines(stripped_text)
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
if watch.get('sort_text_alphabetically'):
stripped_text = transformer.sort_alphabetically(stripped_text)
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
# === CHECKSUM CALCULATION ===
text_for_checksuming = stripped_text
### CALCULATE MD5
# If there's text to ignore
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
# Apply ignore_text for checksum calculation
if filter_config.ignore_text:
text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text)
text_for_checksuming = stripped_text_from_html
if text_to_ignore:
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
# Optionally remove ignored lines from output
strip_ignored_lines = watch.get('strip_ignored_lines')
if strip_ignored_lines is None:
strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines')
if strip_ignored_lines:
stripped_text = text_for_checksuming
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
else:
fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
# Calculate checksum
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace)
# === BLOCKING RULES EVALUATION ===
############ Blocking rules, after checksum #################
blocked = False
# Check trigger_text
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
trigger_text = watch.get('trigger_text', [])
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
if len(trigger_text):
# Assume blocked
blocked = True
# Filter and trigger works the same, so reuse it
# It should return the line numbers that match
# Unblock flow if the trigger was found (some text remained after stripped what didnt match)
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=trigger_text,
mode="line numbers")
# Unblock if the trigger was found
if result:
blocked = False
# Check text_should_not_be_present
if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present):
blocked = True
text_should_not_be_present = watch.get('text_should_not_be_present', [])
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
if len(text_should_not_be_present):
# If anything matched, then we should block a change from happening
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=text_should_not_be_present,
mode="line numbers")
if result:
blocked = True
# Check custom conditions
if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text):
blocked = True
# And check if 'conditions' will let this pass through
if watch.get('conditions') and watch.get('conditions_match_logic'):
conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
application_datastruct=self.datastore.data,
ephemeral_data={
'text': stripped_text_from_html
}
)
# === CHANGE DETECTION ===
if not conditions_result.get('result'):
# Conditions say "Condition not met" so we block it.
blocked = True
# Looks like something changed, but did it match all the rules?
if blocked:
changed_detected = False
else:
# Compare checksums
# The main thing that all this at the moment comes down to :)
if watch.get('previous_md5') != fetched_md5:
changed_detected = True
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
# On first run, initialize previous_md5
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
# === UNIQUE LINES CHECK ===
if changed_detected and watch.get('check_unique_lines', False):
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
lines=stripped_text.splitlines(),
ignore_whitespace=ignore_whitespace
)
if changed_detected:
if watch.get('check_unique_lines', False):
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
if not has_unique_lines:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
changed_detected = False
else:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
lines=stripped_text_from_html.splitlines(),
ignore_whitespace=ignore_whitespace
)
# Note: Explicit cleanup is only needed here because text_json_diff handles
# large strings (100KB-300KB for RSS/HTML). The other processors work with
# small strings and don't need this.
#
# Python would clean these up automatically, but explicit `del` frees memory
# immediately rather than waiting for function return, reducing peak memory usage.
del content
if 'html_content' in locals() and html_content is not stripped_text:
del html_content
if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text:
del text_content_before_ignored_filter
if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text:
del text_for_checksuming
# One or more lines? unsure?
if not has_unique_lines:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
changed_detected = False
else:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
return changed_detected, update_obj, stripped_text
def _apply_diff_filtering(self, watch, stripped_text, text_before_filter):
"""Apply user's diff filtering preferences (show only added/removed/replaced lines)."""
from changedetectionio import diff
rendered_diff = diff.render_diff(
previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
newest_version_file_contents=stripped_text,
include_equal=False,
include_added=watch.get('filter_text_added', True),
include_removed=watch.get('filter_text_removed', True),
include_replaced=watch.get('filter_text_replaced', True),
line_feed_sep="\n",
include_change_type_prefix=False
)
watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8'))
if not rendered_diff and stripped_text:
# No differences found
return None
return rendered_diff
# stripped_text_from_html - Everything after filters and NO 'ignored' content
return changed_detected, update_obj, stripped_text_from_html

View File

@@ -243,15 +243,14 @@ def handle_watch_update(socketio, **kwargs):
general_stats = {
'count_errors': errored_count,
'unread_changes_count': datastore.unread_changes_count
'has_unviewed': datastore.has_unviewed
}
# Debug what's being emitted
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
socketio.emit("watch_update", {'watch': watch_data})
socketio.emit("general_stats_update", general_stats)
socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats})
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")

View File

@@ -1,130 +0,0 @@
"""
RSS/Atom feed processing tools for changedetection.io
"""
from loguru import logger
import re
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
"""
Process CDATA sections in HTML/XML content - inline replacement.
Args:
html_content: The HTML/XML content to process
render_anchor_tag_content: Whether to render anchor tag content
Returns:
Processed HTML/XML content with CDATA sections replaced inline
"""
from xml.sax.saxutils import escape as xml_escape
from .html_tools import html_to_text
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
def repl(m):
text = m.group(1)
return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip()
return re.sub(pattern, repl, html_content)
def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
"""
Format RSS/Atom feed items in a readable text format using feedparser.
Converts RSS <item> or Atom <entry> elements to formatted text with:
- <title> → <h1>Title</h1>
- <link> → Link: [url]
- <guid> → Guid: [id]
- <pubDate> → PubDate: [date]
- <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
Args:
rss_content: The RSS/Atom feed content
render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility)
Returns:
Formatted HTML content ready for html_to_text conversion
"""
try:
import feedparser
from xml.sax.saxutils import escape as xml_escape
# Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
feed = feedparser.parse(rss_content)
formatted_items = []
# Determine feed type for appropriate labels when fields are missing
# feedparser sets feed.version to things like 'rss20', 'atom10', etc.
is_atom = feed.version and 'atom' in feed.version
for entry in feed.entries:
item_parts = []
# Title - feedparser handles CDATA and entity unescaping automatically
if hasattr(entry, 'title') and entry.title:
item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>')
# Link
if hasattr(entry, 'link') and entry.link:
item_parts.append(f'Link: {xml_escape(entry.link)}<br>')
# GUID/ID
if hasattr(entry, 'id') and entry.id:
item_parts.append(f'Guid: {xml_escape(entry.id)}<br>')
# Date - feedparser normalizes all date field names to 'published'
if hasattr(entry, 'published') and entry.published:
item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>')
# Description/Content - feedparser handles CDATA and entity unescaping automatically
# Only add "Summary:" label for Atom <summary> tags
content = None
add_label = False
if hasattr(entry, 'content') and entry.content:
# Atom <content> - no label, just content
content = entry.content[0].value if entry.content[0].value else None
elif hasattr(entry, 'summary'):
# Could be RSS <description> or Atom <summary>
# feedparser maps both to entry.summary
content = entry.summary if entry.summary else None
# Only add "Summary:" label for Atom feeds (which use <summary> tag)
if is_atom:
add_label = True
# Add content with or without label
if content:
if add_label:
item_parts.append(f'Summary:<br>{content}')
else:
item_parts.append(content)
else:
# No content - just show <none>
item_parts.append('&lt;none&gt;')
# Join all parts of this item
if item_parts:
formatted_items.append('\n'.join(item_parts))
# Wrap each item in a div with classes (first, last, item-N)
items_html = []
total_items = len(formatted_items)
for idx, item in enumerate(formatted_items):
classes = ['rss-item']
if idx == 0:
classes.append('first')
if idx == total_items - 1:
classes.append('last')
classes.append(f'item-{idx + 1}')
class_str = ' '.join(classes)
items_html.append(f'<div class="{class_str}">{item}</div>')
return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>'
except Exception as e:
logger.warning(f"Error formatting RSS items: {str(e)}")
# Fall back to original content
return rss_content

View File

@@ -15,7 +15,7 @@ find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 --tb=long $test_name
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
done
echo "RUNNING WITH BASE_URL SET"
@@ -23,20 +23,20 @@ echo "RUNNING WITH BASE_URL SET"
# Now re-run some tests with BASE_URL enabled
# Re #65 - Ability to include a link back to the installation, in the notification.
export BASE_URL="https://really-unique-domain.io"
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 tests/test_notification.py
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
# Re-run with HIDE_REFERER set - could affect login
export HIDE_REFERER=True
pytest -vv -s --maxfail=1 tests/test_access_control.py
pytest tests/test_access_control.py
# Re-run a few tests that will trigger brotli based storage
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
pytest -vv -s --maxfail=1 tests/test_access_control.py
pytest tests/test_access_control.py
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
pytest -vv -s --maxfail=1 tests/test_backend.py
pytest -vv -s --maxfail=1 tests/test_rss.py
pytest -vv -s --maxfail=1 tests/test_unique_lines.py
pytest tests/test_backend.py
pytest tests/test_rss.py
pytest tests/test_unique_lines.py
# Try high concurrency
FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l

View File

@@ -9,7 +9,7 @@ set -x
# SOCKS5 related - start simple Socks5 proxy server
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
docker run --network changedet-network -d --hostname socks5proxy --rm --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth serjs/go-socks5-proxy
echo "---------------------------------- SOCKS5 -------------------"
# SOCKS5 related - test from proxies.json

View File

@@ -0,0 +1,24 @@
"""
Safe Jinja2 render with max payload sizes
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
"""
import jinja2.sandbox
import typing as t
import os
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
# (Which also limits available functions that could be called)
def render(template_str, **args: t.Any) -> str:
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension'])
output = jinja2_env.from_string(template_str).render(args)
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
def render_fully_escaped(content):
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
template = env.from_string("{{ some_html|e }}")
return template.render(some_html=content)

View File

@@ -29,7 +29,7 @@ $(document).ready(function () {
$(this).text(new Date($(this).data("utc")).toLocaleString());
})
const timezoneInput = $('#application-scheduler_timezone_default');
const timezoneInput = $('#application-timezone');
if(timezoneInput.length) {
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
if (!timezoneInput.val().trim()) {

View File

@@ -117,16 +117,15 @@ $(document).ready(function () {
}
})
socket.on('general_stats_update', function (general_stats) {
// Tabs at bottom of list
$('#watch-table-wrapper').toggleClass("has-unread-changes", general_stats.unread_changes_count !==0)
$('#watch-table-wrapper').toggleClass("has-error", general_stats.count_errors !== 0)
$('#post-list-with-errors a').text(`With errors (${ new Intl.NumberFormat(navigator.language).format(general_stats.count_errors) })`);
$('#unread-tab-counter').text(new Intl.NumberFormat(navigator.language).format(general_stats.unread_changes_count));
});
socket.on('watch_update', function (data) {
const watch = data.watch;
const general_stats = data.general_stats;
// Log the entire watch object for debugging
console.log('!!! WATCH UPDATE EVENT RECEIVED !!!');
console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`);
console.log('Watch data:', watch);
console.log('General stats:', general_stats);
// Updating watch table rows
const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]');
@@ -151,6 +150,12 @@ $(document).ready(function () {
console.log('Updated UI for watch:', watch.uuid);
}
// Tabs at bottom of list
$('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed);
$('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0)
$('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`);
$('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid));
});

View File

@@ -51,7 +51,6 @@ $(document).ready(function () {
$('#notification_body').val('');
$('#notification_format').val('System default');
$('#notification_urls').val('');
$('#notification_muted_none').prop('checked', true); // in the case of a ternary field
e.preventDefault();
});
$("#notification-token-toggle").click(function (e) {

View File

@@ -17,6 +17,15 @@ body.checking-now {
position: fixed;
}
#post-list-buttons {
#post-list-with-errors.has-error {
display: inline-block !important;
}
#post-list-mark-views.has-unviewed {
display: inline-block !important;
}
}

View File

@@ -127,44 +127,5 @@
display: inline-block !important;
}
}
}
#watch-table-wrapper {
/* general styling */
#post-list-buttons {
text-align: right;
padding: 0px;
margin: 0px;
li {
display: inline-block;
}
a {
border-top-left-radius: initial;
border-top-right-radius: initial;
border-bottom-left-radius: 5px;
border-bottom-right-radius: 5px;
}
}
/* post list dynamically on/off stuff */
&.has-error {
#post-list-buttons {
#post-list-with-errors {
display: inline-block !important;
}
}
}
&.has-unread-changes {
#post-list-buttons {
#post-list-unread, #post-list-mark-views, #post-list-unread {
display: inline-block !important;
}
}
}
}

View File

@@ -1,114 +0,0 @@
// Ternary radio button group component
.ternary-radio-group {
display: flex;
gap: 0;
border: 1px solid var(--color-grey-750);
border-radius: 4px;
overflow: hidden;
width: fit-content;
background: var(--color-background);
.ternary-radio-option {
position: relative;
cursor: pointer;
margin: 0;
display: flex;
align-items: center;
input[type="radio"] {
position: absolute;
opacity: 0;
width: 0;
height: 0;
}
.ternary-radio-label {
padding: 8px 16px;
background: var(--color-grey-900);
border: none;
border-right: 1px solid var(--color-grey-750);
font-size: 13px;
font-weight: 500;
color: var(--color-text);
transition: all 0.2s ease;
cursor: pointer;
display: block;
text-align: center;
}
&:last-child .ternary-radio-label {
border-right: none;
}
input:checked + .ternary-radio-label {
background: var(--color-link);
color: var(--color-text-button);
font-weight: 600;
&.ternary-default {
background: var(--color-grey-600);
color: var(--color-text-button);
}
&:hover {
background: #1a7bc4;
&.ternary-default {
background: var(--color-grey-500);
}
}
}
&:hover .ternary-radio-label {
background: var(--color-grey-800);
}
}
@media (max-width: 480px) {
width: 100%;
.ternary-radio-label {
flex: 1;
min-width: auto;
}
}
}
// Standard radio button styling
input[type="radio"].pure-radio:checked + label,
input[type="radio"].pure-radio:checked {
background: var(--color-link);
color: var(--color-text-button);
}
html[data-darkmode="true"] {
.ternary-radio-group {
.ternary-radio-option {
.ternary-radio-label {
background: var(--color-grey-350);
}
&:hover .ternary-radio-label {
background: var(--color-grey-400);
}
input:checked + .ternary-radio-label {
background: var(--color-link);
color: var(--color-text-button);
&.ternary-default {
background: var(--color-grey-600);
}
&:hover {
background: #1a7bc4;
&.ternary-default {
background: var(--color-grey-500);
}
}
}
}
}
}

View File

@@ -20,7 +20,7 @@
@use "parts/lister_extra";
@use "parts/socket";
@use "parts/visualselector";
@use "parts/widgets";
body {
color: var(--color-text);
@@ -203,6 +203,24 @@ code {
}
#post-list-buttons {
text-align: right;
padding: 0px;
margin: 0px;
li {
display: inline-block;
}
a {
border-top-left-radius: initial;
border-top-right-radius: initial;
border-bottom-left-radius: 5px;
border-bottom-right-radius: 5px;
}
}
body:after {
content: "";
background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%);
@@ -344,7 +362,7 @@ label {
}
}
.grey-form-border {
#notification-customisation {
border: 1px solid var(--color-border-notification);
padding: 0.5rem;
border-radius: 5px;
@@ -1112,12 +1130,11 @@ ul {
}
#realtime-conn-error {
position: fixed;
position: absolute;
bottom: 0;
left: 0;
left: 30px;
background: var(--color-warning);
padding: 10px;
font-size: 0.8rem;
color: #fff;
opacity: 0.8;
}

File diff suppressed because one or more lines are too long

View File

@@ -202,13 +202,14 @@ class ChangeDetectionStore:
return seconds
@property
def unread_changes_count(self):
unread_changes_count = 0
def has_unviewed(self):
if not self.__data.get('watching'):
return None
for uuid, watch in self.__data['watching'].items():
if watch.history_n >= 2 and watch.viewed == False:
unread_changes_count += 1
return unread_changes_count
return True
return False
@property
def data(self):
@@ -261,6 +262,11 @@ class ChangeDetectionStore:
extras = deepcopy(self.data['watching'][uuid])
new_uuid = self.add_watch(url=url, extras=extras)
watch = self.data['watching'][new_uuid]
if self.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
# Because it will be recalculated on the next fetch
self.data['watching'][new_uuid]['title'] = None
return new_uuid
def url_exists(self, url):
@@ -302,6 +308,7 @@ class ChangeDetectionStore:
'browser_steps',
'css_filter',
'extract_text',
'extract_title_as_title',
'headers',
'ignore_text',
'include_filters',
@@ -316,7 +323,6 @@ class ChangeDetectionStore:
'title',
'trigger_text',
'url',
'use_page_title_in_list',
'webdriver_js_execute_code',
]:
if res.get(k):
@@ -967,20 +973,6 @@ class ChangeDetectionStore:
f_d.write(zlib.compress(f_j.read()))
os.unlink(json_path)
def update_20(self):
for uuid, watch in self.data['watching'].items():
if self.data['watching'][uuid].get('extract_title_as_title'):
self.data['watching'][uuid]['use_page_title_in_list'] = self.data['watching'][uuid].get('extract_title_as_title')
del self.data['watching'][uuid]['extract_title_as_title']
if self.data['settings']['application'].get('extract_title_as_title'):
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
def update_21(self):
self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone')
del self.data['settings']['application']['timezone']
def add_notification_url(self, notification_url):
logger.debug(f">>> Adding new notification_url - '{notification_url}'")

View File

@@ -33,7 +33,7 @@
<div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div>
</div>
</div>
<div class="pure-control-group grey-form-border">
<div id="notification-customisation" class="pure-control-group">
<div class="pure-control-group">
{{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }}
<span class="pure-form-message-inline">Title for all notifications</span>
@@ -70,7 +70,7 @@
</tr>
<tr>
<td><code>{{ '{{watch_title}}' }}</code></td>
<td>The page title of the watch, uses &lt;title&gt; if not set, falls back to URL</td>
<td>The title of the watch.</td>
</tr>
<tr>
<td><code>{{ '{{watch_tag}}' }}</code></td>

View File

@@ -1,47 +1,14 @@
{% macro render_field(field) %}
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field.label }}</div>
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
{% if field.top_errors %}
top
<ul class="errors top-errors">
{% for error in field.top_errors %}
<li>{{ error }}</li>
{% endfor %}
</ul>
{% endif %}
{% if field.errors %}
<ul class=errors>
{% if field.errors is mapping and 'form' in field.errors %}
{# and subfield form errors, such as used in RequiredFormField() for TimeBetweenCheckForm sub form #}
{% set errors = field.errors['form'] %}
{% for error in errors %}
<li>{{ error }}</li>
{% endfor %}
{% elif field.type == 'FieldList' %}
{# Handle FieldList of FormFields - errors is a list of dicts, one per entry #}
{% for idx, entry_errors in field.errors|enumerate %}
{% if entry_errors is mapping and entry_errors %}
{# Only show entries that have actual errors #}
<li><strong>Entry {{ idx + 1 }}:</strong>
<ul>
{% for field_name, messages in entry_errors.items() %}
{% for message in messages %}
<li>{{ field_name }}: {{ message }}</li>
{% endfor %}
{% endfor %}
</ul>
</li>
{% endif %}
{% endfor %}
{% else %}
{# regular list of errors with this field #}
{% for error in field.errors %}
<li>{{ error }}</li>
{% endfor %}
{% endif %}
</ul>
{% endif %}
</div>
<div {% if field.errors %} class="error" {% endif %}>{{ field.label }}</div>
<div {% if field.errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
{% if field.errors %}
<ul class=errors>
{% for error in field.errors %}
<li>{{ error }}</li>
{% endfor %}
</ul>
{% endif %}
</div>
{% endmacro %}
{% macro render_checkbox_field(field) %}
@@ -57,23 +24,6 @@
</div>
{% endmacro %}
{% macro render_ternary_field(field, BooleanField=false) %}
{% if BooleanField %}
{% set _ = field.__setattr__('boolean_mode', true) %}
{% endif %}
<div class="ternary-field {% if field.errors %} error {% endif %}">
<div class="ternary-field-label">{{ field.label }}</div>
<div class="ternary-field-widget">{{ field(**kwargs)|safe }}</div>
{% if field.errors %}
<ul class=errors>
{% for error in field.errors %}
<li>{{ error }}</li>
{% endfor %}
</ul>
{% endif %}
</div>
{% endmacro %}
{% macro render_simple_field(field) %}
<span class="label {% if field.errors %}error{% endif %}">{{ field.label }}</span>
@@ -111,39 +61,6 @@
{{ field(**kwargs)|safe }}
{% endmacro %}
{% macro render_fieldlist_with_inline_errors(fieldlist) %}
{# Specialized macro for FieldList(FormField(...)) that renders errors inline with each field #}
<div {% if fieldlist.errors %} class="error" {% endif %}>{{ fieldlist.label }}</div>
<div {% if fieldlist.errors %} class="error" {% endif %}>
<ul id="{{ fieldlist.id }}">
{% for entry in fieldlist %}
<li {% if entry.errors %} class="error" {% endif %}>
<label for="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>{{ fieldlist.label.text }}-{{ loop.index0 }}</label>
<table id="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>
<tbody>
{% for subfield in entry %}
<tr {% if subfield.errors %} class="error" {% endif %}>
<th {% if subfield.errors %} class="error" {% endif %}><label for="{{ subfield.id }}" {% if subfield.errors %} class="error" {% endif %}>{{ subfield.label.text }}</label></th>
<td {% if subfield.errors %} class="error" {% endif %}>
{{ subfield(**kwargs)|safe }}
{% if subfield.errors %}
<ul class="errors">
{% for error in subfield.errors %}
<li class="error">{{ error }}</li>
{% endfor %}
</ul>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</li>
{% endfor %}
</ul>
</div>
{% endmacro %}
{% macro render_conditions_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %}
<div class="fieldlist_formfields" id="{{ table_id }}">
<div class="fieldlist-header">

View File

@@ -5,7 +5,6 @@
<meta charset="utf-8" >
<meta name="viewport" content="width=device-width, initial-scale=1.0" >
<meta name="description" content="Self hosted website change detection." >
<meta name="robots" content="noindex">
<title>Change Detection{{extra_title}}</title>
{% if app_rss_token %}
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss.feed', tag=active_tag_uuid , token=app_rss_token)}}" >
@@ -41,7 +40,7 @@
{% endif %}
</head>
<body class="{{extra_classes}}">
<body class="">
<div class="header">
<div class="pure-menu-fixed" style="width: 100%;">
<div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
@@ -237,7 +236,7 @@
<script src="{{url_for('static_content', group='js', filename='toggle-theme.js')}}" defer></script>
<div id="checking-now-fixed-tab" style="display: none;"><span class="spinner"></span><span>&nbsp;Checking now</span></div>
<div id="realtime-conn-error" style="display:none">Real-time updates offline</div>
<div id="realtime-conn-error" style="display:none">Offline</div>
</body>
</html>

View File

@@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
@@ -72,16 +72,15 @@
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
</div>
<div class="pure-control-group">
{{ render_field(form.tags) }}
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
</div>
<div class="pure-control-group inline-radio">
{{ render_field(form.processor) }}
</div>
<div class="pure-control-group">
{{ render_field(form.title, class="m-d", placeholder=watch.label) }}
<span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span>
{{ render_field(form.title, class="m-d") }}
</div>
<div class="pure-control-group">
{{ render_field(form.tags) }}
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
</div>
<div class="pure-control-group time-between-check border-fieldset">
@@ -102,16 +101,15 @@
</div>
<br>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.extract_title_as_title) }}
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.filter_failure_notification_send) }}
<span class="pure-form-message-inline">
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
</span>
</div>
<div class="pure-control-group">
{{ render_ternary_field(form.use_page_title_in_list) }}
</div>
</fieldset>
</div>
@@ -264,7 +262,7 @@ Math: {{ 1 + 1 }}") }}
<div class="tab-pane-inner" id="notifications">
<fieldset>
<div class="pure-control-group inline-radio">
{{ render_ternary_field(form.notification_muted, BooleanField=true) }}
{{ render_checkbox_field(form.notification_muted) }}
</div>
{% if watch_needs_selenium_or_playwright %}
<div class="pure-control-group inline-radio">
@@ -471,11 +469,11 @@ Math: {{ 1 + 1 }}") }}
<div class="pure-control-group">
{{ render_button(form.save_button) }}
<a href="{{url_for('ui.form_delete', uuid=uuid)}}"
class="pure-button button-error ">Delete</a>
class="pure-button button-small button-error ">Delete</a>
{% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}"
class="pure-button button-error">Clear History</a>{% endif %}
class="pure-button button-small button-error ">Clear History</a>{% endif %}
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
class="pure-button">Clone &amp; Edit</a>
class="pure-button button-small ">Clone &amp; Edit</a>
</div>
</div>
</form>

View File

@@ -26,10 +26,7 @@
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
</ul>
</span>
<br><br>
<div class="pure-control-group">
{{ render_ternary_field(form.strip_ignored_lines) }}
</div>
</fieldset>
<fieldset>

View File

@@ -4,7 +4,6 @@ import time
from threading import Thread
import pytest
import arrow
from changedetectionio import changedetection_app
from changedetectionio import store
import os
@@ -30,39 +29,16 @@ def reportlog(pytestconfig):
logger.remove(handler_id)
@pytest.fixture
def environment(mocker):
"""Mock arrow.now() to return a fixed datetime for testing jinja2 time extension."""
# Fixed datetime: Wed, 09 Dec 2015 23:33:01 UTC
# This is calculated to match the test expectations when offsets are applied
fixed_datetime = arrow.Arrow(2015, 12, 9, 23, 33, 1, tzinfo='UTC')
# Patch arrow.now in the TimeExtension module where it's actually used
mocker.patch('changedetectionio.jinja2_custom.extensions.TimeExtension.arrow.now', return_value=fixed_datetime)
return fixed_datetime
def format_memory_human(bytes_value):
"""Format memory in human-readable units (KB, MB, GB)"""
if bytes_value < 1024:
return f"{bytes_value} B"
elif bytes_value < 1024 ** 2:
return f"{bytes_value / 1024:.2f} KB"
elif bytes_value < 1024 ** 3:
return f"{bytes_value / (1024 ** 2):.2f} MB"
else:
return f"{bytes_value / (1024 ** 3):.2f} GB"
def track_memory(memory_usage, ):
process = psutil.Process(os.getpid())
while not memory_usage["stop"]:
current_rss = process.memory_info().rss
memory_usage["peak"] = max(memory_usage["peak"], current_rss)
memory_usage["current"] = current_rss # Keep updating current
time.sleep(0.01) # Adjust the sleep time as needed
@pytest.fixture(scope='function')
def measure_memory_usage(request):
memory_usage = {"peak": 0, "current": 0, "stop": False}
memory_usage = {"peak": 0, "stop": False}
tracker_thread = Thread(target=track_memory, args=(memory_usage,))
tracker_thread.start()
@@ -71,17 +47,16 @@ def measure_memory_usage(request):
memory_usage["stop"] = True
tracker_thread.join()
# Note: psutil returns RSS memory in bytes
peak_human = format_memory_human(memory_usage["peak"])
s = f"{time.time()} {request.node.fspath} - '{request.node.name}' - Peak memory: {peak_human}"
# Note: ru_maxrss is in kilobytes on Unix-based systems
max_memory_used = memory_usage["peak"] / 1024 # Convert to MB
s = f"Peak memory used by the test {request.node.fspath} - '{request.node.name}': {max_memory_used:.2f} MB"
logger.debug(s)
with open("test-memory.log", 'a') as f:
f.write(f"{s}\n")
# Assert that the memory usage is less than 200MB
# assert peak_memory_kb < 150 * 1024, f"Memory usage exceeded 150MB: {peak_human}"
# assert max_memory_used < 150, f"Memory usage exceeded 200MB: {max_memory_used:.2f} MB"
def cleanup(datastore_path):

View File

@@ -29,8 +29,13 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
assert b"Settings updated." in res.data
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
if make_test_use_extra_browser:
@@ -50,8 +55,7 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
"tags": "",
"headers": "",
'fetch_backend': f"extra_browser_{custom_browser_name}",
'webdriver_js_execute_code': '',
"time_between_check_use_default": "y"
'webdriver_js_execute_code': ''
},
follow_redirects=True
)

View File

@@ -28,7 +28,6 @@ def test_execute_custom_js(client, live_server, measure_memory_usage):
'fetch_backend': "html_webdriver",
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();',
'headers': "testheader: yes\buser-agent: MyCustomAgent",
"time_between_check_use_default": "y",
},
follow_redirects=True
)

View File

@@ -27,7 +27,6 @@ def test_preferred_proxy(client, live_server, measure_memory_usage):
"proxy": "proxy-two",
"tags": "",
"url": url,
"time_between_check_use_default": "y",
},
follow_redirects=True
)

View File

@@ -62,7 +62,6 @@ def test_noproxy_option(client, live_server, measure_memory_usage):
"proxy": "no-proxy",
"tags": "",
"url": url,
"time_between_check_use_default": "y",
},
follow_redirects=True
)

View File

@@ -44,7 +44,6 @@ def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
"url": test_url,
"fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
"proxy": "ui-0custom-test-proxy",
"time_between_check_use_default": "y",
}
res = client.post(

View File

@@ -49,39 +49,3 @@ def test_select_custom(client, live_server, measure_memory_usage):
#
# Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default
def test_custom_proxy_validation(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
# Goto settings, add our custom one
res = client.post(
url_for("settings.settings_page"),
data={
"requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-fetch_backend": 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
"requests-extra_proxies-0-proxy_url": "xxxxhtt/333??p://test:awesome@squid-custom:3128",
},
follow_redirects=True
)
assert b"Settings updated." not in res.data
assert b'Proxy URLs must start with' in res.data
res = client.post(
url_for("settings.settings_page"),
data={
"requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-fetch_backend": 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
"requests-extra_proxies-0-proxy_url": "https://",
},
follow_redirects=True
)
assert b"Settings updated." not in res.data
assert b"Invalid URL." in res.data

View File

@@ -2,7 +2,7 @@
import json
import os
from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def set_response():
@@ -66,7 +66,6 @@ def test_socks5(client, live_server, measure_memory_usage):
"proxy": "ui-0socks5proxy",
"tags": "",
"url": test_url,
"time_between_check_use_default": "y",
},
follow_redirects=True
)
@@ -98,5 +97,6 @@ def test_socks5(client, live_server, measure_memory_usage):
)
assert b"OK" in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -53,7 +53,6 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
"proxy": "socks5proxy",
"tags": "",
"url": test_url,
"time_between_check_use_default": "y",
},
follow_redirects=True
)

View File

@@ -5,7 +5,7 @@ import re
from flask import url_for
from changedetectionio.tests.util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, \
wait_for_all_checks, \
set_longer_modified_response, delete_all_watches
set_longer_modified_response
from changedetectionio.tests.util import extract_UUID_from_client
import logging
import base64
@@ -85,7 +85,8 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
assert '(added) So let\'s see what happens.\r\n' in msg # The plaintext part with \r\n
assert 'Content-Type: text/html' in msg
assert '(added) So let\'s see what happens.<br>' in msg # the html part
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage):
@@ -156,8 +157,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
data={
"url": test_url,
"notification_format": 'HTML',
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"},
'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -178,4 +178,5 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
assert '&lt;' not in msg
assert 'Content-Type: text/html' in msg
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -2,7 +2,7 @@ from .util import live_server_setup, wait_for_all_checks
from flask import url_for
import time
def test_check_access_control(app, client, live_server, measure_memory_usage):
def test_check_access_control(app, client, live_server):
# Still doesnt work, but this is closer.
# live_server_setup(live_server) # Setup on conftest per function

View File

@@ -3,7 +3,7 @@
import os.path
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
import time
def set_original(excluding=None, add_line=None):
@@ -44,8 +44,12 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
set_original()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -57,8 +61,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
data={"trigger_text": 'The golden line',
"url": test_url,
'fetch_backend': "html_requests",
'filter_text_removed': 'y',
"time_between_check_use_default": "y"},
'filter_text_removed': 'y'},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -71,7 +74,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
wait_for_all_checks(client)
time.sleep(0.5)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
# The trigger line is REMOVED, this should trigger
set_original(excluding='The golden line')
@@ -80,7 +83,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
time.sleep(1)
@@ -94,21 +97,23 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
wait_for_all_checks(client)
time.sleep(1)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
# Remove it again, and we should get a trigger
set_original(excluding='The golden line')
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage):
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
time.sleep(1)
# Give the endpoint time to spin up
@@ -131,8 +136,12 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
set_original()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -145,8 +154,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
'processor': 'text_json_diff',
'fetch_backend': "html_requests",
'filter_text_removed': '',
'filter_text_added': 'y',
"time_between_check_use_default": "y"},
'filter_text_added': 'y'},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -159,7 +167,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
# The trigger line is ADDED, this should trigger
set_original(add_line='<p>Oh yes please</p>')
@@ -167,7 +175,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
# Takes a moment for apprise to fire
wait_for_notification_endpoint_output()
@@ -177,4 +185,5 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
assert b'-Oh yes please' in response
assert '网站监测 内容更新了'.encode('utf-8') in response
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
import json
import uuid
@@ -276,7 +276,8 @@ def test_access_denied(client, live_server, measure_memory_usage):
assert res.status_code == 200
# Cleanup everything
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
res = client.post(
url_for("settings.settings_page"),
@@ -310,7 +311,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
"value": "." # contains anything
}
],
"conditions_match_logic": "ALL",
"conditions_match_logic": "ALL"
}
),
headers={'content-type': 'application/json', 'x-api-key': api_key},
@@ -327,7 +328,6 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
)
watch_uuid = list(res.json.keys())[0]
assert not res.json[watch_uuid].get('viewed'), 'A newly created watch can only be unviewed'
# Check in the edit page just to be sure
res = client.get(
@@ -341,12 +341,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"title": "new title",
'time_between_check': {'minutes': 552},
'headers': {'cookie': 'all eaten'},
'last_viewed': int(time.time())
}),
data=json.dumps({"title": "new title", 'time_between_check': {'minutes': 552}, 'headers': {'cookie': 'all eaten'}}),
)
assert res.status_code == 200, "HTTP PUT update was sent OK"
@@ -356,7 +351,6 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
headers={'x-api-key': api_key}
)
assert res.json.get('title') == 'new title'
assert res.json.get('viewed'), 'With the timestamp greater than "changed" a watch can be updated to viewed'
# Check in the edit page just to be sure
res = client.get(
@@ -384,17 +378,18 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
assert b'Additional properties are not allowed' in res.data
# Cleanup everything
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_api_import(client, live_server, measure_memory_usage):
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
res = client.post(
url_for("import") + "?tag=import-test",
data='https://website1.com\r\nhttps://website2.com',
headers={'x-api-key': api_key, 'content-type': 'text/plain'},
headers={'x-api-key': api_key},
follow_redirects=True
)

View File

@@ -4,7 +4,7 @@ from flask import url_for
from .util import live_server_setup
import json
def test_api_notifications_crud(client, live_server, measure_memory_usage):
def test_api_notifications_crud(client, live_server):
# live_server_setup(live_server) # Setup on conftest per function
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

View File

@@ -1,199 +0,0 @@
#!/usr/bin/env python3
"""
OpenAPI validation tests for ChangeDetection.io API
This test file specifically verifies that OpenAPI validation is working correctly
by testing various scenarios that should trigger validation errors.
"""
import time
import json
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage):
"""Test that creating a watch with invalid content-type triggers OpenAPI validation error."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Try to create a watch with JSON data but without proper content-type header
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "https://example.com", "title": "Test Watch"}),
headers={'x-api-key': api_key}, # Missing 'content-type': 'application/json'
follow_redirects=True
)
# Should get 400 error due to OpenAPI validation failure
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage):
"""Test that creating a watch without required URL field triggers OpenAPI validation error."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Try to create a watch without the required 'url' field
res = client.post(
url_for("createwatch"),
data=json.dumps({"title": "Test Watch Without URL"}), # Missing required 'url' field
headers={'x-api-key': api_key, 'content-type': 'application/json'},
follow_redirects=True
)
# Should get 400 error due to missing required field
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage):
"""Test that including invalid fields triggers OpenAPI validation error."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# First create a valid watch
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "https://example.com", "title": "Test Watch"}),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
follow_redirects=True
)
assert res.status_code == 201, "Watch creation should succeed"
# Get the watch list to find the UUID
res = client.get(
url_for("createwatch"),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
watch_uuid = list(res.json.keys())[0]
# Now try to update the watch with an invalid field
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"title": "Updated title",
"invalid_field_that_doesnt_exist": "this should cause validation error"
}),
)
# Should get 400 error due to invalid field (this will be caught by internal validation)
# Note: This tests the flow where OpenAPI validation passes but internal validation catches it
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties"
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage):
"""Test that import endpoint with wrong content-type triggers OpenAPI validation error."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Try to import URLs with JSON content-type instead of text/plain
res = client.post(
url_for("import") + "?tag=test-import",
data='https://website1.com\nhttps://website2.com',
headers={'x-api-key': api_key, 'content-type': 'application/json'}, # Wrong content-type
follow_redirects=True
)
# Should get 400 error due to content-type mismatch
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage):
"""Test that import endpoint with correct content-type succeeds (positive test)."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Import URLs with correct text/plain content-type
res = client.post(
url_for("import") + "?tag=test-import",
data='https://website1.com\nhttps://website2.com',
headers={'x-api-key': api_key, 'content-type': 'text/plain'}, # Correct content-type
follow_redirects=True
)
# Should succeed
assert res.status_code == 200, f"Expected 200 but got {res.status_code}"
assert len(res.json) == 2, "Should import 2 URLs"
def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage):
"""Test that GET requests bypass OpenAPI validation entirely."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Disable API token requirement first
res = client.post(
url_for("settings.settings_page"),
data={
"requests-time_between_check-minutes": 180,
"application-fetch_backend": "html_requests",
"application-api_access_token_enabled": ""
},
follow_redirects=True
)
assert b"Settings updated." in res.data
# Make GET request to list watches - should succeed even without API key or content-type
res = client.get(url_for("createwatch")) # No headers needed for GET
assert res.status_code == 200, f"GET requests should succeed without OpenAPI validation, got {res.status_code}"
# Should return JSON with watch list (empty in this case)
assert isinstance(res.json, dict), "Should return JSON dictionary for watch list"
def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage):
"""Test that creating a tag without required title triggers OpenAPI validation error."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Try to create a tag without the required 'title' field
res = client.post(
url_for("tag"),
data=json.dumps({"notification_urls": ["mailto:test@example.com"]}), # Missing required 'title' field
headers={'x-api-key': api_key, 'content-type': 'application/json'},
follow_redirects=True
)
# Should get 400 error due to missing required field
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage):
"""Test that watch updates allow partial updates without requiring all fields (positive test)."""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# First create a valid watch
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "https://example.com", "title": "Test Watch"}),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
follow_redirects=True
)
assert res.status_code == 201, "Watch creation should succeed"
# Get the watch list to find the UUID
res = client.get(
url_for("createwatch"),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
watch_uuid = list(res.json.keys())[0]
# Update only the title (partial update) - should succeed
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"title": "Updated Title Only"}), # Only updating title, not URL
)
# Should succeed because UpdateWatch schema allows partial updates
assert res.status_code == 200, f"Partial updates should succeed, got {res.status_code}"
# Verify the update worked
res = client.get(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
assert res.json.get('title') == 'Updated Title Only', "Title should be updated"
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"

View File

@@ -6,7 +6,7 @@ import time
from .util import live_server_setup, wait_for_all_checks
def test_api_search(client, live_server, measure_memory_usage):
def test_api_search(client, live_server):
# live_server_setup(live_server) # Setup on conftest per function
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')

View File

@@ -12,14 +12,18 @@ def test_basic_auth(client, live_server, measure_memory_usage):
# This page will echo back any auth info
test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
time.sleep(1)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
time.sleep(1)
# Check form validation
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data

View File

@@ -86,8 +86,12 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Should get a notice that it's available
@@ -125,8 +129,12 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'ldjson-price-track-offer' not in res.data
@@ -138,8 +146,12 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
for k,v in client.application.config.get('DATASTORE').data['watching'].items():

View File

@@ -3,7 +3,7 @@
import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
extract_UUID_from_client, delete_all_watches
extract_UUID_from_client
sleep_time_for_fetch_thread = 3
@@ -38,9 +38,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'test-endpoint' in res.data
# Default no password set, this stuff should be always available.
@@ -74,9 +74,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid))
assert b'which has this one new line' in res.data
# Now something should be ready, indicated by having a 'has-unread-changes' class
# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
# #75, and it should be in the RSS feed
rss_token = extract_rss_token_from_UI(client)
@@ -89,8 +89,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b'CDATA' in res.data
assert expected_url.encode('utf-8') in res.data
#
# Following the 'diff' link, it should no longer display as 'has-unread-changes' even after we recheck it a few times
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid))
assert b'selected=""' in res.data, "Confirm diff history page loaded"
@@ -104,34 +104,26 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
wait_for_all_checks(client)
# Do this a few times.. ensures we don't accidently set the status
# Do this a few times.. ensures we dont accidently set the status
for n in range(2):
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'class="has-unread-changes' not in res.data
assert b'head title' in res.data # Should be ON by default
assert b'unviewed' not in res.data
assert b'class="has-unviewed' not in res.data
assert b'head title' not in res.data # Should not be present because this is off by default
assert b'test-endpoint' in res.data
# Recheck it but only with a title change, content wasnt changed
set_original_response(extra_title=" and more")
set_original_response()
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'head title and more' in res.data
# disable <title> pickup
# Enable auto pickup of <title> in settings
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-use_page_title_in_list": "", "requests-time_between_check-minutes": 180,
data={"application-extract_title_as_title": "1", "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
@@ -140,19 +132,21 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'class="has-unread-changes' in res.data
assert b'head title' not in res.data # should now be off
assert b'unviewed' in res.data
assert b'class="has-unviewed' in res.data
# It should have picked up the <title>
assert b'head title' in res.data
# Be sure the last_viewed is going to be greater than the last snapshot
time.sleep(1)
# hit the mark all viewed link
res = client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
time.sleep(0.2)
assert b'class="has-unread-changes' not in res.data
assert b'has-unread-changes' not in res.data
assert b'class="has-unviewed' not in res.data
assert b'unviewed' not in res.data
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
client.get(url_for("ui.clear_watch_history", uuid=uuid))
@@ -163,219 +157,5 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
#
# Cleanup everything
delete_all_watches(client)
# Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
def test_requests_timeout(client, live_server, measure_memory_usage):
delay = 2
test_url = url_for('test_endpoint', delay=delay, _external=True)
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-use_page_title_in_list": "",
"requests-time_between_check-minutes": 180,
"requests-timeout": delay - 1,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# requests takes >2 sec but we timeout at 1 second
res = client.get(url_for("watchlist.index"))
assert b'Read timed out. (read timeout=1)' in res.data
##### Now set a longer timeout
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-use_page_title_in_list": "",
"requests-time_between_check-minutes": 180,
"requests-timeout": delay + 1, # timeout should be a second more than the reply time
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'Read timed out' not in res.data
def test_non_text_mime_or_downloads(client, live_server, measure_memory_usage):
"""
https://github.com/dgtlmoon/changedetection.io/issues/3434
I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
WHAT THIS DOES - makes the system rely on 'magic' to determine what is it
:param client:
:param live_server:
:param measure_memory_usage:
:return:
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""some random text that should be split by line
and not parsed with html_to_text
this way we know that it correctly parsed as plain text
\r\n
ok\r\n
got it\r\n
""")
test_url = url_for('test_endpoint', content_type="application/octet-stream", _external=True)
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
### check the front end
res = client.get(
url_for("ui.ui_views.preview_page", uuid="first"),
follow_redirects=True
)
assert b"some random text that should be split by line\n" in res.data
####
# Check the snapshot by API that it has linefeeds too
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
res = client.get(
url_for("watchhistory", uuid=watch_uuid),
headers={'x-api-key': api_key},
)
# Fetch a snapshot by timestamp, check the right one was found
res = client.get(
url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]),
headers={'x-api-key': api_key},
)
assert b"some random text that should be split by line\n" in res.data
delete_all_watches(client)
def test_standard_text_plain(client, live_server, measure_memory_usage):
"""
https://github.com/dgtlmoon/changedetection.io/issues/3434
I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
The real bug here can be that it will try to process plain-text as HTML, losing <etc>
:param client:
:param live_server:
:param measure_memory_usage:
:return:
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""some random text that should be split by line
and not parsed with html_to_text
<title>Even this title should stay because we are just plain text</title>
this way we know that it correctly parsed as plain text
\r\n
ok\r\n
got it\r\n
""")
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
### check the front end
res = client.get(
url_for("ui.ui_views.preview_page", uuid="first"),
follow_redirects=True
)
assert b"some random text that should be split by line\n" in res.data
####
# Check the snapshot by API that it has linefeeds too
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
res = client.get(
url_for("watchhistory", uuid=watch_uuid),
headers={'x-api-key': api_key},
)
# Fetch a snapshot by timestamp, check the right one was found
res = client.get(
url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]),
headers={'x-api-key': api_key},
)
assert b"some random text that should be split by line\n" in res.data
assert b"<title>Even this title should stay because we are just plain text</title>" in res.data
delete_all_watches(client)
# Server says its plaintext, we should always treat it as plaintext
def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<?xml version="1.0" encoding="utf-8"?>
<resources xmlns:tools="http://schemas.android.com/tools">
<!--Activity and fragment titles-->
<string name="feed_update_receiver_name">Abonnementen bijwerken</string>
</resources>
""")
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for("ui.ui_views.preview_page", uuid="first"),
follow_redirects=True
)
assert b'&lt;string name=&#34;feed_update_receiver_name&#34;' in res.data
delete_all_watches(client)
# Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server, measure_memory_usage):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<?xml version="1.0" encoding="utf-8"?>
<resources xmlns:tools="http://schemas.android.com/tools">
<!--Activity and fragment titles-->
<string name="feed_update_receiver_name">Abonnementen bijwerken</string>
<foobar>ok man</foobar>
</resources>
""")
test_url=url_for('test_endpoint', content_type="text/plain", _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": ['//string']})
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for("ui.ui_views.preview_page", uuid="first"),
follow_redirects=True
)
assert b'&lt;string name=&#34;feed_update_receiver_name&#34;' in res.data
assert b'&lt;foobar' not in res.data
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -58,7 +58,6 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
has_watch_update = False
has_unviewed_update = False
got_general_stats_update = False
for i in range(10):
# Get received events
@@ -66,11 +65,15 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
if received:
logger.info(f"Received {len(received)} events after {i+1} seconds")
# Check for watch_update events with unviewed=True
for event in received:
if event['name'] == 'watch_update':
has_watch_update = True
if event['name'] == 'general_stats_update':
got_general_stats_update = True
if event['args'][0]['watch'].get('unviewed', False):
has_unviewed_update = True
logger.info("Found unviewed update event!")
break
if has_unviewed_update:
break
@@ -89,7 +92,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
assert has_watch_update, "No watch_update events received"
# Verify we received an unviewed event
assert got_general_stats_update, "Got general stats update event"
assert has_unviewed_update, "No watch_update event with unviewed=True received"
# Alternatively, check directly if the watch in the datastore is marked as unviewed
from changedetectionio.flask_app import app

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
from changedetectionio import html_tools
def set_original_ignore_response():
@@ -70,8 +70,12 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -82,8 +86,7 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"text_should_not_be_present": ignore_text,
"url": test_url,
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
'fetch_backend': "html_requests"
},
follow_redirects=True
)
@@ -103,9 +106,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# The page changed, BUT the text is still there, just the rest of it changes, we should not see a change
@@ -116,9 +119,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# 2548
@@ -127,7 +130,7 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
# Now we set a change where the text is gone AND its different content, it should now trigger
@@ -135,9 +138,10 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -14,8 +14,12 @@ def test_clone_functionality(client, live_server, measure_memory_usage):
test_url = url_for('test_endpoint', _external=True)
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# So that we can be sure the same history doesnt carry over

View File

@@ -3,7 +3,7 @@ import json
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT
@@ -47,11 +47,11 @@ def set_number_out_of_range_response(number="150"):
f.write(test_return_data)
# def test_setup(client, live_server, measure_memory_usage):
# def test_setup(client, live_server):
"""Test that both text and number conditions work together with AND logic."""
# live_server_setup(live_server) # Setup on conftest per function
def test_conditions_with_text_and_number(client, live_server, measure_memory_usage):
def test_conditions_with_text_and_number(client, live_server):
"""Test that both text and number conditions work together with AND logic."""
set_original_response("50")
@@ -60,8 +60,12 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
test_url = url_for('test_endpoint', _external=True)
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Configure the watch with two conditions connected with AND:
@@ -101,7 +105,6 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
"conditions-5-operator": "contains_regex",
"conditions-5-field": "page_filtered_text",
"conditions-5-value": "\d",
"time_between_check_use_default": "y",
},
follow_redirects=True
)
@@ -121,7 +124,7 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
time.sleep(2)
# 75 is > 20 and < 100 and contains "5"
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
# Case 2: Change with one condition violated
@@ -137,20 +140,25 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
# Should NOT be marked as having changes since not all conditions are met
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# The 'validate' button next to each rule row
def test_condition_validate_rule_row(client, live_server, measure_memory_usage):
def test_condition_validate_rule_row(client, live_server):
set_original_response("50")
test_url = url_for('test_endpoint', _external=True)
# Add our URL to the import page
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
@@ -221,8 +229,12 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -276,8 +288,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT, # ALL = AND logic
"conditions-0-field": "levenshtein_ratio",
"conditions-0-operator": "<",
"conditions-0-value": "0.8", # needs to be more of a diff to trigger a change
"time_between_check_use_default": "y"
"conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
},
follow_redirects=True
)
@@ -286,7 +297,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
# Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
res = client.get(
@@ -313,7 +324,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data #because this will be like 0.90 not 0.8 threshold
assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold
############### Now change it a MORE THAN 50%
test_return_data = """<html>
@@ -332,7 +343,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),

View File

@@ -81,8 +81,12 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
@@ -91,7 +95,7 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
# Add our URL to the import page
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -112,10 +116,10 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should have 'has-unread-changes' still
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
# Tests the whole stack works with the CSS Filter
@@ -134,8 +138,12 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage):
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Goto the edit page, add our ignore text
@@ -146,8 +154,7 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage):
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"},
'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -185,8 +192,12 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Goto the edit page, add our ignore text
@@ -197,8 +208,7 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"},
'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data

View File

@@ -5,7 +5,7 @@ import time
from flask import url_for
from ..html_tools import *
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
@@ -171,7 +171,6 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
"tags": "",
"headers": "",
"fetch_backend": "html_requests",
"time_between_check_use_default": "y",
},
follow_redirects=True,
)
@@ -190,7 +189,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
wait_for_all_checks(client)
# so that we set the state to 'has-unread-changes' after all the edits
# so that we set the state to 'unviewed' after all the edits
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
# Make a change to header/footer/nav
@@ -209,32 +208,47 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
# Re #2752
def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage):
set_response_with_multiple_index()
subtractive_selectors_data = [
### css style ###
"""body > table > tr:nth-child(1) > th:nth-child(2)
subtractive_selectors_data = ["""
body > table > tr:nth-child(1) > th:nth-child(2)
body > table > tr:nth-child(2) > td:nth-child(2)
body > table > tr:nth-child(3) > td:nth-child(2)
body > table > tr:nth-child(1) > th:nth-child(3)
body > table > tr:nth-child(2) > td:nth-child(3)
body > table > tr:nth-child(3) > td:nth-child(3)""",
### second type, xpath ###
"""//body/table/tr[1]/th[2]
//body/table/tr[2]/td[2]
//body/table/tr[3]/td[2]
//body/table/tr[1]/th[3]
//body/table/tr[2]/td[3]
//body/table/tr[3]/td[3]"""]
test_url = url_for("test_endpoint", _external=True)
for selector_list in subtractive_selectors_data:
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"subtractive_selectors": selector_list.splitlines()})
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# Add our URL to the import page
test_url = url_for("test_endpoint", _external=True)
res = client.post(
url_for("imports.import_page"), data={"urls": test_url}, follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={
"subtractive_selectors": selector_list,
"url": test_url,
"tags": "",
"fetch_backend": "html_requests",
},
follow_redirects=True,
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(
@@ -242,7 +256,6 @@ body > table > tr:nth-child(3) > td:nth-child(3)""",
follow_redirects=True
)
# the filters above should have removed this but they never say to remove the "emil" column
assert b"Tobias" not in res.data
assert b"Linus" not in res.data
assert b"Person 2" not in res.data

View File

@@ -28,8 +28,11 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):
# Add our URL to the import page
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -56,8 +59,11 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
wait_for_all_checks(client)

View File

@@ -3,7 +3,7 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
@@ -19,15 +19,19 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
status_code=http_code,
_external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
# no change
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert bytes(expected_text.encode('utf-8')) in res.data
@@ -43,7 +47,8 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
#assert b'Error Screenshot' in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_http_error_handler(client, live_server, measure_memory_usage):
@@ -51,7 +56,8 @@ def test_http_error_handler(client, live_server, measure_memory_usage):
_runner_test_http_errors(client, live_server, 404, 'Page not found')
_runner_test_http_errors(client, live_server, 500, '(Internal server error) received')
_runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# Just to be sure error text is properly handled
def test_DNS_errors(client, live_server, measure_memory_usage):
@@ -81,7 +87,8 @@ def test_DNS_errors(client, live_server, measure_memory_usage):
assert found_name_resolution_error
# Should always record that we tried
assert bytes("just now".encode('utf-8')) in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# Re 1513
def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage):
@@ -120,8 +127,7 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us
url_for("ui.ui_edit.edit_page", uuid="first"),
data={
"url": test_url,
"fetch_backend": "html_requests",
"time_between_check_use_default": "y"},
"fetch_backend": "html_requests"},
follow_redirects=True
)
@@ -138,4 +144,5 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us
)
assert not found_name_resolution_error
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
from ..html_tools import *
@@ -76,8 +76,12 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage):
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
@@ -91,8 +95,7 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage):
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
'fetch_backend': "html_requests"
},
follow_redirects=True
)
@@ -127,8 +130,12 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -142,8 +149,7 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"
'fetch_backend': "html_requests"
},
follow_redirects=True
)
@@ -166,10 +172,10 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should have 'has-unread-changes' still
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
# Check HTML conversion detected and workd
res = client.get(
@@ -204,19 +210,23 @@ def test_regex_error_handling(client, live_server, measure_memory_usage):
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
### test regex error handling
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"extract_text": '/something bad\d{3/XYZ',
"url": test_url,
"fetch_backend": "html_requests",
"time_between_check_use_default": "y"},
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b'is not a valid regular expression.' in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -94,8 +94,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
"title": "my title",
"headers": "",
"include_filters": '.ticket-available',
"fetch_backend": "html_requests",
"time_between_check_use_default": "y"})
"fetch_backend": "html_requests"})
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),

View File

@@ -1,8 +1,10 @@
import os
import time
from loguru import logger
from flask import url_for
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output
from ..notification import valid_notification_formats
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
wait_for_notification_endpoint_output
from changedetectionio.model import App
def set_response_with_filter():
@@ -21,14 +23,13 @@ def set_response_with_filter():
f.write(test_return_data)
return None
def run_filter_test(client, live_server, content_filter, app_notification_format):
def run_filter_test(client, live_server, content_filter):
# Response WITHOUT the filter ID element
set_original_response()
live_server.app.config['DATASTORE'].data['settings']['application']['notification_format'] = app_notification_format
# Goto the edit page, add our ignore text
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'post')
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
@@ -41,8 +42,13 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
if os.path.isfile("test-datastore/notification.txt"):
os.unlink("test-datastore/notification.txt")
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
@@ -66,7 +72,6 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
"notification_format": "Text",
"fetch_backend": "html_requests",
"filter_failure_notification_send": 'y',
"time_between_check_use_default": "y",
"headers": "",
"tags": "my tag",
"title": "my title 123",
@@ -126,23 +131,8 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
with open("test-datastore/notification.txt", 'r') as f:
notification = f.read()
assert 'Your configured CSS/xPath filters' in notification
# Text (or HTML conversion) markup to make the notifications a little nicer should have worked
if app_notification_format.startswith('html'):
# apprise should have used sax-escape (&#39; instead of &quot;, " etc), lets check it worked
from apprise.conversion import convert_between
from apprise.common import NotifyFormat
escaped_filter = convert_between(NotifyFormat.TEXT, NotifyFormat.HTML, content_filter)
assert escaped_filter in notification or escaped_filter.replace('&quot;', '&#34;') in notification
assert 'a href="' in notification # Quotes should still be there so the link works
else:
assert 'a href' not in notification
assert content_filter in notification
assert 'CSS/xPath filter was not present in the page' in notification
assert content_filter.replace('"', '\\"') in notification
# Remove it and prove that it doesn't trigger when not expected
# It should register a change, but no 'filter not found'
@@ -173,20 +163,14 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
os.unlink("test-datastore/notification.txt")
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
# # live_server_setup(live_server) # Setup on conftest per function
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('HTML Color'))
# Check markup send conversion didnt affect plaintext preference
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('Text'))
# # live_server_setup(live_server) # Setup on conftest per function
run_filter_test(client, live_server,'#nope-doesnt-exist')
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
# # live_server_setup(live_server) # Setup on conftest per function
run_filter_test(client=client, live_server=live_server, content_filter='//*[@id="nope-doesnt-exist"]', app_notification_format=valid_notification_formats.get('HTML Color'))
# # live_server_setup(live_server) # Setup on conftest per function
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent
def test_basic_markup_from_text(client, live_server, measure_memory_usage):
# Test the notification error templates convert to HTML if needed (link activate)
from ..notification.handler import markup_text_links_to_html
x = markup_text_links_to_html("hello https://google.com")
assert 'a href' in x

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client
import os
@@ -127,7 +127,8 @@ def test_setup_group_tag(client, live_server, measure_memory_usage):
assert b"should-be-excluded" not in res.data
assert res.status_code == 200
assert b"first-imported=1" in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_tag_import_singular(client, live_server, measure_memory_usage):
@@ -146,7 +147,8 @@ def test_tag_import_singular(client, live_server, measure_memory_usage):
)
# Should be only 1 tag because they both had the same
assert res.data.count(b'test-tag') == 1
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_tag_add_in_ui(client, live_server, measure_memory_usage):
@@ -162,7 +164,8 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage):
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_group_tag_notification(client, live_server, measure_memory_usage):
@@ -229,7 +232,8 @@ def test_group_tag_notification(client, live_server, measure_memory_usage):
#@todo Test that multiple notifications fired
#@todo Test that each of multiple notifications with different settings
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_limit_tag_ui(client, live_server, measure_memory_usage):
@@ -260,12 +264,15 @@ def test_limit_tag_ui(client, live_server, measure_memory_usage):
client.get(url_for('ui.mark_all_viewed', tag=tag_uuid), follow_redirects=True)
wait_for_all_checks(client)
with open('/tmp/fuck.html', 'wb') as f:
f.write(res.data)
# Should be only 1 unviewed
res = client.get(url_for("watchlist.index"))
assert res.data.count(b' unviewed ') == 1
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data
@@ -292,7 +299,8 @@ def test_clone_tag_on_import(client, live_server, measure_memory_usage):
# 2 times plus the top link to tag
assert res.data.count(b'test-tag') == 3
assert res.data.count(b'another-tag') == 3
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage):
@@ -319,7 +327,8 @@ def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usa
# 2 times plus the top link to tag
assert res.data.count(b'test-tag') == 3
assert res.data.count(b'another-tag') == 3
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data
@@ -382,8 +391,12 @@ def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measu
f.write(d)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
filters = [
@@ -411,8 +424,7 @@ def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measu
"url": test_url,
"tags": "test-tag-keep-order",
"headers": "",
'fetch_backend': "html_requests",
"time_between_check_use_default": "y"},
'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -469,4 +481,5 @@ the {test} appeared before. {test in res.data[:n]=}
"""
n += t_index + len(test)
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -3,8 +3,9 @@
import time
import os
import json
import logging
from flask import url_for
from .util import wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
from urllib.parse import urlparse, parse_qs
def test_consistent_history(client, live_server, measure_memory_usage):
@@ -80,15 +81,19 @@ def test_consistent_history(client, live_server, measure_memory_usage):
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
def test_check_text_history_view(client, live_server, measure_memory_usage):
def test_check_text_history_view(client, live_server):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("<html>test-one</html>")
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -117,4 +122,5 @@ def test_check_text_history_view(client, live_server, measure_memory_usage):
assert b'test-two' in res.data
assert b'test-one' not in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -27,8 +27,12 @@ def test_ignore(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
set_original_ignore_response()
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -54,35 +58,3 @@ def test_ignore(client, live_server, measure_memory_usage):
# Should be in base.html
assert b'csrftoken' in res.data
def test_strip_ignore_lines(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
set_original_ignore_response()
# Goto the settings page, add our ignore text
res = client.post(
url_for("settings.settings_page"),
data={
"requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-strip_ignored_lines": "y",
"application-global_ignore_text": "Which is across multiple",
'application-fetch_backend': "html_requests"
},
follow_redirects=True
)
assert b"Settings updated." in res.data
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
# It should not be in the preview anymore
res = client.get(url_for("ui.ui_views.preview_page", uuid=uuid))
assert b'<div class="ignored">' not in res.data
assert b'Which is across multiple' not in res.data

View File

@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
from changedetectionio import html_tools
@@ -97,8 +97,12 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -107,7 +111,7 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
# Add our URL to the import page
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"ignore_text": ignore_text, "url": test_url, 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
data={"ignore_text": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -124,9 +128,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# Make a change
@@ -137,9 +141,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
@@ -150,7 +154,7 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
res = client.get(url_for("ui.ui_views.preview_page", uuid="first"))
@@ -159,7 +163,8 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
# it is only ignored, it is not removed (it will be highlighted too)
assert b'new ignore stuff' in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# When adding some ignore text, it should not trigger a change, even if something else on that line changes
def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
@@ -187,8 +192,12 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
# Switch to source mode so we can test that too!
test_url = "source:"+test_url
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
@@ -196,7 +205,7 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
#Adding some ignore text should not trigger a change
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -213,9 +222,9 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class), adding random ignore text should not cause a change
# It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
#####
@@ -229,10 +238,10 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# Just to be sure.. set a regular modified change that will trigger it
@@ -240,14 +249,15 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
def test_check_global_ignore_text_functionality(client, live_server):
_run_test_global_ignore(client, as_source=False)
def test_check_global_ignore_text_functionality_as_source(client, live_server, measure_memory_usage):
def test_check_global_ignore_text_functionality_as_source(client, live_server):
_run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')

View File

@@ -3,7 +3,9 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
def set_original_ignore_response():
@@ -109,11 +111,13 @@ def test_render_anchor_tag_content_true(client, live_server, measure_memory_usag
assert '(/modified_link)' in res.data.decode()
# since the link has changed, and we chose to render anchor tag content,
# we should detect a change (new 'has-unread-changes' class)
# we should detect a change (new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b"unviewed" in res.data
assert b"/test-endpoint" in res.data
# Cleanup everything
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"),
follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -60,8 +60,12 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
@@ -73,9 +77,9 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data
assert b'/test-endpoint' in res.data
@@ -90,8 +94,12 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
# Add our URL to the import page
test_url = url_for('test_endpoint', status_code=403, _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
@@ -100,7 +108,7 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
# Add our URL to the import page
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"ignore_status_codes": "y", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
data={"ignore_status_codes": "y", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -116,8 +124,8 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
# Give the thread time to pick it up
wait_for_all_checks(client)
# It should have 'has-unread-changes' still
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'unviewed' in res.data

View File

@@ -70,8 +70,12 @@ def test_check_ignore_whitespace(client, live_server, measure_memory_usage):
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
@@ -85,7 +89,7 @@ def test_check_ignore_whitespace(client, live_server, measure_memory_usage):
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'has-unread-changes' class)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data

View File

@@ -5,7 +5,7 @@ import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
from .util import live_server_setup, wait_for_all_checks
# def test_setup(client, live_server, measure_memory_usage):
@@ -28,7 +28,7 @@ https://example.com tag1, other tag"""
assert b"3 Imported" in res.data
assert b"tag1" in res.data
assert b"other tag" in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
# Clear flask alerts
res = client.get( url_for("watchlist.index"))
@@ -53,7 +53,7 @@ def xtest_import_skip_url(client, live_server, measure_memory_usage):
assert b"1 Imported" in res.data
assert b"ht000000broken" in res.data
assert b"1 Skipped" in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
# Clear flask alerts
res = client.get( url_for("watchlist.index"))
@@ -119,7 +119,7 @@ def test_import_distillio(client, live_server, measure_memory_usage):
assert b"nice stuff" in res.data
assert b"nerd-news" in res.data
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
# Clear flask alerts
res = client.get(url_for("watchlist.index"))
@@ -169,7 +169,8 @@ def test_import_custom_xlsx(client, live_server, measure_memory_usage):
assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]'
assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0}
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_import_watchete_xlsx(client, live_server, measure_memory_usage):
"""Test can upload a excel spreadsheet and the watches are created correctly"""
@@ -213,4 +214,5 @@ def test_import_watchete_xlsx(client, live_server, measure_memory_usage):
if watch.get('title') == 'system default website':
assert watch.get('fetch_backend') == 'system' # uses default if blank
delete_all_watches(client)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

Some files were not shown because too many files have changed in this diff Show More