mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-17 03:36:08 +00:00
Compare commits
3 Commits
0.53.2
...
dont-repro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
822d1782a3 | ||
|
|
0866a85934 | ||
|
|
528ef378da |
@@ -79,7 +79,7 @@ class Tag(Resource):
|
||||
'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures',
|
||||
'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error',
|
||||
'last_notification_error', 'last_viewed', 'notification_alert_count',
|
||||
'page_title', 'previous_md5', 'previous_md5_before_filters', 'remote_server_reply'
|
||||
'page_title', 'previous_md5', 'remote_server_reply'
|
||||
}
|
||||
|
||||
# Create clean tag dict without Watch-specific fields
|
||||
@@ -160,6 +160,11 @@ class Tag(Resource):
|
||||
tag.update(json_data)
|
||||
tag.commit()
|
||||
|
||||
# Clear checksums for all watches using this tag to force reprocessing
|
||||
# Tag changes affect inherited configuration
|
||||
cleared_count = self.datastore.clear_checksums_for_tag(uuid)
|
||||
logger.info(f"Tag {uuid} updated via API, cleared {cleared_count} watch checksums")
|
||||
|
||||
return "OK", 200
|
||||
|
||||
|
||||
|
||||
@@ -70,46 +70,6 @@ def _resolve_schema_properties(schema_name):
|
||||
|
||||
return properties
|
||||
|
||||
@functools.cache
|
||||
def _resolve_readonly_fields(schema_name):
|
||||
"""
|
||||
Generic helper to resolve readOnly fields, including allOf inheritance.
|
||||
|
||||
Args:
|
||||
schema_name: Name of the schema (e.g., 'Watch', 'Tag')
|
||||
|
||||
Returns:
|
||||
frozenset: All readOnly field names including inherited ones
|
||||
"""
|
||||
spec_dict = get_openapi_schema_dict()
|
||||
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
||||
|
||||
readonly_fields = set()
|
||||
|
||||
# Handle allOf (schema inheritance)
|
||||
if 'allOf' in schema:
|
||||
for item in schema['allOf']:
|
||||
# Resolve $ref to parent schema
|
||||
if '$ref' in item:
|
||||
ref_path = item['$ref'].split('/')[-1]
|
||||
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
||||
if 'properties' in ref_schema:
|
||||
for field_name, field_def in ref_schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
# Check schema-specific properties
|
||||
if 'properties' in item:
|
||||
for field_name, field_def in item['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
else:
|
||||
# Direct properties (no inheritance)
|
||||
if 'properties' in schema:
|
||||
for field_name, field_def in schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
|
||||
return frozenset(readonly_fields)
|
||||
|
||||
@functools.cache
|
||||
def get_watch_schema_properties():
|
||||
@@ -120,14 +80,8 @@ def get_watch_schema_properties():
|
||||
"""
|
||||
return _resolve_schema_properties('WatchBase')
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_watch_fields():
|
||||
"""
|
||||
Extract readOnly field names from Watch schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
|
||||
"""
|
||||
return _resolve_readonly_fields('Watch')
|
||||
# Import readonly field utilities from shared module (avoids circular dependencies with model layer)
|
||||
from changedetectionio.model.schema_utils import get_readonly_watch_fields, get_readonly_tag_fields
|
||||
|
||||
@functools.cache
|
||||
def get_tag_schema_properties():
|
||||
@@ -138,15 +92,6 @@ def get_tag_schema_properties():
|
||||
"""
|
||||
return _resolve_schema_properties('Tag')
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_tag_fields():
|
||||
"""
|
||||
Extract readOnly field names from Tag schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
|
||||
"""
|
||||
return _resolve_readonly_fields('Tag')
|
||||
|
||||
def validate_openapi_request(operation_id):
|
||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
||||
def decorator(f):
|
||||
|
||||
@@ -83,6 +83,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||
datastore.commit()
|
||||
|
||||
# Clear all checksums to force reprocessing with new settings
|
||||
# Global settings can affect watch behavior (filters, rendering, etc.)
|
||||
datastore.clear_all_last_checksums()
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
@@ -244,6 +244,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
tag.update(form.data)
|
||||
tag['processor'] = 'restock_diff'
|
||||
tag.commit()
|
||||
|
||||
# Clear checksums for all watches using this tag to force reprocessing
|
||||
# Tag changes affect inherited configuration
|
||||
cleared_count = datastore.clear_checksums_for_tag(uuid)
|
||||
logger.info(f"Tag {uuid} updated, cleared {cleared_count} watch checksums")
|
||||
|
||||
flash(gettext("Updated"))
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@@ -335,7 +335,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
'last_notification_error': False,
|
||||
'last_viewed': 0,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False,
|
||||
'remote_server_reply': None,
|
||||
'track_ldjson_price_data': None
|
||||
})
|
||||
@@ -386,10 +385,16 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
@property
|
||||
def is_pdf(self):
|
||||
# content_type field is set in the future
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/1392
|
||||
# Not sure the best logic here
|
||||
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
|
||||
url = str(self.get("url") or "").lower()
|
||||
content_type = str(self.get("content-type") or "").lower()
|
||||
|
||||
if content_type in ("none", "null", ""):
|
||||
content_type = ""
|
||||
|
||||
return (
|
||||
url.endswith(".pdf")
|
||||
or content_type.split(";")[0].strip() == "application/pdf"
|
||||
)
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
|
||||
@@ -129,7 +129,6 @@ class watch_base(dict):
|
||||
fetch_time (float): Duration of last fetch in seconds
|
||||
consecutive_filter_failures (int): Counter for consecutive filter match failures
|
||||
previous_md5 (str|bool): MD5 hash of previous content
|
||||
previous_md5_before_filters (str|bool): MD5 hash before filters applied
|
||||
history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
|
||||
|
||||
Conditions:
|
||||
@@ -166,6 +165,10 @@ class watch_base(dict):
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
# IMPORTANT: Don't initialize __watch_was_edited yet!
|
||||
# We'll initialize it AFTER the initial update() call below
|
||||
# This prevents marking the watch as edited during initialization
|
||||
|
||||
self.update({
|
||||
# Custom notification content
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
@@ -211,7 +214,6 @@ class watch_base(dict):
|
||||
'page_title': None, # <title> from the page
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||
'price_change_threshold_percent': None,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
@@ -297,9 +299,116 @@ class watch_base(dict):
|
||||
|
||||
super(watch_base, self).__init__(*arg, **kw)
|
||||
|
||||
# Check if we're being initialized from an existing watch object
|
||||
# that has was_edited=True, so we can preserve the flag
|
||||
preserve_edited_flag = False
|
||||
if self.get('default'):
|
||||
# When creating a new watch object from an existing one (e.g., changing processor),
|
||||
# preserve the was_edited flag if it was True
|
||||
default_watch = self.get('default')
|
||||
if hasattr(default_watch, 'was_edited') and default_watch.was_edited:
|
||||
preserve_edited_flag = True
|
||||
del self['default']
|
||||
|
||||
# NOW initialize the edited flag after all initial setup is complete
|
||||
# This ensures initialization doesn't trigger the edited flag
|
||||
# But preserve it if the source watch had it set to True
|
||||
self.__watch_was_edited = preserve_edited_flag
|
||||
|
||||
def _mark_field_as_edited(self, key):
|
||||
"""
|
||||
Helper to mark a field as edited if it's writable.
|
||||
|
||||
Internal method used by __setitem__, update(), pop(), etc.
|
||||
"""
|
||||
# Don't track edits during initial load or if already edited
|
||||
if not hasattr(self, '_watch_base__watch_was_edited'):
|
||||
return
|
||||
if self.__watch_was_edited:
|
||||
return # Already marked as edited
|
||||
|
||||
# Import from shared schema utilities (no circular dependency)
|
||||
from .schema_utils import get_readonly_watch_fields
|
||||
readonly_fields = get_readonly_watch_fields()
|
||||
|
||||
# Additional system-managed fields not in OpenAPI spec (yet)
|
||||
# These are set by processors/workers and should not trigger edited flag
|
||||
additional_system_fields = {
|
||||
'last_check_status', # Set by processors
|
||||
'restock', # Set by restock processor
|
||||
'last_viewed', # Set by mark_all_viewed endpoint
|
||||
}
|
||||
|
||||
# Only mark as edited if this is a user-writable field
|
||||
if key not in readonly_fields and key not in additional_system_fields:
|
||||
self.__watch_was_edited = True
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""
|
||||
Override dict.__setitem__ to track when writable watch fields are modified.
|
||||
|
||||
This enables skipping reprocessing when:
|
||||
1. HTML content is unchanged (checksumFromPreviousCheckWasTheSame)
|
||||
2. AND watch configuration was not edited
|
||||
|
||||
Only sets the edited flag when field is NOT in readonly_fields (from OpenAPI spec).
|
||||
"""
|
||||
# Set the value first (always)
|
||||
super().__setitem__(key, value)
|
||||
# Mark as edited if writable field
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Override dict.__delitem__ to track deletions of writable fields."""
|
||||
super().__delitem__(key)
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
"""Override dict.update() to track modifications to writable fields."""
|
||||
# Call parent update first
|
||||
super().update(*args, **kwargs)
|
||||
|
||||
# Mark as edited for any writable fields that were updated
|
||||
# Handle both update(dict) and update(key=value) forms
|
||||
if args:
|
||||
for key in args[0].keys():
|
||||
self._mark_field_as_edited(key)
|
||||
for key in kwargs.keys():
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
def pop(self, key, *args):
|
||||
"""Override dict.pop() to track removal of writable fields."""
|
||||
result = super().pop(key, *args)
|
||||
self._mark_field_as_edited(key)
|
||||
return result
|
||||
|
||||
def setdefault(self, key, default=None):
|
||||
"""Override dict.setdefault() to track modifications to writable fields."""
|
||||
# Only marks as edited if key didn't exist (i.e., a new value was set)
|
||||
existed = key in self
|
||||
result = super().setdefault(key, default)
|
||||
if not existed:
|
||||
self._mark_field_as_edited(key)
|
||||
return result
|
||||
|
||||
@property
|
||||
def was_edited(self):
|
||||
"""
|
||||
Check if watch configuration was edited since last processing.
|
||||
|
||||
Returns:
|
||||
bool: True if writable fields were modified, False otherwise
|
||||
"""
|
||||
return getattr(self, '_watch_base__watch_was_edited', False)
|
||||
|
||||
def reset_watch_edited_flag(self):
|
||||
"""
|
||||
Reset the watch edited flag after successful processing.
|
||||
|
||||
Call this after processing completes to allow future content-only change detection.
|
||||
"""
|
||||
self.__watch_was_edited = False
|
||||
|
||||
@classmethod
|
||||
def get_property_names(cls):
|
||||
"""
|
||||
|
||||
92
changedetectionio/model/schema_utils.py
Normal file
92
changedetectionio/model/schema_utils.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
Schema utilities for Watch and Tag models.
|
||||
|
||||
Provides functions to extract readonly fields and properties from OpenAPI spec.
|
||||
Shared by both the model layer and API layer to avoid circular dependencies.
|
||||
"""
|
||||
|
||||
import functools
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Returns the YAML dict directly (not the OpenAPI object).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _resolve_readonly_fields(schema_name):
|
||||
"""
|
||||
Generic helper to resolve readOnly fields, including allOf inheritance.
|
||||
|
||||
Args:
|
||||
schema_name: Name of the schema (e.g., 'Watch', 'Tag')
|
||||
|
||||
Returns:
|
||||
frozenset: All readOnly field names including inherited ones
|
||||
"""
|
||||
spec_dict = get_openapi_schema_dict()
|
||||
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
||||
|
||||
readonly_fields = set()
|
||||
|
||||
# Handle allOf (schema inheritance)
|
||||
if 'allOf' in schema:
|
||||
for item in schema['allOf']:
|
||||
# Resolve $ref to parent schema
|
||||
if '$ref' in item:
|
||||
ref_path = item['$ref'].split('/')[-1]
|
||||
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
||||
if 'properties' in ref_schema:
|
||||
for field_name, field_def in ref_schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
# Check schema-specific properties
|
||||
if 'properties' in item:
|
||||
for field_name, field_def in item['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
else:
|
||||
# Direct properties (no inheritance)
|
||||
if 'properties' in schema:
|
||||
for field_name, field_def in schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
|
||||
return frozenset(readonly_fields)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_watch_fields():
|
||||
"""
|
||||
Extract readOnly field names from Watch schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
|
||||
|
||||
Used by:
|
||||
- model/watch_base.py: Track when writable fields are edited
|
||||
- api/Watch.py: Filter readonly fields from PUT requests
|
||||
"""
|
||||
return _resolve_readonly_fields('Watch')
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_tag_fields():
|
||||
"""
|
||||
Extract readOnly field names from Tag schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
|
||||
"""
|
||||
return _resolve_readonly_fields('Tag')
|
||||
@@ -1,6 +1,6 @@
|
||||
from functools import lru_cache
|
||||
from loguru import logger
|
||||
from flask_babel import gettext
|
||||
from flask_babel import gettext, get_locale
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
@@ -190,14 +190,15 @@ def get_plugin_processor_metadata():
|
||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||
return metadata
|
||||
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
@lru_cache(maxsize=32)
|
||||
def _available_processors_cached(locale_str):
|
||||
"""
|
||||
Internal cached function that includes locale in cache key.
|
||||
This ensures translations are cached per-language instead of globally.
|
||||
|
||||
:param locale_str: The locale string (e.g., 'en', 'it', 'zh')
|
||||
:return: A list of tuples (processor_name, translated_description, weight)
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if DISABLED_PROCESSORS env var is set
|
||||
@@ -256,6 +257,22 @@ def available_processors():
|
||||
# Return as tuples without weight (for backwards compatibility)
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
|
||||
This function delegates to a locale-aware cached version to ensure translations
|
||||
are cached per-language instead of globally.
|
||||
|
||||
:return: A list of tuples (processor_name, translated_description)
|
||||
"""
|
||||
# Get current locale and use it as cache key
|
||||
# Convert Babel Locale object to string for use as cache key
|
||||
locale = get_locale()
|
||||
locale_str = str(locale) if locale else 'en'
|
||||
return _available_processors_cached(locale_str)
|
||||
|
||||
|
||||
def get_default_processor():
|
||||
"""
|
||||
|
||||
@@ -19,6 +19,7 @@ class difference_detection_processor():
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||
last_raw_content_checksum = None
|
||||
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
@@ -34,6 +35,64 @@ class difference_detection_processor():
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
# Load the last raw content checksum from file
|
||||
self.read_last_raw_content_checksum()
|
||||
|
||||
def update_last_raw_content_checksum(self, checksum):
|
||||
"""
|
||||
Save the raw content MD5 checksum to file.
|
||||
This is used for skip logic - avoid reprocessing if raw HTML unchanged.
|
||||
"""
|
||||
if not checksum:
|
||||
return
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if not data_dir:
|
||||
return
|
||||
|
||||
watch.ensure_data_dir_exists()
|
||||
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||
|
||||
try:
|
||||
with open(checksum_file, 'w', encoding='utf-8') as f:
|
||||
f.write(checksum)
|
||||
self.last_raw_content_checksum = checksum
|
||||
except IOError as e:
|
||||
logger.warning(f"Failed to write checksum file for {self.watch_uuid}: {e}")
|
||||
|
||||
def read_last_raw_content_checksum(self):
|
||||
"""
|
||||
Read the last raw content MD5 checksum from file.
|
||||
Returns None if file doesn't exist (first run) or can't be read.
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
if not watch:
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if not data_dir:
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||
|
||||
if not os.path.isfile(checksum_file):
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
try:
|
||||
with open(checksum_file, 'r', encoding='utf-8') as f:
|
||||
self.last_raw_content_checksum = f.read().strip()
|
||||
except IOError as e:
|
||||
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
|
||||
self.last_raw_content_checksum = None
|
||||
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
@@ -257,8 +316,16 @@ class difference_detection_processor():
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write extra watch config {filename}: {e}")
|
||||
|
||||
def get_raw_document_checksum(self):
|
||||
checksum = None
|
||||
|
||||
if self.fetcher.content:
|
||||
checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
|
||||
return checksum
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch):
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
|
||||
@@ -30,7 +30,7 @@ class perform_site_check(difference_detection_processor):
|
||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
"""
|
||||
Perform screenshot comparison using OpenCV subprocess handler.
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from ..base import difference_detection_processor
|
||||
from ..exceptions import ProcessorException
|
||||
from . import Restock
|
||||
from loguru import logger
|
||||
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||
|
||||
import urllib3
|
||||
import time
|
||||
@@ -403,22 +404,37 @@ class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
import hashlib
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||
# Skip processing only if BOTH conditions are true:
|
||||
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||
# separate checks for trigger_text or other processing rules.
|
||||
if (not force_reprocess and
|
||||
not watch.was_edited and
|
||||
self.last_raw_content_checksum and
|
||||
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
|
||||
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||
# Track the content type (readonly field, doesn't trigger was_edited)
|
||||
update_obj['content-type'] = self.fetcher.headers.get('Content-Type', '') # Use hyphen (matches OpenAPI spec)
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
from ...html_tools import html_to_text
|
||||
|
||||
@@ -17,7 +17,8 @@ def _task(watch, update_handler):
|
||||
|
||||
try:
|
||||
# The slow process (we run 2 of these in parallel)
|
||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
|
||||
# Always force reprocess for preview - we want to show the filtered content regardless of checksums
|
||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch, force_reprocess=True)
|
||||
except FilterNotFoundInResponse as e:
|
||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||
except ReplyWithContentButNoText as e:
|
||||
|
||||
@@ -7,6 +7,7 @@ import re
|
||||
import urllib3
|
||||
|
||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||
from ..base import difference_detection_processor
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
@@ -368,12 +369,24 @@ class ChecksumCalculator:
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
changed_detected = False
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||
# Skip processing only if BOTH conditions are true:
|
||||
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||
# separate checks for trigger_text or other processing rules.
|
||||
if (not force_reprocess and
|
||||
not watch.was_edited and
|
||||
self.last_raw_content_checksum and
|
||||
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
# Initialize components
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
||||
@@ -391,9 +404,11 @@ class perform_site_check(difference_detection_processor):
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type and checksum before filters
|
||||
update_obj['content_type'] = ctype_header
|
||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
# Track the content type (readonly field, doesn't trigger was_edited)
|
||||
update_obj['content-type'] = ctype_header # Use hyphen (matches OpenAPI spec and watch_base default)
|
||||
|
||||
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||
|
||||
# === CONTENT PREPROCESSING ===
|
||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
||||
|
||||
@@ -456,6 +456,63 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
self.__data['settings']['application']['password'] = False
|
||||
self.commit()
|
||||
|
||||
def clear_all_last_checksums(self):
|
||||
"""
|
||||
Delete all last-checksum.txt files to force reprocessing of all watches.
|
||||
|
||||
This should be called when global settings change, since watches inherit
|
||||
configuration and need to reprocess even if their individual watch dict
|
||||
hasn't been modified.
|
||||
|
||||
Note: We delete the checksum file rather than setting was_edited=True because:
|
||||
- was_edited is not persisted across restarts
|
||||
- File deletion ensures reprocessing works across app restarts
|
||||
"""
|
||||
deleted_count = 0
|
||||
for uuid in self.__data['watching'].keys():
|
||||
watch = self.__data['watching'][uuid]
|
||||
if watch.data_dir:
|
||||
checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
|
||||
if os.path.isfile(checksum_file):
|
||||
try:
|
||||
os.remove(checksum_file)
|
||||
deleted_count += 1
|
||||
logger.debug(f"Cleared checksum for watch {uuid}")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete checksum file for {uuid}: {e}")
|
||||
|
||||
logger.info(f"Cleared {deleted_count} checksum files to force reprocessing")
|
||||
return deleted_count
|
||||
|
||||
def clear_checksums_for_tag(self, tag_uuid):
|
||||
"""
|
||||
Delete last-checksum.txt files for all watches using a specific tag.
|
||||
|
||||
This should be called when a tag configuration is edited, since watches
|
||||
inherit tag settings and need to reprocess.
|
||||
|
||||
Args:
|
||||
tag_uuid: UUID of the tag that was modified
|
||||
|
||||
Returns:
|
||||
int: Number of checksum files deleted
|
||||
"""
|
||||
deleted_count = 0
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
if watch.data_dir:
|
||||
checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
|
||||
if os.path.isfile(checksum_file):
|
||||
try:
|
||||
os.remove(checksum_file)
|
||||
deleted_count += 1
|
||||
logger.debug(f"Cleared checksum for watch {uuid} (tag {tag_uuid})")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete checksum file for {uuid}: {e}")
|
||||
|
||||
logger.info(f"Cleared {deleted_count} checksum files for tag {tag_uuid}")
|
||||
return deleted_count
|
||||
|
||||
def commit(self):
|
||||
"""
|
||||
Save settings immediately to disk using atomic write.
|
||||
|
||||
@@ -54,11 +54,11 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
backup = ZipFile(io.BytesIO(res.data))
|
||||
l = backup.namelist()
|
||||
|
||||
# Check for UUID-based txt files (history and snapshot)
|
||||
# Check for UUID-based txt files (history, snapshot, and last-checksum)
|
||||
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
txt_files = list(filter(uuid4hex_txt.match, l))
|
||||
# Should be two txt files in the archive (history and the snapshot)
|
||||
assert len(txt_files) == 2
|
||||
# Should be three txt files in the archive (history, snapshot, and last-checksum)
|
||||
assert len(txt_files) == 3
|
||||
|
||||
# Check for watch.json files (new format)
|
||||
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
|
||||
|
||||
@@ -71,22 +71,19 @@ def test_include_filters_output():
|
||||
|
||||
# Tests the whole stack works with the CSS Filter
|
||||
def test_check_markup_include_filters_restriction(client, live_server, measure_memory_usage, datastore_path):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
include_filters = "#sametext"
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
@@ -103,15 +100,15 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
|
||||
)
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Make a change
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# It should have 'has-unread-changes' still
|
||||
# Because it should be looking at only that 'sametext' id
|
||||
|
||||
@@ -106,7 +106,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
|
||||
# Find the snapshot one
|
||||
for fname in files_in_watch_dir:
|
||||
if fname != 'history.txt' and fname != 'watch.json' and 'html' not in fname:
|
||||
if fname != 'history.txt' and fname != 'watch.json' and fname != 'last-checksum.txt' and 'html' not in fname:
|
||||
if strtobool(os.getenv("TEST_WITH_BROTLI")):
|
||||
assert fname.endswith('.br'), "Forced TEST_WITH_BROTLI then it should be a .br filename"
|
||||
|
||||
@@ -123,11 +123,18 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
assert json_obj['watching'][w]['title'], "Watch should have a title set"
|
||||
assert contents.startswith(watch_title + "x"), f"Snapshot contents in file {fname} should start with '{watch_title}x', got '{contents}'"
|
||||
|
||||
# With new format, we also have watch.json, so 4 files total
|
||||
# With new format, we have watch.json, so 4 files minimum
|
||||
# Note: last-checksum.txt may or may not exist - it gets cleared by settings changes,
|
||||
# and this test changes settings before checking files
|
||||
# This assertion should be AFTER the loop, not inside it
|
||||
if os.path.exists(changedetection_json):
|
||||
assert len(files_in_watch_dir) == 4, "Should be four files in the dir with new format: watch.json, html.br snapshot, history.txt and the extracted text snapshot"
|
||||
# 4 required files: watch.json, html.br, history.txt, extracted text snapshot
|
||||
# last-checksum.txt is optional (cleared by settings changes in this test)
|
||||
assert len(files_in_watch_dir) >= 4 and len(files_in_watch_dir) <= 5, f"Should be 4-5 files in the dir with new format (last-checksum.txt is optional). Found {len(files_in_watch_dir)}: {files_in_watch_dir}"
|
||||
else:
|
||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir with legacy format: html.br snapshot, history.txt and the extracted text snapshot"
|
||||
# 3 required files: html.br, history.txt, extracted text snapshot
|
||||
# last-checksum.txt is optional
|
||||
assert len(files_in_watch_dir) >= 3 and len(files_in_watch_dir) <= 4, f"Should be 3-4 files in the dir with legacy format (last-checksum.txt is optional). Found {len(files_in_watch_dir)}: {files_in_watch_dir}"
|
||||
|
||||
# Check that 'default' Watch vars aren't accidentally being saved
|
||||
if os.path.exists(changedetection_json):
|
||||
|
||||
208
changedetectionio/tests/test_settings_tag_force_reprocess.py
Normal file
208
changedetectionio/tests/test_settings_tag_force_reprocess.py
Normal file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test that changing global settings or tag configurations forces reprocessing.
|
||||
|
||||
When settings or tag configurations change, all affected watches need to
|
||||
reprocess even if their content hasn't changed, because configuration affects
|
||||
the processing result.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import wait_for_all_checks
|
||||
|
||||
|
||||
def test_settings_change_forces_reprocess(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that changing global settings clears all checksums to force reprocessing.
|
||||
"""
|
||||
|
||||
# Setup test content
|
||||
test_html = """<html>
|
||||
<body>
|
||||
<p>Test content that stays the same</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_html)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Add two watches
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
uuid1 = datastore.add_watch(url=test_url, extras={'title': 'Watch 1'})
|
||||
uuid2 = datastore.add_watch(url=test_url, extras={'title': 'Watch 2'})
|
||||
|
||||
# Unpause watches
|
||||
datastore.data['watching'][uuid1]['paused'] = False
|
||||
datastore.data['watching'][uuid2]['paused'] = False
|
||||
|
||||
# First check - establishes baseline
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Verify checksum files were created
|
||||
checksum1 = os.path.join(datastore_path, uuid1, 'last-checksum.txt')
|
||||
checksum2 = os.path.join(datastore_path, uuid2, 'last-checksum.txt')
|
||||
assert os.path.isfile(checksum1), "First check should create checksum file for watch 1"
|
||||
assert os.path.isfile(checksum2), "First check should create checksum file for watch 2"
|
||||
|
||||
# Change global settings (any setting will do)
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={
|
||||
"application-empty_pages_are_a_change": "",
|
||||
"requests-time_between_check-minutes": 180,
|
||||
'application-fetch_backend': "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
# Give it a moment to process
|
||||
time.sleep(0.5)
|
||||
|
||||
# Verify ALL checksum files were deleted
|
||||
assert not os.path.isfile(checksum1), "Settings change should delete checksum for watch 1"
|
||||
assert not os.path.isfile(checksum2), "Settings change should delete checksum for watch 2"
|
||||
|
||||
# Next check should reprocess (not skip) and recreate checksums
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Verify checksum files were recreated
|
||||
assert os.path.isfile(checksum1), "Reprocessing should recreate checksum file for watch 1"
|
||||
assert os.path.isfile(checksum2), "Reprocessing should recreate checksum file for watch 2"
|
||||
|
||||
print("✓ Settings change forces reprocessing of all watches")
|
||||
|
||||
|
||||
def test_tag_change_forces_reprocess(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that changing a tag configuration clears checksums only for watches with that tag.
|
||||
"""
|
||||
|
||||
# Setup test content
|
||||
test_html = """<html>
|
||||
<body>
|
||||
<p>Test content that stays the same</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_html)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create a tag
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
tag_uuid = datastore.add_tag('Test Tag')
|
||||
|
||||
# Add watches - one with tag, one without
|
||||
uuid_with_tag = datastore.add_watch(url=test_url, extras={'title': 'Watch With Tag', 'tags': [tag_uuid]})
|
||||
uuid_without_tag = datastore.add_watch(url=test_url, extras={'title': 'Watch Without Tag'})
|
||||
|
||||
# Unpause watches
|
||||
datastore.data['watching'][uuid_with_tag]['paused'] = False
|
||||
datastore.data['watching'][uuid_without_tag]['paused'] = False
|
||||
|
||||
# First check - establishes baseline
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Verify checksum files were created
|
||||
checksum_with = os.path.join(datastore_path, uuid_with_tag, 'last-checksum.txt')
|
||||
checksum_without = os.path.join(datastore_path, uuid_without_tag, 'last-checksum.txt')
|
||||
assert os.path.isfile(checksum_with), "First check should create checksum for tagged watch"
|
||||
assert os.path.isfile(checksum_without), "First check should create checksum for untagged watch"
|
||||
|
||||
# Edit the tag (change notification_muted as an example)
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid=tag_uuid),
|
||||
data={
|
||||
'title': 'Test Tag',
|
||||
'notification_muted': 'y',
|
||||
'overrides_watch': 'n'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated" in res.data
|
||||
|
||||
# Give it a moment to process
|
||||
time.sleep(0.5)
|
||||
|
||||
# Verify ONLY the tagged watch's checksum was deleted
|
||||
assert not os.path.isfile(checksum_with), "Tag change should delete checksum for watch WITH tag"
|
||||
assert os.path.isfile(checksum_without), "Tag change should NOT delete checksum for watch WITHOUT tag"
|
||||
|
||||
# Next check should reprocess tagged watch and recreate its checksum
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Verify tagged watch's checksum was recreated
|
||||
assert os.path.isfile(checksum_with), "Reprocessing should recreate checksum for tagged watch"
|
||||
assert os.path.isfile(checksum_without), "Untagged watch should still have its checksum"
|
||||
|
||||
print("✓ Tag change forces reprocessing only for watches with that tag")
|
||||
|
||||
|
||||
def test_tag_change_via_api_forces_reprocess(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that updating a tag via API also clears checksums for affected watches.
|
||||
"""
|
||||
|
||||
# Setup test content
|
||||
test_html = """<html>
|
||||
<body>
|
||||
<p>Test content</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_html)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create a tag
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
tag_uuid = datastore.add_tag('API Test Tag')
|
||||
|
||||
# Add watch with tag
|
||||
uuid_with_tag = datastore.add_watch(url=test_url, extras={'title': 'API Watch'})
|
||||
datastore.data['watching'][uuid_with_tag]['paused'] = False
|
||||
datastore.data['watching'][uuid_with_tag]['tags'] = [tag_uuid]
|
||||
datastore.data['watching'][uuid_with_tag].commit()
|
||||
|
||||
# First check
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Verify checksum exists
|
||||
checksum_file = os.path.join(datastore_path, uuid_with_tag, 'last-checksum.txt')
|
||||
assert os.path.isfile(checksum_file), "First check should create checksum file"
|
||||
|
||||
# Update tag via API
|
||||
res = client.put(
|
||||
f'/api/v1/tag/{tag_uuid}',
|
||||
json={'notification_muted': True},
|
||||
headers={'x-api-key': datastore.data['settings']['application']['api_access_token']}
|
||||
)
|
||||
assert res.status_code == 200, f"API call failed with status {res.status_code}: {res.data}"
|
||||
|
||||
# Give it more time for async operations
|
||||
time.sleep(1.0)
|
||||
|
||||
# Debug: Check if checksum still exists
|
||||
if os.path.isfile(checksum_file):
|
||||
# Read checksum to see if it changed
|
||||
with open(checksum_file, 'r') as f:
|
||||
checksum_content = f.read()
|
||||
print(f"Checksum still exists: {checksum_content}")
|
||||
|
||||
# Verify checksum was deleted
|
||||
assert not os.path.isfile(checksum_file), "API tag update should delete checksum"
|
||||
|
||||
print("✓ Tag update via API forces reprocessing")
|
||||
246
changedetectionio/tests/test_watch_edited_flag.py
Normal file
246
changedetectionio/tests/test_watch_edited_flag.py
Normal file
@@ -0,0 +1,246 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test the watch edited flag functionality.
|
||||
|
||||
This tests the private __watch_was_edited flag that tracks when writable
|
||||
watch fields are modified, which prevents skipping reprocessing when the
|
||||
watch configuration has changed.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_test_content(datastore_path):
|
||||
"""Write test HTML content to endpoint-content.txt for test server."""
|
||||
test_html = """<html>
|
||||
<body>
|
||||
<p>Test content for watch edited flag tests</p>
|
||||
<p>This content stays the same across checks</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_html)
|
||||
|
||||
|
||||
def test_watch_edited_flag_lifecycle(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test the full lifecycle of the was_edited flag:
|
||||
1. Flag starts False when watch is created
|
||||
2. Flag becomes True when writable fields are modified
|
||||
3. Flag is reset False after worker processing
|
||||
4. Flag stays False when readonly fields are modified
|
||||
"""
|
||||
|
||||
# Setup - Add a watch
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data or b"Updated watch" in res.data
|
||||
|
||||
# Get the watch UUID
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
uuid = list(datastore.data['watching'].keys())[0]
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Reset flag after initial form submission (form sets fields which trigger the flag)
|
||||
watch.reset_watch_edited_flag()
|
||||
|
||||
# Test 1: Flag should be False after reset
|
||||
assert not watch.was_edited, "Flag should be False after reset"
|
||||
|
||||
# Test 2: Modify a writable field (title) - flag should become True
|
||||
watch['title'] = 'New Title'
|
||||
assert watch.was_edited, "Flag should be True after modifying writable field 'title'"
|
||||
|
||||
# Test 3: Reset flag manually (simulating what worker does)
|
||||
watch.reset_watch_edited_flag()
|
||||
assert not watch.was_edited, "Flag should be False after reset"
|
||||
|
||||
# Test 4: Modify another writable field (url) - flag should become True again
|
||||
watch['url'] = 'https://example.com'
|
||||
assert watch.was_edited, "Flag should be True after modifying writable field 'url'"
|
||||
|
||||
# Test 5: Reset and modify a readonly field - flag should stay False
|
||||
watch.reset_watch_edited_flag()
|
||||
assert not watch.was_edited, "Flag should be False after reset"
|
||||
|
||||
# Modify readonly field (uuid) - should not set flag
|
||||
old_uuid = watch['uuid']
|
||||
watch['uuid'] = 'readonly-test-uuid'
|
||||
assert not watch.was_edited, "Flag should stay False when modifying readonly field 'uuid'"
|
||||
watch['uuid'] = old_uuid # Restore original
|
||||
|
||||
# Note: Worker reset behavior is tested in test_check_removed_line_contains_trigger
|
||||
# and test_watch_edited_flag_prevents_skip
|
||||
|
||||
print("✓ All watch edited flag lifecycle tests passed")
|
||||
|
||||
|
||||
def test_watch_edited_flag_dict_methods(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that the flag is set correctly by various dict methods:
|
||||
- __setitem__ (watch['key'] = value)
|
||||
- update() (watch.update({'key': value}))
|
||||
- setdefault() (watch.setdefault('key', default))
|
||||
- pop() (watch.pop('key'))
|
||||
- __delitem__ (del watch['key'])
|
||||
"""
|
||||
|
||||
# Setup - Add a watch
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
uuid = list(datastore.data['watching'].keys())[0]
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Test __setitem__
|
||||
watch.reset_watch_edited_flag()
|
||||
watch['title'] = 'Test via setitem'
|
||||
assert watch.was_edited, "Flag should be True after __setitem__ on writable field"
|
||||
|
||||
# Test update() with dict
|
||||
watch.reset_watch_edited_flag()
|
||||
watch.update({'title': 'Test via update dict'})
|
||||
assert watch.was_edited, "Flag should be True after update() with writable field"
|
||||
|
||||
# Test update() with kwargs
|
||||
watch.reset_watch_edited_flag()
|
||||
watch.update(title='Test via update kwargs')
|
||||
assert watch.was_edited, "Flag should be True after update() kwargs with writable field"
|
||||
|
||||
# Test setdefault() on new key
|
||||
watch.reset_watch_edited_flag()
|
||||
watch.setdefault('title', 'Should not be set') # Key exists, no change
|
||||
assert not watch.was_edited, "Flag should stay False when setdefault() doesn't change existing key"
|
||||
|
||||
watch.setdefault('custom_field', 'New value') # New key
|
||||
assert watch.was_edited, "Flag should be True after setdefault() creates new writable field"
|
||||
|
||||
# Test pop() on writable field
|
||||
watch.reset_watch_edited_flag()
|
||||
watch.pop('custom_field', None)
|
||||
assert watch.was_edited, "Flag should be True after pop() on writable field"
|
||||
|
||||
# Test __delitem__ on writable field
|
||||
watch.reset_watch_edited_flag()
|
||||
watch['temp_field'] = 'temp'
|
||||
watch.reset_watch_edited_flag() # Reset after adding
|
||||
del watch['temp_field']
|
||||
assert watch.was_edited, "Flag should be True after __delitem__ on writable field"
|
||||
|
||||
print("✓ All dict methods correctly set the flag")
|
||||
|
||||
|
||||
def test_watch_edited_flag_prevents_skip(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that the was_edited flag prevents skipping reprocessing.
|
||||
When watch configuration is edited, it should reprocess even if content unchanged.
|
||||
After worker processing, flag should be reset and subsequent checks can skip.
|
||||
"""
|
||||
|
||||
# Setup test content
|
||||
set_test_content(datastore_path)
|
||||
|
||||
# Setup - Add a watch
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data or b"Updated watch" in res.data
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
uuid = list(datastore.data['watching'].keys())[0]
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Unpause the watch (watches are paused by default in tests)
|
||||
watch['paused'] = False
|
||||
|
||||
# Run first check to establish baseline
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Verify first check completed successfully - checksum file should exist
|
||||
checksum_file = os.path.join(datastore_path, uuid, 'last-checksum.txt')
|
||||
assert os.path.isfile(checksum_file), "First check should create last-checksum.txt file"
|
||||
|
||||
# Reset the was_edited flag (simulating clean state after processing)
|
||||
watch.reset_watch_edited_flag()
|
||||
assert not watch.was_edited, "Flag should be False after reset"
|
||||
|
||||
# Run second check without any changes - should skip via checksumFromPreviousCheckWasTheSame
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Verify it was skipped (last_check_status should indicate skip)
|
||||
# Note: The actual skip is tested in test_check_removed_line_contains_trigger
|
||||
# Here we're focused on the was_edited flag interaction
|
||||
|
||||
# Now modify the watch - flag should become True
|
||||
watch['title'] = 'Modified Title'
|
||||
assert watch.was_edited, "Flag should be True after modifying watch"
|
||||
|
||||
# Run third check - should NOT skip because was_edited=True even though content unchanged
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# After worker processing, the flag should be reset by the worker
|
||||
# This reset happens in the processor's run() method after processing completes
|
||||
assert not watch.was_edited, "Flag should be False after worker processing"
|
||||
|
||||
print("✓ was_edited flag correctly prevents skip and is reset by worker")
|
||||
|
||||
|
||||
def test_watch_edited_flag_system_fields(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that system fields (readonly + additional system fields) don't trigger the flag.
|
||||
"""
|
||||
|
||||
# Setup - Add a watch
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
uuid = list(datastore.data['watching'].keys())[0]
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Test readonly fields from OpenAPI spec
|
||||
readonly_fields = ['uuid', 'date_created', 'last_viewed']
|
||||
for field in readonly_fields:
|
||||
watch.reset_watch_edited_flag()
|
||||
if field in watch:
|
||||
old_value = watch[field]
|
||||
watch[field] = 'modified-readonly-value'
|
||||
assert not watch.was_edited, f"Flag should stay False when modifying readonly field '{field}'"
|
||||
watch[field] = old_value # Restore
|
||||
|
||||
# Test additional system fields not in OpenAPI spec yet
|
||||
system_fields = ['last_check_status']
|
||||
for field in system_fields:
|
||||
watch.reset_watch_edited_flag()
|
||||
watch[field] = 'system-value'
|
||||
assert not watch.was_edited, f"Flag should stay False when modifying system field '{field}'"
|
||||
|
||||
# Test that content-type (readonly per OpenAPI) doesn't trigger flag
|
||||
watch.reset_watch_edited_flag()
|
||||
watch['content-type'] = 'text/html'
|
||||
assert not watch.was_edited, "Flag should stay False when modifying 'content-type' (readonly)"
|
||||
|
||||
print("✓ System fields correctly don't trigger the flag")
|
||||
@@ -276,6 +276,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Yes fine, so nothing todo, don't continue to process.
|
||||
process_changedetection_results = False
|
||||
changed_detected = False
|
||||
logger.debug(f'[{uuid}] - checksumFromPreviousCheckWasTheSame - Checksum from previous check was the same, nothing todo here.')
|
||||
# Reset the edited flag since we successfully completed the check
|
||||
watch.reset_watch_edited_flag()
|
||||
|
||||
except content_fetchers_exceptions.BrowserConnectError as e:
|
||||
datastore.update_watch(uuid=uuid,
|
||||
@@ -378,7 +381,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
update_obj['content-type'] = str(update_handler.fetcher.get_all_headers().get('content-type', '') or "").lower()
|
||||
|
||||
if not watch.get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
@@ -392,6 +395,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler and update_handler.xpath_data else 'empty.'}")
|
||||
if update_handler and process_changedetection_results:
|
||||
try:
|
||||
# Reset the edited flag BEFORE update_watch (which calls watch.update() and would set it again)
|
||||
watch.reset_watch_edited_flag()
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
if changed_detected or not watch.history_n:
|
||||
@@ -439,8 +444,22 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
|
||||
# Always record attempt count
|
||||
count = watch.get('check_count', 0) + 1
|
||||
|
||||
final_updates = {'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count,
|
||||
}
|
||||
# Record server header
|
||||
try:
|
||||
server_header = str(update_handler.fetcher.get_all_headers().get('server', '') or "").strip().lower()[:255]
|
||||
if server_header:
|
||||
final_updates['remote_server_reply'] = server_header
|
||||
except Exception as e:
|
||||
server_header = None
|
||||
pass
|
||||
|
||||
if update_handler: # Could be none or empty if the processor was not found
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
|
||||
@@ -449,17 +468,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if page_title:
|
||||
page_title = page_title.strip()[:2000]
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
final_updates['page_title'] = page_title
|
||||
except Exception as e:
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
|
||||
# Record server header
|
||||
try:
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
# Store favicon if necessary
|
||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
||||
@@ -467,14 +481,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
||||
)
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count})
|
||||
datastore.update_watch(uuid=uuid, update_obj=final_updates)
|
||||
|
||||
# NOW clear fetcher content - after all processing is complete
|
||||
# This is the last point where we need the fetcher data
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
# Explicitly delete update_handler to free all references
|
||||
if update_handler:
|
||||
|
||||
Reference in New Issue
Block a user