Compare commits

..

1 Commits

Author SHA1 Message Date
dgtlmoon
9356f9467e Watch history - Don't rescan whole history.txt when looking up a timestamp <->filepath 2025-11-05 18:40:35 +01:00
9 changed files with 134 additions and 129 deletions

View File

@@ -36,39 +36,6 @@ jobs:
with: with:
python-version: '3.11' python-version: '3.11'
test-application-3-11-windows:
# Always run - Windows native tests (no Docker)
needs: lint-code
runs-on: windows-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v5
- name: Set up Python 3.11
uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Cache pip packages
uses: actions/cache@v4
with:
path: ~\AppData\Local\pip\Cache
key: ${{ runner.os }}-pip-py3.11-${{ hashFiles('requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-py3.11-
${{ runner.os }}-pip-
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest pytest-xdist pytest-flask pytest-mock
- name: Run basic tests on Windows
shell: bash
run: |
cd changedetectionio
./run_basic_tests.sh
test-application-3-12: test-application-3-12:
# Only run on push to master (including PR merges) # Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master' if: github.event_name == 'push' && github.ref == 'refs/heads/master'

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki # Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.50.40' __version__ = '0.50.39'
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError

View File

@@ -353,15 +353,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
count = watch.get('check_count', 0) + 1 count = watch.get('check_count', 0) + 1
# Always record page title (used in notifications, and can change even when the content is the same) # Always record page title (used in notifications, and can change even when the content is the same)
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'): try:
try: page_title = html_tools.extract_title(data=update_handler.fetcher.content)
page_title = html_tools.extract_title(data=update_handler.fetcher.content) logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
if page_title: datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
page_title = page_title.strip()[:2000] except Exception as e:
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'") logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
except Exception as e:
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
# Record server header # Record server header
try: try:

View File

@@ -503,9 +503,7 @@ class ValidateJinja2Template(object):
jinja2_env = create_jinja_env(loader=BaseLoader) jinja2_env = create_jinja_env(loader=BaseLoader)
# Add notification tokens for validation # Add notification tokens for validation
static_token_placeholders = NotificationContextData() jinja2_env.globals.update(NotificationContextData())
static_token_placeholders.set_random_for_validation()
jinja2_env.globals.update(static_token_placeholders)
if hasattr(field, 'extra_notification_tokens'): if hasattr(field, 'extra_notification_tokens'):
jinja2_env.globals.update(field.extra_notification_tokens) jinja2_env.globals.update(field.extra_notification_tokens)

View File

@@ -37,6 +37,18 @@ class SignalHandler:
notification_event_signal.connect(self.handle_notification_event, weak=False) notification_event_signal.connect(self.handle_notification_event, weak=False)
logger.info("SignalHandler: Connected to notification_event signal") logger.info("SignalHandler: Connected to notification_event signal")
# Create and start the queue update thread using standard threading
import threading
self.polling_emitter_thread = threading.Thread(
target=self.polling_emit_running_or_queued_watches_threaded,
daemon=True
)
self.polling_emitter_thread.start()
logger.info("Started polling thread using threading (eventlet-free)")
# Store the thread reference in socketio for clean shutdown
self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
def handle_signal(self, *args, **kwargs): def handle_signal(self, *args, **kwargs):
logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs") logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
# Safely extract the watch UUID from kwargs # Safely extract the watch UUID from kwargs
@@ -112,6 +124,74 @@ class SignalHandler:
except Exception as e: except Exception as e:
logger.error(f"Socket.IO error in handle_notification_event: {str(e)}") logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")
def polling_emit_running_or_queued_watches_threaded(self):
"""Threading version of polling for Windows compatibility"""
import time
import threading
logger.info("Queue update thread started (threading mode)")
# Import here to avoid circular imports
from changedetectionio.flask_app import app
from changedetectionio import worker_handler
watch_check_update = signal('watch_check_update')
# Track previous state to avoid unnecessary emissions
previous_running_uuids = set()
# Run until app shutdown - check exit flag more frequently for fast shutdown
exit_event = getattr(app.config, 'exit', threading.Event())
while not exit_event.is_set():
try:
# Get current running UUIDs from async workers
running_uuids = set(worker_handler.get_running_uuids())
# Only send updates for UUIDs that changed state
newly_running = running_uuids - previous_running_uuids
no_longer_running = previous_running_uuids - running_uuids
# Send updates for newly running UUIDs (but exit fast if shutdown requested)
for uuid in newly_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} started processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
if not exit_event.is_set():
for uuid in no_longer_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} finished processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Update tracking for next iteration
previous_running_uuids = running_uuids
# Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
for _ in range(20): # 20 * 0.5 = 10 seconds total
if exit_event.is_set():
break
time.sleep(0.5)
except Exception as e:
logger.error(f"Error in threading polling: {str(e)}")
# Even during error recovery, check for exit quickly
for _ in range(1): # 1 * 0.5 = 0.5 seconds
if exit_event.is_set():
break
time.sleep(0.5)
# Check if we're in pytest environment - if so, be more gentle with logging
import sys
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
if not in_pytest:
logger.info("Queue update thread stopped (threading mode)")
def handle_watch_update(socketio, **kwargs): def handle_watch_update(socketio, **kwargs):
@@ -303,6 +383,19 @@ def init_socketio(app, datastore):
"""Shutdown the SocketIO server fast and aggressively""" """Shutdown the SocketIO server fast and aggressively"""
try: try:
logger.info("Socket.IO: Fast shutdown initiated...") logger.info("Socket.IO: Fast shutdown initiated...")
# For threading mode, give the thread a very short time to exit gracefully
if hasattr(socketio, 'polling_emitter_thread'):
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
socketio.polling_emitter_thread.join(timeout=1.0) # Only 1 second timeout
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
else:
logger.info("Socket.IO: Polling thread stopped quickly")
else:
logger.info("Socket.IO: Polling thread already stopped")
logger.info("Socket.IO: Fast shutdown complete") logger.info("Socket.IO: Fast shutdown complete")
except Exception as e: except Exception as e:
logger.error(f"Socket.IO error during shutdown: {str(e)}") logger.error(f"Socket.IO error during shutdown: {str(e)}")

View File

@@ -22,13 +22,6 @@ import uuid as uuid_builder
from loguru import logger from loguru import logger
from blinker import signal from blinker import signal
# Try to import orjson for faster JSON serialization
try:
import orjson
HAS_ORJSON = True
except ImportError:
HAS_ORJSON = False
from .processors import get_custom_watch_obj_for_processor from .processors import get_custom_watch_obj_for_processor
from .processors.restock_diff import Restock from .processors.restock_diff import Restock
@@ -433,14 +426,9 @@ class ChangeDetectionStore:
# Re #286 - First write to a temp file, then confirm it looks OK and rename it # Re #286 - First write to a temp file, then confirm it looks OK and rename it
# This is a fairly basic strategy to deal with the case that the file is corrupted, # This is a fairly basic strategy to deal with the case that the file is corrupted,
# system was out of memory, out of RAM etc # system was out of memory, out of RAM etc
if HAS_ORJSON: with open(self.json_store_path+".tmp", 'w') as json_file:
# Use orjson for faster serialization # Use compact JSON in production for better performance
with open(self.json_store_path+".tmp", 'wb') as json_file: json.dump(data, json_file, indent=2)
json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
else:
# Fallback to standard json module
with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=2)
os.replace(self.json_store_path+".tmp", self.json_store_path) os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e: except Exception as e:
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")

View File

@@ -19,9 +19,18 @@ def test_inscriptus():
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage, datastore_path): def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path) set_original_response(datastore_path=datastore_path)
# live_server_setup(live_server) # Setup on conftest per function
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True)) # Add our URL to the import page
res = client.post(
url_for("imports.import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Do this a few times.. ensures we dont accidently set the status # Do this a few times.. ensures we dont accidently set the status
for n in range(3): for n in range(3):
@@ -106,6 +115,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# It should report nothing found (no new 'has-unread-changes' class) # It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index")) res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data assert b'has-unread-changes' not in res.data
assert b'class="has-unread-changes' not in res.data assert b'class="has-unread-changes' not in res.data
assert b'head title' in res.data # Should be ON by default assert b'head title' in res.data # Should be ON by default
@@ -119,6 +129,23 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
res = client.get(url_for("watchlist.index")) res = client.get(url_for("watchlist.index"))
assert b'head title and more' in res.data assert b'head title and more' in res.data
# disable <title> pickup
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-use_page_title_in_list": "", "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'class="has-unread-changes' in res.data
assert b'head title' not in res.data # should now be off
# Be sure the last_viewed is going to be greater than the last snapshot # Be sure the last_viewed is going to be greater than the last snapshot
time.sleep(1) time.sleep(1)
@@ -139,63 +166,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# Cleanup everything # Cleanup everything
delete_all_watches(client) delete_all_watches(client)
def test_title_scraper(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path)
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks()
# It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index"))
assert b'head title' in res.data # Should be ON by default
# Recheck it but only with a title change, content wasnt changed
set_original_response(datastore_path=datastore_path, extra_title=" and more")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'head title and more' in res.data
# disable <title> pickup
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-use_page_title_in_list": "",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
set_original_response(datastore_path=datastore_path, extra_title=" SHOULD NOT APPEAR")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'SHOULD NOT APPEAR' not in res.data
delete_all_watches(client)
def test_title_scraper_html_only(client, live_server, measure_memory_usage, datastore_path):
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write('"My text document\nWhere I talk about <title>\nwhich should not get registered\n</title>')
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks()
# It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index"))
assert b'which should not get registered' not in res.data # Should be ON by default
assert not live_server.app.config['DATASTORE'].data['watching'][uuid].get('title')
# Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that # Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
def test_requests_timeout(client, live_server, measure_memory_usage, datastore_path): def test_requests_timeout(client, live_server, measure_memory_usage, datastore_path):

View File

@@ -302,20 +302,15 @@ def test_notification_urls_jinja2_apprise_integration(client, live_server, measu
data={ data={
"application-fetch_backend": "html_requests", "application-fetch_backend": "html_requests",
"application-minutes_between_check": 180, "application-minutes_between_check": 180,
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了", "another": "{{diff|truncate(1500)}}" }', "application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
"application-notification_format": default_notification_format, "application-notification_format": default_notification_format,
"application-notification_urls": test_notification_url, "application-notification_urls": test_notification_url,
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} {{diff|truncate(200)}} ", "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
}, },
follow_redirects=True follow_redirects=True
) )
assert b'Settings updated' in res.data assert b'Settings updated' in res.data
assert '网站监测'.encode() in res.data
assert b'{{diff|truncate(1500)}}' in res.data
assert b'{{diff|truncate(200)}}' in res.data
def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_memory_usage, datastore_path): def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_memory_usage, datastore_path):

View File

@@ -33,9 +33,6 @@ chardet>2.3.0
wtforms~=3.2 wtforms~=3.2
jsonpath-ng~=1.7.0 jsonpath-ng~=1.7.0
# Fast JSON serialization for better performance
orjson~=3.10
# dnspython - Used by paho-mqtt for MQTT broker resolution # dnspython - Used by paho-mqtt for MQTT broker resolution
# Version pin removed since eventlet (which required the specific 2.6.1 pin) has been eliminated # Version pin removed since eventlet (which required the specific 2.6.1 pin) has been eliminated
# paho-mqtt will install compatible dnspython version automatically # paho-mqtt will install compatible dnspython version automatically