Compare commits

...

26 Commits

Author SHA1 Message Date
dgtlmoon
59e93c29d0 0.50.01 2025-06-03 16:13:58 +02:00
dgtlmoon
d7173bb96e UI - Adding missing icons lib 2025-06-03 16:13:16 +02:00
dgtlmoon
d544e11a20 Use pip build cache from inside Dockerfile (#3228)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io Container Build Test / test-container-build (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-06-03 15:04:38 +02:00
dgtlmoon
7f0c19c61c UI - Also uncheck 'check all' checkbox for group operations in realtime mode 2025-06-03 14:56:31 +02:00
dgtlmoon
30e84f1030 UI - Real time - checkbox operations now realtime without reload 2025-06-03 14:54:13 +02:00
dgtlmoon
d5af91d8f7 UI - Revert icon changes 2025-06-03 14:01:00 +02:00
dgtlmoon
4b18c633ba Building - Use GHA layer caching (#3227) 2025-06-03 13:03:36 +02:00
dgtlmoon
08728d7d03 UI - Realtime - Fixing 'last_changed' status re #3224 2025-06-03 10:44:15 +02:00
dgtlmoon
73f3beda00 Realtime UI - Socketio tweaks and refactor (#3220) 2025-06-03 10:17:19 +02:00
dgtlmoon
7b8d335c43 Code - Fix dep warning (#3221)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
CodeQL / Analyze (javascript) (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
2025-05-28 14:11:11 +02:00
dgtlmoon
ba0b6071e6 Realtime UI - Reducing log output
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-05-28 09:25:23 +02:00
dgtlmoon
a6603d5ad6 UI - Reword restock detector plugin description
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-05-27 15:26:29 +02:00
dgtlmoon
26833781a7 UI - Remove incorrect error text
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io Container Build Test / test-container-build (push) Has been cancelled
2025-05-26 21:25:15 +02:00
dgtlmoon
f3ed9bdbb5 0.49.18 2025-05-26 20:35:46 +02:00
dgtlmoon
0f65178190 Realtime UI updates via WebSocket (#3183) 2025-05-26 20:12:32 +02:00
dgtlmoon
a58fc82575 Update to Apprise 1.9.3 - BlueSky, Resend support (#3216)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / test-container-build (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-05-22 15:42:18 +02:00
dgtlmoon
2575c03ae0 UI - Update 'Browser Steps' UI text 2025-05-22 15:37:36 +02:00
Jost Alemann
9b7372fff0 Code - Remove unused f-strings (#3209) 2025-05-22 12:11:39 +02:00
Spacetech
fcd6ebe0ee Use logger.debug for playwright console logs (#3201)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
CodeQL / Analyze (javascript) (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
2025-05-18 13:26:50 +02:00
dgtlmoon
c162ec9d52 0.49.17
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
CodeQL / Analyze (javascript) (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
2025-05-12 10:47:27 +02:00
Emmanuel Ferdman
bb7f7f473b Resolve warnings of bs4 library (#3187)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / test-container-build (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-05-09 14:35:35 +02:00
dgtlmoon
a9ca511004 Revert memory strategy change for html_to_text (Was hanging under high concurrency setups)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-05-09 09:44:02 +02:00
dgtlmoon
8df61f5eaa 0.49.16
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / test-container-build (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
CodeQL / Analyze (javascript) (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
2025-05-03 16:43:04 +02:00
dgtlmoon
162f573967 Fixes to ensure proxy errors are handled correctly (#3168) 2025-05-03 16:05:40 +02:00
dgtlmoon
eada0ef08d UI - Custom headers should have validation (#3172) 2025-05-03 13:57:42 +02:00
dgtlmoon
f57bc10973 Update selenium library (#3170)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io Container Build Test / test-container-build (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-05-02 14:05:23 +02:00
128 changed files with 4384 additions and 1761 deletions

View File

@@ -29,3 +29,35 @@ venv/
# Visual Studio
.vscode/
# Test and development files
test-datastore/
tests/
docs/
*.md
!README.md
# Temporary and log files
*.log
*.tmp
tmp/
temp/
# Training data and large files
train-data/
works-data/
# Container files
Dockerfile*
docker-compose*.yml
.dockerignore
# Development certificates and keys
*.pem
*.key
*.crt
profile_output.prof
# Large binary files that shouldn't be in container
*.pdf
chrome.json

View File

@@ -56,6 +56,8 @@ jobs:
context: ./
file: ./.github/test/Dockerfile-alpine
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Test that the docker containers can build
id: docker_build
@@ -65,6 +67,6 @@ jobs:
context: ./
file: ./Dockerfile
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -86,10 +86,10 @@ jobs:
run: |
# Playwright via Sockpuppetbrowser fetch
# tests/visualselector/test_fetch_data.py will do browser steps
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
- name: Playwright and SocketPuppetBrowser - Headers and requests

View File

@@ -23,13 +23,24 @@ WORKDIR /install
COPY requirements.txt /requirements.txt
# --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed)
RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt
# Use cache mounts and multiple wheel sources for faster ARM builds
ENV PIP_CACHE_DIR=/tmp/pip-cache
RUN --mount=type=cache,target=/tmp/pip-cache \
pip install \
--extra-index-url https://www.piwheels.org/simple \
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
-r /requirements.txt
# Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
RUN pip install --target=/dependencies playwright~=1.48.0 \
RUN --mount=type=cache,target=/tmp/pip-cache \
pip install \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
playwright~=1.48.0 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
# Final image stage

View File

@@ -5,6 +5,7 @@ recursive-include changedetectionio/conditions *
recursive-include changedetectionio/model *
recursive-include changedetectionio/notification *
recursive-include changedetectionio/processors *
recursive-include changedetectionio/realtime *
recursive-include changedetectionio/static *
recursive-include changedetectionio/templates *
recursive-include changedetectionio/tests *

View File

@@ -2,20 +2,19 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.49.15'
__version__ = '0.50.01'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
import os
os.environ['EVENTLET_NO_GREENDNS'] = 'yes'
import eventlet
import eventlet.wsgi
import getopt
import platform
import signal
import socket
import sys
# Eventlet completely removed - using threading mode for SocketIO
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
from changedetectionio import store
from changedetectionio.flask_app import changedetection_app
from loguru import logger
@@ -30,13 +29,34 @@ def get_version():
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
def sigshutdown_handler(_signo, _stack_frame):
name = signal.Signals(_signo).name
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
datastore.sync_to_json()
logger.success('Sync JSON to disk complete.')
# This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it.
# Solution: move to gevent or other server in the future (#2014)
datastore.stop_thread = True
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
# Set exit flag immediately to stop all loops
app.config.exit.set()
datastore.stop_thread = True
# Shutdown workers immediately
try:
from changedetectionio import worker_handler
worker_handler.shutdown_workers()
except Exception as e:
logger.error(f"Error shutting down workers: {str(e)}")
# Shutdown socketio server fast
from changedetectionio.flask_app import socketio_server
if socketio_server and hasattr(socketio_server, 'shutdown'):
try:
socketio_server.shutdown()
except Exception as e:
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
# Save data quickly
try:
datastore.sync_to_json()
logger.success('Fast sync to disk complete.')
except Exception as e:
logger.error(f"Error syncing to disk: {str(e)}")
sys.exit()
def main():
@@ -45,9 +65,9 @@ def main():
datastore_path = None
do_cleanup = False
host = ''
host = "0.0.0.0"
ipv6_enabled = False
port = os.environ.get('PORT') or 5000
port = int(os.environ.get('PORT', 5000))
ssl_mode = False
# On Windows, create and use a default path.
@@ -143,6 +163,11 @@ def main():
app = changedetection_app(app_config, datastore)
# Get the SocketIO instance from the Flask app (created in flask_app.py)
from changedetectionio.flask_app import socketio_server
global socketio
socketio = socketio_server
signal.signal(signal.SIGTERM, sigshutdown_handler)
signal.signal(signal.SIGINT, sigshutdown_handler)
@@ -167,10 +192,11 @@ def main():
@app.context_processor
def inject_version():
def inject_template_globals():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False
has_password=datastore.data['settings']['application']['password'] != False,
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
)
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
@@ -194,15 +220,21 @@ def main():
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
# SocketIO instance is already initialized in flask_app.py
# Launch using SocketIO run method for proper integration (if enabled)
if socketio_server:
if ssl_mode:
socketio.run(app, host=host, port=int(port), debug=False,
certfile='cert.pem', keyfile='privkey.pem', allow_unsafe_werkzeug=True)
else:
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app)
# Run Flask app without Socket.IO if disabled
logger.info("Starting Flask app without Socket.IO server")
if ssl_mode:
app.run(host=host, port=int(port), debug=False,
ssl_context=('cert.pem', 'privkey.pem'))
else:
app.run(host=host, port=int(port), debug=False)

View File

@@ -3,6 +3,7 @@ from changedetectionio.strtobool import strtobool
from flask_expects_json import expects_json
from changedetectionio import queuedWatchMetaData
from changedetectionio import worker_handler
from flask_restful import abort, Resource
from flask import request, make_response
import validators
@@ -47,7 +48,7 @@ class Watch(Resource):
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if request.args.get('recheck'):
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return "OK", 200
if request.args.get('paused', '') == 'paused':
self.datastore.data['watching'].get(uuid).pause()
@@ -236,7 +237,7 @@ class CreateWatch(Resource):
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
if new_uuid:
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported URL", 400
@@ -291,7 +292,7 @@ class CreateWatch(Resource):
if request.args.get('recheck_all'):
for uuid in self.datastore.data['watching'].keys():
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return {'status': "OK"}, 200
return list, 200

View File

@@ -0,0 +1,449 @@
from .processors.exceptions import ProcessorException
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio import html_tools
from changedetectionio.flask_app import watch_check_update
import asyncio
import importlib
import os
import time
from loguru import logger
# Async version of update_worker
# Processes jobs from AsyncSignalPriorityQueue instead of threaded queue
async def async_update_worker(worker_id, q, notification_q, app, datastore):
"""
Async worker function that processes watch check jobs from the queue.
Args:
worker_id: Unique identifier for this worker
q: AsyncSignalPriorityQueue containing jobs to process
notification_q: Standard queue for notifications
app: Flask application instance
datastore: Application datastore
"""
# Set a descriptive name for this task
task = asyncio.current_task()
if task:
task.set_name(f"async-worker-{worker_id}")
logger.info(f"Starting async worker {worker_id}")
while not app.config.exit.is_set():
update_handler = None
watch = None
try:
# Use asyncio wait_for to make queue.get() cancellable
queued_item_data = await asyncio.wait_for(q.get(), timeout=1.0)
except asyncio.TimeoutError:
# No jobs available, continue loop
continue
except Exception as e:
logger.error(f"Worker {worker_id} error getting queue item: {e}")
await asyncio.sleep(0.1)
continue
uuid = queued_item_data.item.get('uuid')
fetch_start_time = round(time.time())
# Mark this UUID as being processed
from changedetectionio import worker_handler
worker_handler.set_uuid_processing(uuid, processing=True)
try:
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
changed_detected = False
contents = b''
process_changedetection_results = True
update_obj = {}
# Clear last errors
datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
datastore.data['watching'][uuid]['last_checked'] = fetch_start_time
watch = datastore.data['watching'].get(uuid)
logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
try:
watch_check_update.send(watch_uuid=uuid)
# Processor is what we are using for detecting the "Change"
processor = watch.get('processor', 'text_json_diff')
# Init a new 'difference_detection_processor'
processor_module_name = f"changedetectionio.processors.{processor}.processor"
try:
processor_module = importlib.import_module(processor_module_name)
except ModuleNotFoundError as e:
print(f"Processor module '{processor}' not found.")
raise e
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid)
# All fetchers are now async, so call directly
await update_handler.call_browser()
# Run change detection (this is synchronous)
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
except PermissionError as e:
logger.critical(f"File permission error updating file, watch: {uuid}")
logger.critical(str(e))
process_changedetection_results = False
except ProcessorException as e:
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
process_changedetection_results = False
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
extra_help = ""
if e.has_filters:
has_img = html_tools.include_filters(include_filters='img',
html_content=e.html_content)
if has_img:
extra_help = ", it's possible that the filters you have give an empty result or contain only an image."
else:
extra_help = ", it's possible that the filters were found, but contained no usable text."
datastore.update_watch(uuid=uuid, update_obj={
'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
})
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
process_changedetection_results = False
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 403:
err_text = "Error - 403 (Access denied) received"
elif e.status_code == 404:
err_text = "Error - 404 (Page not found) received"
elif e.status_code == 407:
err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?"
elif e.status_code == 500:
err_text = "Error - 500 (Internal server error) received from the web site"
else:
extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data, as_error=True)
if e.page_text:
watch.save_error_text(contents=e.page_text)
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False
except FilterNotFoundInResponse as e:
if not datastore.data['watching'].get(uuid):
continue
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
# Only when enabled, send the notification
if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
if c >= threshold:
if not watch.get('notification_muted'):
logger.debug(f"Sending filter failed notification for {uuid}")
await send_filter_failure_notification(uuid, notification_q, datastore)
c = 0
logger.debug(f"Reset filter failure count back to zero")
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
else:
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
process_changedetection_results = False
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
# Yes fine, so nothing todo, don't continue to process.
process_changedetection_results = False
changed_detected = False
except content_fetchers_exceptions.BrowserConnectError as e:
datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg})
process_changedetection_results = False
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg})
process_changedetection_results = False
except content_fetchers_exceptions.BrowserStepsStepException as e:
if not datastore.data['watching'].get(uuid):
continue
error_step = e.step_n + 1
from playwright._impl._errors import TimeoutError, Error
# Generally enough info for TimeoutError (couldnt locate the element after default seconds)
err_text = f"Browser step at position {error_step} could not run, check the watch, add a delay if necessary, view Browser Steps to see screenshot at that step."
if e.original_e.name == "TimeoutError":
# Just the first line is enough, the rest is the stack trace
err_text += " Could not find the target."
else:
# Other Error, more info is good.
err_text += " " + str(e.original_e).splitlines()[0]
logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}")
datastore.update_watch(uuid=uuid,
update_obj={'last_error': err_text,
'browser_steps_last_error_step': error_step})
if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
if threshold > 0 and c >= threshold:
if not watch.get('notification_muted'):
await send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n, notification_q=notification_q, datastore=datastore)
c = 0
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
process_changedetection_results = False
except content_fetchers_exceptions.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.PageUnloadable as e:
err_text = "Page request from server didnt respond correctly"
if e.message:
err_text = "{} - {}".format(err_text, e.message)
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code,
'has_ldjson_price_data': None})
process_changedetection_results = False
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False
logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
except Exception as e:
logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}")
logger.error(str(e))
datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
process_changedetection_results = False
else:
if not datastore.data['watching'].get(uuid):
continue
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
if not watch.get('ignore_status_codes'):
update_obj['consecutive_filter_failures'] = 0
update_obj['last_error'] = False
cleanup_error_artifacts(uuid, datastore)
if not datastore.data['watching'].get(uuid):
continue
if process_changedetection_results:
# Extract title if needed
if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
if not watch['title'] or not len(watch['title']):
try:
update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}")
except Exception as e:
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
try:
datastore.update_watch(uuid=uuid, update_obj=update_obj)
if changed_detected or not watch.history_n:
if update_handler.screenshot:
watch.save_screenshot(screenshot=update_handler.screenshot)
if update_handler.xpath_data:
watch.save_xpath_data(data=update_handler.xpath_data)
# Ensure unique timestamp for history
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
logger.warning(f"Timestamp {fetch_start_time} already exists, waiting 1 seconds")
fetch_start_time += 1
await asyncio.sleep(1)
watch.save_history_text(contents=contents,
timestamp=int(fetch_start_time),
snapshot_id=update_obj.get('previous_md5', 'none'))
empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
# Send notifications on second+ check
if watch.history_n >= 2:
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
if not watch.get('notification_muted'):
await send_content_changed_notification(uuid, notification_q, datastore)
except Exception as e:
logger.critical(f"Worker {worker_id} exception in process_changedetection_results")
logger.critical(str(e))
datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
# Always record attempt count
count = watch.get('check_count', 0) + 1
# Record server header
try:
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
except Exception as e:
pass
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
'check_count': count})
except Exception as e:
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
# Also update the watch with error information
if datastore and uuid in datastore.data['watching']:
datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"})
finally:
# Always cleanup - this runs whether there was an exception or not
if uuid:
try:
# Mark UUID as no longer being processed
worker_handler.set_uuid_processing(uuid, processing=False)
# Send completion signal
if watch:
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
watch_check_update.send(watch_uuid=watch['uuid'])
update_handler = None
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
except Exception as cleanup_error:
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
# Brief pause before continuing to avoid tight error loops (only on error)
if 'e' in locals():
await asyncio.sleep(1.0)
else:
# Small yield for normal completion
await asyncio.sleep(0.01)
# Check if we should exit
if app.config.exit.is_set():
break
# Check if we're in pytest environment - if so, be more gentle with logging
import sys
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
if not in_pytest:
logger.info(f"Worker {worker_id} shutting down")
def cleanup_error_artifacts(uuid, datastore):
"""Helper function to clean up error artifacts"""
cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
for f in cleanup_files:
full_path = os.path.join(datastore.datastore_path, uuid, f)
if os.path.isfile(full_path):
os.unlink(full_path)
async def send_content_changed_notification(watch_uuid, notification_q, datastore):
"""Helper function to queue notifications using the new notification service"""
try:
from changedetectionio.notification_service import create_notification_service
# Create notification service instance
notification_service = create_notification_service(datastore, notification_q)
notification_service.send_content_changed_notification(watch_uuid)
except Exception as e:
logger.error(f"Error sending notification for {watch_uuid}: {e}")
async def send_filter_failure_notification(watch_uuid, notification_q, datastore):
"""Helper function to send filter failure notifications using the new notification service"""
try:
from changedetectionio.notification_service import create_notification_service
# Create notification service instance
notification_service = create_notification_service(datastore, notification_q)
notification_service.send_filter_failure_notification(watch_uuid)
except Exception as e:
logger.error(f"Error sending filter failure notification for {watch_uuid}: {e}")
async def send_step_failure_notification(watch_uuid, step_n, notification_q, datastore):
"""Helper function to send step failure notifications using the new notification service"""
try:
from changedetectionio.notification_service import create_notification_service
# Create notification service instance
notification_service = create_notification_service(datastore, notification_q)
notification_service.send_step_failure_notification(watch_uuid, step_n)
except Exception as e:
logger.error(f"Error sending step failure notification for {watch_uuid}: {e}")

View File

@@ -25,35 +25,53 @@ io_interface_context = None
import json
import hashlib
from flask import Response
import asyncio
import threading
def run_async_in_browser_loop(coro):
"""Run async coroutine using the existing async worker event loop"""
from changedetectionio import worker_handler
# Use the existing async worker event loop instead of creating a new one
if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed():
logger.debug("Browser steps using existing async worker event loop")
future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop)
return future.result()
else:
# Fallback: create a new event loop (for sync workers or if async loop not available)
logger.debug("Browser steps creating temporary event loop")
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(coro)
finally:
loop.close()
def construct_blueprint(datastore: ChangeDetectionStore):
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
def start_browsersteps_session(watch_uuid):
from . import nonContext
async def start_browsersteps_session(watch_uuid):
from . import browser_steps
import time
global io_interface_context
from playwright.async_api import async_playwright
# We keep the playwright session open for many minutes
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
browsersteps_start_session = {'start_time': time.time()}
# You can only have one of these running
# This should be very fine to leave running for the life of the application
# @idea - Make it global so the pool of watch fetchers can use it also
if not io_interface_context:
io_interface_context = nonContext.c_sync_playwright()
# Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes
io_interface_context = io_interface_context.start()
# Create a new async playwright instance for browser steps
playwright_instance = async_playwright()
playwright_context = await playwright_instance.start()
keepalive_ms = ((keepalive_seconds + 3) * 1000)
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
a = "?" if not '?' in base_url else '&'
base_url += a + f"timeout={keepalive_ms}"
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
browsersteps_start_session['browser'] = browser
browsersteps_start_session['playwright_context'] = playwright_context
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
proxy = None
@@ -75,15 +93,20 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
playwright_browser=browsersteps_start_session['browser'],
browserstepper = browser_steps.browsersteps_live_ui(
playwright_browser=browser,
proxy=proxy,
start_url=datastore.data['watching'][watch_uuid].link,
headers=datastore.data['watching'][watch_uuid].get('headers')
)
# Initialize the async connection
await browserstepper.connect(proxy=proxy)
browsersteps_start_session['browserstepper'] = browserstepper
# For test
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
return browsersteps_start_session
@@ -92,7 +115,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
def browsersteps_start_session():
# A new session was requested, return sessionID
import asyncio
import uuid
browsersteps_session_id = str(uuid.uuid4())
watch_uuid = request.args.get('uuid')
@@ -104,7 +127,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.debug("browser_steps.py connecting")
try:
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
# Run the async function in the dedicated browser steps event loop
browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop(
start_browsersteps_session(watch_uuid)
)
except Exception as e:
if 'ECONNREFUSED' in str(e):
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
@@ -168,12 +194,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
step_optional_value = request.form.get('optional_value')
is_last_step = strtobool(request.form.get('is_last_step'))
# @todo try.. accept.. nice errors not popups..
try:
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
selector=step_selector,
optional_value=step_optional_value)
# Run the async call_action method in the dedicated browser steps event loop
run_async_in_browser_loop(
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
action_name=step_operation,
selector=step_selector,
optional_value=step_optional_value
)
)
except Exception as e:
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
@@ -187,7 +216,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Screenshots and other info only needed on requesting a step (POST)
try:
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
# Run the async get_current_state method in the dedicated browser steps event loop
(screenshot, xpath_data) = run_async_in_browser_loop(
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
)
if is_last_step:
watch = datastore.data['watching'].get(uuid)
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
@@ -201,7 +234,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response("Error fetching screenshot and element data - " + str(e), 401)
# SEND THIS BACK TO THE BROWSER
output = {
"screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
"xpath_data": xpath_data,

View File

@@ -61,26 +61,9 @@ class steppable_browser_interface():
def __init__(self, start_url):
self.start_url = start_url
def safe_page_operation(self, operation_fn, default_return=None):
"""Safely execute a page operation with error handling"""
if self.page is None:
logger.warning("Attempted operation on None page object")
return default_return
try:
return operation_fn()
except Exception as e:
logger.debug(f"Page operation failed: {str(e)}")
# Try to reclaim memory if possible
try:
self.page.request_gc()
except:
pass
return default_return
# Convert and perform "Click Button" for example
def call_action(self, action_name, selector=None, optional_value=None):
async def call_action(self, action_name, selector=None, optional_value=None):
if self.page is None:
logger.warning("Cannot call action on None page object")
return
@@ -109,110 +92,83 @@ class steppable_browser_interface():
if optional_value and ('{%' in optional_value or '{{' in optional_value):
optional_value = jinja_render(template_str=optional_value)
try:
action_handler(selector, optional_value)
# Safely wait for timeout
def wait_timeout():
self.page.wait_for_timeout(1.5 * 1000)
self.safe_page_operation(wait_timeout)
logger.debug(f"Call action done in {time.time()-now:.2f}s")
except Exception as e:
logger.error(f"Error executing action '{call_action_name}': {str(e)}")
# Request garbage collection to free up resources after error
try:
self.page.request_gc()
except:
pass
def action_goto_url(self, selector=None, value=None):
await action_handler(selector, optional_value)
# Safely wait for timeout
await self.page.wait_for_timeout(1.5 * 1000)
logger.debug(f"Call action done in {time.time()-now:.2f}s")
async def action_goto_url(self, selector=None, value=None):
if not value:
logger.warning("No URL provided for goto_url action")
return None
now = time.time()
def goto_operation():
return self.page.goto(value, timeout=0, wait_until='load')
response = self.safe_page_operation(goto_operation)
response = await self.page.goto(value, timeout=0, wait_until='load')
logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
return response
# Incase they request to go back to the start
def action_goto_site(self, selector=None, value=None):
return self.action_goto_url(value=self.start_url)
async def action_goto_site(self, selector=None, value=None):
return await self.action_goto_url(value=self.start_url)
def action_click_element_containing_text(self, selector=None, value=''):
async def action_click_element_containing_text(self, selector=None, value=''):
logger.debug("Clicking element containing text")
if not value or not len(value.strip()):
return
def click_operation():
elem = self.page.get_by_text(value)
if elem.count():
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
self.safe_page_operation(click_operation)
elem = self.page.get_by_text(value)
if await elem.count():
await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
async def action_click_element_containing_text_if_exists(self, selector=None, value=''):
logger.debug("Clicking element containing text if exists")
if not value or not len(value.strip()):
return
def click_if_exists_operation():
elem = self.page.get_by_text(value)
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
if elem.count():
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
elem = self.page.get_by_text(value)
count = await elem.count()
logger.debug(f"Clicking element containing text - {count} elements found")
if count:
await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
self.safe_page_operation(click_if_exists_operation)
def action_enter_text_in_field(self, selector, value):
async def action_enter_text_in_field(self, selector, value):
if not selector or not len(selector.strip()):
return
def fill_operation():
self.page.fill(selector, value, timeout=self.action_timeout)
self.safe_page_operation(fill_operation)
await self.page.fill(selector, value, timeout=self.action_timeout)
def action_execute_js(self, selector, value):
async def action_execute_js(self, selector, value):
if not value:
return None
def evaluate_operation():
return self.page.evaluate(value)
return self.safe_page_operation(evaluate_operation)
return await self.page.evaluate(value)
def action_click_element(self, selector, value):
async def action_click_element(self, selector, value):
logger.debug("Clicking element")
if not selector or not len(selector.strip()):
return
def click_operation():
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
self.safe_page_operation(click_operation)
await self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
def action_click_element_if_exists(self, selector, value):
async def action_click_element_if_exists(self, selector, value):
import playwright._impl._errors as _api_types
logger.debug("Clicking element if exists")
if not selector or not len(selector.strip()):
return
def click_if_exists_operation():
try:
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
except _api_types.TimeoutError:
return
except _api_types.Error:
# Element was there, but page redrew and now its long long gone
return
try:
await self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
except _api_types.TimeoutError:
return
except _api_types.Error:
# Element was there, but page redrew and now its long long gone
return
self.safe_page_operation(click_if_exists_operation)
def action_click_x_y(self, selector, value):
async def action_click_x_y(self, selector, value):
if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
return
@@ -222,57 +178,42 @@ class steppable_browser_interface():
x = int(float(x.strip()))
y = int(float(y.strip()))
def click_xy_operation():
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
await self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
self.safe_page_operation(click_xy_operation)
except Exception as e:
logger.error(f"Error parsing x,y coordinates: {str(e)}")
def action__select_by_option_text(self, selector, value):
async def action__select_by_option_text(self, selector, value):
if not selector or not len(selector.strip()):
return
def select_operation():
self.page.select_option(selector, label=value, timeout=self.action_timeout)
await self.page.select_option(selector, label=value, timeout=self.action_timeout)
self.safe_page_operation(select_operation)
async def action_scroll_down(self, selector, value):
# Some sites this doesnt work on for some reason
await self.page.mouse.wheel(0, 600)
await self.page.wait_for_timeout(1000)
def action_scroll_down(self, selector, value):
def scroll_operation():
# Some sites this doesnt work on for some reason
self.page.mouse.wheel(0, 600)
self.page.wait_for_timeout(1000)
self.safe_page_operation(scroll_operation)
def action_wait_for_seconds(self, selector, value):
async def action_wait_for_seconds(self, selector, value):
try:
seconds = float(value.strip()) if value else 1.0
def wait_operation():
self.page.wait_for_timeout(seconds * 1000)
self.safe_page_operation(wait_operation)
await self.page.wait_for_timeout(seconds * 1000)
except (ValueError, TypeError) as e:
logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
def action_wait_for_text(self, selector, value):
async def action_wait_for_text(self, selector, value):
if not value:
return
import json
v = json.dumps(value)
def wait_for_text_operation():
self.page.wait_for_function(
f'document.querySelector("body").innerText.includes({v});',
timeout=30000
)
await self.page.wait_for_function(
f'document.querySelector("body").innerText.includes({v});',
timeout=30000
)
self.safe_page_operation(wait_for_text_operation)
def action_wait_for_text_in_element(self, selector, value):
async def action_wait_for_text_in_element(self, selector, value):
if not selector or not value:
return
@@ -280,82 +221,60 @@ class steppable_browser_interface():
s = json.dumps(selector)
v = json.dumps(value)
def wait_for_text_in_element_operation():
self.page.wait_for_function(
f'document.querySelector({s}).innerText.includes({v});',
timeout=30000
)
self.safe_page_operation(wait_for_text_in_element_operation)
await self.page.wait_for_function(
f'document.querySelector({s}).innerText.includes({v});',
timeout=30000
)
# @todo - in the future make some popout interface to capture what needs to be set
# https://playwright.dev/python/docs/api/class-keyboard
def action_press_enter(self, selector, value):
def press_operation():
self.page.keyboard.press("Enter", delay=randint(200, 500))
async def action_press_enter(self, selector, value):
await self.page.keyboard.press("Enter", delay=randint(200, 500))
self.safe_page_operation(press_operation)
def action_press_page_up(self, selector, value):
def press_operation():
self.page.keyboard.press("PageUp", delay=randint(200, 500))
self.safe_page_operation(press_operation)
async def action_press_page_up(self, selector, value):
await self.page.keyboard.press("PageUp", delay=randint(200, 500))
def action_press_page_down(self, selector, value):
def press_operation():
self.page.keyboard.press("PageDown", delay=randint(200, 500))
self.safe_page_operation(press_operation)
async def action_press_page_down(self, selector, value):
await self.page.keyboard.press("PageDown", delay=randint(200, 500))
def action_check_checkbox(self, selector, value):
async def action_check_checkbox(self, selector, value):
if not selector:
return
await self.page.locator(selector).check(timeout=self.action_timeout)
async def action_uncheck_checkbox(self, selector, value):
if not selector:
return
def check_operation():
self.page.locator(selector).check(timeout=self.action_timeout)
await self.page.locator(selector).uncheck(timeout=self.action_timeout)
self.safe_page_operation(check_operation)
def action_uncheck_checkbox(self, selector, value):
if not selector:
return
def uncheck_operation():
self.page.locator(selector).uncheck(timeout=self.action_timeout)
self.safe_page_operation(uncheck_operation)
def action_remove_elements(self, selector, value):
async def action_remove_elements(self, selector, value):
"""Removes all elements matching the given selector from the DOM."""
if not selector:
return
def remove_operation():
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
self.safe_page_operation(remove_operation)
await self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
def action_make_all_child_elements_visible(self, selector, value):
async def action_make_all_child_elements_visible(self, selector, value):
"""Recursively makes all child elements inside the given selector fully visible."""
if not selector:
return
def make_visible_operation():
self.page.locator(selector).locator("*").evaluate_all("""
els => els.forEach(el => {
el.style.display = 'block'; // Forces it to be displayed
el.style.visibility = 'visible'; // Ensures it's not hidden
el.style.opacity = '1'; // Fully opaque
el.style.position = 'relative'; // Avoids 'absolute' hiding
el.style.height = 'auto'; // Expands collapsed elements
el.style.width = 'auto'; // Ensures full visibility
el.removeAttribute('hidden'); // Removes hidden attribute
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
})
""")
self.safe_page_operation(make_visible_operation)
await self.page.locator(selector).locator("*").evaluate_all("""
els => els.forEach(el => {
el.style.display = 'block'; // Forces it to be displayed
el.style.visibility = 'visible'; // Ensures it's not hidden
el.style.opacity = '1'; // Fully opaque
el.style.position = 'relative'; // Avoids 'absolute' hiding
el.style.height = 'auto'; // Expands collapsed elements
el.style.width = 'auto'; // Ensures full visibility
el.removeAttribute('hidden'); // Removes hidden attribute
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
})
""")
# Responsible for maintaining a live 'context' with the chrome CDP
# @todo - how long do contexts live for anyway?
@@ -389,21 +308,22 @@ class browsersteps_live_ui(steppable_browser_interface):
self.playwright_browser = playwright_browser
self.start_url = start_url
self._is_cleaned_up = False
if self.context is None:
self.connect(proxy=proxy)
self.proxy = proxy
# Note: connect() is now async and must be called separately
def __del__(self):
# Ensure cleanup happens if object is garbage collected
self.cleanup()
# Note: cleanup is now async, so we can only mark as cleaned up here
self._is_cleaned_up = True
# Connect and setup a new context
def connect(self, proxy=None):
async def connect(self, proxy=None):
# Should only get called once - test that
keep_open = 1000 * 60 * 5
now = time.time()
# @todo handle multiple contexts, bind a unique id from the browser on each req?
self.context = self.playwright_browser.new_context(
self.context = await self.playwright_browser.new_context(
accept_downloads=False, # Should never be needed
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
extra_http_headers=self.headers,
@@ -414,7 +334,7 @@ class browsersteps_live_ui(steppable_browser_interface):
user_agent=manage_user_agent(headers=self.headers),
)
self.page = self.context.new_page()
self.page = await self.context.new_page()
# self.page.set_default_navigation_timeout(keep_open)
self.page.set_default_timeout(keep_open)
@@ -424,13 +344,15 @@ class browsersteps_live_ui(steppable_browser_interface):
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
self.page.wait_for_timeout(1 * 1000)
await self.page.wait_for_timeout(1 * 1000)
def mark_as_closed(self):
logger.debug("Page closed, cleaning up..")
self.cleanup()
# Note: This is called from a sync context (event handler)
# so we'll just mark as cleaned up and let __del__ handle the rest
self._is_cleaned_up = True
def cleanup(self):
async def cleanup(self):
"""Properly clean up all resources to prevent memory leaks"""
if self._is_cleaned_up:
return
@@ -441,7 +363,7 @@ class browsersteps_live_ui(steppable_browser_interface):
if hasattr(self, 'page') and self.page is not None:
try:
# Force garbage collection before closing
self.page.request_gc()
await self.page.request_gc()
except Exception as e:
logger.debug(f"Error during page garbage collection: {str(e)}")
@@ -452,7 +374,7 @@ class browsersteps_live_ui(steppable_browser_interface):
logger.debug(f"Error removing event listeners: {str(e)}")
try:
self.page.close()
await self.page.close()
except Exception as e:
logger.debug(f"Error closing page: {str(e)}")
@@ -461,7 +383,7 @@ class browsersteps_live_ui(steppable_browser_interface):
# Clean up context
if hasattr(self, 'context') and self.context is not None:
try:
self.context.close()
await self.context.close()
except Exception as e:
logger.debug(f"Error closing context: {str(e)}")
@@ -483,12 +405,12 @@ class browsersteps_live_ui(steppable_browser_interface):
return False
def get_current_state(self):
async def get_current_state(self):
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
import importlib.resources
import json
# because we for now only run browser steps in playwright mode (not puppeteer mode)
from changedetectionio.content_fetchers.playwright import capture_full_page
from changedetectionio.content_fetchers.playwright import capture_full_page_async
# Safety check - don't proceed if resources are cleaned up
if self._is_cleaned_up or self.page is None:
@@ -498,29 +420,29 @@ class browsersteps_live_ui(steppable_browser_interface):
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
now = time.time()
self.page.wait_for_timeout(1 * 1000)
await self.page.wait_for_timeout(1 * 1000)
screenshot = None
xpath_data = None
try:
# Get screenshot first
screenshot = capture_full_page(page=self.page)
screenshot = await capture_full_page_async(page=self.page)
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
# Then get interactive elements
now = time.time()
self.page.evaluate("var include_filters=''")
self.page.request_gc()
await self.page.evaluate("var include_filters=''")
await self.page.request_gc()
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
xpath_data = json.loads(await self.page.evaluate(xpath_element_js, {
"visualselector_xpath_selectors": scan_elements,
"max_height": MAX_TOTAL_HEIGHT
}))
self.page.request_gc()
await self.page.request_gc()
# Sort elements by size
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
@@ -530,13 +452,13 @@ class browsersteps_live_ui(steppable_browser_interface):
logger.error(f"Error getting current state: {str(e)}")
# Attempt recovery - force garbage collection
try:
self.page.request_gc()
await self.page.request_gc()
except:
pass
# Request garbage collection one final time
try:
self.page.request_gc()
await self.page.request_gc()
except:
pass

View File

@@ -1,17 +0,0 @@
from playwright.sync_api import PlaywrightContextManager
# So playwright wants to run as a context manager, but we do something horrible and hacky
# we are holding the session open for as long as possible, then shutting it down, and opening a new one
# So it means we don't get to use PlaywrightContextManager' __enter__ __exit__
# To work around this, make goodbye() act the same as the __exit__()
#
# But actually I think this is because the context is opened correctly with __enter__() but we timeout the connection
# then theres some lock condition where we cant destroy it without it hanging
class c_PlaywrightContextManager(PlaywrightContextManager):
def goodbye(self) -> None:
self.__exit__()
def c_sync_playwright() -> PlaywrightContextManager:
return c_PlaywrightContextManager()

View File

@@ -1,6 +1,7 @@
from flask import Blueprint, request, redirect, url_for, flash, render_template
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
from changedetectionio import worker_handler
from changedetectionio.blueprint.imports.importer import (
import_url_list,
import_distill_io_json,
@@ -24,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
importer_handler = import_url_list()
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
for uuid in importer_handler.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
if len(importer_handler.remaining_data) == 0:
return redirect(url_for('watchlist.index'))
@@ -37,7 +38,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
d_importer = import_distill_io_json()
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# XLSX importer
if request.files and request.files.get('xlsx_file'):
@@ -60,7 +61,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
w_importer.run(data=file, flash=flash, datastore=datastore)
for uuid in w_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# Could be some remaining, or we could be on GET
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)

View File

@@ -4,6 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
from flask_login import login_required
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import queuedWatchMetaData
from changedetectionio import worker_handler
from queue import PriorityQueue
PRICE_DATA_TRACK_ACCEPT = 'accepted'
@@ -19,7 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
datastore.data['watching'][uuid].clear_watch()
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return redirect(url_for("watchlist.index"))
@login_required

View File

@@ -67,7 +67,32 @@ def construct_blueprint(datastore: ChangeDetectionStore):
del (app_update['password'])
datastore.data['settings']['application'].update(app_update)
# Handle dynamic worker count adjustment
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
new_worker_count = form.data['requests'].get('workers', 1)
datastore.data['settings']['requests'].update(form.data['requests'])
# Adjust worker count if it changed
if new_worker_count != old_worker_count:
from changedetectionio import worker_handler
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
result = worker_handler.adjust_async_worker_count(
new_count=new_worker_count,
update_q=update_q,
notification_q=notification_q,
app=app,
datastore=ds
)
if result['status'] == 'success':
flash(f"Worker count adjusted: {result['message']}", 'notice')
elif result['status'] == 'not_supported':
flash("Dynamic worker adjustment not supported for sync workers", 'warning')
elif result['status'] == 'error':
flash(f"Error adjusting workers: {result['message']}", 'error')
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password

View File

@@ -135,6 +135,12 @@
{{ render_field(form.application.form.webdriver_delay) }}
</div>
</fieldset>
<div class="pure-control-group">
{{ render_field(form.requests.form.workers) }}
{% set worker_info = get_worker_status_info() %}
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
</div>
<div class="pure-control-group inline-radio">
{{ render_field(form.requests.form.default_ua) }}
<span class="pure-form-message-inline">
@@ -246,6 +252,10 @@ nav
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
<span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
</div>
</div>
<div class="tab-pane-inner" id="proxies">
<div id="recommended-proxy">

View File

@@ -1,14 +1,112 @@
import time
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
from loguru import logger
from functools import wraps
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_update_threads, queuedWatchMetaData):
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
from flask import request, flash
if op == 'delete':
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.delete(uuid)
if emit_flash:
flash(f"{len(uuids)} watches deleted")
elif op == 'pause':
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]['paused'] = True
if emit_flash:
flash(f"{len(uuids)} watches paused")
elif op == 'unpause':
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = False
if emit_flash:
flash(f"{len(uuids)} watches unpaused")
elif (op == 'mark-viewed'):
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.set_last_viewed(uuid, int(time.time()))
if emit_flash:
flash(f"{len(uuids)} watches updated")
elif (op == 'mute'):
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]['notification_muted'] = True
if emit_flash:
flash(f"{len(uuids)} watches muted")
elif (op == 'unmute'):
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]['notification_muted'] = False
if emit_flash:
flash(f"{len(uuids)} watches un-muted")
elif (op == 'recheck'):
for uuid in uuids:
if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
if emit_flash:
flash(f"{len(uuids)} watches queued for rechecking")
elif (op == 'clear-errors'):
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]["last_error"] = False
if emit_flash:
flash(f"{len(uuids)} watches errors cleared")
elif (op == 'clear-history'):
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.clear_watch_history(uuid)
if emit_flash:
flash(f"{len(uuids)} watches cleared/reset.")
elif (op == 'notification-default'):
from changedetectionio.notification import (
default_notification_format_for_watch
)
for uuid in uuids:
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]['notification_title'] = None
datastore.data['watching'][uuid]['notification_body'] = None
datastore.data['watching'][uuid]['notification_urls'] = []
datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch
if emit_flash:
flash(f"{len(uuids)} watches set to use default notification settings")
elif (op == 'assign-tag'):
op_extradata = extra_data
if op_extradata:
tag_uuid = datastore.add_tag(title=op_extradata)
if op_extradata and tag_uuid:
for uuid in uuids:
if datastore.data['watching'].get(uuid):
# Bug in old versions caused by bad edit page/tag handler
if isinstance(datastore.data['watching'][uuid]['tags'], str):
datastore.data['watching'][uuid]['tags'] = []
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
if emit_flash:
flash(f"{len(uuids)} watches were tagged")
if uuids:
for uuid in uuids:
watch_check_update.send(watch_uuid=uuid)
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
# Register the edit blueprint
@@ -20,9 +118,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
ui_blueprint.register_blueprint(notification_blueprint)
# Register the views blueprint
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData)
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update)
ui_blueprint.register_blueprint(views_blueprint)
# Import the login decorator
from changedetectionio.auth_decorator import login_optionally_required
@@ -35,7 +133,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
flash('Watch not found', 'error')
else:
flash("Cleared snapshot history for watch {}".format(uuid))
return redirect(url_for('watchlist.index'))
@ui_blueprint.route("/clear_history", methods=['GET', 'POST'])
@@ -47,7 +144,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
if confirmtext == 'clear':
for uuid in datastore.data['watching'].keys():
datastore.clear_watch_history(uuid)
flash("Cleared snapshot history for all watches")
else:
flash('Incorrect confirmation text.', 'error')
@@ -98,7 +194,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
new_uuid = datastore.clone(uuid)
if not datastore.data['watching'].get(uuid).get('paused'):
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
flash('Cloned, you are editing the new watch.')
@@ -114,13 +210,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
i = 0
running_uuids = []
for t in running_update_threads:
running_uuids.append(t.current_uuid)
running_uuids = worker_handler.get_running_uuids()
if uuid:
if uuid not in running_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
i += 1
else:
@@ -137,7 +231,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
if tag != None and tag not in watch['tags']:
continue
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
i += 1
if i == 1:
@@ -153,100 +247,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
@login_optionally_required
def form_watch_list_checkbox_operations():
op = request.form['op']
uuids = request.form.getlist('uuids')
if (op == 'delete'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.delete(uuid.strip())
flash("{} watches deleted".format(len(uuids)))
elif (op == 'pause'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = True
flash("{} watches paused".format(len(uuids)))
elif (op == 'unpause'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = False
flash("{} watches unpaused".format(len(uuids)))
elif (op == 'mark-viewed'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.set_last_viewed(uuid, int(time.time()))
flash("{} watches updated".format(len(uuids)))
elif (op == 'mute'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['notification_muted'] = True
flash("{} watches muted".format(len(uuids)))
elif (op == 'unmute'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['notification_muted'] = False
flash("{} watches un-muted".format(len(uuids)))
elif (op == 'recheck'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
flash("{} watches queued for rechecking".format(len(uuids)))
elif (op == 'clear-errors'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid]["last_error"] = False
flash(f"{len(uuids)} watches errors cleared")
elif (op == 'clear-history'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.clear_watch_history(uuid)
flash("{} watches cleared/reset.".format(len(uuids)))
elif (op == 'notification-default'):
from changedetectionio.notification import (
default_notification_format_for_watch
)
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['notification_title'] = None
datastore.data['watching'][uuid.strip()]['notification_body'] = None
datastore.data['watching'][uuid.strip()]['notification_urls'] = []
datastore.data['watching'][uuid.strip()]['notification_format'] = default_notification_format_for_watch
flash("{} watches set to use default notification settings".format(len(uuids)))
elif (op == 'assign-tag'):
op_extradata = request.form.get('op_extradata', '').strip()
if op_extradata:
tag_uuid = datastore.add_tag(title=op_extradata)
if op_extradata and tag_uuid:
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
# Bug in old versions caused by bad edit page/tag handler
if isinstance(datastore.data['watching'][uuid]['tags'], str):
datastore.data['watching'][uuid]['tags'] = []
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
flash(f"{len(uuids)} watches were tagged")
uuids = [u.strip() for u in request.form.getlist('uuids') if u]
extra_data = request.form.get('op_extradata', '').strip()
_handle_operations(
datastore=datastore,
extra_data=extra_data,
queuedWatchMetaData=queuedWatchMetaData,
uuids=uuids,
worker_handler=worker_handler,
update_q=update_q,
watch_check_update=watch_check_update,
op=op,
)
return redirect(url_for('watchlist.index'))

View File

@@ -9,6 +9,7 @@ from jinja2 import Environment, FileSystemLoader
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
from changedetectionio.time_handler import is_within_schedule
from changedetectionio import worker_handler
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
@@ -201,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
#############################
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
# Queue the watch for immediate recheck, with a higher priority
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff':

View File

@@ -7,8 +7,9 @@ from copy import deepcopy
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
from changedetectionio import html_tools
from changedetectionio import worker_handler
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
@@ -212,7 +213,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
else:
# Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.")
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))

View File

@@ -72,31 +72,32 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
output = render_template(
"watch-overview.html",
active_tag=active_tag,
active_tag_uuid=active_tag_uuid,
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
datastore=datastore,
errored_count=errored_count,
form=form,
guid=datastore.data['app_guid'],
has_proxies=datastore.proxy_list,
has_unviewed=datastore.has_unviewed,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
now_time_server=time.time(),
pagination=pagination,
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
search_q=request.args.get('q', '').strip(),
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
tags=sorted_tags,
watches=sorted_watches
)
active_tag=active_tag,
active_tag_uuid=active_tag_uuid,
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
datastore=datastore,
errored_count=errored_count,
form=form,
guid=datastore.data['app_guid'],
has_proxies=datastore.proxy_list,
has_unviewed=datastore.has_unviewed,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
now_time_server=round(time.time()),
pagination=pagination,
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
search_q=request.args.get('q', '').strip(),
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
tags=sorted_tags,
watches=sorted_watches
)
if session.get('share-link'):
del(session['share-link'])
del (session['share-link'])
resp = make_response(output)

View File

@@ -1,10 +1,15 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %}
{%- extends 'base.html' -%}
{%- block content -%}
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
<script>let nowtimeserver={{ now_time_server }};</script>
<script>
// Initialize Feather icons after the page loads
document.addEventListener('DOMContentLoaded', function() {
feather.replace();
});
</script>
<style>
.checking-now .last-checked {
background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%);
@@ -38,210 +43,178 @@
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
<div id="checkbox-operations">
<button class="pure-button button-secondary button-xsmall" name="op" value="pause">Pause</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause">UnPause</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="mute">Mute</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute">UnMute</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag">Tag</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors">Clear errors</button>
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history">Clear/reset history</button>
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete">Delete</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button>
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button>
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button>
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button>
</div>
{% if watches|length >= pagination.per_page %}
{%- if watches|length >= pagination.per_page -%}
{{ pagination.info }}
{% endif %}
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
{%- endif -%}
{%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
<div>
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
<!-- tag list -->
{% for uuid, tag in tags %}
{% if tag != "" %}
{%- for uuid, tag in tags -%}
{%- if tag != "" -%}
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
{% endif %}
{% endfor %}
{%- endif -%}
{%- endfor -%}
</div>
{% set sort_order = sort_order or 'asc' %}
{% set sort_attribute = sort_attribute or 'last_changed' %}
{% set pagination_page = request.args.get('page', 0) %}
{% set cols_required = 6 %}
{% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %}
{% if any_has_restock_price_processor %}
{% set cols_required = cols_required + 1 %}
{% endif %}
{%- set sort_order = sort_order or 'asc' -%}
{%- set sort_attribute = sort_attribute or 'last_changed' -%}
{%- set pagination_page = request.args.get('page', 0) -%}
{%- set cols_required = 6 -%}
{%- set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") -%}
{%- if any_has_restock_price_processor -%}
{%- set cols_required = cols_required + 1 -%}
{%- endif -%}
<div id="watch-table-wrapper">
<table class="pure-table pure-table-striped watch-table">
<thead>
<tr>
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
{% set arrow_span = "" %}
{%- set link_order = "desc" if sort_order == 'asc' else "asc" -%}
{%- set arrow_span = "" -%}
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
<th class="empty-cell"></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
{% if any_has_restock_price_processor %}
{%- if any_has_restock_price_processor -%}
<th>Restock &amp; Price</th>
{% endif %}
{%- endif -%}
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
<th class="empty-cell"></th>
</tr>
</thead>
<tbody>
{% if not watches|length %}
{%- if not watches|length -%}
<tr>
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
</tr>
{% endif %}
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
{% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
{% set checking_now = is_checking_now(watch) %}
<tr id="{{ watch.uuid }}"
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
{% if is_unviewed %}unviewed{% endif %}
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
{% if watch.uuid in queued_uuids %}queued{% endif %}
{% if checking_now %}checking-now{% endif %}
">
{%- endif -%}
{%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%}
{%- set checking_now = is_checking_now(watch) -%}
{%- set history_n = watch.history_n -%}
{# Mirror in changedetectionio/static/js/realtime.js for the frontend #}
{%- set row_classes = [
loop.cycle('pure-table-odd', 'pure-table-even'),
'processor-' ~ watch['processor'],
'has-error' if watch.compile_error_texts()|length > 2 else '',
'paused' if watch.paused is defined and watch.paused != False else '',
'unviewed' if watch.has_unviewed else '',
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '',
'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '',
'queued' if watch.uuid in queued_uuids else '',
'checking-now' if checking_now else '',
'notification_muted' if watch.notification_muted else '',
'single-history' if history_n == 1 else '',
'multiple-history' if history_n >= 2 else ''
] -%}
<tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
<td class="inline watch-controls">
{% if not watch.paused %}
<a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
{% else %}
<a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
{% endif %}
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
</td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
{% if watch.get_fetch_backend == "html_webdriver"
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
{%- if watch.get_fetch_backend == "html_webdriver"
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
or "extra_browser_" in watch.get_fetch_backend
%}
-%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
{% endif %}
{%- endif -%}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
{% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
{% if '403' in watch.last_error %}
{% if has_proxies %}
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>&nbsp;
{% endif %}
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
{% endif %}
{% if 'empty result or contain only an image' in watch.last_error %}
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
{% endif %}
</div>
{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
<div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div>
{% endif %}
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
{% if watch['processor'] == 'text_json_diff' %}
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
{%- if watch['processor'] == 'text_json_diff' -%}
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{% endif %}
{% endif %}
{% if watch['processor'] == 'restock_diff' %}
{%- endif -%}
{%- endif -%}
{%- if watch['processor'] == 'restock_diff' -%}
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
{% endif %}
{% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
{%- endif -%}
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
<span class="watch-tag-list">{{ watch_tag.title }}</span>
{% endfor %}
{%- endfor -%}
</td>
<!-- @todo make it so any watch handler obj can expose this --->
{% if any_has_restock_price_processor %}
{%- if any_has_restock_price_processor -%}
<td class="restock-and-price">
{% if watch['processor'] == 'restock_diff' %}
{% if watch.has_restock_info %}
{%- if watch['processor'] == 'restock_diff' -%}
{%- if watch.has_restock_info -%}
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
<!-- maybe some object watch['processor'][restock_diff] or.. -->
{% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
{%- if watch['restock']['in_stock']-%} In stock {%- else-%} Not in stock {%- endif -%}
</span>
{% endif %}
{%- endif -%}
{% if watch.get('restock') and watch['restock']['price'] != None %}
{% if watch['restock']['price'] != None %}
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
{%- if watch['restock']['price'] != None -%}
<span class="restock-label price" title="Price">
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
</span>
{% endif %}
{% elif not watch.has_restock_info %}
{%- endif -%}
{%- elif not watch.has_restock_info -%}
<span class="restock-label error">No information</span>
{% endif %}
{% endif %}
{%- endif -%}
{%- endif -%}
</td>
{% endif %}
{%- endif -%}
{#last_checked becomes fetch-start-time#}
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} >
{% if checking_now %}
<span class="spinner"></span><span> Checking now</span>
{% else %}
{{watch|format_last_checked_time|safe}}</td>
{% endif %}
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
<div class="spinner-wrapper" style="display:none;" >
<span class="spinner"></span><span>&nbsp;Checking now</span>
</div>
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
</td>
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
{{watch.last_changed|format_timestamp_timeago}}
{% else %}
{%- else -%}
Not yet
{% endif %}
{%- endif -%}
</td>
<td>
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
{%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%}
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
{% if watch.history_n >= 2 %}
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
{% if is_unviewed %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
{% else %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
{% endif %}
{% else %}
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
{% endif %}
{% endif %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
</td>
</tr>
{% endfor %}
{%- endfor -%}
</tbody>
</table>
<ul id="post-list-buttons">
{% if errored_count %}
<li>
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a>
<li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" >
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
</li>
{% endif %}
{% if has_unviewed %}
<li>
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a>
<li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" >
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
</li>
{% endif %}
<li>
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
all {%- if active_tag_uuid-%} in "{{active_tag.title}}"{%endif%}</a>
</li>
<li>
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
@@ -251,4 +224,4 @@
</div>
</form>
</div>
{% endblock %}
{%- endblock -%}

View File

@@ -68,7 +68,7 @@ class Fetcher():
return self.error
@abstractmethod
def run(self,
async def run(self,
url,
timeout,
request_headers,
@@ -122,7 +122,7 @@ class Fetcher():
return None
def iterate_browser_steps(self, start_url=None):
async def iterate_browser_steps(self, start_url=None):
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
from playwright._impl._errors import TimeoutError, Error
from changedetectionio.safe_jinja import render as jinja_render
@@ -136,8 +136,8 @@ class Fetcher():
for step in valid_steps:
step_n += 1
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
self.screenshot_step("before-" + str(step_n))
self.save_step_html("before-" + str(step_n))
await self.screenshot_step("before-" + str(step_n))
await self.save_step_html("before-" + str(step_n))
try:
optional_value = step['optional_value']
@@ -148,11 +148,11 @@ class Fetcher():
if '{%' in step['selector'] or '{{' in step['selector']:
selector = jinja_render(template_str=step['selector'])
getattr(interface, "call_action")(action_name=step['operation'],
await getattr(interface, "call_action")(action_name=step['operation'],
selector=selector,
optional_value=optional_value)
self.screenshot_step(step_n)
self.save_step_html(step_n)
await self.screenshot_step(step_n)
await self.save_step_html(step_n)
except (Error, TimeoutError) as e:
logger.debug(str(e))
# Stop processing here

View File

@@ -9,15 +9,15 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
def capture_full_page(page):
async def capture_full_page_async(page):
import os
import time
from multiprocessing import Process, Pipe
start = time.time()
page_height = page.evaluate("document.documentElement.scrollHeight")
page_width = page.evaluate("document.documentElement.scrollWidth")
page_height = await page.evaluate("document.documentElement.scrollHeight")
page_width = await page.evaluate("document.documentElement.scrollWidth")
original_viewport = page.viewport_size
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
@@ -32,23 +32,23 @@ def capture_full_page(page):
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
# Set viewport to a larger size to capture more content at once
page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
# Capture screenshots in chunks up to the max total height
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
page.request_gc()
page.evaluate(f"window.scrollTo(0, {y})")
page.request_gc()
screenshot_chunks.append(page.screenshot(
await page.request_gc()
await page.evaluate(f"window.scrollTo(0, {y})")
await page.request_gc()
screenshot_chunks.append(await page.screenshot(
type="jpeg",
full_page=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
))
y += step_size
page.request_gc()
await page.request_gc()
# Restore original viewport size
page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
# If we have multiple chunks, stitch them together
if len(screenshot_chunks) > 1:
@@ -73,7 +73,6 @@ def capture_full_page(page):
return screenshot_chunks[0]
class fetcher(Fetcher):
fetcher_description = "Playwright {}/Javascript".format(
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
@@ -124,9 +123,9 @@ class fetcher(Fetcher):
self.proxy['username'] = parsed.username
self.proxy['password'] = parsed.password
def screenshot_step(self, step_n=''):
async def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
screenshot = capture_full_page(page=self.page)
screenshot = await capture_full_page_async(page=self.page)
if self.browser_steps_screenshot_path is not None:
@@ -135,15 +134,15 @@ class fetcher(Fetcher):
with open(destination, 'wb') as f:
f.write(screenshot)
def save_step_html(self, step_n):
async def save_step_html(self, step_n):
super().save_step_html(step_n=step_n)
content = self.page.content()
content = await self.page.content()
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
logger.debug(f"Saving step HTML to {destination}")
with open(destination, 'w') as f:
f.write(content)
def run(self,
async def run(self,
url,
timeout,
request_headers,
@@ -154,26 +153,26 @@ class fetcher(Fetcher):
is_binary=False,
empty_pages_are_a_change=False):
from playwright.sync_api import sync_playwright
from playwright.async_api import async_playwright
import playwright._impl._errors
import time
self.delete_browser_steps_screenshots()
response = None
with sync_playwright() as p:
async with async_playwright() as p:
browser_type = getattr(p, self.browser_type)
# Seemed to cause a connection Exception even tho I can see it connect
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
# 60,000 connection timeout only
browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
# SOCKS5 with authentication is not supported (yet)
# https://github.com/microsoft/playwright/issues/10567
# Set user agent to prevent Cloudflare from blocking the browser
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
context = browser.new_context(
context = await browser.new_context(
accept_downloads=False, # Should never be needed
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
extra_http_headers=request_headers,
@@ -183,41 +182,47 @@ class fetcher(Fetcher):
user_agent=manage_user_agent(headers=request_headers),
)
self.page = context.new_page()
self.page = await context.new_page()
# Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
# Re-use as much code from browser steps as possible so its the same
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
browsersteps_interface = steppable_browser_interface(start_url=url)
browsersteps_interface.page = self.page
response = browsersteps_interface.action_goto_url(value=url)
self.headers = response.all_headers()
response = await browsersteps_interface.action_goto_url(value=url)
if response is None:
context.close()
browser.close()
await context.close()
await browser.close()
logger.debug("Content Fetcher > Response object from the browser communication was none")
raise EmptyReply(url=url, status_code=None)
# In async_playwright, all_headers() returns a coroutine
try:
self.headers = await response.all_headers()
except TypeError:
# Fallback for sync version
self.headers = response.all_headers()
try:
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
except playwright._impl._errors.TimeoutError as e:
context.close()
browser.close()
await context.close()
await browser.close()
# This can be ok, we will try to grab what we could retrieve
pass
except Exception as e:
logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}")
context.close()
browser.close()
await context.close()
await browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e))
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
self.page.wait_for_timeout(extra_wait * 1000)
await self.page.wait_for_timeout(extra_wait * 1000)
try:
self.status_code = response.status
@@ -225,48 +230,48 @@ class fetcher(Fetcher):
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
logger.critical(response)
context.close()
browser.close()
await context.close()
await browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e))
if self.status_code != 200 and not ignore_status_codes:
screenshot = capture_full_page(self.page)
screenshot = await capture_full_page_async(self.page)
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
context.close()
browser.close()
await context.close()
await browser.close()
raise EmptyReply(url=url, status_code=response.status)
# Run Browser Steps here
if self.browser_steps_get_valid_steps():
self.iterate_browser_steps(start_url=url)
await self.iterate_browser_steps(start_url=url)
self.page.wait_for_timeout(extra_wait * 1000)
await self.page.wait_for_timeout(extra_wait * 1000)
now = time.time()
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
if current_include_filters is not None:
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
else:
self.page.evaluate("var include_filters=''")
self.page.request_gc()
await self.page.evaluate("var include_filters=''")
await self.page.request_gc()
# request_gc before and after evaluate to free up memory
# @todo browsersteps etc
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
self.xpath_data = self.page.evaluate(XPATH_ELEMENT_JS, {
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
"visualselector_xpath_selectors": visualselector_xpath_selectors,
"max_height": MAX_TOTAL_HEIGHT
})
self.page.request_gc()
await self.page.request_gc()
self.instock_data = self.page.evaluate(INSTOCK_DATA_JS)
self.page.request_gc()
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
await self.page.request_gc()
self.content = self.page.content()
self.page.request_gc()
self.content = await self.page.content()
await self.page.request_gc()
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
# Bug 3 in Playwright screenshot handling
@@ -278,7 +283,7 @@ class fetcher(Fetcher):
# acceptable screenshot quality here
try:
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
self.screenshot = capture_full_page(page=self.page)
self.screenshot = await capture_full_page_async(page=self.page)
except Exception as e:
# It's likely the screenshot was too long/big and something crashed
@@ -286,30 +291,30 @@ class fetcher(Fetcher):
finally:
# Request garbage collection one more time before closing
try:
self.page.request_gc()
await self.page.request_gc()
except:
pass
# Clean up resources properly
try:
self.page.request_gc()
await self.page.request_gc()
except:
pass
try:
self.page.close()
await self.page.close()
except:
pass
self.page = None
try:
context.close()
await context.close()
except:
pass
context = None
try:
browser.close()
await browser.close()
except:
pass
browser = None

View File

@@ -310,15 +310,15 @@ class fetcher(Fetcher):
async def main(self, **kwargs):
await self.fetch_page(**kwargs)
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
async def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180))
# This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only
# Now we run this properly in async context since we're called from async worker
try:
asyncio.run(asyncio.wait_for(self.main(
await asyncio.wait_for(self.main(
url=url,
timeout=timeout,
request_headers=request_headers,
@@ -328,7 +328,7 @@ class fetcher(Fetcher):
current_include_filters=current_include_filters,
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
), timeout=max_time))
), timeout=max_time)
except asyncio.TimeoutError:
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))

View File

@@ -1,6 +1,7 @@
from loguru import logger
import hashlib
import os
import asyncio
from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher
@@ -15,7 +16,7 @@ class fetcher(Fetcher):
self.proxy_override = proxy_override
# browser_connection_url is none because its always 'launched locally'
def run(self,
def _run_sync(self,
url,
timeout,
request_headers,
@@ -25,9 +26,11 @@ class fetcher(Fetcher):
current_include_filters=None,
is_binary=False,
empty_pages_are_a_change=False):
"""Synchronous version of run - the original requests implementation"""
import chardet
import requests
from requests.exceptions import ProxyError, ConnectionError, RequestException
if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url)
@@ -35,7 +38,6 @@ class fetcher(Fetcher):
proxies = {}
# Allows override the proxy on a per-request basis
# https://requests.readthedocs.io/en/latest/user/advanced/#socks
# Should also work with `socks5://user:pass@host:port` type syntax.
@@ -52,14 +54,19 @@ class fetcher(Fetcher):
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
from requests_file import FileAdapter
session.mount('file://', FileAdapter())
r = session.request(method=request_method,
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
url=url,
headers=request_headers,
timeout=timeout,
proxies=proxies,
verify=False)
try:
r = session.request(method=request_method,
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
url=url,
headers=request_headers,
timeout=timeout,
proxies=proxies,
verify=False)
except Exception as e:
msg = str(e)
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
msg = f"Proxy connection failed? {msg}"
raise Exception(msg) from e
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
# For example - some sites don't tell us it's utf-8, but return utf-8 content
@@ -94,9 +101,38 @@ class fetcher(Fetcher):
else:
self.content = r.text
self.raw_content = r.content
async def run(self,
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False,
empty_pages_are_a_change=False):
"""Async wrapper that runs the synchronous requests code in a thread pool"""
loop = asyncio.get_event_loop()
# Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
await loop.run_in_executor(
None, # Use default ThreadPoolExecutor
lambda: self._run_sync(
url=url,
timeout=timeout,
request_headers=request_headers,
request_body=request_body,
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=current_include_filters,
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
)
)
def quit(self, watch=None):
# In case they switched to `requests` fetcher from something else

View File

@@ -10,16 +10,13 @@ class fetcher(Fetcher):
else:
fetcher_description = "WebDriver Chrome/Javascript"
# Configs for Proxy setup
# In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
proxy = None
proxy_url = None
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
from urllib.parse import urlparse
from selenium.webdriver.common.proxy import Proxy
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
if not custom_browser_connection_url:
@@ -28,27 +25,29 @@ class fetcher(Fetcher):
self.browser_connection_is_custom = True
self.browser_connection_url = custom_browser_connection_url
# If any proxy settings are enabled, then we should setup the proxy object
proxy_args = {}
for k in self.selenium_proxy_settings_mappings:
v = os.getenv('webdriver_' + k, False)
if v:
proxy_args[k] = v.strip('"')
# Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
proxy_args['httpProxy'] = self.system_http_proxy
if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
proxy_args['httpsProxy'] = self.system_https_proxy
##### PROXY SETUP #####
# Allows override the proxy on a per-request basis
if proxy_override is not None:
proxy_args['httpProxy'] = proxy_override
proxy_sources = [
self.system_http_proxy,
self.system_https_proxy,
os.getenv('webdriver_proxySocks'),
os.getenv('webdriver_socksProxy'),
os.getenv('webdriver_proxyHttp'),
os.getenv('webdriver_httpProxy'),
os.getenv('webdriver_proxyHttps'),
os.getenv('webdriver_httpsProxy'),
os.getenv('webdriver_sslProxy'),
proxy_override, # last one should override
]
# The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
for k in filter(None, proxy_sources):
if not k:
continue
self.proxy_url = k.strip()
if proxy_args:
self.proxy = SeleniumProxy(raw=proxy_args)
def run(self,
async def run(self,
url,
timeout,
request_headers,
@@ -59,76 +58,86 @@ class fetcher(Fetcher):
is_binary=False,
empty_pages_are_a_change=False):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.common.exceptions import WebDriverException
# request_body, request_method unused for now, until some magic in the future happens.
import asyncio
# Wrap the entire selenium operation in a thread executor
def _run_sync():
from selenium.webdriver.chrome.options import Options as ChromeOptions
# request_body, request_method unused for now, until some magic in the future happens.
options = ChromeOptions()
options = ChromeOptions()
# Load Chrome options from env
CHROME_OPTIONS = [
line.strip()
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
if line.strip()
]
# Load Chrome options from env
CHROME_OPTIONS = [
line.strip()
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
if line.strip()
]
for opt in CHROME_OPTIONS:
options.add_argument(opt)
for opt in CHROME_OPTIONS:
options.add_argument(opt)
if self.proxy:
options.proxy = self.proxy
# 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
# 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
# 3. selenium only allows ONE runner at a time by default!
# 4. driver must use quit() or it will continue to block/hold the selenium process!!
self.driver = webdriver.Remote(
command_executor=self.browser_connection_url,
options=options)
if self.proxy_url:
options.add_argument(f'--proxy-server={self.proxy_url}')
try:
self.driver.get(url)
except WebDriverException as e:
# Be sure we close the session window
self.quit()
raise
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
self.driver.set_window_size(1280, 1024)
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
if self.webdriver_js_execute_code is not None:
self.driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
# @todo - how to check this? is it possible?
self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
# raise EmptyReply(url=url, status_code=r.status_code)
# @todo - dom wait loaded?
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = self.driver.page_source
self.headers = {}
self.screenshot = self.driver.get_screenshot_as_png()
# Does the connection to the webdriver work? run a test connection.
def is_ready(self):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
self.driver = webdriver.Remote(
command_executor=self.command_executor,
options=ChromeOptions())
# driver.quit() seems to cause better exceptions
self.quit()
return True
def quit(self, watch=None):
if self.driver:
from selenium.webdriver.remote.remote_connection import RemoteConnection
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
driver = None
try:
self.driver.quit()
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
remote_connection = RemoteConnection(
self.browser_connection_url,
)
remote_connection.set_timeout(30) # seconds
# Now create the driver with the RemoteConnection
driver = RemoteWebDriver(
command_executor=remote_connection,
options=options
)
driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
except Exception as e:
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
if driver:
driver.quit()
raise e
try:
driver.get(url)
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
driver.set_window_size(1280, 1024)
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
if self.webdriver_js_execute_code is not None:
driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
# @todo - how to check this? is it possible?
self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
# raise EmptyReply(url=url, status_code=r.status_code)
# @todo - dom wait loaded?
import time
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = driver.page_source
self.headers = {}
self.screenshot = driver.get_screenshot_as_png()
except Exception as e:
driver.quit()
raise e
driver.quit()
# Run the selenium operations in a thread pool to avoid blocking the event loop
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, _run_sync)

View File

@@ -0,0 +1,500 @@
import queue
import asyncio
from blinker import signal
from loguru import logger
class SignalPriorityQueue(queue.PriorityQueue):
"""
Extended PriorityQueue that sends a signal when items with a UUID are added.
This class extends the standard PriorityQueue and adds a signal emission
after an item is put into the queue. If the item contains a UUID, the signal
is sent with that UUID as a parameter.
"""
def __init__(self, maxsize=0):
super().__init__(maxsize)
try:
self.queue_length_signal = signal('queue_length')
except Exception as e:
logger.critical(f"Exception: {e}")
def put(self, item, block=True, timeout=None):
# Call the parent's put method first
super().put(item, block, timeout)
# After putting the item in the queue, check if it has a UUID and emit signal
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
uuid = item.item['uuid']
# Get the signal and send it if it exists
watch_check_update = signal('watch_check_update')
if watch_check_update:
# Send the watch_uuid parameter
watch_check_update.send(watch_uuid=uuid)
# Send queue_length signal with current queue size
try:
if self.queue_length_signal:
self.queue_length_signal.send(length=self.qsize())
except Exception as e:
logger.critical(f"Exception: {e}")
def get(self, block=True, timeout=None):
# Call the parent's get method first
item = super().get(block, timeout)
# Send queue_length signal with current queue size
try:
if self.queue_length_signal:
self.queue_length_signal.send(length=self.qsize())
except Exception as e:
logger.critical(f"Exception: {e}")
return item
def get_uuid_position(self, target_uuid):
"""
Find the position of a watch UUID in the priority queue.
Optimized for large queues - O(n) complexity instead of O(n log n).
Args:
target_uuid: The UUID to search for
Returns:
dict: Contains position info or None if not found
- position: 0-based position in queue (0 = next to be processed)
- total_items: total number of items in queue
- priority: the priority value of the found item
"""
with self.mutex:
queue_list = list(self.queue)
total_items = len(queue_list)
if total_items == 0:
return {
'position': None,
'total_items': 0,
'priority': None,
'found': False
}
# Find the target item and its priority first - O(n)
target_item = None
target_priority = None
for item in queue_list:
if (hasattr(item, 'item') and
isinstance(item.item, dict) and
item.item.get('uuid') == target_uuid):
target_item = item
target_priority = item.priority
break
if target_item is None:
return {
'position': None,
'total_items': total_items,
'priority': None,
'found': False
}
# Count how many items have higher priority (lower numbers) - O(n)
position = 0
for item in queue_list:
# Items with lower priority numbers are processed first
if item.priority < target_priority:
position += 1
elif item.priority == target_priority and item != target_item:
# For same priority, count items that come before this one
# (Note: this is approximate since heap order isn't guaranteed for equal priorities)
position += 1
return {
'position': position,
'total_items': total_items,
'priority': target_priority,
'found': True
}
def get_all_queued_uuids(self, limit=None, offset=0):
"""
Get UUIDs currently in the queue with their positions.
For large queues, use limit/offset for pagination.
Args:
limit: Maximum number of items to return (None = all)
offset: Number of items to skip (for pagination)
Returns:
dict: Contains items and metadata
- items: List of dicts with uuid, position, and priority
- total_items: Total number of items in queue
- returned_items: Number of items returned
- has_more: Whether there are more items after this page
"""
with self.mutex:
queue_list = list(self.queue)
total_items = len(queue_list)
if total_items == 0:
return {
'items': [],
'total_items': 0,
'returned_items': 0,
'has_more': False
}
# For very large queues, warn about performance
if total_items > 1000 and limit is None:
logger.warning(f"Getting all {total_items} queued items without limit - this may be slow")
# Sort only if we need exact positions (expensive for large queues)
if limit is not None and limit <= 100:
# For small requests, we can afford to sort
queue_items = sorted(queue_list)
end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
items_to_process = queue_items[offset:end_idx]
result = []
for position, item in enumerate(items_to_process, start=offset):
if (hasattr(item, 'item') and
isinstance(item.item, dict) and
'uuid' in item.item):
result.append({
'uuid': item.item['uuid'],
'position': position,
'priority': item.priority
})
return {
'items': result,
'total_items': total_items,
'returned_items': len(result),
'has_more': (offset + len(result)) < total_items
}
else:
# For large requests, return items with approximate positions
# This is much faster O(n) instead of O(n log n)
result = []
processed = 0
skipped = 0
for item in queue_list:
if (hasattr(item, 'item') and
isinstance(item.item, dict) and
'uuid' in item.item):
if skipped < offset:
skipped += 1
continue
if limit and processed >= limit:
break
# Approximate position based on priority comparison
approx_position = sum(1 for other in queue_list if other.priority < item.priority)
result.append({
'uuid': item.item['uuid'],
'position': approx_position, # Approximate
'priority': item.priority
})
processed += 1
return {
'items': result,
'total_items': total_items,
'returned_items': len(result),
'has_more': (offset + len(result)) < total_items,
'note': 'Positions are approximate for performance with large queues'
}
def get_queue_summary(self):
"""
Get a quick summary of queue state without expensive operations.
O(n) complexity - fast even for large queues.
Returns:
dict: Queue summary statistics
"""
with self.mutex:
queue_list = list(self.queue)
total_items = len(queue_list)
if total_items == 0:
return {
'total_items': 0,
'priority_breakdown': {},
'immediate_items': 0,
'clone_items': 0,
'scheduled_items': 0
}
# Count items by priority type - O(n)
immediate_items = 0 # priority 1
clone_items = 0 # priority 5
scheduled_items = 0 # priority > 100 (timestamps)
priority_counts = {}
for item in queue_list:
priority = item.priority
priority_counts[priority] = priority_counts.get(priority, 0) + 1
if priority == 1:
immediate_items += 1
elif priority == 5:
clone_items += 1
elif priority > 100:
scheduled_items += 1
return {
'total_items': total_items,
'priority_breakdown': priority_counts,
'immediate_items': immediate_items,
'clone_items': clone_items,
'scheduled_items': scheduled_items,
'min_priority': min(priority_counts.keys()) if priority_counts else None,
'max_priority': max(priority_counts.keys()) if priority_counts else None
}
class AsyncSignalPriorityQueue(asyncio.PriorityQueue):
"""
Async version of SignalPriorityQueue that sends signals when items are added/removed.
This class extends asyncio.PriorityQueue and maintains the same signal behavior
as the synchronous version for real-time UI updates.
"""
def __init__(self, maxsize=0):
super().__init__(maxsize)
try:
self.queue_length_signal = signal('queue_length')
except Exception as e:
logger.critical(f"Exception: {e}")
async def put(self, item):
# Call the parent's put method first
await super().put(item)
# After putting the item in the queue, check if it has a UUID and emit signal
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
uuid = item.item['uuid']
# Get the signal and send it if it exists
watch_check_update = signal('watch_check_update')
if watch_check_update:
# Send the watch_uuid parameter
watch_check_update.send(watch_uuid=uuid)
# Send queue_length signal with current queue size
try:
if self.queue_length_signal:
self.queue_length_signal.send(length=self.qsize())
except Exception as e:
logger.critical(f"Exception: {e}")
async def get(self):
# Call the parent's get method first
item = await super().get()
# Send queue_length signal with current queue size
try:
if self.queue_length_signal:
self.queue_length_signal.send(length=self.qsize())
except Exception as e:
logger.critical(f"Exception: {e}")
return item
@property
def queue(self):
"""
Provide compatibility with sync PriorityQueue.queue access
Returns the internal queue for template access
"""
return self._queue if hasattr(self, '_queue') else []
def get_uuid_position(self, target_uuid):
"""
Find the position of a watch UUID in the async priority queue.
Optimized for large queues - O(n) complexity instead of O(n log n).
Args:
target_uuid: The UUID to search for
Returns:
dict: Contains position info or None if not found
- position: 0-based position in queue (0 = next to be processed)
- total_items: total number of items in queue
- priority: the priority value of the found item
"""
queue_list = list(self._queue)
total_items = len(queue_list)
if total_items == 0:
return {
'position': None,
'total_items': 0,
'priority': None,
'found': False
}
# Find the target item and its priority first - O(n)
target_item = None
target_priority = None
for item in queue_list:
if (hasattr(item, 'item') and
isinstance(item.item, dict) and
item.item.get('uuid') == target_uuid):
target_item = item
target_priority = item.priority
break
if target_item is None:
return {
'position': None,
'total_items': total_items,
'priority': None,
'found': False
}
# Count how many items have higher priority (lower numbers) - O(n)
position = 0
for item in queue_list:
if item.priority < target_priority:
position += 1
elif item.priority == target_priority and item != target_item:
position += 1
return {
'position': position,
'total_items': total_items,
'priority': target_priority,
'found': True
}
def get_all_queued_uuids(self, limit=None, offset=0):
"""
Get UUIDs currently in the async queue with their positions.
For large queues, use limit/offset for pagination.
Args:
limit: Maximum number of items to return (None = all)
offset: Number of items to skip (for pagination)
Returns:
dict: Contains items and metadata (same structure as sync version)
"""
queue_list = list(self._queue)
total_items = len(queue_list)
if total_items == 0:
return {
'items': [],
'total_items': 0,
'returned_items': 0,
'has_more': False
}
# Same logic as sync version but without mutex
if limit is not None and limit <= 100:
queue_items = sorted(queue_list)
end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
items_to_process = queue_items[offset:end_idx]
result = []
for position, item in enumerate(items_to_process, start=offset):
if (hasattr(item, 'item') and
isinstance(item.item, dict) and
'uuid' in item.item):
result.append({
'uuid': item.item['uuid'],
'position': position,
'priority': item.priority
})
return {
'items': result,
'total_items': total_items,
'returned_items': len(result),
'has_more': (offset + len(result)) < total_items
}
else:
# Fast approximate positions for large queues
result = []
processed = 0
skipped = 0
for item in queue_list:
if (hasattr(item, 'item') and
isinstance(item.item, dict) and
'uuid' in item.item):
if skipped < offset:
skipped += 1
continue
if limit and processed >= limit:
break
approx_position = sum(1 for other in queue_list if other.priority < item.priority)
result.append({
'uuid': item.item['uuid'],
'position': approx_position,
'priority': item.priority
})
processed += 1
return {
'items': result,
'total_items': total_items,
'returned_items': len(result),
'has_more': (offset + len(result)) < total_items,
'note': 'Positions are approximate for performance with large queues'
}
def get_queue_summary(self):
"""
Get a quick summary of async queue state.
O(n) complexity - fast even for large queues.
"""
queue_list = list(self._queue)
total_items = len(queue_list)
if total_items == 0:
return {
'total_items': 0,
'priority_breakdown': {},
'immediate_items': 0,
'clone_items': 0,
'scheduled_items': 0
}
immediate_items = 0
clone_items = 0
scheduled_items = 0
priority_counts = {}
for item in queue_list:
priority = item.priority
priority_counts[priority] = priority_counts.get(priority, 0) + 1
if priority == 1:
immediate_items += 1
elif priority == 5:
clone_items += 1
elif priority > 100:
scheduled_items += 1
return {
'total_items': total_items,
'priority_breakdown': priority_counts,
'immediate_items': immediate_items,
'clone_items': clone_items,
'scheduled_items': scheduled_items,
'min_priority': min(priority_counts.keys()) if priority_counts else None,
'max_priority': max(priority_counts.keys()) if priority_counts else None
}

View File

@@ -4,12 +4,16 @@ import flask_login
import locale
import os
import queue
import sys
import threading
import time
import timeago
from blinker import signal
from changedetectionio.strtobool import strtobool
from threading import Event
from changedetectionio.custom_queue import SignalPriorityQueue, AsyncSignalPriorityQueue
from changedetectionio import worker_handler
from flask import (
Flask,
@@ -25,9 +29,12 @@ from flask import (
)
from flask_compress import Compress as FlaskCompress
from flask_login import current_user
from flask_paginate import Pagination, get_page_parameter
from flask_restful import abort, Api
from flask_cors import CORS
# Create specific signals for application events
# Make this a global singleton to avoid multiple signal objects
watch_check_update = signal('watch_check_update', doc='Signal sent when a watch check is completed')
from flask_wtf import CSRFProtect
from loguru import logger
@@ -40,12 +47,11 @@ from .time_handler import is_within_schedule
datastore = None
# Local
running_update_threads = []
ticker_thread = None
extra_stylesheets = []
update_q = queue.PriorityQueue()
# Use async queue by default, keep sync for backward compatibility
update_q = AsyncSignalPriorityQueue() if worker_handler.USE_ASYNC_WORKERS else SignalPriorityQueue()
notification_q = queue.Queue()
MAX_QUEUE_SIZE = 2000
@@ -54,6 +60,9 @@ app = Flask(__name__,
static_folder="static",
template_folder="templates")
# Will be initialized in changedetection_app
socketio_server = None
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
CORS(app)
@@ -115,6 +124,18 @@ def get_darkmode_state():
def get_css_version():
return __version__
@app.template_global()
def get_socketio_path():
"""Generate the correct Socket.IO path prefix for the client"""
# If behind a proxy with a sub-path, we need to respect that path
prefix = ""
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
prefix = request.headers['X-Forwarded-Prefix']
# Socket.IO will be available at {prefix}/socket.io/
return prefix
@app.template_filter('format_number_locale')
def _jinja2_filter_format_number_locale(value: float) -> str:
"Formats for example 4000.10 to the local locale default of 4,000.10"
@@ -125,10 +146,32 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
@app.template_global('is_checking_now')
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
# Worker thread tells us which UUID it is currently processing.
for t in running_update_threads:
if t.current_uuid == watch_obj['uuid']:
return True
return worker_handler.is_watch_running(watch_obj['uuid'])
@app.template_global('get_watch_queue_position')
def _get_watch_queue_position(watch_obj):
"""Get the position of a watch in the queue"""
uuid = watch_obj['uuid']
return update_q.get_uuid_position(uuid)
@app.template_global('get_current_worker_count')
def _get_current_worker_count():
"""Get the current number of operational workers"""
return worker_handler.get_worker_count()
@app.template_global('get_worker_status_info')
def _get_worker_status_info():
"""Get detailed worker status information for display"""
status = worker_handler.get_worker_status()
running_uuids = worker_handler.get_running_uuids()
return {
'count': status['worker_count'],
'type': status['worker_type'],
'active_workers': len(running_uuids),
'processing_watches': running_uuids,
'loop_running': status.get('async_loop_running', None)
}
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
@@ -215,12 +258,15 @@ class User(flask_login.UserMixin):
def changedetection_app(config=None, datastore_o=None):
logger.trace("TRACE log is enabled")
global datastore
global datastore, socketio_server
datastore = datastore_o
# so far just for read-only via tests, but this will be moved eventually to be the main source
# (instead of the global var)
app.config['DATASTORE'] = datastore_o
# Store the signal in the app config to ensure it's accessible everywhere
app.config['watch_check_update_SIGNAL'] = watch_check_update
login_manager = flask_login.LoginManager(app)
login_manager.login_view = 'login'
@@ -248,6 +294,9 @@ def changedetection_app(config=None, datastore_o=None):
# RSS access with token is allowed
elif request.endpoint and 'rss.feed' in request.endpoint:
return None
# Socket.IO routes - need separate handling
elif request.path.startswith('/socket.io/'):
return None
# API routes - use their own auth mechanism (@auth.check_token)
elif request.path.startswith('/api/'):
return None
@@ -444,11 +493,22 @@ def changedetection_app(config=None, datastore_o=None):
# watchlist UI buttons etc
import changedetectionio.blueprint.ui as ui
app.register_blueprint(ui.construct_blueprint(datastore, update_q, running_update_threads, queuedWatchMetaData))
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
import changedetectionio.blueprint.watchlist as watchlist
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
# Initialize Socket.IO server conditionally based on settings
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
if socket_io_enabled:
from changedetectionio.realtime.socket_server import init_socketio
global socketio_server
socketio_server = init_socketio(app, datastore)
logger.info("Socket.IO server initialized")
else:
logger.info("Socket.IO server disabled via settings")
socketio_server = None
# Memory cleanup endpoint
@app.route('/gc-cleanup', methods=['GET'])
@login_optionally_required
@@ -459,14 +519,95 @@ def changedetection_app(config=None, datastore_o=None):
result = memory_cleanup(app)
return jsonify({"status": "success", "message": "Memory cleanup completed", "result": result})
# Worker health check endpoint
@app.route('/worker-health', methods=['GET'])
@login_optionally_required
def worker_health():
from flask import jsonify
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
# Get basic status
status = worker_handler.get_worker_status()
# Perform health check
health_result = worker_handler.check_worker_health(
expected_count=expected_workers,
update_q=update_q,
notification_q=notification_q,
app=app,
datastore=datastore
)
return jsonify({
"status": "success",
"worker_status": status,
"health_check": health_result,
"expected_workers": expected_workers
})
# Queue status endpoint
@app.route('/queue-status', methods=['GET'])
@login_optionally_required
def queue_status():
from flask import jsonify, request
# Get specific UUID position if requested
target_uuid = request.args.get('uuid')
if target_uuid:
position_info = update_q.get_uuid_position(target_uuid)
return jsonify({
"status": "success",
"uuid": target_uuid,
"queue_position": position_info
})
else:
# Get pagination parameters
limit = request.args.get('limit', type=int)
offset = request.args.get('offset', type=int, default=0)
summary_only = request.args.get('summary', type=bool, default=False)
if summary_only:
# Fast summary for large queues
summary = update_q.get_queue_summary()
return jsonify({
"status": "success",
"queue_summary": summary
})
else:
# Get queued items with pagination support
if limit is None:
# Default limit for large queues to prevent performance issues
queue_size = update_q.qsize()
if queue_size > 100:
limit = 50
logger.warning(f"Large queue ({queue_size} items) detected, limiting to {limit} items. Use ?limit=N for more.")
all_queued = update_q.get_all_queued_uuids(limit=limit, offset=offset)
return jsonify({
"status": "success",
"queue_size": update_q.qsize(),
"queued_data": all_queued
})
# Start the async workers during app initialization
# Can be overridden by ENV or use the default settings
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
logger.info(f"Starting {n_workers} workers during app initialization")
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
# @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
threading.Thread(target=notification_runner).start()
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
# Check for new release version, but not when running in test/build or pytest
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')):
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
threading.Thread(target=check_for_new_version).start()
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
# This avoids circular dependencies
return app
@@ -502,73 +643,87 @@ def notification_runner():
global notification_debug_log
from datetime import datetime
import json
while not app.config.exit.is_set():
try:
# At the moment only one thread runs (single runner)
n_object = notification_q.get(block=False)
except queue.Empty:
time.sleep(1)
else:
now = datetime.now()
sent_obj = None
with app.app_context():
while not app.config.exit.is_set():
try:
from changedetectionio.notification.handler import process_notification
# At the moment only one thread runs (single runner)
n_object = notification_q.get(block=False)
except queue.Empty:
time.sleep(1)
# Fallback to system config if not set
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
else:
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
now = datetime.now()
sent_obj = None
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
if n_object.get('notification_urls', {}):
sent_obj = process_notification(n_object, datastore)
try:
from changedetectionio.notification.handler import process_notification
except Exception as e:
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
# Fallback to system config if not set
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
# UUID wont be present when we submit a 'test' from the global settings
if 'uuid' in n_object:
datastore.update_watch(uuid=n_object['uuid'],
update_obj={'last_notification_error': "Notification error detected, goto notification log."})
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
if n_object.get('notification_urls', {}):
sent_obj = process_notification(n_object, datastore)
except Exception as e:
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
# UUID wont be present when we submit a 'test' from the global settings
if 'uuid' in n_object:
datastore.update_watch(uuid=n_object['uuid'],
update_obj={'last_notification_error': "Notification error detected, goto notification log."})
log_lines = str(e).splitlines()
notification_debug_log += log_lines
with app.app_context():
app.config['watch_check_update_SIGNAL'].send(app_context=app, watch_uuid=n_object.get('uuid'))
# Process notifications
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
# Trim the log length
notification_debug_log = notification_debug_log[-100:]
log_lines = str(e).splitlines()
notification_debug_log += log_lines
# Process notifications
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
# Trim the log length
notification_debug_log = notification_debug_log[-100:]
# Threaded runner, look for new watches to feed into the Queue.
def ticker_thread_check_time_launch_checks():
import random
from changedetectionio import update_worker
proxy_last_called_time = {}
last_health_check = 0
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
# Spin up Workers that do the fetching
# Can be overriden by ENV or use the default settings
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
for _ in range(n_workers):
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
running_update_threads.append(new_worker)
new_worker.start()
# Workers are now started during app initialization, not here
while not app.config.exit.is_set():
# Periodic worker health check (every 60 seconds)
now = time.time()
if now - last_health_check > 60:
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
health_result = worker_handler.check_worker_health(
expected_count=expected_workers,
update_q=update_q,
notification_q=notification_q,
app=app,
datastore=datastore
)
if health_result['status'] != 'healthy':
logger.warning(f"Worker health check: {health_result['message']}")
last_health_check = now
# Get a list of watches by UUID that are currently fetching data
running_uuids = []
for t in running_update_threads:
if t.current_uuid:
running_uuids.append(t.current_uuid)
running_uuids = worker_handler.get_running_uuids()
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
watch_uuid_list = []
@@ -671,7 +826,7 @@ def ticker_thread_check_time_launch_checks():
f"{now - watch['last_checked']:0.2f}s since last checked")
# Into the queue with you
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
# Reset for next time
watch.jitter_seconds = 0

View File

@@ -224,27 +224,37 @@ class StringDictKeyValue(StringField):
def _value(self):
if self.data:
output = u''
for k in self.data.keys():
output += "{}: {}\r\n".format(k, self.data[k])
output = ''
for k, v in self.data.items():
output += f"{k}: {v}\r\n"
return output
else:
return u''
return ''
# incoming
# incoming data processing + validation
def process_formdata(self, valuelist):
self.data = {}
errors = []
if valuelist:
self.data = {}
# Remove empty strings
cleaned = list(filter(None, valuelist[0].split("\n")))
for s in cleaned:
parts = s.strip().split(':', 1)
if len(parts) == 2:
self.data.update({parts[0].strip(): parts[1].strip()})
# Remove empty strings (blank lines)
cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
for idx, s in enumerate(cleaned, start=1):
if ':' not in s:
errors.append(f"Line {idx} is missing a ':' separator.")
continue
parts = s.split(':', 1)
key = parts[0].strip()
value = parts[1].strip()
else:
self.data = {}
if not key:
errors.append(f"Line {idx} has an empty key.")
if not value:
errors.append(f"Line {idx} has an empty value.")
self.data[key] = value
if errors:
raise ValidationError("Invalid input:\n" + "\n".join(errors))
class ValidateContentFetcherIsReady(object):
"""
@@ -709,6 +719,12 @@ class globalSettingsRequestForm(Form):
jitter_seconds = IntegerField('Random jitter seconds ± check',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
workers = IntegerField('Number of fetch workers',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=1, max=50,
message="Should be between 1 and 50")])
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
@@ -723,6 +739,7 @@ class globalSettingsRequestForm(Form):
class globalSettingsApplicationUIForm(Form):
open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):

View File

@@ -309,10 +309,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
soup = BeautifulSoup(content, 'html.parser')
if ensure_is_ldjson_info_type:
bs_result = soup.findAll('script', {"type": "application/ld+json"})
bs_result = soup.find_all('script', {"type": "application/ld+json"})
else:
bs_result = soup.findAll('script')
bs_result += soup.findAll('body')
bs_result = soup.find_all('script')
bs_result += soup.find_all('body')
bs_jsons = []
for result in bs_result:
@@ -436,55 +436,27 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
return re.sub(pattern, repl, html_content)
def html_to_text_sub_worker(conn, html_content: str, render_anchor_tag_content=False, is_rss=False):
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
"""Converts html string to a string with just the text. If ignoring
rendering anchor tag content is enable, anchor tag content are also
included in the text
:param html_content: string with html content
:param render_anchor_tag_content: boolean flag indicating whether to extract
hyperlinks (the anchor tag content) together with text. This refers to the
'href' inside 'a' tags.
Anchor tag content is rendered in the following manner:
'[ text ](anchor tag content)'
:return: extracted text from the HTML
"""
# if anchor tag content flag is set to True define a config for
# extracting this content
if render_anchor_tag_content:
parser_config = ParserConfig(
annotation_rules={"a": ["hyperlink"]},
display_links=True
)
# otherwise set config to None/default
else:
parser_config = None
# RSS Mode - Inscriptis will treat `title` as something else.
# Make it as a regular block display element (//item/title)
# This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874
if is_rss:
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
html_content = re.sub(r'</title>', r'</h1>', html_content)
text_content = get_text(html_content, config=parser_config)
conn.send(text_content)
conn.close()
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False):
from multiprocessing import Process, Pipe
parent_conn, child_conn = Pipe()
p = Process(target=html_to_text_sub_worker, args=(child_conn, html_content, render_anchor_tag_content, is_rss))
p.start()
text = parent_conn.recv()
p.join()
return text
return text_content
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
def has_ldjson_product_info(content):

View File

@@ -62,6 +62,7 @@ class model(dict):
'timezone': None, # Default IANA timezone name
'ui': {
'open_diff_in_new_tab': True,
'socket_io_enabled': True
},
}
}

View File

@@ -1,3 +1,5 @@
from blinker import signal
from changedetectionio.strtobool import strtobool
from changedetectionio.safe_jinja import render as jinja_render
from . import watch_base
@@ -41,6 +43,7 @@ class model(watch_base):
self.__datastore_path = kw.get('datastore_path')
if kw.get('datastore_path'):
del kw['datastore_path']
super(model, self).__init__(*arg, **kw)
if kw.get('default'):
self.update(kw['default'])
@@ -60,6 +63,10 @@ class model(watch_base):
return False
@property
def has_unviewed(self):
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
def ensure_data_dir_exists(self):
if not os.path.isdir(self.watch_data_dir):
logger.debug(f"> Creating data dir {self.watch_data_dir}")
@@ -120,6 +127,10 @@ class model(watch_base):
'remote_server_reply': None,
'track_ldjson_price_data': None
})
watch_check_update = signal('watch_check_update')
if watch_check_update:
watch_check_update.send(watch_uuid=self.get('uuid'))
return
@property
@@ -648,3 +659,44 @@ class model(watch_base):
if step_n:
available.append(step_n.group(1))
return available
def compile_error_texts(self, has_proxies=None):
"""Compile error texts for this watch.
Accepts has_proxies parameter to ensure it works even outside app context"""
from flask import url_for
from markupsafe import Markup
output = [] # Initialize as list since we're using append
last_error = self.get('last_error','')
try:
url_for('settings.settings_page')
except Exception as e:
has_app_context = False
else:
has_app_context = True
# has app+request context, we can use url_for()
if has_app_context:
if last_error:
if '403' in last_error:
if has_proxies:
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a>&nbsp;'")))
else:
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try adding external proxies/locations</a>&nbsp;'")))
else:
output.append(str(Markup(last_error)))
if self.get('last_notification_error'):
output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
else:
# Lo_Fi version
if last_error:
output.append(str(Markup(last_error)))
if self.get('last_notification_error'):
output.append(str(Markup(self.get('last_notification_error'))))
res = "\n".join(output)
return res

View File

@@ -36,6 +36,7 @@ class watch_base(dict):
'include_filters': [],
'last_checked': 0,
'last_error': False,
'last_notification_error': None,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET',
'notification_alert_count': 0,

View File

@@ -2,10 +2,8 @@
import time
import apprise
from loguru import logger
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
def process_notification(n_object, datastore):
from changedetectionio.safe_jinja import render as jinja_render
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats

View File

@@ -0,0 +1,246 @@
#!/usr/bin/env python3
"""
Notification Service Module
Extracted from update_worker.py to provide standalone notification functionality
for both sync and async workers
"""
import time
from loguru import logger
class NotificationService:
"""
Standalone notification service that handles all notification functionality
previously embedded in the update_worker class
"""
def __init__(self, datastore, notification_q):
self.datastore = datastore
self.notification_q = notification_q
def queue_notification_for_watch(self, n_object, watch):
"""
Queue a notification for a watch with full diff rendering and template variables
"""
from changedetectionio import diff
from changedetectionio.notification import default_notification_format_for_watch
dates = []
trigger_text = ''
now = time.time()
if watch:
watch_history = watch.history
dates = list(watch_history.keys())
trigger_text = watch.get('trigger_text', [])
# Add text that was triggered
if len(dates):
snapshot_contents = watch.get_history_snapshot(dates[-1])
else:
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
# If we ended up here with "System default"
if n_object.get('notification_format') == default_notification_format_for_watch:
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
html_colour_enable = False
# HTML needs linebreak, but MarkDown and Text can use a linefeed
if n_object.get('notification_format') == 'HTML':
line_feed_sep = "<br>"
# Snapshot will be plaintext on the disk, convert to some kind of HTML
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
elif n_object.get('notification_format') == 'HTML Color':
line_feed_sep = "<br>"
# Snapshot will be plaintext on the disk, convert to some kind of HTML
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
html_colour_enable = True
else:
line_feed_sep = "\n"
triggered_text = ''
if len(trigger_text):
from . import html_tools
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
if triggered_text:
triggered_text = line_feed_sep.join(triggered_text)
# Could be called as a 'test notification' with only 1 snapshot available
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
if len(dates) > 1:
prev_snapshot = watch.get_history_snapshot(dates[-2])
current_snapshot = watch.get_history_snapshot(dates[-1])
n_object.update({
'current_snapshot': snapshot_contents,
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
'notification_timestamp': now,
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'triggered_text': triggered_text,
'uuid': watch.get('uuid') if watch else None,
'watch_url': watch.get('url') if watch else None,
})
if watch:
n_object.update(watch.extra_notification_token_values())
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
logger.debug("Queued notification for sending")
self.notification_q.put(n_object)
def _check_cascading_vars(self, var_name, watch):
"""
Check notification variables in cascading priority:
Individual watch settings > Tag settings > Global settings
"""
from changedetectionio.notification import (
default_notification_format_for_watch,
default_notification_body,
default_notification_title
)
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
v = watch.get(var_name)
if v and not watch.get('notification_muted'):
if var_name == 'notification_format' and v == default_notification_format_for_watch:
return self.datastore.data['settings']['application'].get('notification_format')
return v
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
if tags:
for tag_uuid, tag in tags.items():
v = tag.get(var_name)
if v and not tag.get('notification_muted'):
return v
if self.datastore.data['settings']['application'].get(var_name):
return self.datastore.data['settings']['application'].get(var_name)
# Otherwise could be defaults
if var_name == 'notification_format':
return default_notification_format_for_watch
if var_name == 'notification_body':
return default_notification_body
if var_name == 'notification_title':
return default_notification_title
return None
def send_content_changed_notification(self, watch_uuid):
"""
Send notification when content changes are detected
"""
n_object = {}
watch = self.datastore.data['watching'].get(watch_uuid)
if not watch:
return
watch_history = watch.history
dates = list(watch_history.keys())
# Theoretically it's possible that this could be just 1 long,
# - In the case that the timestamp key was not unique
if len(dates) == 1:
raise ValueError(
"History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?"
)
# Should be a better parent getter in the model object
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch)
n_object['notification_title'] = self._check_cascading_vars('notification_title', watch)
n_object['notification_body'] = self._check_cascading_vars('notification_body', watch)
n_object['notification_format'] = self._check_cascading_vars('notification_format', watch)
# (Individual watch) Only prepare to notify if the rules above matched
queued = False
if n_object and n_object.get('notification_urls'):
queued = True
count = watch.get('notification_alert_count', 0) + 1
self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
self.queue_notification_for_watch(n_object=n_object, watch=watch)
return queued
def send_filter_failure_notification(self, watch_uuid):
"""
Send notification when CSS/XPath filters fail consecutively
"""
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
watch = self.datastore.data['watching'].get(watch_uuid)
if not watch:
return
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
", ".join(watch['include_filters']),
threshold),
'notification_format': 'text'}
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']
elif len(self.datastore.data['settings']['application']['notification_urls']):
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
# Only prepare to notify if the rules above matched
if 'notification_urls' in n_object:
n_object.update({
'watch_url': watch['url'],
'uuid': watch_uuid,
'screenshot': None
})
self.notification_q.put(n_object)
logger.debug(f"Sent filter not found notification for {watch_uuid}")
else:
logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
def send_step_failure_notification(self, watch_uuid, step_n):
"""
Send notification when browser steps fail consecutively
"""
watch = self.datastore.data['watching'].get(watch_uuid, False)
if not watch:
return
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1),
'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} "
"did not appear on the page after {} attempts, did the page change layout? "
"Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n"
"Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold),
'notification_format': 'text'}
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']
elif len(self.datastore.data['settings']['application']['notification_urls']):
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
# Only prepare to notify if the rules above matched
if 'notification_urls' in n_object:
n_object.update({
'watch_url': watch['url'],
'uuid': watch_uuid
})
self.notification_q.put(n_object)
logger.error(f"Sent step not found notification for {watch_uuid}")
# Convenience functions for creating notification service instances
def create_notification_service(datastore, notification_q):
"""
Factory function to create a NotificationService instance
"""
return NotificationService(datastore, notification_q)

View File

@@ -27,7 +27,7 @@ class difference_detection_processor():
# Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher()
def call_browser(self, preferred_proxy_id=None):
async def call_browser(self, preferred_proxy_id=None):
from requests.structures import CaseInsensitiveDict
@@ -89,7 +89,7 @@ class difference_detection_processor():
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
else:
logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ")
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
@@ -147,16 +147,17 @@ class difference_detection_processor():
# And here we go! call the right browser with browser-specific settings
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
self.fetcher.run(url=url,
timeout=timeout,
request_headers=request_headers,
request_body=request_body,
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=self.watch.get('include_filters'),
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
)
# All fetchers are now async
await self.fetcher.run(url=url,
timeout=timeout,
request_headers=request_headers,
request_body=request_body,
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=self.watch.get('include_filters'),
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
)
#@todo .quit here could go on close object, so we can run JS if change-detected
self.fetcher.quit(watch=self.watch)

View File

@@ -7,7 +7,7 @@ import urllib3
import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Re-stock & Price detection for single product pages'
name = 'Re-stock & Price detection for pages with a SINGLE product'
description = 'Detects if the product goes back to in-stock'
class UnableToExtractRestockData(Exception):
@@ -79,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
# First phase, dead simple scanning of anything that looks useful
value = Restock()
if data:
logger.debug(f"Using jsonpath to find price/availability/etc")
logger.debug("Using jsonpath to find price/availability/etc")
price_parse = parse('$..(price|Price)')
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
availability_parse = parse('$..(availability|Availability)')
@@ -110,7 +110,7 @@ def get_itemprop_availability(html_content) -> Restock:
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
if not value.get('price') or value.get('availability'):
logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
logger.debug("Alternatively digging through OpenGraph properties for restock/price info..")
jsonpath_expr = parse('$..properties')
for match in jsonpath_expr.find(data):

View File

@@ -15,7 +15,7 @@ def _task(watch, update_handler):
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
text_after_filter = "Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"

View File

@@ -0,0 +1,124 @@
# Real-time Socket.IO Implementation
This directory contains the Socket.IO implementation for changedetection.io's real-time updates.
## Architecture Overview
The real-time system provides live updates to the web interface for:
- Watch status changes (checking, completed, errors)
- Queue length updates
- General statistics updates
## Current Implementation
### Socket.IO Configuration
- **Async Mode**: `threading` (default) or `gevent` (optional via SOCKETIO_MODE env var)
- **Server**: Flask-SocketIO with threading support
- **Background Tasks**: Python threading with daemon threads
### Async Worker Integration
- **Workers**: Async workers using asyncio for watch processing
- **Queue**: AsyncSignalPriorityQueue for job distribution
- **Signals**: Blinker signals for real-time updates between workers and Socket.IO
### Environment Variables
- `SOCKETIO_MODE=threading` (default, recommended)
- `SOCKETIO_MODE=gevent` (optional, has cross-platform limitations)
## Architecture Decision: Why Threading Mode?
### Previous Issues with Eventlet
**Eventlet was completely removed** due to fundamental compatibility issues:
1. **Monkey Patching Conflicts**: `eventlet.monkey_patch()` globally replaced Python's threading/socket modules, causing conflicts with:
- Playwright's synchronous browser automation
- Async worker event loops
- Various Python libraries expecting real threading
2. **Python 3.12+ Compatibility**: Eventlet had issues with newer Python versions and asyncio integration
3. **CVE-2023-29483**: Security vulnerability in eventlet's dnspython dependency
### Current Solution Benefits
**Threading Mode Advantages**:
- Full compatibility with async workers and Playwright
- No monkey patching - uses standard Python threading
- Better Python 3.12+ support
- Cross-platform compatibility (Windows, macOS, Linux)
- No external async library dependencies
- Fast shutdown capabilities
**Optional Gevent Support**:
- Available via `SOCKETIO_MODE=gevent` for high-concurrency scenarios
- Cross-platform limitations documented in requirements.txt
- Not recommended as default due to Windows socket limits and macOS ARM build issues
## Socket.IO Mode Configuration
### Threading Mode (Default)
```python
# Enabled automatically
async_mode = 'threading'
socketio = SocketIO(app, async_mode='threading')
```
### Gevent Mode (Optional)
```bash
# Set environment variable
export SOCKETIO_MODE=gevent
```
## Background Tasks
### Queue Polling
- **Threading Mode**: `threading.Thread` with `threading.Event` for shutdown
- **Signal Handling**: Blinker signals for watch state changes
- **Real-time Updates**: Direct Socket.IO `emit()` calls to connected clients
### Worker Integration
- **Async Workers**: Run in separate asyncio event loop thread
- **Communication**: AsyncSignalPriorityQueue bridges async workers and Socket.IO
- **Updates**: Real-time updates sent when workers complete tasks
## Files in This Directory
- `socket_server.py`: Main Socket.IO initialization and event handling
- `events.py`: Watch operation event handlers
- `__init__.py`: Module initialization
## Production Deployment
### Recommended WSGI Servers
For production with Socket.IO threading mode:
- **Gunicorn**: `gunicorn --worker-class eventlet changedetection:app` (if using gevent mode)
- **uWSGI**: With threading support
- **Docker**: Built-in Flask server works well for containerized deployments
### Performance Considerations
- Threading mode: Better memory usage, standard Python threading
- Gevent mode: Higher concurrency but platform limitations
- Async workers: Separate from Socket.IO, provides scalability
## Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `SOCKETIO_MODE` | `threading` | Socket.IO async mode (`threading` or `gevent`) |
| `FETCH_WORKERS` | `10` | Number of async workers for watch processing |
| `CHANGEDETECTION_HOST` | `0.0.0.0` | Server bind address |
| `CHANGEDETECTION_PORT` | `5000` | Server port |
## Debugging Tips
1. **Socket.IO Issues**: Check browser dev tools for WebSocket connection errors
2. **Threading Issues**: Monitor with `ps -T` to check thread count
3. **Worker Issues**: Use `/worker-health` endpoint to check async worker status
4. **Queue Issues**: Use `/queue-status` endpoint to monitor job queue
5. **Performance**: Use `/gc-cleanup` endpoint to trigger memory cleanup
## Migration Notes
If upgrading from eventlet-based versions:
- Remove any `EVENTLET_*` environment variables
- No code changes needed - Socket.IO mode is automatically configured
- Optional: Set `SOCKETIO_MODE=gevent` if high concurrency is required and platform supports it

View File

@@ -0,0 +1,3 @@
"""
Socket.IO realtime updates module for changedetection.io
"""

View File

@@ -0,0 +1,58 @@
from flask_socketio import emit
from loguru import logger
from blinker import signal
def register_watch_operation_handlers(socketio, datastore):
"""Register Socket.IO event handlers for watch operations"""
@socketio.on('watch_operation')
def handle_watch_operation(data):
"""Handle watch operations like pause, mute, recheck via Socket.IO"""
try:
op = data.get('op')
uuid = data.get('uuid')
logger.debug(f"Socket.IO: Received watch operation '{op}' for UUID {uuid}")
if not op or not uuid:
emit('operation_result', {'success': False, 'error': 'Missing operation or UUID'})
return
# Check if watch exists
if not datastore.data['watching'].get(uuid):
emit('operation_result', {'success': False, 'error': 'Watch not found'})
return
watch = datastore.data['watching'][uuid]
# Perform the operation
if op == 'pause':
watch.toggle_pause()
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
elif op == 'mute':
watch.toggle_mute()
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
elif op == 'recheck':
# Import here to avoid circular imports
from changedetectionio.flask_app import update_q
from changedetectionio import queuedWatchMetaData
from changedetectionio import worker_handler
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
else:
emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
return
# Send signal to update UI
watch_check_update = signal('watch_check_update')
if watch_check_update:
watch_check_update.send(watch_uuid=uuid)
# Send success response to client
emit('operation_result', {'success': True, 'operation': op, 'uuid': uuid})
except Exception as e:
logger.error(f"Socket.IO error in handle_watch_operation: {str(e)}")
emit('operation_result', {'success': False, 'error': str(e)})

View File

@@ -0,0 +1,374 @@
import timeago
from flask_socketio import SocketIO
import time
import os
from loguru import logger
from blinker import signal
from changedetectionio import strtobool
class SignalHandler:
"""A standalone class to receive signals"""
def __init__(self, socketio_instance, datastore):
self.socketio_instance = socketio_instance
self.datastore = datastore
# Connect to the watch_check_update signal
from changedetectionio.flask_app import watch_check_update as wcc
wcc.connect(self.handle_signal, weak=False)
# logger.info("SignalHandler: Connected to signal from direct import")
# Connect to the queue_length signal
queue_length_signal = signal('queue_length')
queue_length_signal.connect(self.handle_queue_length, weak=False)
# logger.info("SignalHandler: Connected to queue_length signal")
watch_delete_signal = signal('watch_deleted')
watch_delete_signal.connect(self.handle_deleted_signal, weak=False)
# Create and start the queue update thread using standard threading
import threading
self.polling_emitter_thread = threading.Thread(
target=self.polling_emit_running_or_queued_watches_threaded,
daemon=True
)
self.polling_emitter_thread.start()
logger.info("Started polling thread using threading (eventlet-free)")
# Store the thread reference in socketio for clean shutdown
self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
def handle_signal(self, *args, **kwargs):
logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
# Safely extract the watch UUID from kwargs
watch_uuid = kwargs.get('watch_uuid')
app_context = kwargs.get('app_context')
if watch_uuid:
# Get the watch object from the datastore
watch = self.datastore.data['watching'].get(watch_uuid)
if watch:
if app_context:
# note
with app_context.app_context():
with app_context.test_request_context():
# Forward to handle_watch_update with the watch parameter
handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
else:
handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
logger.trace(f"Signal handler processed watch UUID {watch_uuid}")
else:
logger.warning(f"Watch UUID {watch_uuid} not found in datastore")
def handle_deleted_signal(self, *args, **kwargs):
watch_uuid = kwargs.get('watch_uuid')
if watch_uuid:
# Emit the queue size to all connected clients
self.socketio_instance.emit("watch_deleted", {
"uuid": watch_uuid,
"event_timestamp": time.time()
})
logger.debug(f"Watch UUID {watch_uuid} was deleted")
def handle_queue_length(self, *args, **kwargs):
"""Handle queue_length signal and emit to all clients"""
try:
queue_length = kwargs.get('length', 0)
logger.debug(f"SignalHandler: Queue length update received: {queue_length}")
# Emit the queue size to all connected clients
self.socketio_instance.emit("queue_size", {
"q_length": queue_length,
"event_timestamp": time.time()
})
except Exception as e:
logger.error(f"Socket.IO error in handle_queue_length: {str(e)}")
def polling_emit_running_or_queued_watches_threaded(self):
"""Threading version of polling for Windows compatibility"""
import time
import threading
logger.info("Queue update thread started (threading mode)")
# Import here to avoid circular imports
from changedetectionio.flask_app import app
from changedetectionio import worker_handler
watch_check_update = signal('watch_check_update')
# Track previous state to avoid unnecessary emissions
previous_running_uuids = set()
# Run until app shutdown - check exit flag more frequently for fast shutdown
exit_event = getattr(app.config, 'exit', threading.Event())
while not exit_event.is_set():
try:
# Get current running UUIDs from async workers
running_uuids = set(worker_handler.get_running_uuids())
# Only send updates for UUIDs that changed state
newly_running = running_uuids - previous_running_uuids
no_longer_running = previous_running_uuids - running_uuids
# Send updates for newly running UUIDs (but exit fast if shutdown requested)
for uuid in newly_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} started processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
if not exit_event.is_set():
for uuid in no_longer_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} finished processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Update tracking for next iteration
previous_running_uuids = running_uuids
# Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
for _ in range(20): # 20 * 0.5 = 10 seconds total
if exit_event.is_set():
break
time.sleep(0.5)
except Exception as e:
logger.error(f"Error in threading polling: {str(e)}")
# Even during error recovery, check for exit quickly
for _ in range(1): # 1 * 0.5 = 0.5 seconds
if exit_event.is_set():
break
time.sleep(0.5)
# Check if we're in pytest environment - if so, be more gentle with logging
import sys
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
if not in_pytest:
logger.info("Queue update thread stopped (threading mode)")
def handle_watch_update(socketio, **kwargs):
"""Handle watch update signal from blinker"""
try:
watch = kwargs.get('watch')
datastore = kwargs.get('datastore')
# Emit the watch update to all connected clients
from changedetectionio.flask_app import update_q
from changedetectionio.flask_app import _jinja2_filter_datetime
from changedetectionio import worker_handler
# Get list of watches that are currently running
running_uuids = worker_handler.get_running_uuids()
# Get list of watches in the queue
queue_list = []
for q_item in update_q.queue:
if hasattr(q_item, 'item') and 'uuid' in q_item.item:
queue_list.append(q_item.item['uuid'])
# Get the error texts from the watch
error_texts = watch.compile_error_texts()
# Create a simplified watch data object to send to clients
watch_data = {
'checking_now': True if watch.get('uuid') in running_uuids else False,
'fetch_time': watch.get('fetch_time'),
'has_error': True if error_texts else False,
'last_changed': watch.get('last_changed'),
'last_checked': watch.get('last_checked'),
'error_text': error_texts,
'history_n': watch.history_n,
'last_checked_text': _jinja2_filter_datetime(watch),
'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet',
'queued': True if watch.get('uuid') in queue_list else False,
'paused': True if watch.get('paused') else False,
'notification_muted': True if watch.get('notification_muted') else False,
'unviewed': watch.has_unviewed,
'uuid': watch.get('uuid'),
'event_timestamp': time.time()
}
errored_count = 0
for watch_uuid_iter, watch_iter in datastore.data['watching'].items():
if watch_iter.get('last_error'):
errored_count += 1
general_stats = {
'count_errors': errored_count,
'has_unviewed': datastore.has_unviewed
}
# Debug what's being emitted
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats})
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
except Exception as e:
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
def init_socketio(app, datastore):
"""Initialize SocketIO with the main Flask app"""
import platform
import sys
# Platform-specific async_mode selection for better stability
system = platform.system().lower()
python_version = sys.version_info
# Check for SocketIO mode configuration via environment variable
# Default is 'threading' for best cross-platform compatibility
socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower()
if socketio_mode == 'gevent':
# Use gevent mode (higher concurrency but platform limitations)
try:
import gevent
async_mode = 'gevent'
logger.info(f"SOCKETIO_MODE=gevent: Using {async_mode} mode for Socket.IO")
except ImportError:
async_mode = 'threading'
logger.warning(f"SOCKETIO_MODE=gevent but gevent not available, falling back to {async_mode} mode")
elif socketio_mode == 'threading':
# Use threading mode (default - best compatibility)
async_mode = 'threading'
logger.info(f"SOCKETIO_MODE=threading: Using {async_mode} mode for Socket.IO")
else:
# Invalid mode specified, use default
async_mode = 'threading'
logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO")
# Log platform info for debugging
logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}")
# Restrict SocketIO CORS to same origin by default, can be overridden with env var
cors_origins = os.environ.get('SOCKETIO_CORS_ORIGINS', None)
socketio = SocketIO(app,
async_mode=async_mode,
cors_allowed_origins=cors_origins, # None means same-origin only
logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')))
# Set up event handlers
logger.info("Socket.IO: Registering connect event handler")
@socketio.on('checkbox-operation')
def event_checkbox_operations(data):
from changedetectionio.blueprint.ui import _handle_operations
from changedetectionio import queuedWatchMetaData
from changedetectionio import worker_handler
from changedetectionio.flask_app import update_q, watch_check_update
logger.trace(f"Got checkbox operations event: {data}")
datastore = socketio.datastore
_handle_operations(
op=data.get('op'),
uuids=data.get('uuids'),
datastore=datastore,
extra_data=data.get('extra_data'),
worker_handler=worker_handler,
update_q=update_q,
queuedWatchMetaData=queuedWatchMetaData,
watch_check_update=watch_check_update,
emit_flash=False
)
@socketio.on('connect')
def handle_connect():
"""Handle client connection"""
# logger.info("Socket.IO: CONNECT HANDLER CALLED - Starting connection process")
from flask import request
from flask_login import current_user
from changedetectionio.flask_app import update_q
# Access datastore from socketio
datastore = socketio.datastore
# logger.info(f"Socket.IO: Current user authenticated: {current_user.is_authenticated if hasattr(current_user, 'is_authenticated') else 'No current_user'}")
# Check if authentication is required and user is not authenticated
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
# logger.info(f"Socket.IO: Password enabled: {has_password_enabled}")
if has_password_enabled and not current_user.is_authenticated:
logger.warning("Socket.IO: Rejecting unauthenticated connection")
return False # Reject the connection
# Send the current queue size to the newly connected client
try:
queue_size = update_q.qsize()
socketio.emit("queue_size", {
"q_length": queue_size,
"event_timestamp": time.time()
}, room=request.sid) # Send only to this client
logger.debug(f"Socket.IO: Sent initial queue size {queue_size} to new client")
except Exception as e:
logger.error(f"Socket.IO error sending initial queue size: {str(e)}")
logger.info("Socket.IO: Client connected")
# logger.info("Socket.IO: Registering disconnect event handler")
@socketio.on('disconnect')
def handle_disconnect():
"""Handle client disconnection"""
logger.info("Socket.IO: Client disconnected")
# Create a dedicated signal handler that will receive signals and emit them to clients
signal_handler = SignalHandler(socketio, datastore)
# Register watch operation event handlers
from .events import register_watch_operation_handlers
register_watch_operation_handlers(socketio, datastore)
# Store the datastore reference on the socketio object for later use
socketio.datastore = datastore
# No stop event needed for threading mode - threads check app.config.exit directly
# Add a shutdown method to the socketio object
def shutdown():
"""Shutdown the SocketIO server fast and aggressively"""
try:
logger.info("Socket.IO: Fast shutdown initiated...")
# For threading mode, give the thread a very short time to exit gracefully
if hasattr(socketio, 'polling_emitter_thread'):
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
socketio.polling_emitter_thread.join(timeout=1.0) # Only 1 second timeout
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
else:
logger.info("Socket.IO: Polling thread stopped quickly")
else:
logger.info("Socket.IO: Polling thread already stopped")
logger.info("Socket.IO: Fast shutdown complete")
except Exception as e:
logger.error(f"Socket.IO error during shutdown: {str(e)}")
# Attach the shutdown method to the socketio object
socketio.shutdown = shutdown
logger.info("Socket.IO initialized and attached to main Flask app")
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
return socketio

View File

@@ -38,6 +38,9 @@ pytest tests/test_backend.py
pytest tests/test_rss.py
pytest tests/test_unique_lines.py
# Try high concurrency
FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
# Check file:// will pickup a file when enabled
echo "Hello world" > /tmp/test-file.txt
ALLOW_FILE_URI=yes pytest tests/test_security.py

View File

@@ -82,3 +82,25 @@ done
docker kill squid-one squid-two squid-custom
# Test that the UI is returning the correct error message when a proxy is not available
# Requests
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
# Playwright
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
# Puppeteer fast
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
# Selenium
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,146 @@
// Socket.IO client-side integration for changedetection.io
$(document).ready(function () {
function bindSocketHandlerButtonsEvents(socket) {
$('.ajax-op').on('click.socketHandlerNamespace', function (e) {
e.preventDefault();
const op = $(this).data('op');
const uuid = $(this).closest('tr').data('watch-uuid');
console.log(`Socket.IO: Sending watch operation '${op}' for UUID ${uuid}`);
// Emit the operation via Socket.IO
socket.emit('watch_operation', {
'op': op,
'uuid': uuid
});
return false;
});
$('#checkbox-operations button').on('click.socketHandlerNamespace', function (e) {
e.preventDefault();
const op = $(this).val();
const checkedUuids = $('input[name="uuids"]:checked').map(function () {
return this.value.trim();
}).get();
console.log(`Socket.IO: Sending watch operation '${op}' for UUIDs:`, checkedUuids);
socket.emit('checkbox-operation', {
op: op,
uuids: checkedUuids,
extra_data: $('#op_extradata').val() // Set by the alert() handler
});
$('input[name="uuids"]:checked').prop('checked', false);
$('#check-all:checked').prop('checked', false);
return false;
});
}
// Only try to connect if authentication isn't required or user is authenticated
// The 'is_authenticated' variable will be set in the template
if (typeof is_authenticated !== 'undefined' ? is_authenticated : true) {
// Try to create the socket connection to the SocketIO server - if it fails, the site will still work normally
try {
// Connect to Socket.IO on the same host/port, with path from template
const socket = io({
path: socketio_url, // This will be the path prefix like "/app/socket.io" from the template
transports: ['polling', 'websocket'], // Try WebSocket but fall back to polling
reconnectionDelay: 1000,
reconnectionAttempts: 15
});
// Connection status logging
socket.on('connect', function () {
console.log('Socket.IO connected with path:', socketio_url);
console.log('Socket transport:', socket.io.engine.transport.name);
bindSocketHandlerButtonsEvents(socket);
});
socket.on('connect_error', function(error) {
console.error('Socket.IO connection error:', error);
});
socket.on('connect_timeout', function() {
console.error('Socket.IO connection timeout');
});
socket.on('error', function(error) {
console.error('Socket.IO error:', error);
});
socket.on('disconnect', function (reason) {
console.log('Socket.IO disconnected, reason:', reason);
$('.ajax-op').off('.socketHandlerNamespace')
});
socket.on('queue_size', function (data) {
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
// Update queue size display if implemented in the UI
})
// Listen for operation results
socket.on('operation_result', function (data) {
if (data.success) {
console.log(`Socket.IO: Operation '${data.operation}' completed successfully for UUID ${data.uuid}`);
} else {
console.error(`Socket.IO: Operation failed: ${data.error}`);
alert("There was a problem processing the request: " + data.error);
}
});
// Listen for periodically emitted watch data
console.log('Adding watch_update event listener');
socket.on('watch_update', function (data) {
const watch = data.watch;
const general_stats = data.general_stats;
// Log the entire watch object for debugging
console.log('!!! WATCH UPDATE EVENT RECEIVED !!!');
console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`);
console.log('Watch data:', watch);
console.log('General stats:', general_stats);
// Updating watch table rows
const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]');
console.log('Found watch row elements:', $watchRow.length);
if ($watchRow.length) {
$($watchRow).toggleClass('checking-now', watch.checking_now);
$($watchRow).toggleClass('queued', watch.queued);
$($watchRow).toggleClass('unviewed', watch.unviewed);
$($watchRow).toggleClass('has-error', watch.has_error);
$($watchRow).toggleClass('notification_muted', watch.notification_muted);
$($watchRow).toggleClass('paused', watch.paused);
$($watchRow).toggleClass('single-history', watch.history_n === 1);
$($watchRow).toggleClass('multiple-history', watch.history_n >= 2);
$('td.title-col .error-text', $watchRow).html(watch.error_text)
$('td.last-changed', $watchRow).text(watch.last_changed_text)
$('td.last-checked .innertext', $watchRow).text(watch.last_checked_text)
$('td.last-checked', $watchRow).data('timestamp', watch.last_checked).data('fetchduration', watch.fetch_time);
$('td.last-checked', $watchRow).data('eta_complete', watch.last_checked + watch.fetch_time);
console.log('Updated UI for watch:', watch.uuid);
}
// Tabs at bottom of list
$('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed);
$('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0)
$('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`);
$('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid));
});
} catch (e) {
// If Socket.IO fails to initialize, just log it and continue
console.log('Socket.IO initialization error:', e);
}
}
});

File diff suppressed because one or more lines are too long

View File

@@ -68,7 +68,7 @@ $(function () {
if (eta_complete + 2 > nowtimeserver && fetch_duration > 3) {
const remaining_seconds = Math.abs(eta_complete) - nowtimeserver - 1;
let r = (1.0 - (remaining_seconds / fetch_duration)) * 100;
let r = Math.round((1.0 - (remaining_seconds / fetch_duration)) * 100);
if (r < 10) {
r = 10;
}
@@ -76,8 +76,8 @@ $(function () {
r = 100;
}
$(this).css('background-size', `${r}% 100%`);
//$(this).text(`${r}% remain ${remaining_seconds}`);
} else {
// Snap to full complete
$(this).css('background-size', `100% 100%`);
}
});

View File

@@ -0,0 +1,31 @@
// Styles for Socket.IO real-time updates
body.checking-now {
#checking-now-fixed-tab {
display: block !important;
}
}
#checking-now-fixed-tab {
background: #ccc;
border-radius: 5px;
bottom: 0;
color: var(--color-text);
display: none;
font-size: 0.8rem;
left: 0;
padding: 5px;
position: fixed;
}
#post-list-buttons {
#post-list-with-errors.has-error {
display: inline-block !important;
}
#post-list-mark-views.has-unviewed {
display: inline-block !important;
}
}

View File

@@ -0,0 +1,128 @@
/* table related */
.watch-table {
width: 100%;
font-size: 80%;
tr {
&.unviewed {
font-weight: bold;
}
color: var(--color-watch-table-row-text);
}
td {
white-space: nowrap;
&.title-col {
word-break: break-all;
white-space: normal;
a::after {
content: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAQElEQVR42qXKwQkAIAxDUUdxtO6/RBQkQZvSi8I/pL4BoGw/XPkh4XigPmsUgh0626AjRsgxHTkUThsG2T/sIlzdTsp52kSS1wAAAABJRU5ErkJggg==);
margin: 0 3px 0 5px;
}
}
}
th {
white-space: nowrap;
a {
font-weight: normal;
&.active {
font-weight: bolder;
}
&.inactive {
.arrow {
display: none;
}
}
}
}
/* Row with 'checking-now' */
tr.checking-now {
td:first-child {
position: relative;
}
td:first-child::before {
content: "";
position: absolute;
top: 0;
bottom: 0;
left: 0;
width: 3px;
background-color: #293eff;
}
td.last-checked {
.spinner-wrapper {
display: inline-block !important;
}
.innertext {
display: none !important;
}
}
}
tr.queued {
a.recheck {
display: none !important;
}
a.already-in-queue-button {
display: inline-block !important;
}
}
tr.paused {
a.pause-toggle {
&.state-on {
display: inline !important;
}
&.state-off {
display: none !important;
}
}
}
tr.notification_muted {
a.mute-toggle {
&.state-on {
display: inline !important;
}
&.state-off {
display: none !important;
}
}
}
tr.has-error {
color: var(--color-watch-table-error);
.error-text {
display: block !important;
}
}
tr.single-history {
a.preview-link {
display: inline-block !important;
}
}
tr.multiple-history {
a.history-link {
display: inline-block !important;
}
}
}

View File

@@ -13,8 +13,10 @@
@import "parts/_menu";
@import "parts/_love";
@import "parts/preview_text_filter";
@import "parts/_watch_table";
@import "parts/_edit";
@import "parts/_conditions_table";
@import "parts/_socket";
body {
color: var(--color-text);
@@ -169,56 +171,6 @@ code {
color: var(--color-text);
}
/* table related */
.watch-table {
width: 100%;
font-size: 80%;
tr {
&.unviewed {
font-weight: bold;
}
&.error {
color: var(--color-watch-table-error);
}
color: var(--color-watch-table-row-text);
}
td {
white-space: nowrap;
&.title-col {
word-break: break-all;
white-space: normal;
}
}
th {
white-space: nowrap;
a {
font-weight: normal;
&.active {
font-weight: bolder;
}
&.inactive {
.arrow {
display: none;
}
}
}
}
.title-col a[target="_blank"]::after,
.current-diff-url::after {
content: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAQElEQVR42qXKwQkAIAxDUUdxtO6/RBQkQZvSi8I/pL4BoGw/XPkh4XigPmsUgh0626AjRsgxHTkUThsG2T/sIlzdTsp52kSS1wAAAABJRU5ErkJggg==);
margin: 0 3px 0 5px;
}
}
.inline-tag {
white-space: nowrap;
border-radius: 5px;
@@ -1131,6 +1083,9 @@ ul {
/* some space if they wrap the page */
margin-bottom: 3px;
margin-top: 3px;
/* vertically center icon and text */
display: inline-flex;
align-items: center;
}
}

View File

@@ -523,6 +523,66 @@ body.preview-text-enabled {
z-index: 3;
box-shadow: 1px 1px 4px var(--color-shadow-jump); }
/* table related */
.watch-table {
width: 100%;
font-size: 80%;
/* Row with 'checking-now' */ }
.watch-table tr {
color: var(--color-watch-table-row-text); }
.watch-table tr.unviewed {
font-weight: bold; }
.watch-table td {
white-space: nowrap; }
.watch-table td.title-col {
word-break: break-all;
white-space: normal; }
.watch-table td.title-col a::after {
content: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAQElEQVR42qXKwQkAIAxDUUdxtO6/RBQkQZvSi8I/pL4BoGw/XPkh4XigPmsUgh0626AjRsgxHTkUThsG2T/sIlzdTsp52kSS1wAAAABJRU5ErkJggg==);
margin: 0 3px 0 5px; }
.watch-table th {
white-space: nowrap; }
.watch-table th a {
font-weight: normal; }
.watch-table th a.active {
font-weight: bolder; }
.watch-table th a.inactive .arrow {
display: none; }
.watch-table tr.checking-now td:first-child {
position: relative; }
.watch-table tr.checking-now td:first-child::before {
content: "";
position: absolute;
top: 0;
bottom: 0;
left: 0;
width: 3px;
background-color: #293eff; }
.watch-table tr.checking-now td.last-checked .spinner-wrapper {
display: inline-block !important; }
.watch-table tr.checking-now td.last-checked .innertext {
display: none !important; }
.watch-table tr.queued a.recheck {
display: none !important; }
.watch-table tr.queued a.already-in-queue-button {
display: inline-block !important; }
.watch-table tr.paused a.pause-toggle.state-on {
display: inline !important; }
.watch-table tr.paused a.pause-toggle.state-off {
display: none !important; }
.watch-table tr.notification_muted a.mute-toggle.state-on {
display: inline !important; }
.watch-table tr.notification_muted a.mute-toggle.state-off {
display: none !important; }
.watch-table tr.has-error {
color: var(--color-watch-table-error); }
.watch-table tr.has-error .error-text {
display: block !important; }
.watch-table tr.single-history a.preview-link {
display: inline-block !important; }
.watch-table tr.multiple-history a.history-link {
display: inline-block !important; }
ul#conditions_match_logic {
list-style: none; }
ul#conditions_match_logic input, ul#conditions_match_logic label, ul#conditions_match_logic li {
@@ -623,6 +683,26 @@ ul#conditions_match_logic {
.fieldlist_formfields .addRuleRow:hover, .fieldlist_formfields .removeRuleRow:hover, .fieldlist_formfields .verifyRuleRow:hover {
background-color: #999; }
body.checking-now #checking-now-fixed-tab {
display: block !important; }
#checking-now-fixed-tab {
background: #ccc;
border-radius: 5px;
bottom: 0;
color: var(--color-text);
display: none;
font-size: 0.8rem;
left: 0;
padding: 5px;
position: fixed; }
#post-list-buttons #post-list-with-errors.has-error {
display: inline-block !important; }
#post-list-buttons #post-list-mark-views.has-unviewed {
display: inline-block !important; }
body {
color: var(--color-text);
background: var(--color-background-page);
@@ -735,34 +815,6 @@ code {
background: var(--color-background-code);
color: var(--color-text); }
/* table related */
.watch-table {
width: 100%;
font-size: 80%; }
.watch-table tr {
color: var(--color-watch-table-row-text); }
.watch-table tr.unviewed {
font-weight: bold; }
.watch-table tr.error {
color: var(--color-watch-table-error); }
.watch-table td {
white-space: nowrap; }
.watch-table td.title-col {
word-break: break-all;
white-space: normal; }
.watch-table th {
white-space: nowrap; }
.watch-table th a {
font-weight: normal; }
.watch-table th a.active {
font-weight: bolder; }
.watch-table th a.inactive .arrow {
display: none; }
.watch-table .title-col a[target="_blank"]::after,
.watch-table .current-diff-url::after {
content: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAQElEQVR42qXKwQkAIAxDUUdxtO6/RBQkQZvSi8I/pL4BoGw/XPkh4XigPmsUgh0626AjRsgxHTkUThsG2T/sIlzdTsp52kSS1wAAAABJRU5ErkJggg==);
margin: 0 3px 0 5px; }
.inline-tag, .watch-tag-list, .tracking-ldjson-price-data, .restock-label {
white-space: nowrap;
border-radius: 5px;
@@ -1408,7 +1460,10 @@ ul {
#checkbox-operations button {
/* some space if they wrap the page */
margin-bottom: 3px;
margin-top: 3px; }
margin-top: 3px;
/* vertically center icon and text */
display: inline-flex;
align-items: center; }
.checkbox-uuid > * {
vertical-align: middle; }

View File

@@ -17,6 +17,7 @@ import threading
import time
import uuid as uuid_builder
from loguru import logger
from blinker import signal
from .processors import get_custom_watch_obj_for_processor
from .processors.restock_diff import Restock
@@ -166,6 +167,10 @@ class ChangeDetectionStore:
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
self.needs_write = True
watch_check_update = signal('watch_check_update')
if watch_check_update:
watch_check_update.send(watch_uuid=uuid)
def remove_password(self):
self.__data['settings']['application']['password'] = False
self.needs_write = True
@@ -233,6 +238,7 @@ class ChangeDetectionStore:
with self.lock:
if uuid == 'all':
self.__data['watching'] = {}
time.sleep(1) # Mainly used for testing to allow all items to flush before running next test
# GitHub #30 also delete history records
for uuid in self.data['watching']:
@@ -247,6 +253,9 @@ class ChangeDetectionStore:
del self.data['watching'][uuid]
self.needs_write_urgent = True
watch_delete_signal = signal('watch_deleted')
if watch_delete_signal:
watch_delete_signal.send(watch_uuid=uuid)
# Clone a watch by UUID
def clone(self, uuid):
@@ -402,7 +411,12 @@ class ChangeDetectionStore:
# This is a fairly basic strategy to deal with the case that the file is corrupted,
# system was out of memory, out of RAM etc
with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=4)
# Use compact JSON in production for better performance
debug_mode = os.environ.get('CHANGEDETECTION_DEBUG', 'false').lower() == 'true'
if debug_mode:
json.dump(data, json_file, indent=4)
else:
json.dump(data, json_file, separators=(',', ':'))
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")

View File

@@ -28,9 +28,16 @@
<meta name="theme-color" content="#ffffff">
<script>
const csrftoken="{{ csrf_token() }}";
const socketio_url="{{ get_socketio_path() }}/socket.io";
const is_authenticated = {% if current_user.is_authenticated or not has_password %}true{% else %}false{% endif %};
</script>
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='feather-icons.min.js')}}" defer></script>
{% if socket_io_enabled %}
<script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script>
{% endif %}
</head>
<body class="">
@@ -227,6 +234,8 @@
{% block content %}{% endblock %}
</section>
<script src="{{url_for('static_content', group='js', filename='toggle-theme.js')}}" defer></script>
<div id="checking-now-fixed-tab" style="display: none;"><span class="spinner"></span><span>&nbsp;Checking now</span></div>
</body>
</html>

View File

@@ -239,7 +239,7 @@ Math: {{ 1 + 1 }}") }}
</div>
</div>
<div id="browser-steps-fieldlist" >
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
<span id="browser-seconds-remaining">Press "Play" to start.</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
{{ render_field(form.browser_steps) }}
</div>
</div>

View File

@@ -10,6 +10,8 @@ import os
import sys
from loguru import logger
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
# Much better boilerplate than the docs
# https://www.python-boilerplate.com/py3+flask+pytest/
@@ -70,6 +72,22 @@ def cleanup(datastore_path):
if os.path.isfile(f):
os.unlink(f)
@pytest.fixture(scope='function', autouse=True)
def prepare_test_function(live_server):
routes = [rule.rule for rule in live_server.app.url_map.iter_rules()]
if '/test-random-content-endpoint' not in routes:
logger.debug("Setting up test URL routes")
new_live_server_setup(live_server)
yield
# Then cleanup/shutdown
live_server.app.config['DATASTORE'].data['watching']={}
time.sleep(0.3)
live_server.app.config['DATASTORE'].data['watching']={}
@pytest.fixture(scope='session')
def app(request):
"""Create application for the tests."""
@@ -106,8 +124,33 @@ def app(request):
app.config['STOP_THREADS'] = True
def teardown():
# Stop all threads and services
datastore.stop_thread = True
app.config.exit.set()
# Shutdown workers gracefully before loguru cleanup
try:
from changedetectionio import worker_handler
worker_handler.shutdown_workers()
except Exception:
pass
# Stop socket server threads
try:
from changedetectionio.flask_app import socketio_server
if socketio_server and hasattr(socketio_server, 'shutdown'):
socketio_server.shutdown()
except Exception:
pass
# Give threads a moment to finish their shutdown
import time
time.sleep(0.1)
# Remove all loguru handlers to prevent "closed file" errors
logger.remove()
# Cleanup files
cleanup(app_config['datastore_path'])

View File

@@ -7,7 +7,7 @@ from ..util import live_server_setup, wait_for_all_checks
def do_test(client, live_server, make_test_use_extra_browser=False):
# Grep for this string in the logs?
test_url = f"https://changedetection.io/ci-test.html?non-custom-default=true"
test_url = "https://changedetection.io/ci-test.html?non-custom-default=true"
# "non-custom-default" should not appear in the custom browser connection
custom_browser_name = 'custom browser URL'
@@ -51,7 +51,7 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
url_for("ui.ui_edit.edit_page", uuid="first"),
data={
# 'run_customer_browser_url_tests.sh' will search for this string to know if we hit the right browser container or not
"url": f"https://changedetection.io/ci-test.html?custom-browser-search-string=1",
"url": "https://changedetection.io/ci-test.html?custom-browser-search-string=1",
"tags": "",
"headers": "",
'fetch_backend': f"extra_browser_{custom_browser_name}",
@@ -78,12 +78,12 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
# Requires playwright to be installed
def test_request_via_custom_browser_url(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# We do this so we can grep the logs of the custom container and see if the request actually went through that container
do_test(client, live_server, make_test_use_extra_browser=True)
def test_request_not_via_custom_browser_url(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# We do this so we can grep the logs of the custom container and see if the request actually went through that container
do_test(client, live_server, make_test_use_extra_browser=False)

View File

@@ -7,7 +7,7 @@ import logging
# Requires playwright to be installed
def test_fetch_webdriver_content(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
#####################
res = client.post(

View File

@@ -5,7 +5,7 @@ from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_cli
def test_execute_custom_js(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
test_url = url_for('test_interactive_html_endpoint', _external=True)

View File

@@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks
def test_preferred_proxy(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
url = "http://chosen.changedetection.io"

View File

@@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_cli
def test_noproxy_option(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# Run by run_proxy_tests.sh
# Call this URL then scan the containers that it never went through them
url = "http://noproxy.changedetection.io"

View File

@@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_cli
# just make a request, we will grep in the docker logs to see it actually got called
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
res = client.post(
url_for("imports.import_page"),
# Because a URL wont show in squid/proxy logs due it being SSLed

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python3
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks
import os
from ... import strtobool
# Just to be sure the UI outputs the right error message on proxy connection failed
# docker run -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
# PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
# FAST_PUPPETEER_CHROME_FETCHER=True PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
# WEBDRIVER_URL=http://127.0.0.1:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py
def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
# Goto settings, add our custom one
res = client.post(
url_for("settings.settings_page"),
data={
"requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
# test:awesome is set in tests/proxy_list/squid-passwords.txt
"requests-extra_proxies-0-proxy_url": "http://127.0.0.1:3128",
},
follow_redirects=True
)
assert b"Settings updated." in res.data
test_url = "https://changedetection.io"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data
options = {
"url": test_url,
"fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
"proxy": "ui-0custom-test-proxy",
}
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
data=options,
follow_redirects=True
)
assert b"unpaused" in res.data
import time
wait_for_all_checks(client)
# Requests default
check_string = b'Cannot connect to proxy'
if os.getenv('PLAYWRIGHT_DRIVER_URL') or strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) or os.getenv("WEBDRIVER_URL"):
check_string = b'ERR_PROXY_CONNECTION_FAILED'
res = client.get(url_for("watchlist.index"))
#with open("/tmp/debug.html", 'wb') as f:
# f.write(res.data)
assert check_string in res.data

View File

@@ -7,7 +7,7 @@ import os
# just make a request, we will grep in the docker logs to see it actually got called
def test_select_custom(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# Goto settings, add our custom one
res = client.post(

View File

@@ -7,7 +7,7 @@ from changedetectionio.tests.util import live_server_setup, wait_for_all_checks,
def set_response():
import time
data = f"""<html>
data = """<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked
@@ -20,7 +20,7 @@ def set_response():
time.sleep(1)
def test_socks5(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
set_response()
# Setup a proxy

View File

@@ -6,7 +6,7 @@ from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
def set_response():
import time
data = f"""<html>
data = """<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked
@@ -21,7 +21,7 @@ def set_response():
# should be proxies.json mounted from run_proxy_tests.sh already
# -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
set_response()
# Because the socks server should connect back to us
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"

View File

@@ -0,0 +1,72 @@
import asyncio
import socketio
from aiohttp import web
SOCKETIO_URL = 'ws://localhost.localdomain:5005'
SOCKETIO_PATH = "/socket.io"
NUM_CLIENTS = 1
clients = []
shutdown_event = asyncio.Event()
class WatchClient:
def __init__(self, client_id: int):
self.client_id = client_id
self.i_got_watch_update_event = False
self.sio = socketio.AsyncClient(reconnection_attempts=50, reconnection_delay=1)
@self.sio.event
async def connect():
print(f"[Client {self.client_id}] Connected")
@self.sio.event
async def disconnect():
print(f"[Client {self.client_id}] Disconnected")
@self.sio.on("watch_update")
async def on_watch_update(watch):
self.i_got_watch_update_event = True
print(f"[Client {self.client_id}] Received update: {watch}")
async def run(self):
try:
await self.sio.connect(SOCKETIO_URL, socketio_path=SOCKETIO_PATH, transports=["websocket", "polling"])
await self.sio.wait()
except Exception as e:
print(f"[Client {self.client_id}] Connection error: {e}")
async def handle_check(request):
all_received = all(c.i_got_watch_update_event for c in clients)
result = "yes" if all_received else "no"
print(f"Received HTTP check — returning '{result}'")
shutdown_event.set() # Signal shutdown
return web.Response(text=result)
async def start_http_server():
app = web.Application()
app.add_routes([web.get('/did_all_clients_get_watch_update', handle_check)])
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, '0.0.0.0', 6666)
await site.start()
async def main():
#await start_http_server()
for i in range(NUM_CLIENTS):
client = WatchClient(i)
clients.append(client)
asyncio.create_task(client.run())
await shutdown_event.wait()
print("Shutting down...")
# Graceful disconnect
for c in clients:
await c.sio.disconnect()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("Interrupted")

View File

@@ -54,7 +54,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response()
#assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
#####################
notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')

View File

@@ -20,8 +20,7 @@ from changedetectionio.notification import (
valid_notification_formats,
)
def test_setup(live_server):
live_server_setup(live_server)
def get_last_message_from_smtp_server():
import socket
@@ -40,7 +39,7 @@ def get_last_message_from_smtp_server():
# Requires running the test SMTP server
def test_check_notification_email_formats_default_HTML(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
## live_server_setup(live_server) # Setup on conftest per function
set_original_response()
notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com'
@@ -91,7 +90,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
## live_server_setup(live_server) # Setup on conftest per function
# HTML problems? see this
# https://github.com/caronc/apprise/issues/633

View File

@@ -4,7 +4,7 @@ import time
def test_check_access_control(app, client, live_server):
# Still doesnt work, but this is closer.
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
with app.test_client(use_cookies=True) as c:
# Check we don't have any password protection enabled yet.

View File

@@ -4,7 +4,7 @@ import os.path
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
import time
def set_original(excluding=None, add_line=None):
test_return_data = """<html>
@@ -35,11 +35,11 @@ def set_original(excluding=None, add_line=None):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# def test_setup(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
def test_check_removed_line_contains_trigger(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# Give the endpoint time to spin up
set_original()
# Add our URL to the import page
@@ -72,6 +72,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
time.sleep(0.5)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data
@@ -84,12 +85,17 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
res = client.get(url_for("watchlist.index"))
assert b'unviewed' in res.data
time.sleep(1)
# Now add it back, and we should not get a trigger
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
time.sleep(0.2)
time.sleep(1)
set_original(excluding=None)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(1)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data
@@ -105,7 +111,10 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
time.sleep(1)
# Give the endpoint time to spin up
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://') + "?xxx={{ watch_url }}"

View File

@@ -52,12 +52,12 @@ def is_valid_uuid(val):
return False
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# def test_setup(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
def test_api_simple(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
@@ -108,7 +108,7 @@ def test_api_simple(client, live_server, measure_memory_usage):
headers={'x-api-key': api_key}
)
assert len(res.json) == 0
time.sleep(1)
wait_for_all_checks(client)
set_modified_response()
@@ -119,6 +119,7 @@ def test_api_simple(client, live_server, measure_memory_usage):
)
wait_for_all_checks(client)
time.sleep(1)
# Did the recheck fire?
res = client.get(
url_for("createwatch"),
@@ -291,7 +292,7 @@ def test_access_denied(client, live_server, measure_memory_usage):
def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Create a watch
@@ -371,7 +372,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
def test_api_import(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
res = client.post(
@@ -393,7 +394,7 @@ def test_api_import(client, live_server, measure_memory_usage):
def test_api_conflict_UI_password(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Enable password check and diff page access bypass

View File

@@ -5,7 +5,7 @@ from .util import live_server_setup
import json
def test_api_notifications_crud(client, live_server):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Confirm notifications are initially empty

View File

@@ -7,7 +7,7 @@ from .util import live_server_setup, wait_for_all_checks
def test_api_search(client, live_server):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
watch_data = {}

View File

@@ -5,7 +5,7 @@ from .util import live_server_setup, wait_for_all_checks
import json
def test_api_tags_listing(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
tag_title = 'Test Tag'

View File

@@ -4,14 +4,14 @@ import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
# test pages with http://username@password:foobar.com/ work
def test_basic_auth(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
live_server_setup(live_server)
# Add our URL to the import page
# This page will echo back any auth info
test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
time.sleep(1)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
@@ -34,4 +34,4 @@ def test_basic_auth(client, live_server, measure_memory_usage):
follow_redirects=True
)
assert b'myuser mypass basic' in res.data
assert b'myuser mypass basic' in res.data

View File

@@ -76,12 +76,12 @@ def set_response_without_ldjson():
f.write(test_return_data)
return None
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# def test_setup(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
# actually only really used by the distll.io importer, but could be handy too
def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_response_with_ldjson()
# Add our URL to the import page
@@ -164,7 +164,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_
def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_return_data = """
<html>
<head>

View File

@@ -18,7 +18,7 @@ def test_inscriptus():
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
set_original_response()
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# Add our URL to the import page
res = client.post(
@@ -114,7 +114,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data
assert b'Mark all viewed' not in res.data
assert b'class="has-unviewed' not in res.data
assert b'head title' not in res.data # Should not be present because this is off by default
assert b'test-endpoint' in res.data
@@ -133,7 +133,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
res = client.get(url_for("watchlist.index"))
assert b'unviewed' in res.data
assert b'Mark all viewed' in res.data
assert b'class="has-unviewed' in res.data
# It should have picked up the <title>
assert b'head title' in res.data
@@ -143,8 +143,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# hit the mark all viewed link
res = client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
time.sleep(0.2)
assert b'Mark all viewed' not in res.data
assert b'class="has-unviewed' not in res.data
assert b'unviewed' not in res.data
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again

View File

@@ -9,7 +9,7 @@ import time
def test_backup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
set_original_response()

View File

@@ -0,0 +1,139 @@
#!/usr/bin/env python3
import time
from flask import url_for
from .util import (
set_original_response,
set_modified_response,
live_server_setup,
wait_for_all_checks
)
from loguru import logger
def run_socketio_watch_update_test(client, live_server, password_mode=""):
"""Test that the socketio emits a watch update event when content changes"""
# Set up the test server
set_original_response()
# Get the SocketIO instance from the app
from changedetectionio.flask_app import app
socketio = app.extensions['socketio']
# Create a test client for SocketIO
socketio_test_client = socketio.test_client(app, flask_test_client=client)
if password_mode == "not logged in, should exit on connect":
assert not socketio_test_client.is_connected(), "Failed to connect to Socket.IO server because it should bounce this connect"
return
assert socketio_test_client.is_connected(), "Failed to connect to Socket.IO server"
print("Successfully connected to Socket.IO server")
# Add our URL to the import page
res = client.post(
url_for("imports.import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
res = client.get(url_for("watchlist.index"))
assert url_for('test_endpoint', _external=True).encode() in res.data
# Wait for initial check to complete
wait_for_all_checks(client)
# Clear any initial messages
socketio_test_client.get_received()
# Make a change to trigger an update
set_modified_response()
# Force recheck
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
# Wait for the watch to be checked
wait_for_all_checks(client)
has_watch_update = False
has_unviewed_update = False
for i in range(10):
# Get received events
received = socketio_test_client.get_received()
if received:
logger.info(f"Received {len(received)} events after {i+1} seconds")
# Check for watch_update events with unviewed=True
for event in received:
if event['name'] == 'watch_update':
has_watch_update = True
if event['args'][0]['watch'].get('unviewed', False):
has_unviewed_update = True
logger.info("Found unviewed update event!")
break
if has_unviewed_update:
break
# Force a recheck every 5 seconds to ensure events are emitted
# if i > 0 and i % 5 == 0:
# print(f"Still waiting for events, forcing another recheck...")
# res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# assert b'Queued 1 watch for rechecking.' in res.data
# wait_for_all_checks(client)
# print(f"Waiting for unviewed update event... {i+1}/{max_wait}")
time.sleep(1)
# Verify we received watch_update events
assert has_watch_update, "No watch_update events received"
# Verify we received an unviewed event
assert has_unviewed_update, "No watch_update event with unviewed=True received"
# Alternatively, check directly if the watch in the datastore is marked as unviewed
from changedetectionio.flask_app import app
datastore = app.config.get('DATASTORE')
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
# Get the watch from the datastore
watch = datastore.data['watching'].get(watch_uuid)
assert watch, f"Watch {watch_uuid} not found in datastore"
assert watch.has_unviewed, "The watch was not marked as unviewed after content change"
# Clean up
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
def test_everything(live_server, client):
# live_server_setup(live_server) # Setup on conftest per function
run_socketio_watch_update_test(password_mode="", live_server=live_server, client=client)
############################ Password required auth check ##############################
# Enable password check and diff page access bypass
res = client.post(
url_for("settings.settings_page"),
data={"application-password": "foobar",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." in res.data
run_socketio_watch_update_test(password_mode="not logged in, should exit on connect", live_server=live_server, client=client)
res = client.post(
url_for("login"),
data={"password": "foobar"},
follow_redirects=True
)
# Yes we are correctly logged in
assert b"LOG OUT" in res.data
run_socketio_watch_update_test(password_mode="should be like normal", live_server=live_server, client=client)

View File

@@ -62,7 +62,7 @@ def set_modified_response_minus_block_text():
def test_check_block_changedetection_text_NOT_present(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# Use a mix of case in ZzZ to prove it works case-insensitive.
ignore_text = "out of stoCk\r\nfoobar"
set_original_ignore_response()

View File

@@ -7,7 +7,7 @@ from .util import live_server_setup, wait_for_all_checks
def test_clone_functionality(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("<html><body>Some content</body></html>")

View File

@@ -45,15 +45,15 @@ def set_number_out_of_range_response(number="150"):
f.write(test_return_data)
def test_setup(client, live_server):
# def test_setup(client, live_server):
"""Test that both text and number conditions work together with AND logic."""
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
def test_conditions_with_text_and_number(client, live_server):
"""Test that both text and number conditions work together with AND logic."""
set_original_response("50")
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
@@ -110,6 +110,8 @@ def test_conditions_with_text_and_number(client, live_server):
wait_for_all_checks(client)
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
time.sleep(0.2)
wait_for_all_checks(client)
# Case 1
@@ -126,6 +128,8 @@ def test_conditions_with_text_and_number(client, live_server):
# Case 2: Change with one condition violated
# Number out of range (150) but contains '5'
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
time.sleep(0.2)
set_number_out_of_range_response("150.5")
@@ -206,7 +210,7 @@ def test_condition_validate_rule_row(client, live_server):
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_return_data = """<html>
<body>
@@ -249,7 +253,7 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html>

View File

@@ -6,8 +6,7 @@ from .util import live_server_setup, wait_for_all_checks
from ..html_tools import *
def test_setup(live_server):
live_server_setup(live_server)
def set_original_response():
test_return_data = """<html>
@@ -125,7 +124,7 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
# Tests the whole stack works with the CSS Filter
def test_check_multiple_filters(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]"
with open("test-datastore/endpoint-content.txt", "w") as f:
@@ -177,7 +176,7 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage):
# Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector
# Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text
def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
include_filters = "#blob-a"

View File

@@ -8,8 +8,7 @@ from ..html_tools import *
from .util import live_server_setup, wait_for_all_checks
def test_setup(live_server):
live_server_setup(live_server)
def set_response_with_multiple_index():
data= """<!DOCTYPE html>
@@ -148,7 +147,7 @@ across multiple lines
def test_element_removal_full(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_original_response()
@@ -209,7 +208,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
# Re #2752
def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_response_with_multiple_index()
subtractive_selectors_data = ["""

View File

@@ -7,8 +7,7 @@ from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_clie
import pytest
def test_setup(live_server):
live_server_setup(live_server)
def set_html_response():

View File

@@ -5,10 +5,7 @@ import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from ..html_tools import *
def test_setup(live_server):
live_server_setup(live_server)
def _runner_test_http_errors(client, live_server, http_code, expected_text):
@@ -79,7 +76,14 @@ def test_DNS_errors(client, live_server, measure_memory_usage):
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
found_name_resolution_error = (
b"No address found" in res.data or
b"Name or service not known" in res.data or
b"nodename nor servname provided" in res.data or
b"Temporary failure in name resolution" in res.data or
b"Failed to establish a new connection" in res.data or
b"Connection error occurred" in res.data
)
assert found_name_resolution_error
# Should always record that we tried
assert bytes("just now".encode('utf-8')) in res.data
@@ -88,7 +92,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage):
# Re 1513
def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
@@ -108,7 +112,14 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us
# We should see the DNS error
res = client.get(url_for("watchlist.index"))
found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
found_name_resolution_error = (
b"No address found" in res.data or
b"Name or service not known" in res.data or
b"nodename nor servname provided" in res.data or
b"Temporary failure in name resolution" in res.data or
b"Failed to establish a new connection" in res.data or
b"Connection error occurred" in res.data
)
assert found_name_resolution_error
# Update with what should work
@@ -123,7 +134,14 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us
# Now the error should be gone
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
found_name_resolution_error = (
b"No address found" in res.data or
b"Name or service not known" in res.data or
b"nodename nor servname provided" in res.data or
b"Temporary failure in name resolution" in res.data or
b"Failed to establish a new connection" in res.data or
b"Connection error occurred" in res.data
)
assert not found_name_resolution_error
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)

View File

@@ -14,7 +14,7 @@ def test_check_extract_text_from_diff(client, live_server, measure_memory_usage)
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("Now it's {} seconds since epoch, time flies!".format(str(time.time())))
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# Add our URL to the import page
res = client.post(

View File

@@ -67,11 +67,11 @@ def set_multiline_response():
return None
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# def test_setup(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
def test_check_filter_multiline(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
## live_server_setup(live_server) # Setup on conftest per function
set_multiline_response()
# Add our URL to the import page
@@ -206,7 +206,7 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
def test_regex_error_handling(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)

View File

@@ -46,7 +46,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
# And the page has that filter available
# Then I should get a notification
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
# Give the endpoint time to spin up
time.sleep(1)

View File

@@ -163,15 +163,14 @@ def run_filter_test(client, live_server, content_filter):
os.unlink("test-datastore/notification.txt")
def test_setup(live_server):
live_server_setup(live_server)
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
# # live_server_setup(live_server) # Setup on conftest per function
run_filter_test(client, live_server,'#nope-doesnt-exist')
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
# # live_server_setup(live_server) # Setup on conftest per function
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent

View File

@@ -6,8 +6,8 @@ from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from
import os
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# def test_setup(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
def set_original_response():
test_return_data = """<html>
@@ -40,7 +40,7 @@ def set_modified_response():
return None
def test_setup_group_tag(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_original_response()
# Add a tag with some config, import a tag and it should roughly work
@@ -131,7 +131,7 @@ def test_setup_group_tag(client, live_server, measure_memory_usage):
assert b'Deleted' in res.data
def test_tag_import_singular(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
res = client.post(
@@ -151,7 +151,7 @@ def test_tag_import_singular(client, live_server, measure_memory_usage):
assert b'Deleted' in res.data
def test_tag_add_in_ui(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
#
res = client.post(
url_for("tags.form_tag_add"),
@@ -168,7 +168,7 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage):
assert b'Deleted' in res.data
def test_group_tag_notification(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_original_response()
test_url = url_for('test_endpoint', _external=True)
@@ -236,7 +236,7 @@ def test_group_tag_notification(client, live_server, measure_memory_usage):
assert b'Deleted' in res.data
def test_limit_tag_ui(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
urls=[]
@@ -275,7 +275,7 @@ def test_limit_tag_ui(client, live_server, measure_memory_usage):
assert b'All tags deleted' in res.data
def test_clone_tag_on_import(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("imports.import_page"),
@@ -301,7 +301,7 @@ def test_clone_tag_on_import(client, live_server, measure_memory_usage):
assert b'Deleted' in res.data
def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)

View File

@@ -9,9 +9,9 @@ from .util import live_server_setup, wait_for_all_checks
from urllib.parse import urlparse, parse_qs
def test_consistent_history(client, live_server, measure_memory_usage):
live_server_setup(live_server)
r = range(1, 30)
# live_server_setup(live_server) # Setup on conftest per function
workers = int(os.getenv("FETCH_WORKERS", 10))
r = range(1, 10+workers)
for one in r:
test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -46,9 +46,10 @@ def test_consistent_history(client, live_server, measure_memory_usage):
# assert the right amount of watches was found in the JSON
assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON"
i=0
# each one should have a history.txt containing just one line
for w in json_obj['watching'].keys():
i+=1
history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
@@ -58,8 +59,8 @@ def test_consistent_history(client, live_server, measure_memory_usage):
assert len(tmp_history) == 1, "History.txt should contain 1 line"
# Should be two files,. the history.txt , and the snapshot.txt
files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,
w))
files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w))
# Find the snapshot one
for fname in files_in_watch_dir:
if fname != 'history.txt' and 'html' not in fname:
@@ -75,7 +76,6 @@ def test_consistent_history(client, live_server, measure_memory_usage):
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
with open(json_db_file, 'r') as f:
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"

View File

@@ -24,7 +24,7 @@ def set_original_ignore_response():
def test_ignore(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# live_server_setup(live_server) # Setup on conftest per function
set_original_ignore_response()
test_url = url_for('test_endpoint', _external=True)
res = client.post(

View File

@@ -3,8 +3,7 @@
from . util import live_server_setup
from changedetectionio import html_tools
def test_setup(live_server):
live_server_setup(live_server)
# Unit test of the stripper
# Always we are dealing in utf-8

View File

@@ -5,8 +5,7 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from changedetectionio import html_tools
def test_setup(live_server):
live_server_setup(live_server)
# Unit test of the stripper
# Always we are dealing in utf-8
@@ -256,9 +255,9 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
assert b'Deleted' in res.data
def test_check_global_ignore_text_functionality(client, live_server):
#live_server_setup(live_server)
_run_test_global_ignore(client, as_source=False)
def test_check_global_ignore_text_functionality_as_source(client, live_server):
#live_server_setup(live_server)
_run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')

View File

@@ -3,11 +3,10 @@
import time
from flask import url_for
from .util import live_server_setup
from .util import live_server_setup, wait_for_all_checks
def test_setup(live_server):
live_server_setup(live_server)
def set_original_ignore_response():
test_return_data = """<html>
@@ -70,19 +69,18 @@ def test_render_anchor_tag_content_true(client, live_server, measure_memory_usag
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# set a new html text with a modified link
set_modified_ignore_response()
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# We should not see the rendered anchor tag
res = client.get(url_for("ui.ui_views.preview_page", uuid="first"))
@@ -104,7 +102,7 @@ def test_render_anchor_tag_content_true(client, live_server, measure_memory_usag
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)

View File

@@ -5,8 +5,7 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks
def test_setup(live_server):
live_server_setup(live_server)
def set_original_response():

View File

@@ -4,8 +4,7 @@ import time
from flask import url_for
from . util import live_server_setup
def test_setup(live_server):
live_server_setup(live_server)
# Should be the same as set_original_ignore_response() but with a little more whitespacing

View File

@@ -8,8 +8,8 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# def test_setup(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # Setup on conftest per function
def test_import(client, live_server, measure_memory_usage):
# Give the endpoint time to spin up
@@ -126,7 +126,7 @@ def test_import_distillio(client, live_server, measure_memory_usage):
def test_import_custom_xlsx(client, live_server, measure_memory_usage):
"""Test can upload a excel spreadsheet and the watches are created correctly"""
#live_server_setup(live_server)
dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, 'import/spreadsheet.xlsx')
@@ -175,7 +175,7 @@ def test_import_custom_xlsx(client, live_server, measure_memory_usage):
def test_import_watchete_xlsx(client, live_server, measure_memory_usage):
"""Test can upload a excel spreadsheet and the watches are created correctly"""
#live_server_setup(live_server)
dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, 'import/spreadsheet.xlsx')
with open(filename, 'rb') as f:

View File

@@ -5,12 +5,12 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# def test_setup(client, live_server, measure_memory_usage):
# # live_server_setup(live_server) # Setup on conftest per function
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_jinja2_in_url_query(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# Add our URL to the import page
test_url = url_for('test_return_query', _external=True)
@@ -35,7 +35,7 @@ def test_jinja2_in_url_query(client, live_server, measure_memory_usage):
# https://techtonics.medium.com/secure-templating-with-jinja2-understanding-ssti-and-jinja2-sandbox-environment-b956edd60456
def test_jinja2_security_url_query(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# Add our URL to the import page
test_url = url_for('test_return_query', _external=True)

Some files were not shown because too many files have changed in this diff Show More