Compare commits

..

4 Commits

Author SHA1 Message Date
dgtlmoon
c93a3470a9 Move this other weird claude code
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-07-28 18:58:21 +02:00
dgtlmoon
57c83e868d Revert horrible AI code changes 2025-07-28 18:45:29 +02:00
dgtlmoon
ddbbe1ddee Merge branch 'master' into custom-restock-str-master 2025-07-28 18:41:11 +02:00
Guillem Saiz Pascual
1a2e9309ed Add custom out-of-stock and in-stock string detection
- Implements configurable custom strings for restock detection (fixes #2779)
- Adds robust text normalization (case-insensitive, accent removal, whitespace)
- Supports international sites with custom messages like 'Pronto estarán en stock\!'
- Makes built-in in-stock detection configurable (addresses TODO)
- Includes comprehensive unit and integration tests

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-26 09:26:36 -07:00
30 changed files with 397 additions and 607 deletions

View File

@@ -2,7 +2,7 @@
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
FROM ghcr.io/linuxserver/baseimage-alpine:3.22
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
ENV PYTHONUNBUFFERED=1
COPY requirements.txt /requirements.txt
@@ -24,13 +24,12 @@ RUN \
apk add --update --no-cache \
libjpeg \
libxslt \
file \
nodejs \
poppler-utils \
python3 && \
echo "**** pip3 install test of changedetection.io ****" && \
python3 -m venv /lsiopy && \
pip install -U pip wheel setuptools && \
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
apk del --purge \
build-dependencies

View File

@@ -30,7 +30,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v5
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL

View File

@@ -39,7 +39,7 @@ jobs:
# Or if we are in a tagged release scenario.
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:

View File

@@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
@@ -34,7 +34,7 @@ jobs:
- build
steps:
- name: Download all the dists
uses: actions/download-artifact@v5
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
@@ -72,7 +72,7 @@ jobs:
steps:
- name: Download all the dists
uses: actions/download-artifact@v5
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/

View File

@@ -46,7 +46,7 @@ jobs:
- platform: linux/arm64
dockerfile: ./.github/test/Dockerfile-alpine
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:

View File

@@ -7,7 +7,7 @@ jobs:
lint-code:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Lint with Ruff
run: |
pip install ruff

View File

@@ -20,7 +20,7 @@ jobs:
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
# Mainly just for link/flake8
- name: Set up Python ${{ env.PYTHON_VERSION }}

View File

@@ -84,9 +84,6 @@ COPY changedetection.py /app/changedetection.py
ARG LOGGER_LEVEL=''
ENV LOGGER_LEVEL="$LOGGER_LEVEL"
# Default
ENV LC_ALL=en_US.UTF-8
WORKDIR /app
CMD ["python", "./changedetection.py", "-d", "/datastore"]

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.50.9'
__version__ = '0.50.7'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -35,22 +35,13 @@ def sigshutdown_handler(_signo, _stack_frame):
app.config.exit.set()
datastore.stop_thread = True
# Shutdown workers and queues immediately
# Shutdown workers immediately
try:
from changedetectionio import worker_handler
worker_handler.shutdown_workers()
except Exception as e:
logger.error(f"Error shutting down workers: {str(e)}")
# Close janus queues properly
try:
from changedetectionio.flask_app import update_q, notification_q
update_q.close()
notification_q.close()
logger.debug("Janus queues closed successfully")
except Exception as e:
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
# Shutdown socketio server fast
from changedetectionio.flask_app import socketio_server
if socketio_server and hasattr(socketio_server, 'shutdown'):

View File

@@ -7,7 +7,6 @@ from changedetectionio.flask_app import watch_check_update
import asyncio
import importlib
import os
import queue
import time
from loguru import logger
@@ -38,23 +37,13 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
watch = None
try:
# Use native janus async interface - no threads needed!
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
# Use asyncio wait_for to make queue.get() cancellable
queued_item_data = await asyncio.wait_for(q.get(), timeout=1.0)
except asyncio.TimeoutError:
# No jobs available, continue loop
continue
except Exception as e:
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
# Log queue health for debugging
try:
queue_size = q.qsize()
is_empty = q.empty()
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
except Exception as health_e:
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
logger.error(f"Worker {worker_id} error getting queue item: {e}")
await asyncio.sleep(0.1)
continue

View File

@@ -199,6 +199,14 @@ nav
</ul>
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.application.form.custom_outofstock_strings) }}
<span class="pure-form-message-inline">Additional custom out-of-stock detection strings (one per line).</span>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.application.form.custom_instock_strings) }}
<span class="pure-form-message-inline">Additional custom in-stock detection strings (one per line).</span>
</fieldset>
</div>
<div class="tab-pane-inner" id="api">

View File

@@ -93,15 +93,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
return redirect(url_for('watchlist.index'))
# For submission of requesting an extract
extract_form = forms.extractDataForm(formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
extract_form = forms.extractDataForm(request.form)
if not extract_form.validate():
flash("An error occurred, please see below.", "error")
return _render_diff_template(uuid, extract_form)
else:
extract_regex = request.form.get('extract_regex', '').strip()
extract_regex = request.form.get('extract_regex').strip()
output = watch.extract_regex_from_all_history(extract_regex)
if output:
watch_dir = os.path.join(datastore.datastore_path, uuid)
@@ -112,11 +109,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
response.headers['Expires'] = "0"
return response
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
flash('Nothing matches that RegEx', 'error')
redirect(url_for('ui_views.diff_history_page', uuid=uuid) + '#extract')
def _render_diff_template(uuid, extract_form=None):
"""Helper function to render the diff template with all required data"""
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
@login_optionally_required
def diff_history_page(uuid):
from changedetectionio import forms
# More for testing, possible to return the first/only
@@ -130,11 +128,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Use provided form or create a new one
if extract_form is None:
extract_form = forms.extractDataForm(formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
# For submission of requesting an extract
extract_form = forms.extractDataForm(request.form)
history = watch.history
dates = list(history.keys())
@@ -175,7 +170,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
datastore.set_last_viewed(uuid, time.time())
return render_template("diff.html",
output = render_template("diff.html",
current_diff_url=watch['url'],
from_version=str(from_version),
to_version=str(to_version),
@@ -198,10 +193,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
watch_a=watch
)
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
@login_optionally_required
def diff_history_page(uuid):
return _render_diff_template(uuid)
return output
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required

View File

@@ -1,8 +1,8 @@
async () => {
async (customOutOfStockStrings = []) => {
function isItemInStock() {
// @todo Pass these in so the same list can be used in non-JS fetchers
const outOfStockTexts = [
const builtInOutOfStockTexts = [
' أخبرني عندما يتوفر',
'0 in stock',
'actuellement indisponible',
@@ -110,6 +110,9 @@ async () => {
'품절'
];
// Combine built-in strings with custom strings provided by user
const outOfStockTexts = [...builtInOutOfStockTexts, ...customOutOfStockStrings];
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);

View File

@@ -12,7 +12,7 @@ from blinker import signal
from changedetectionio.strtobool import strtobool
from threading import Event
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
from changedetectionio.custom_queue import SignalPriorityQueue, AsyncSignalPriorityQueue, NotificationQueue
from changedetectionio import worker_handler
from flask import (
@@ -48,8 +48,8 @@ datastore = None
ticker_thread = None
extra_stylesheets = []
# Use bulletproof janus-based queues for sync/async reliability
update_q = RecheckPriorityQueue()
# Use async queue by default, keep sync for backward compatibility
update_q = AsyncSignalPriorityQueue() if worker_handler.USE_ASYNC_WORKERS else SignalPriorityQueue()
notification_q = NotificationQueue()
MAX_QUEUE_SIZE = 2000
@@ -844,22 +844,16 @@ def ticker_thread_check_time_launch_checks():
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
priority = int(time.time())
logger.debug(
f"> Queued watch UUID {uuid} "
f"last checked at {watch['last_checked']} "
f"queued at {now:0.2f} priority {priority} "
f"jitter {watch.jitter_seconds:0.2f}s, "
f"{now - watch['last_checked']:0.2f}s since last checked")
# Into the queue with you
queued_successfully = worker_handler.queue_item_async_safe(update_q,
queuedWatchMetaData.PrioritizedItem(priority=priority,
item={'uuid': uuid})
)
if queued_successfully:
logger.debug(
f"> Queued watch UUID {uuid} "
f"last checked at {watch['last_checked']} "
f"queued at {now:0.2f} priority {priority} "
f"jitter {watch.jitter_seconds:0.2f}s, "
f"{now - watch['last_checked']:0.2f}s since last checked")
else:
logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!")
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
# Reset for next time
watch.jitter_seconds = 0

View File

@@ -396,19 +396,6 @@ def validate_url(test_url):
# This should be wtforms.validators.
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
class ValidateSinglePythonRegexString(object):
def __init__(self, message=None):
self.message = message
def __call__(self, form, field):
try:
re.compile(field.data)
except re.error:
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
raise ValidationError(message % (field.data))
class ValidateListRegex(object):
"""
Validates that anything that looks like a regex passes as a regex
@@ -427,7 +414,6 @@ class ValidateListRegex(object):
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
raise ValidationError(message % (line))
class ValidateCSSJSONXPATHInput(object):
"""
Filter validation
@@ -788,6 +774,20 @@ class globalSettingsApplicationForm(commonSettingsForm):
message="Should contain zero or more attempts")])
ui = FormField(globalSettingsApplicationUIForm)
#@todo better validations?
custom_outofstock_strings = StringListField('Custom out-of-stock detection strings',
[validators.Optional()],
render_kw={
"placeholder": "Enter custom out-of-stock strings, one per line\nExample:\nPronto estarán en stock!\nTemporarily out of stock",
"rows": "3"})
custom_instock_strings = StringListField('Custom in-stock detection strings',
[validators.Optional()],
render_kw={
"placeholder": "Enter custom in-stock strings, one per line\nExample:\nDisponible ahora\nIn voorraad",
"rows": "3"})
class globalSettingsForm(Form):
# Define these as FormFields/"sub forms", this way it matches the JSON storage
@@ -805,5 +805,5 @@ class globalSettingsForm(Form):
class extractDataForm(Form):
extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")])
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})

View File

@@ -38,6 +38,8 @@ class model(dict):
# Custom notification content
'api_access_token_enabled': True,
'base_url' : None,
'custom_instock_strings': [],
'custom_outofstock_strings' : [],
'empty_pages_are_a_change': False,
'extract_title_as_title': False,
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),

View File

@@ -639,7 +639,7 @@ class model(watch_base):
if res:
if not csv_writer:
# A file on the disk can be transferred much faster via flask than a string reply
csv_output_filename = f"report-{self.get('uuid')}.csv"
csv_output_filename = 'report.csv'
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
# @todo some headers in the future
#fieldnames = ['Epoch seconds', 'Date']

View File

@@ -3,7 +3,6 @@ import uuid
from changedetectionio import strtobool
default_notification_format_for_watch = 'System default'
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
class watch_base(dict):
@@ -16,8 +15,6 @@ class watch_base(dict):
'body': None,
'browser_steps': [],
'browser_steps_last_error_step': None,
'conditions' : {},
'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
'check_count': 0,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.

View File

@@ -1,7 +1,8 @@
from wtforms import (
BooleanField,
validators,
FloatField
FloatField,
TextAreaField
)
from wtforms.fields.choices import RadioField
from wtforms.fields.form import FormField
@@ -29,6 +30,7 @@ class RestockSettingsForm(Form):
follow_price_changes = BooleanField('Follow price changes', default=True)
class processor_settings_form(processor_text_json_diff_form):
restock_settings = FormField(RestockSettingsForm)
@@ -74,7 +76,7 @@ class processor_settings_form(processor_text_json_diff_form):
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
</fieldset>
</fieldset>
</div>
</fieldset>
"""

View File

@@ -143,6 +143,89 @@ def is_between(number, lower=None, upper=None):
class perform_site_check(difference_detection_processor):
screenshot = None
xpath_data = None
def _normalize_text_for_matching(self, text):
"""
Normalize text for more robust matching:
- Convert to lowercase
- Remove accents/diacritics
- Normalize whitespace
"""
import unicodedata
import re
if not text:
return ""
# Convert to lowercase
text = text.lower()
# Remove accents/diacritics (NFD normalization + filter)
# This converts "é" to "e", "ñ" to "n", etc.
text = unicodedata.normalize('NFD', text)
text = ''.join(char for char in text if unicodedata.category(char) != 'Mn')
# Normalize whitespace (replace multiple spaces/tabs/newlines with single space)
text = re.sub(r'\s+', ' ', text).strip()
return text
def _check_custom_strings(self, text_to_check, custom_strings, string_type="out-of-stock"):
"""
Check text against custom strings (either in-stock or out-of-stock).
Uses normalized matching for better international support.
Returns the matched string if found, None otherwise.
"""
if not custom_strings:
return None
# Split custom strings by newlines and clean them up
raw_custom_list = [s.strip() for s in custom_strings.split('\n') if s.strip()]
if not raw_custom_list:
return None
# Normalize both the page text and custom strings for matching
normalized_text = self._normalize_text_for_matching(text_to_check)
# Check each custom string against the text
for original_custom_text in raw_custom_list:
normalized_custom_text = self._normalize_text_for_matching(original_custom_text)
if normalized_custom_text and normalized_custom_text in normalized_text:
logger.debug(f"Custom {string_type} string found: '{original_custom_text}' (normalized: '{normalized_custom_text}')")
return original_custom_text # Return the original user-provided string
return None
def _get_combined_instock_strings(self, restock_settings):
"""
Get combined list of built-in and custom in-stock strings.
Custom strings are normalized for better matching.
"""
# Built-in in-stock strings (from the TODO line)
builtin_instock_strings = [
'instock',
'instoreonly',
'limitedavailability',
'onlineonly',
'presale'
]
# Add custom in-stock strings if provided
custom_strings = restock_settings.get('custom_instock_strings', '').strip()
if custom_strings:
# Normalize custom strings for better matching
custom_list = []
for s in custom_strings.split('\n'):
s = s.strip()
if s:
normalized = self._normalize_text_for_matching(s)
if normalized:
custom_list.append(normalized)
builtin_instock_strings.extend(custom_list)
return builtin_instock_strings
def run_changedetection(self, watch):
import hashlib
@@ -205,6 +288,7 @@ class perform_site_check(difference_detection_processor):
if itemprop_availability.get('availability'):
# @todo: Configurable?
if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [
'instock',
'instoreonly',
@@ -238,6 +322,8 @@ class perform_site_check(difference_detection_processor):
if self.fetcher.instock_data and itemprop_availability.get('availability') is None:
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
# Careful! this does not really come from chrome/js when the watch is set to plaintext
stock_detection_result = self.fetcher.instock_data
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.")

View File

@@ -1,435 +0,0 @@
from blinker import signal
from loguru import logger
from typing import Dict, List, Any, Optional
import heapq
import queue
import threading
try:
import janus
except ImportError:
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
raise
class RecheckPriorityQueue:
"""
Ultra-reliable priority queue using janus for async/sync bridging.
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
- Synchronous code (Flask, threads) cannot use async methods without blocking
- Async code cannot use sync methods without blocking the event loop
- janus provides the only safe bridge between these two worlds
Attempting to unify to async-only would require:
- Converting all Flask routes to async (major breaking change)
- Using asyncio.run() in sync contexts (causes deadlocks)
- Thread-pool wrapping (adds complexity and overhead)
Minimal implementation focused on reliability:
- Pure janus for sync/async bridge
- Thread-safe priority ordering
- Bulletproof error handling with critical logging
"""
def __init__(self, maxsize: int = 0):
try:
self._janus_queue = janus.Queue(maxsize=maxsize)
# BOTH interfaces required - see class docstring for why
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
self.async_q = self._janus_queue.async_q # Async workers
# Priority storage - thread-safe
self._priority_items = []
self._lock = threading.RLock()
# Signals for UI updates
self.queue_length_signal = signal('queue_length')
logger.debug("RecheckPriorityQueue initialized successfully")
except Exception as e:
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
raise
# SYNC INTERFACE (for ticker thread)
def put(self, item, block: bool = True, timeout: Optional[float] = None):
"""Thread-safe sync put with priority ordering"""
try:
# Add to priority storage
with self._lock:
heapq.heappush(self._priority_items, item)
# Notify via janus sync queue
self.sync_q.put(True, block=block, timeout=timeout)
# Emit signals
self._emit_put_signals(item)
logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
return True
except Exception as e:
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
# Remove from priority storage if janus put failed
try:
with self._lock:
if item in self._priority_items:
self._priority_items.remove(item)
heapq.heapify(self._priority_items)
except Exception as cleanup_e:
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
return False
def get(self, block: bool = True, timeout: Optional[float] = None):
"""Thread-safe sync get with priority ordering"""
try:
# Wait for notification
self.sync_q.get(block=block, timeout=timeout)
# Get highest priority item
with self._lock:
if not self._priority_items:
logger.critical(f"CRITICAL: Queue notification received but no priority items available")
raise Exception("Priority queue inconsistency")
item = heapq.heappop(self._priority_items)
# Emit signals
self._emit_get_signals()
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
return item
except Exception as e:
logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}")
raise
# ASYNC INTERFACE (for workers)
async def async_put(self, item):
"""Pure async put with priority ordering"""
try:
# Add to priority storage
with self._lock:
heapq.heappush(self._priority_items, item)
# Notify via janus async queue
await self.async_q.put(True)
# Emit signals
self._emit_put_signals(item)
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
return True
except Exception as e:
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
# Remove from priority storage if janus put failed
try:
with self._lock:
if item in self._priority_items:
self._priority_items.remove(item)
heapq.heapify(self._priority_items)
except Exception as cleanup_e:
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
return False
async def async_get(self):
"""Pure async get with priority ordering"""
try:
# Wait for notification
await self.async_q.get()
# Get highest priority item
with self._lock:
if not self._priority_items:
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
raise Exception("Priority queue inconsistency")
item = heapq.heappop(self._priority_items)
# Emit signals
self._emit_get_signals()
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
return item
except Exception as e:
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
raise
# UTILITY METHODS
def qsize(self) -> int:
"""Get current queue size"""
try:
with self._lock:
return len(self._priority_items)
except Exception as e:
logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}")
return 0
def empty(self) -> bool:
"""Check if queue is empty"""
return self.qsize() == 0
def close(self):
"""Close the janus queue"""
try:
self._janus_queue.close()
logger.debug("RecheckPriorityQueue closed successfully")
except Exception as e:
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
# COMPATIBILITY METHODS (from original implementation)
@property
def queue(self):
"""Provide compatibility with original queue access"""
try:
with self._lock:
return list(self._priority_items)
except Exception as e:
logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}")
return []
def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]:
"""Find position of UUID in queue"""
try:
with self._lock:
queue_list = list(self._priority_items)
total_items = len(queue_list)
if total_items == 0:
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
# Find target item
for item in queue_list:
if (hasattr(item, 'item') and isinstance(item.item, dict) and
item.item.get('uuid') == target_uuid):
# Count items with higher priority
position = sum(1 for other in queue_list if other.priority < item.priority)
return {
'position': position,
'total_items': total_items,
'priority': item.priority,
'found': True
}
return {'position': None, 'total_items': total_items, 'priority': None, 'found': False}
except Exception as e:
logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}")
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]:
"""Get all queued UUIDs with pagination"""
try:
with self._lock:
queue_list = sorted(self._priority_items) # Sort by priority
total_items = len(queue_list)
if total_items == 0:
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
# Apply pagination
end_idx = min(offset + limit, total_items) if limit else total_items
items_to_process = queue_list[offset:end_idx]
result = []
for position, item in enumerate(items_to_process, start=offset):
if (hasattr(item, 'item') and isinstance(item.item, dict) and
'uuid' in item.item):
result.append({
'uuid': item.item['uuid'],
'position': position,
'priority': item.priority
})
return {
'items': result,
'total_items': total_items,
'returned_items': len(result),
'has_more': (offset + len(result)) < total_items
}
except Exception as e:
logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}")
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
def get_queue_summary(self) -> Dict[str, Any]:
"""Get queue summary statistics"""
try:
with self._lock:
queue_list = list(self._priority_items)
total_items = len(queue_list)
if total_items == 0:
return {
'total_items': 0, 'priority_breakdown': {},
'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0
}
immediate_items = clone_items = scheduled_items = 0
priority_counts = {}
for item in queue_list:
priority = item.priority
priority_counts[priority] = priority_counts.get(priority, 0) + 1
if priority == 1:
immediate_items += 1
elif priority == 5:
clone_items += 1
elif priority > 100:
scheduled_items += 1
return {
'total_items': total_items,
'priority_breakdown': priority_counts,
'immediate_items': immediate_items,
'clone_items': clone_items,
'scheduled_items': scheduled_items,
'min_priority': min(priority_counts.keys()) if priority_counts else None,
'max_priority': max(priority_counts.keys()) if priority_counts else None
}
except Exception as e:
logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}")
return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,
'clone_items': 0, 'scheduled_items': 0}
# PRIVATE METHODS
def _get_item_uuid(self, item) -> str:
"""Safely extract UUID from item for logging"""
try:
if hasattr(item, 'item') and isinstance(item.item, dict):
return item.item.get('uuid', 'unknown')
except Exception:
pass
return 'unknown'
def _emit_put_signals(self, item):
"""Emit signals when item is added"""
try:
# Watch update signal
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
watch_check_update = signal('watch_check_update')
if watch_check_update:
watch_check_update.send(watch_uuid=item.item['uuid'])
# Queue length signal
if self.queue_length_signal:
self.queue_length_signal.send(length=self.qsize())
except Exception as e:
logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
def _emit_get_signals(self):
"""Emit signals when item is removed"""
try:
if self.queue_length_signal:
self.queue_length_signal.send(length=self.qsize())
except Exception as e:
logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}")
class NotificationQueue:
"""
Ultra-reliable notification queue using pure janus.
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
- async_q: Used by async workers and coroutines
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts.
See RecheckPriorityQueue docstring above for detailed explanation.
Simple wrapper around janus with bulletproof error handling.
"""
def __init__(self, maxsize: int = 0):
try:
self._janus_queue = janus.Queue(maxsize=maxsize)
# BOTH interfaces required - see class docstring for why
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
self.async_q = self._janus_queue.async_q # Async workers
self.notification_event_signal = signal('notification_event')
logger.debug("NotificationQueue initialized successfully")
except Exception as e:
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
raise
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
"""Thread-safe sync put with signal emission"""
try:
self.sync_q.put(item, block=block, timeout=timeout)
self._emit_notification_signal(item)
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
return True
except Exception as e:
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
return False
async def async_put(self, item: Dict[str, Any]):
"""Pure async put with signal emission"""
try:
await self.async_q.put(item)
self._emit_notification_signal(item)
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
return True
except Exception as e:
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
return False
def get(self, block: bool = True, timeout: Optional[float] = None):
"""Thread-safe sync get"""
try:
return self.sync_q.get(block=block, timeout=timeout)
except queue.Empty as e:
raise e
except Exception as e:
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
raise e
async def async_get(self):
"""Pure async get"""
try:
return await self.async_q.get()
except queue.Empty as e:
raise e
except Exception as e:
logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
raise e
def qsize(self) -> int:
"""Get current queue size"""
try:
return self.sync_q.qsize()
except Exception as e:
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
return 0
def empty(self) -> bool:
"""Check if queue is empty"""
return self.qsize() == 0
def close(self):
"""Close the janus queue"""
try:
self._janus_queue.close()
logger.debug("NotificationQueue closed successfully")
except Exception as e:
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
def _emit_notification_signal(self, item: Dict[str, Any]):
"""Emit notification signal"""
try:
if self.notification_event_signal and isinstance(item, dict):
watch_uuid = item.get('uuid')
if watch_uuid:
self.notification_event_signal.send(watch_uuid=watch_uuid)
else:
self.notification_event_signal.send()
except Exception as e:
logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}")

View File

@@ -111,3 +111,130 @@ def test_restock_detection(client, live_server, measure_memory_usage):
res = client.get(url_for("watchlist.index"))
assert b'not-in-stock' in res.data, "Correctly showing NOT IN STOCK in the list after it changed from IN STOCK"
def test_restock_custom_strings(client, live_server):
"""Test custom out-of-stock strings feature"""
# Set up a response with custom out-of-stock text
test_return_data = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<div>price: $10.99</div>
<div id="custom">Pronto estarán en stock!</div>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
# Add watch with custom out-of-stock strings
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
follow_redirects=True
)
# Get the UUID so we can configure the watch
uuid = extract_UUID_from_client(client)
# Configure custom out-of-stock strings
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={
"url": test_url,
'processor': 'restock_diff',
'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message'
},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Check that it detects as out of stock
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'not-in-stock' in res.data, "Should detect custom out-of-stock string"
# Test custom in-stock strings by changing the content
test_return_data_instock = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<div>price: $10.99</div>
<div id="custom">Disponible ahora</div>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data_instock)
# Update the watch to include custom in-stock strings
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={
"url": test_url,
'processor': 'restock_diff',
'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message',
'restock_settings-custom_instock_strings': 'Disponible ahora\nIn voorraad'
},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Check again - should be detected as in stock now
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'not-in-stock' not in res.data, "Should detect custom in-stock string and show as available"
def test_restock_custom_strings_normalization(client, live_server):
"""Test key normalization scenarios: accents, case, and spaces"""
# Test page with Spanish text with accents and mixed case
test_return_data = """<html>
<body>
<div>price: $10.99</div>
<div id="status">¡TEMPORALMENTE AGOTADO!</div>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
# Add watch
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
follow_redirects=True
)
uuid = extract_UUID_from_client(client)
# Configure custom string without accents, lowercase, no extra spaces
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={
"url": test_url,
'processor': 'restock_diff',
'restock_settings-custom_outofstock_strings': 'temporalmente agotado'
},
follow_redirects=True
)
# Should detect as out of stock despite text differences
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'not-in-stock' in res.data, "Should match despite accents, case, and spacing differences"

View File

@@ -292,7 +292,9 @@ def test_access_denied(client, live_server, measure_memory_usage):
def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Create a watch
set_original_response()
test_url = url_for('test_endpoint', _external=True)
@@ -300,27 +302,14 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
# Create new
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url,
'tag': "One, Two",
"title": "My test URL",
'headers': {'cookie': 'yum'},
"conditions": [
{
"field": "page_filtered_text",
"operator": "contains_regex",
"value": "." # contains anything
}
],
"conditions_match_logic": "ALL"
}
),
data=json.dumps({"url": test_url, 'tag': "One, Two", "title": "My test URL", 'headers': {'cookie': 'yum'} }),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 201
wait_for_all_checks(client)
# Get a listing, it will be the first one
res = client.get(
url_for("createwatch"),

View File

@@ -4,8 +4,6 @@ import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT
def set_original_response(number="50"):
test_return_data = f"""<html>
@@ -78,7 +76,7 @@ def test_conditions_with_text_and_number(client, live_server):
"fetch_backend": "html_requests",
"include_filters": ".number-container",
"title": "Number AND Text Condition Test",
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT, # ALL = AND logic
"conditions_match_logic": "ALL", # ALL = AND logic
"conditions-0-operator": "in",
"conditions-0-field": "page_filtered_text",
"conditions-0-value": "5",
@@ -285,7 +283,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
data={
"url": test_url,
"fetch_backend": "html_requests",
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT, # ALL = AND logic
"conditions_match_logic": "ALL", # ALL = AND logic
"conditions-0-field": "levenshtein_ratio",
"conditions-0-operator": "<",
"conditions-0-value": "0.8" # needs to be more of a diff to trigger a change

View File

@@ -46,7 +46,7 @@ def test_check_extract_text_from_diff(client, live_server, measure_memory_usage)
follow_redirects=False
)
assert b'No matches found while scanning all of the watch history for that RegEx.' not in res.data
assert b'Nothing matches that RegEx' not in res.data
assert res.content_type == 'text/csv'
# Read the csv reply as stringio

View File

@@ -1,5 +1,4 @@
from changedetectionio.conditions import execute_ruleset_against_all_plugins
from changedetectionio.model import CONDITIONS_MATCH_LOGIC_DEFAULT
from changedetectionio.store import ChangeDetectionStore
import shutil
import tempfile
@@ -60,7 +59,7 @@ class TestTriggerConditions(unittest.TestCase):
self.store.data['watching'][self.watch_uuid].update(
{
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT,
"conditions_match_logic": "ALL",
"conditions": [
{"operator": ">=", "field": "extracted_number", "value": "10"},
{"operator": "<=", "field": "extracted_number", "value": "5000"},

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
import unittest
from changedetectionio.processors.restock_diff.processor import perform_site_check
class TestCustomStringNormalization(unittest.TestCase):
"""Test the text normalization logic for custom out-of-stock strings"""
def setUp(self):
# Create a processor instance for testing
self.processor = perform_site_check(datastore=None, watch_uuid='test')
def test_normalize_text_for_matching(self):
"""Test the _normalize_text_for_matching method"""
test_cases = [
# (input, expected_output)
("Agotado", "agotado"),
("AGOTADO", "agotado"), # Lowercase
("Sin stock!", "sin stock!"), # Normalize whitespace
("Pronto\t\nestarán\nen stock", "pronto estaran en stock"), # Multiple whitespace types + accents
("¡Temporalmente AGOTADO!", "¡temporalmente agotado!"), # Complex case
("", ""), # Empty string
("café", "cafe"), # French accent
("naïve", "naive"), # Multiple accents
]
for input_text, expected in test_cases:
with self.subTest(input_text=input_text):
result = self.processor._normalize_text_for_matching(input_text)
self.assertEqual(result, expected,
f"Failed to normalize '{input_text}' -> expected '{expected}', got '{result}'")
def test_check_custom_strings_normalization(self):
"""Test that custom string matching works with normalization"""
test_cases = [
# (page_text, custom_strings, should_match, description)
("AGOTADO", "agotado", True, "uppercase to lowercase"),
("Agotado", "agotado", True, "single uppercase to lowercase"),
("Sin stock!", "sin stock", True, "multiple spaces normalized"),
("¡Pronto estarán en stock!", "pronto estaran en stock", True, "accents + spaces"),
("TEMPORALMENTE AGOTADO", "temporalmente agotado", True, "multi-word uppercase"),
("Available now", "agotado", False, "no match case"),
("", "agotado", False, "empty text"),
("agotado", "", False, "empty custom strings"),
]
for page_text, custom_strings, should_match, description in test_cases:
with self.subTest(description=description):
result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock")
if should_match:
self.assertIsNotNone(result,
f"Expected match for '{description}': '{page_text}' should match '{custom_strings}'")
else:
self.assertIsNone(result,
f"Expected no match for '{description}': '{page_text}' should not match '{custom_strings}'")
def test_check_custom_strings_multiline(self):
"""Test that multi-line custom strings work properly"""
page_text = "Product status: TEMPORALMENTE AGOTADO"
custom_strings = """
sin stock
agotado
temporalmente agotado
"""
result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock")
self.assertIsNotNone(result)
self.assertEqual(result.strip(), "temporalmente agotado")
def test_get_combined_instock_strings_normalization(self):
"""Test that custom in-stock strings are normalized properly"""
restock_settings = {
'custom_instock_strings': 'Disponible AHORA\nEn Stock\nDISPONÍBLE'
}
result = self.processor._get_combined_instock_strings(restock_settings)
# Check that built-in strings are included
self.assertIn('instock', result)
self.assertIn('presale', result)
# Check that custom strings are normalized and included
self.assertIn('disponible ahora', result)
self.assertIn('en stock', result)
self.assertIn('disponible', result) # accent removed
if __name__ == '__main__':
unittest.main()

View File

@@ -188,54 +188,15 @@ def is_watch_running(watch_uuid):
def queue_item_async_safe(update_q, item):
"""Bulletproof queue operation with comprehensive error handling"""
item_uuid = 'unknown'
try:
# Safely extract UUID for logging
if hasattr(item, 'item') and isinstance(item.item, dict):
item_uuid = item.item.get('uuid', 'unknown')
except Exception as uuid_e:
logger.critical(f"CRITICAL: Failed to extract UUID from queue item: {uuid_e}")
# Validate inputs
if not update_q:
logger.critical(f"CRITICAL: Queue is None/invalid for item {item_uuid}")
return False
if not item:
logger.critical(f"CRITICAL: Item is None/invalid")
return False
# Attempt queue operation with multiple fallbacks
try:
# Primary: Use sync interface (thread-safe)
success = update_q.put(item, block=True, timeout=5.0)
if success is False: # Explicit False return means failure
logger.critical(f"CRITICAL: Queue.put() returned False for item {item_uuid}")
return False
logger.debug(f"Successfully queued item: {item_uuid}")
return True
except Exception as e:
logger.critical(f"CRITICAL: Exception during queue operation for item {item_uuid}: {type(e).__name__}: {e}")
# Secondary: Attempt queue health check
"""Queue an item for async queue processing"""
if async_loop and not async_loop.is_closed():
try:
queue_size = update_q.qsize()
is_empty = update_q.empty()
logger.critical(f"CRITICAL: Queue health - size: {queue_size}, empty: {is_empty}")
except Exception as health_e:
logger.critical(f"CRITICAL: Queue health check failed: {health_e}")
# Log queue type for debugging
try:
logger.critical(f"CRITICAL: Queue type: {type(update_q)}, has sync_q: {hasattr(update_q, 'sync_q')}")
except Exception:
logger.critical(f"CRITICAL: Cannot determine queue type")
return False
# For async queue, schedule the put operation
asyncio.run_coroutine_threadsafe(update_q.put(item), async_loop)
except RuntimeError as e:
logger.error(f"Failed to queue item: {e}")
else:
logger.error("Async loop not available or closed for queueing item")
def shutdown_workers():

View File

@@ -66,9 +66,6 @@ services:
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# - TZ=America/Los_Angeles
#
# Text processing locale, en_US.UTF-8 used by default unless defined as something else here, UTF-8 should cover 99.99% of cases.
# - LC_ALL=en_US.UTF-8
#
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
# RAM usage will be higher if you increase this.
# - SCREENSHOT_MAX_HEIGHT=16000

View File

@@ -7,7 +7,6 @@ flask-paginate
flask_expects_json~=1.7
flask_restful
flask_cors # For the Chrome extension to operate
janus # Thread-safe async/sync queue bridge
flask_wtf~=1.2
flask~=2.3
flask-socketio~=5.5.1
@@ -46,7 +45,7 @@ apprise==1.9.3
paho-mqtt!=2.0.*
# Requires extra wheel for rPi
#cryptography~=42.0.8
cryptography~=42.0.8
# Used for CSS filtering
beautifulsoup4>=4.0.0