Compare commits

...

3 Commits

Author SHA1 Message Date
dgtlmoon
259e44940c WIP
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled
2026-03-08 14:17:28 +01:00
dgtlmoon
1dbd25cdb4 Simplify change 2026-03-08 14:03:57 +01:00
dgtlmoon
bb2c9855ee Various memory and CPU improvements 2026-03-08 14:00:22 +01:00
6 changed files with 89 additions and 62 deletions

View File

@@ -61,8 +61,22 @@ import time
# ============================================================================== # ==============================================================================
import multiprocessing import multiprocessing
import os
import sys import sys
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
if 'MALLOC_ARENA_MAX' not in os.environ:
os.environ['MALLOC_ARENA_MAX'] = '2'
try:
import ctypes as _ctypes
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
except Exception:
pass
# Set spawn as global default (safety net - all our code uses explicit contexts anyway) # Set spawn as global default (safety net - all our code uses explicit contexts anyway)
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions) # Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
if 'pytest' not in sys.modules: if 'pytest' not in sys.modules:

View File

@@ -81,6 +81,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
proxy_list = datastore.proxy_list
output = render_template( output = render_template(
"watch-overview.html", "watch-overview.html",
active_tag=active_tag, active_tag=active_tag,
@@ -92,7 +93,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
form=form, form=form,
generate_tag_colors=processors.generate_processor_badge_colors, generate_tag_colors=processors.generate_processor_badge_colors,
guid=datastore.data['app_guid'], guid=datastore.data['app_guid'],
has_proxies=datastore.proxy_list, has_proxies=proxy_list,
hosted_sticky=os.getenv("SALTED_PASS", False) == False, hosted_sticky=os.getenv("SALTED_PASS", False) == False,
now_time_server=round(time.time()), now_time_server=round(time.time()),
pagination=pagination, pagination=pagination,
@@ -110,6 +111,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
watches=sorted_watches watches=sorted_watches
) )
# Return freed template-building memory to the OS immediately.
# render_template allocates ~20MB of intermediate strings that are freed on return,
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
if session.get('share-link'): if session.get('share-link'):
del (session['share-link']) del (session['share-link'])

View File

@@ -213,12 +213,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{%- set checking_now = is_checking_now(watch) -%} {%- set checking_now = is_checking_now(watch) -%}
{%- set history_n = watch.history_n -%} {%- set history_n = watch.history_n -%}
{%- set favicon = watch.get_favicon_filename() -%} {%- set favicon = watch.get_favicon_filename() -%}
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%} {%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #} {# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
{%- set row_classes = [ {%- set row_classes = [
loop.cycle('pure-table-odd', 'pure-table-even'), loop.cycle('pure-table-odd', 'pure-table-even'),
'processor-' ~ watch['processor'], 'processor-' ~ watch['processor'],
'has-error' if watch.compile_error_texts()|length > 2 else '', 'has-error' if error_texts|length > 2 else '',
'paused' if watch.paused is defined and watch.paused != False else '', 'paused' if watch.paused is defined and watch.paused != False else '',
'unviewed' if watch.has_unviewed else '', 'unviewed' if watch.has_unviewed else '',
'has-restock-info' if watch.has_restock_info else 'no-restock-info', 'has-restock-info' if watch.has_restock_info else 'no-restock-info',
@@ -271,7 +272,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{% endif %} {% endif %}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a> <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
</span> </span>
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div> <div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
{%- if watch['processor'] == 'text_json_diff' -%} {%- if watch['processor'] == 'text_json_diff' -%}
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%} {%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>

View File

@@ -4,6 +4,7 @@ import flask_login
import locale import locale
import os import os
import queue import queue
import re
import sys import sys
import threading import threading
import time import time
@@ -387,6 +388,8 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
return '' return ''
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
@app.template_filter('sanitize_tag_class') @app.template_filter('sanitize_tag_class')
def _jinja2_filter_sanitize_tag_class(tag_title): def _jinja2_filter_sanitize_tag_class(tag_title):
"""Sanitize a tag title to create a valid CSS class name. """Sanitize a tag title to create a valid CSS class name.
@@ -398,9 +401,8 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
Returns: Returns:
str: A sanitized string suitable for use as a CSS class name str: A sanitized string suitable for use as a CSS class name
""" """
import re
# Remove all non-alphanumeric characters and convert to lowercase # Remove all non-alphanumeric characters and convert to lowercase
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower() sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
# Ensure it starts with a letter (CSS requirement) # Ensure it starts with a letter (CSS requirement)
if sanitized and not sanitized[0].isalpha(): if sanitized and not sanitized[0].isalpha():
sanitized = 'tag' + sanitized sanitized = 'tag' + sanitized
@@ -488,28 +490,21 @@ def changedetection_app(config=None, datastore_o=None):
available_languages = get_available_languages() available_languages = get_available_languages()
language_codes = get_language_codes() language_codes = get_language_codes()
def get_locale(): _locale_aliases = {
# Locale aliases: map browser language codes to translation directory names 'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
# This handles cases where browsers send standard codes (e.g., zh-TW) 'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
# but our translations use more specific codes (e.g., zh_Hant_TW) }
locale_aliases = { _locale_match_list = language_codes + list(_locale_aliases.keys())
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
}
def get_locale():
# 1. Try to get locale from session (user explicitly selected) # 1. Try to get locale from session (user explicitly selected)
if 'locale' in session: if 'locale' in session:
return session['locale'] return session['locale']
# 2. Fall back to Accept-Language header # 2. Fall back to Accept-Language header
# Get the best match from browser's Accept-Language header browser_locale = request.accept_languages.best_match(_locale_match_list)
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys())) # 3. Map browser locale to our internal locale if needed
return _locale_aliases.get(browser_locale, browser_locale)
# 3. Check if we need to map the browser locale to our internal locale
if browser_locale in locale_aliases:
return locale_aliases[browser_locale]
return browser_locale
# Initialize Babel with locale selector # Initialize Babel with locale selector
babel = Babel(app, locale_selector=get_locale) babel = Babel(app, locale_selector=get_locale)
@@ -1022,15 +1017,16 @@ def check_for_new_version():
import urllib3 import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
session = requests.Session()
session.verify = False
while not app.config.exit.is_set(): while not app.config.exit.is_set():
try: try:
r = requests.post("https://changedetection.io/check-ver.php", r = session.post("https://changedetection.io/check-ver.php",
data={'version': __version__, data={'version': __version__,
'app_guid': datastore.data['app_guid'], 'app_guid': datastore.data['app_guid'],
'watch_count': len(datastore.data['watching']) 'watch_count': len(datastore.data['watching'])
}, })
verify=False)
except: except:
pass pass

View File

@@ -43,6 +43,11 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
FAVICON_RESAVE_THRESHOLD_SECONDS=86400 FAVICON_RESAVE_THRESHOLD_SECONDS=86400
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20)) BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
# Module-level favicon filename cache: data_dir → basename (or None)
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
_FAVICON_FILENAME_CACHE: dict = {}
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -806,9 +811,8 @@ class model(EntityPersistenceMixin, watch_base):
with open(fname, 'wb') as f: with open(fname, 'wb') as f:
f.write(decoded) f.write(decoded)
# Invalidate favicon filename cache # Invalidate module-level favicon filename cache for this watch
if hasattr(self, '_favicon_filename_cache'): _FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
delattr(self, '_favicon_filename_cache')
# A signal that could trigger the socket server to update the browser also # A signal that could trigger the socket server to update the browser also
watch_check_update = signal('watch_favicon_bump') watch_check_update = signal('watch_favicon_bump')
@@ -823,35 +827,23 @@ class model(EntityPersistenceMixin, watch_base):
def get_favicon_filename(self) -> str | None: def get_favicon_filename(self) -> str | None:
""" """
Find any favicon.* file in the current working directory Find any favicon.* file in the watch data directory.
and return the contents of the newest one.
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations. Uses a module-level cache keyed by data_dir to survive Watch object recreation,
glob.glob() causes millions of fnmatch allocations when called for every watch on page load. deepcopy (which drops instance attrs), and concurrent request races.
Invalidated by bump_favicon() when a new favicon is saved.
Returns: Returns:
str: Basename of the newest favicon file, or None if not found. str: Basename of the favicon file, or None if not found.
""" """
# Check cache first (prevents 26M+ allocations from repeated glob operations) if self.data_dir in _FAVICON_FILENAME_CACHE:
cache_key = '_favicon_filename_cache' return _FAVICON_FILENAME_CACHE[self.data_dir]
if hasattr(self, cache_key):
return getattr(self, cache_key)
import glob import glob
# Search for all favicon.* files
files = glob.glob(os.path.join(self.data_dir, "favicon.*")) files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
fname = os.path.basename(files[0]) if files else None
if not files: _FAVICON_FILENAME_CACHE[self.data_dir] = fname
result = None return fname
else:
# Find the newest by modification time
newest_file = max(files, key=os.path.getmtime)
result = os.path.basename(newest_file)
# Cache the result
setattr(self, cache_key, result)
return result
def get_screenshot_as_thumbnail(self, max_age=3200): def get_screenshot_as_thumbnail(self, max_age=3200):
"""Return path to a square thumbnail of the most recent screenshot. """Return path to a square thumbnail of the most recent screenshot.
@@ -1182,18 +1174,13 @@ class model(EntityPersistenceMixin, watch_base):
def compile_error_texts(self, has_proxies=None): def compile_error_texts(self, has_proxies=None):
"""Compile error texts for this watch. """Compile error texts for this watch.
Accepts has_proxies parameter to ensure it works even outside app context""" Accepts has_proxies parameter to ensure it works even outside app context"""
from flask import url_for from flask import url_for, has_request_context
from markupsafe import Markup from markupsafe import Markup
output = [] # Initialize as list since we're using append output = [] # Initialize as list since we're using append
last_error = self.get('last_error','') last_error = self.get('last_error','')
try: has_app_context = has_request_context()
url_for('settings.settings_page')
except Exception as e:
has_app_context = False
else:
has_app_context = True
# has app+request context, we can use url_for() # has app+request context, we can use url_for()
if has_app_context: if has_app_context:

View File

@@ -100,6 +100,19 @@ def is_safe_valid_url(test_url):
logger.warning('URL validation failed: URL is empty or whitespace only') logger.warning('URL validation failed: URL is empty or whitespace only')
return False return False
# Per-request cache: same URL is often validated 2-3x per watchlist render (sort + display).
# Flask's g is scoped to one request and auto-cleared on teardown, so dynamic Jinja2 URLs
# like {{microtime()}} are always re-evaluated on the next request.
# Falls back gracefully when called outside a request context (e.g. background workers).
_cache_key = test_url
try:
from flask import g
_cache = g.setdefault('_url_validation_cache', {})
if _cache_key in _cache:
return _cache[_cache_key]
except RuntimeError:
_cache = None # No app context
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false')) allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):' safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'
@@ -112,11 +125,14 @@ def is_safe_valid_url(test_url):
test_url = r.sub('', test_url) test_url = r.sub('', test_url)
# Check the actual rendered URL in case of any Jinja markup # Check the actual rendered URL in case of any Jinja markup
try: # Only run jinja_render when the URL actually contains Jinja2 syntax - creating a new
test_url = jinja_render(test_url) # ImmutableSandboxedEnvironment is expensive and is called once per watch per page load
except Exception as e: if '{%' in test_url or '{{' in test_url:
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}') try:
return False test_url = jinja_render(test_url)
except Exception as e:
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
return False
# Check query parameters and fragment # Check query parameters and fragment
if re.search(r'[<>]', test_url): if re.search(r'[<>]', test_url):
@@ -142,4 +158,6 @@ def is_safe_valid_url(test_url):
logger.warning(f'URL f"{test_url}" failed validation, aborting.') logger.warning(f'URL f"{test_url}" failed validation, aborting.')
return False return False
if _cache is not None:
_cache[_cache_key] = True
return True return True