mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-03-20 10:48:06 +00:00
Compare commits
3 Commits
JSONP-supp
...
cpu-memory
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
259e44940c | ||
|
|
1dbd25cdb4 | ||
|
|
bb2c9855ee |
@@ -61,8 +61,22 @@ import time
|
|||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||||
|
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||||
|
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||||
|
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||||
|
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||||
|
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||||
|
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||||
|
try:
|
||||||
|
import ctypes as _ctypes
|
||||||
|
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||||
if 'pytest' not in sys.modules:
|
if 'pytest' not in sys.modules:
|
||||||
|
|||||||
@@ -81,6 +81,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||||
|
|
||||||
|
proxy_list = datastore.proxy_list
|
||||||
output = render_template(
|
output = render_template(
|
||||||
"watch-overview.html",
|
"watch-overview.html",
|
||||||
active_tag=active_tag,
|
active_tag=active_tag,
|
||||||
@@ -92,7 +93,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
form=form,
|
form=form,
|
||||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||||
guid=datastore.data['app_guid'],
|
guid=datastore.data['app_guid'],
|
||||||
has_proxies=datastore.proxy_list,
|
has_proxies=proxy_list,
|
||||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||||
now_time_server=round(time.time()),
|
now_time_server=round(time.time()),
|
||||||
pagination=pagination,
|
pagination=pagination,
|
||||||
@@ -110,6 +111,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
watches=sorted_watches
|
watches=sorted_watches
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Return freed template-building memory to the OS immediately.
|
||||||
|
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||||
|
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||||
|
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||||
|
try:
|
||||||
|
import ctypes
|
||||||
|
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
if session.get('share-link'):
|
if session.get('share-link'):
|
||||||
del (session['share-link'])
|
del (session['share-link'])
|
||||||
|
|
||||||
|
|||||||
@@ -213,12 +213,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
|||||||
{%- set checking_now = is_checking_now(watch) -%}
|
{%- set checking_now = is_checking_now(watch) -%}
|
||||||
{%- set history_n = watch.history_n -%}
|
{%- set history_n = watch.history_n -%}
|
||||||
{%- set favicon = watch.get_favicon_filename() -%}
|
{%- set favicon = watch.get_favicon_filename() -%}
|
||||||
|
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||||
{%- set row_classes = [
|
{%- set row_classes = [
|
||||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||||
'processor-' ~ watch['processor'],
|
'processor-' ~ watch['processor'],
|
||||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
'has-error' if error_texts|length > 2 else '',
|
||||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||||
'unviewed' if watch.has_unviewed else '',
|
'unviewed' if watch.has_unviewed else '',
|
||||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||||
@@ -271,7 +272,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||||
</span>
|
</span>
|
||||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import flask_login
|
|||||||
import locale
|
import locale
|
||||||
import os
|
import os
|
||||||
import queue
|
import queue
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
@@ -387,6 +388,8 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
|||||||
|
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||||
|
|
||||||
@app.template_filter('sanitize_tag_class')
|
@app.template_filter('sanitize_tag_class')
|
||||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||||
"""Sanitize a tag title to create a valid CSS class name.
|
"""Sanitize a tag title to create a valid CSS class name.
|
||||||
@@ -398,9 +401,8 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
|||||||
Returns:
|
Returns:
|
||||||
str: A sanitized string suitable for use as a CSS class name
|
str: A sanitized string suitable for use as a CSS class name
|
||||||
"""
|
"""
|
||||||
import re
|
|
||||||
# Remove all non-alphanumeric characters and convert to lowercase
|
# Remove all non-alphanumeric characters and convert to lowercase
|
||||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||||
# Ensure it starts with a letter (CSS requirement)
|
# Ensure it starts with a letter (CSS requirement)
|
||||||
if sanitized and not sanitized[0].isalpha():
|
if sanitized and not sanitized[0].isalpha():
|
||||||
sanitized = 'tag' + sanitized
|
sanitized = 'tag' + sanitized
|
||||||
@@ -488,28 +490,21 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
available_languages = get_available_languages()
|
available_languages = get_available_languages()
|
||||||
language_codes = get_language_codes()
|
language_codes = get_language_codes()
|
||||||
|
|
||||||
def get_locale():
|
_locale_aliases = {
|
||||||
# Locale aliases: map browser language codes to translation directory names
|
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
}
|
||||||
locale_aliases = {
|
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
|
||||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
|
||||||
}
|
|
||||||
|
|
||||||
|
def get_locale():
|
||||||
# 1. Try to get locale from session (user explicitly selected)
|
# 1. Try to get locale from session (user explicitly selected)
|
||||||
if 'locale' in session:
|
if 'locale' in session:
|
||||||
return session['locale']
|
return session['locale']
|
||||||
|
|
||||||
# 2. Fall back to Accept-Language header
|
# 2. Fall back to Accept-Language header
|
||||||
# Get the best match from browser's Accept-Language header
|
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
# 3. Map browser locale to our internal locale if needed
|
||||||
|
return _locale_aliases.get(browser_locale, browser_locale)
|
||||||
# 3. Check if we need to map the browser locale to our internal locale
|
|
||||||
if browser_locale in locale_aliases:
|
|
||||||
return locale_aliases[browser_locale]
|
|
||||||
|
|
||||||
return browser_locale
|
|
||||||
|
|
||||||
# Initialize Babel with locale selector
|
# Initialize Babel with locale selector
|
||||||
babel = Babel(app, locale_selector=get_locale)
|
babel = Babel(app, locale_selector=get_locale)
|
||||||
@@ -1022,15 +1017,16 @@ def check_for_new_version():
|
|||||||
import urllib3
|
import urllib3
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.verify = False
|
||||||
|
|
||||||
while not app.config.exit.is_set():
|
while not app.config.exit.is_set():
|
||||||
try:
|
try:
|
||||||
r = requests.post("https://changedetection.io/check-ver.php",
|
r = session.post("https://changedetection.io/check-ver.php",
|
||||||
data={'version': __version__,
|
data={'version': __version__,
|
||||||
'app_guid': datastore.data['app_guid'],
|
'app_guid': datastore.data['app_guid'],
|
||||||
'watch_count': len(datastore.data['watching'])
|
'watch_count': len(datastore.data['watching'])
|
||||||
},
|
})
|
||||||
|
|
||||||
verify=False)
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -43,6 +43,11 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
|||||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||||
|
|
||||||
|
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||||
|
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||||
|
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||||
|
_FAVICON_FILENAME_CACHE: dict = {}
|
||||||
|
|
||||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||||
|
|
||||||
@@ -806,9 +811,8 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
with open(fname, 'wb') as f:
|
with open(fname, 'wb') as f:
|
||||||
f.write(decoded)
|
f.write(decoded)
|
||||||
|
|
||||||
# Invalidate favicon filename cache
|
# Invalidate module-level favicon filename cache for this watch
|
||||||
if hasattr(self, '_favicon_filename_cache'):
|
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||||
delattr(self, '_favicon_filename_cache')
|
|
||||||
|
|
||||||
# A signal that could trigger the socket server to update the browser also
|
# A signal that could trigger the socket server to update the browser also
|
||||||
watch_check_update = signal('watch_favicon_bump')
|
watch_check_update = signal('watch_favicon_bump')
|
||||||
@@ -823,35 +827,23 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
|
|
||||||
def get_favicon_filename(self) -> str | None:
|
def get_favicon_filename(self) -> str | None:
|
||||||
"""
|
"""
|
||||||
Find any favicon.* file in the current working directory
|
Find any favicon.* file in the watch data directory.
|
||||||
and return the contents of the newest one.
|
|
||||||
|
|
||||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
deepcopy (which drops instance attrs), and concurrent request races.
|
||||||
|
Invalidated by bump_favicon() when a new favicon is saved.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Basename of the newest favicon file, or None if not found.
|
str: Basename of the favicon file, or None if not found.
|
||||||
"""
|
"""
|
||||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||||
cache_key = '_favicon_filename_cache'
|
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||||
if hasattr(self, cache_key):
|
|
||||||
return getattr(self, cache_key)
|
|
||||||
|
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
# Search for all favicon.* files
|
|
||||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||||
|
fname = os.path.basename(files[0]) if files else None
|
||||||
if not files:
|
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||||
result = None
|
return fname
|
||||||
else:
|
|
||||||
# Find the newest by modification time
|
|
||||||
newest_file = max(files, key=os.path.getmtime)
|
|
||||||
result = os.path.basename(newest_file)
|
|
||||||
|
|
||||||
# Cache the result
|
|
||||||
setattr(self, cache_key, result)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||||
"""Return path to a square thumbnail of the most recent screenshot.
|
"""Return path to a square thumbnail of the most recent screenshot.
|
||||||
@@ -1182,18 +1174,13 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
def compile_error_texts(self, has_proxies=None):
|
def compile_error_texts(self, has_proxies=None):
|
||||||
"""Compile error texts for this watch.
|
"""Compile error texts for this watch.
|
||||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||||
from flask import url_for
|
from flask import url_for, has_request_context
|
||||||
from markupsafe import Markup
|
from markupsafe import Markup
|
||||||
|
|
||||||
output = [] # Initialize as list since we're using append
|
output = [] # Initialize as list since we're using append
|
||||||
last_error = self.get('last_error','')
|
last_error = self.get('last_error','')
|
||||||
|
|
||||||
try:
|
has_app_context = has_request_context()
|
||||||
url_for('settings.settings_page')
|
|
||||||
except Exception as e:
|
|
||||||
has_app_context = False
|
|
||||||
else:
|
|
||||||
has_app_context = True
|
|
||||||
|
|
||||||
# has app+request context, we can use url_for()
|
# has app+request context, we can use url_for()
|
||||||
if has_app_context:
|
if has_app_context:
|
||||||
|
|||||||
@@ -100,6 +100,19 @@ def is_safe_valid_url(test_url):
|
|||||||
logger.warning('URL validation failed: URL is empty or whitespace only')
|
logger.warning('URL validation failed: URL is empty or whitespace only')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Per-request cache: same URL is often validated 2-3x per watchlist render (sort + display).
|
||||||
|
# Flask's g is scoped to one request and auto-cleared on teardown, so dynamic Jinja2 URLs
|
||||||
|
# like {{microtime()}} are always re-evaluated on the next request.
|
||||||
|
# Falls back gracefully when called outside a request context (e.g. background workers).
|
||||||
|
_cache_key = test_url
|
||||||
|
try:
|
||||||
|
from flask import g
|
||||||
|
_cache = g.setdefault('_url_validation_cache', {})
|
||||||
|
if _cache_key in _cache:
|
||||||
|
return _cache[_cache_key]
|
||||||
|
except RuntimeError:
|
||||||
|
_cache = None # No app context
|
||||||
|
|
||||||
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
|
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
|
||||||
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'
|
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'
|
||||||
|
|
||||||
@@ -112,11 +125,14 @@ def is_safe_valid_url(test_url):
|
|||||||
test_url = r.sub('', test_url)
|
test_url = r.sub('', test_url)
|
||||||
|
|
||||||
# Check the actual rendered URL in case of any Jinja markup
|
# Check the actual rendered URL in case of any Jinja markup
|
||||||
try:
|
# Only run jinja_render when the URL actually contains Jinja2 syntax - creating a new
|
||||||
test_url = jinja_render(test_url)
|
# ImmutableSandboxedEnvironment is expensive and is called once per watch per page load
|
||||||
except Exception as e:
|
if '{%' in test_url or '{{' in test_url:
|
||||||
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
try:
|
||||||
return False
|
test_url = jinja_render(test_url)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
|
||||||
|
return False
|
||||||
|
|
||||||
# Check query parameters and fragment
|
# Check query parameters and fragment
|
||||||
if re.search(r'[<>]', test_url):
|
if re.search(r'[<>]', test_url):
|
||||||
@@ -142,4 +158,6 @@ def is_safe_valid_url(test_url):
|
|||||||
logger.warning(f'URL f"{test_url}" failed validation, aborting.')
|
logger.warning(f'URL f"{test_url}" failed validation, aborting.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if _cache is not None:
|
||||||
|
_cache[_cache_key] = True
|
||||||
return True
|
return True
|
||||||
|
|||||||
Reference in New Issue
Block a user