mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-01-23 15:40:19 +00:00
Multi-language / Translations Support (#3696) - Complete internationalization system implemented - Support for 7 languages: Czech (cs), German (de), French (fr), Italian (it), Korean (ko), Chinese Simplified (zh), Chinese Traditional (zh_TW) - Language selector with localized flags and theming - Flash message translations - Multiple translation fixes and improvements across all languages - Language setting preserved across redirects Pluggable Content Fetchers (#3653) - New architecture for extensible content fetcher system - Allows custom fetcher implementations Image / Screenshot Comparison Processor (#3680) - New processor for visual change detection (disabled for this release) - Supporting CSS/JS infrastructure added UI Improvements Design & Layout - Auto-generated tag color schemes - Simplified login form styling - Removed hard-coded CSS, moved to SCSS variables - Tag UI cleanup and improvements - Automatic tab wrapper functionality - Menu refactoring for better organization - Cleanup of offset settings - Hide sticky tabs on narrow viewports - Improved responsive layout (#3702) User Experience - Modal alerts/confirmations on delete/clear operations (#3693, #3598, #3382) - Auto-add https:// to URLs in quickwatch form if not present - Better redirect handling on login (#3699) - 'Recheck all' now returns to correct group/tag (#3673) - Language set redirect keeps hash fragment - More friendly human-readable text throughout UI Performance & Reliability Scheduler & Processing - Soft delays instead of blocking time.sleep() calls (#3710) - More resilient handling of same UUID being processed (#3700) - Better Puppeteer timeout handling - Improved Puppeteer shutdown/cleanup (#3692) - Requests cleanup now properly async History & Rendering - Faster server-side "difference" rendering on History page (#3442) - Show ignored/triggered rows in history - API: Retry watch data if watch dict changed (more reliable) API Improvements - Watch get endpoint: retry mechanism for changed watch data - WatchHistoryDiff API endpoint includes extra format args (#3703) Testing Improvements - Replace time.sleep with wait_for_notification_endpoint_output (#3716) - Test for mode switching (#3701) - Test for #3720 added (#3725) - Extract-text difference test fixes - Improved dev workflow Bug Fixes - Notification error text output (#3672, #3669, #3280) - HTML validation fixes (#3704) - Template discovery path fixes - Notification debug log now uses system locale for dates/times - Puppeteer spelling mistake in log output - Recalculation on anchor change - Queue bubble update disabled temporarily Dependency Updates - beautifulsoup4 updated (#3724) - psutil 7.1.0 → 7.2.1 (#3723) - python-engineio ~=4.12.3 → ~=4.13.0 (#3707) - python-socketio ~=5.14.3 → ~=5.16.0 (#3706) - flask-socketio ~=5.5.1 → ~=5.6.0 (#3691) - brotli ~=1.1 → ~=1.2 (#3687) - lxml updated (#3590) - pytest ~=7.2 → ~=9.0 (#3676) - jsonschema ~=4.0 → ~=4.25 (#3618) - pluggy ~=1.5 → ~=1.6 (#3616) - cryptography 44.0.1 → 46.0.3 (security) (#3589) Documentation - README updated with viewport size setup information Development Infrastructure - Dev container only built on dev branch - Improved dev workflow tooling
124 lines
4.4 KiB
Python
124 lines
4.4 KiB
Python
"""
|
|
Levenshtein distance and similarity plugin for text change detection.
|
|
Provides metrics for measuring text similarity between snapshots.
|
|
"""
|
|
import pluggy
|
|
from loguru import logger
|
|
|
|
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
|
|
|
|
# Support both plugin systems
|
|
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
|
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
|
|
|
def levenshtein_ratio_recent_history(watch, incoming_text=None):
|
|
try:
|
|
from Levenshtein import ratio, distance
|
|
k = list(watch.history.keys())
|
|
a = None
|
|
b = None
|
|
|
|
# When called from ui_edit_stats_extras, we don't have incoming_text
|
|
if incoming_text is None:
|
|
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
|
|
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
|
|
|
|
# Needs atleast one snapshot
|
|
elif len(k) >= 1: # Should be atleast one snapshot to compare against
|
|
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
|
|
b = incoming_text if incoming_text else k[-2]
|
|
|
|
if a and b:
|
|
distance_value = distance(a, b)
|
|
ratio_value = ratio(a, b)
|
|
return {
|
|
'distance': distance_value,
|
|
'ratio': ratio_value,
|
|
'percent_similar': round(ratio_value * 100, 2)
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Unable to calc similarity: {str(e)}")
|
|
|
|
return ''
|
|
|
|
@conditions_hookimpl
|
|
def register_operators():
|
|
pass
|
|
|
|
@conditions_hookimpl
|
|
def register_operator_choices():
|
|
pass
|
|
|
|
|
|
@conditions_hookimpl
|
|
def register_field_choices():
|
|
return [
|
|
("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
|
|
("levenshtein_distance", "Levenshtein - Text change distance"),
|
|
]
|
|
|
|
@conditions_hookimpl
|
|
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
|
res = {}
|
|
watch = application_datastruct['watching'].get(current_watch_uuid)
|
|
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
|
|
|
|
if watch and 'text' in ephemeral_data:
|
|
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
|
|
if isinstance(lev_data, dict):
|
|
res['levenshtein_ratio'] = lev_data.get('ratio', 0)
|
|
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
|
|
res['levenshtein_distance'] = lev_data.get('distance', 0)
|
|
|
|
return res
|
|
|
|
@global_hookimpl
|
|
def ui_edit_stats_extras(watch):
|
|
"""Add Levenshtein stats to the UI using the global plugin system"""
|
|
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
|
|
if len(watch.history.keys()) < 2:
|
|
return "<p>Not enough history to calculate Levenshtein metrics</p>"
|
|
|
|
|
|
# Protection against the algorithm getting stuck on huge documents
|
|
k = list(watch.history.keys())
|
|
if any(
|
|
len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS
|
|
for idx in (-1, -2)
|
|
if len(k) >= abs(idx)
|
|
):
|
|
return "<p>Snapshot too large for edit statistics, skipping.</p>"
|
|
|
|
try:
|
|
lev_data = levenshtein_ratio_recent_history(watch)
|
|
if not lev_data or not isinstance(lev_data, dict):
|
|
return "<p>Unable to calculate Levenshtein metrics</p>"
|
|
|
|
html = f"""
|
|
<div class="levenshtein-stats">
|
|
<h4>Levenshtein Text Similarity Details</h4>
|
|
<table class="pure-table">
|
|
<tbody>
|
|
<tr>
|
|
<td>Raw distance (edits needed)</td>
|
|
<td>{lev_data['distance']}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Similarity ratio</td>
|
|
<td>{lev_data['ratio']:.4f}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Percent similar</td>
|
|
<td>{lev_data['percent_similar']}%</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
|
|
</div>
|
|
"""
|
|
return html
|
|
except Exception as e:
|
|
logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
|
|
return "<p>Error calculating Levenshtein metrics</p>"
|
|
|