Files
skkzsh 8cfa6eb336
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled
CodeQL / Analyze (javascript) (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
i18n: Wrap untranslated UI strings and update ja translations (#4052)
2026-04-16 03:54:13 +02:00

125 lines
4.6 KiB
Python

"""
Levenshtein distance and similarity plugin for text change detection.
Provides metrics for measuring text similarity between snapshots.
"""
import pluggy
from loguru import logger
from flask_babel import gettext as _, lazy_gettext as _l
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
# Support both plugin systems
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
def levenshtein_ratio_recent_history(watch, incoming_text=None):
try:
from Levenshtein import ratio, distance
k = list(watch.history.keys())
a = None
b = None
# When called from ui_edit_stats_extras, we don't have incoming_text
if incoming_text is None:
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
# Needs atleast one snapshot
elif len(k) >= 1: # Should be atleast one snapshot to compare against
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
b = incoming_text if incoming_text else k[-2]
if a and b:
distance_value = distance(a, b)
ratio_value = ratio(a, b)
return {
'distance': distance_value,
'ratio': ratio_value,
'percent_similar': round(ratio_value * 100, 2)
}
except Exception as e:
logger.warning(f"Unable to calc similarity: {str(e)}")
return ''
@conditions_hookimpl
def register_operators():
pass
@conditions_hookimpl
def register_operator_choices():
pass
@conditions_hookimpl
def register_field_choices():
return [
("levenshtein_ratio", _l("Levenshtein - Text similarity ratio")),
("levenshtein_distance", _l("Levenshtein - Text change distance")),
]
@conditions_hookimpl
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
res = {}
watch = application_datastruct['watching'].get(current_watch_uuid)
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
if watch and 'text' in ephemeral_data:
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
if isinstance(lev_data, dict):
res['levenshtein_ratio'] = lev_data.get('ratio', 0)
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
res['levenshtein_distance'] = lev_data.get('distance', 0)
return res
@global_hookimpl
def ui_edit_stats_extras(watch):
"""Add Levenshtein stats to the UI using the global plugin system"""
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
if len(watch.history.keys()) < 2:
return f"<p>{_('Not enough history to calculate Levenshtein metrics')}</p>"
# Protection against the algorithm getting stuck on huge documents
k = list(watch.history.keys())
if any(
len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS
for idx in (-1, -2)
if len(k) >= abs(idx)
):
return f"<p>{_('Snapshot too large for edit statistics, skipping.')}</p>"
try:
lev_data = levenshtein_ratio_recent_history(watch)
if not lev_data or not isinstance(lev_data, dict):
return f"<p>{_('Unable to calculate Levenshtein metrics')}</p>"
html = f"""
<div class="levenshtein-stats">
<h4>{_('Levenshtein Text Similarity Details')}</h4>
<table class="pure-table">
<tbody>
<tr>
<td>{_('Raw distance (edits needed)')}</td>
<td>{lev_data['distance']}</td>
</tr>
<tr>
<td>{_('Similarity ratio')}</td>
<td>{lev_data['ratio']:.4f}</td>
</tr>
<tr>
<td>{_('Percent similar')}</td>
<td>{lev_data['percent_similar']}%</td>
</tr>
</tbody>
</table>
<p style="font-size: 80%;">{_('Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.')}</p>
</div>
"""
return html
except Exception as e:
logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
return f"<p>{_('Error calculating Levenshtein metrics')}</p>"