mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-11 13:38:02 +00:00
Compare commits
16 Commits
3835-plugg
...
4037-word-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
388b280219 | ||
|
|
4294b461c7 | ||
|
|
77116f5203 | ||
|
|
238d6ba72d | ||
|
|
ede06a92bd | ||
|
|
9d4249c820 | ||
|
|
b5bac1c868 | ||
|
|
0479aa9654 | ||
|
|
746e213398 | ||
|
|
84d97ec9cf | ||
|
|
c8f13f5084 | ||
|
|
d74b7d5329 | ||
|
|
31a760c214 | ||
|
|
43bba5a1b6 | ||
|
|
7c9eb02df4 | ||
|
|
0ad4090d68 |
@@ -99,11 +99,7 @@ jobs:
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.7'
|
||||
__version__ = '0.54.8'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -98,8 +98,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
|
||||
backup_threads = []
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||
@@ -141,8 +141,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return backup_info
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
@@ -165,9 +165,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/", methods=['GET'])
|
||||
@backups_blueprint.route("/create", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def create():
|
||||
backups = find_backups()
|
||||
output = render_template("backup_create.html",
|
||||
@@ -176,8 +176,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
)
|
||||
return output
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def remove_backups():
|
||||
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
|
||||
@@ -174,8 +174,8 @@ def construct_restore_blueprint(datastore):
|
||||
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
|
||||
restore_threads = []
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def restore():
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
@@ -184,8 +184,8 @@ def construct_restore_blueprint(datastore):
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def backups_restore_start():
|
||||
if any(t.is_alive() for t in restore_threads):
|
||||
flash(gettext("A restore is already running, check back in a few minutes"), "error")
|
||||
|
||||
@@ -20,8 +20,7 @@
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB · Max decompressed size: %(decomp)s MB',
|
||||
upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
|
||||
@@ -268,8 +268,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
import uuid
|
||||
@@ -304,8 +304,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug("Starting connection with playwright - done")
|
||||
return {'browsersteps_session_id': browsersteps_session_id}
|
||||
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def browser_steps_fetch_screenshot_image():
|
||||
from flask import (
|
||||
make_response,
|
||||
@@ -330,8 +330,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
|
||||
|
||||
# A request for an action was received
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def browsersteps_ui_update():
|
||||
import base64
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -22,10 +22,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
|
||||
|
||||
from changedetectionio import processors
|
||||
output = render_template("groups-overview.html",
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
available_tags=sorted_tags,
|
||||
form=add_form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
tag_count=tag_count,
|
||||
)
|
||||
|
||||
@@ -208,9 +210,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
template = env.from_string(template_str)
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
# Watches whose URL currently matches this tag's pattern
|
||||
matching_watches = {
|
||||
w_uuid: watch
|
||||
for w_uuid, watch in datastore.data['watching'].items()
|
||||
if default.matches_url(watch.get('url', ''))
|
||||
}
|
||||
|
||||
output = render_template("edit-tag.html",
|
||||
extra_form_content=included_content,
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
matching_watches=matching_watches,
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
**template_args
|
||||
)
|
||||
|
||||
@@ -10,6 +10,8 @@ from changedetectionio.processors.restock_diff.forms import processor_settings_f
|
||||
|
||||
class group_restock_settings_form(restock_settings_form):
|
||||
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
|
||||
url_match_pattern = StringField('Auto-apply to watches with URLs matching',
|
||||
render_kw={"placeholder": "e.g. *://example.com/* or github.com/myorg"})
|
||||
|
||||
class SingleTag(Form):
|
||||
|
||||
|
||||
@@ -43,6 +43,20 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url_match_pattern, class="m-d") }}
|
||||
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
|
||||
</div>
|
||||
{% if matching_watches %}
|
||||
<div class="pure-control-group">
|
||||
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
|
||||
<ul class="tag-url-match-list">
|
||||
{% for w_uuid, w in matching_watches.items() %}
|
||||
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -3,6 +3,22 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<style>
|
||||
{%- for uuid, tag in available_tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
|
||||
@@ -48,7 +64,7 @@
|
||||
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
|
||||
|
||||
@@ -320,7 +320,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
'capabilities': capabilities
|
||||
'capabilities': capabilities,
|
||||
'auto_applied_tags': {
|
||||
tag_uuid: tag
|
||||
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
|
||||
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
|
||||
},
|
||||
}
|
||||
|
||||
included_content = None
|
||||
|
||||
@@ -81,6 +81,14 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
|
||||
{% if auto_applied_tags %}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Also automatically applied by URL pattern:') }}
|
||||
{% for tag_uuid, tag in auto_applied_tags.items() %}
|
||||
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
|
||||
{% endfor %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
|
||||
@@ -49,6 +49,9 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
|
||||
@@ -75,6 +75,9 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
|
||||
@@ -56,6 +56,10 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
|
||||
if total_height > capture_height:
|
||||
stitched = stitched.crop((0, 0, max_width, capture_height))
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
@@ -104,15 +104,17 @@ class fetcher(Fetcher):
|
||||
|
||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
||||
from selenium.webdriver.remote.client_config import ClientConfig
|
||||
from urllib3.util import Timeout
|
||||
driver = None
|
||||
try:
|
||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
||||
remote_connection = RemoteConnection(
|
||||
self.browser_connection_url,
|
||||
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
|
||||
client_config = ClientConfig(
|
||||
remote_server_addr=self.browser_connection_url,
|
||||
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
|
||||
)
|
||||
remote_connection.set_timeout(30) # seconds
|
||||
remote_connection = RemoteConnection(client_config=client_config)
|
||||
|
||||
# Now create the driver with the RemoteConnection
|
||||
driver = RemoteWebDriver(
|
||||
command_executor=remote_connection,
|
||||
options=options
|
||||
|
||||
@@ -45,8 +45,38 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
# Regexes built from the constants above — no brittle hardcoded strings
|
||||
_EXTRACT_REMOVED_RE = re.compile(
|
||||
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
|
||||
)
|
||||
_EXTRACT_ADDED_RE = re.compile(
|
||||
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
|
||||
)
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
|
||||
def extract_changed_from(raw_diff: str) -> str:
|
||||
"""Extract only the removed/changed-from fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def extract_changed_to(raw_diff: str) -> str:
|
||||
"""Extract only the added/changed-into fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
|
||||
"""
|
||||
Render word-level differences between two lines inline using diff-match-patch library.
|
||||
|
||||
@@ -133,14 +163,20 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if removed_tokens:
|
||||
removed_full = ''.join(removed_tokens).rstrip()
|
||||
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
else:
|
||||
result_parts.append(f'{removed_full}{trailing_removed}')
|
||||
|
||||
if added_tokens:
|
||||
if result_parts: # Add newline between removed and added
|
||||
result_parts.append('\n')
|
||||
added_full = ''.join(added_tokens).rstrip()
|
||||
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
if include_change_type_prefix:
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
else:
|
||||
result_parts.append(f'{added_full}{trailing_added}')
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
else:
|
||||
@@ -150,21 +186,27 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
|
||||
if op == 0: # Equal
|
||||
result_parts.append(text)
|
||||
elif op == 1: # Insertion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
elif op == -1: # Deletion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
if not include_change_type_prefix:
|
||||
result_parts.append(text)
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
|
||||
@@ -360,7 +402,7 @@ def customSequenceMatcher(
|
||||
|
||||
# Use inline word-level diff for single line replacements when word_diff is enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix)
|
||||
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
|
||||
if ignore_junk and not has_changes:
|
||||
# No real changes, skip this line
|
||||
|
||||
@@ -28,18 +28,20 @@ def get_timeago_locale(flask_locale):
|
||||
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'pt_BR': 'pt_BR', # Portuguese (Brasil)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'ja': 'ja', # Japanese
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
@@ -53,7 +55,8 @@ LANGUAGE_DATA = {
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
|
||||
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
|
||||
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
|
||||
@@ -46,11 +46,26 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
def matches_url(self, url: str) -> bool:
|
||||
"""Return True if this tag should be auto-applied to the given watch URL.
|
||||
|
||||
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
|
||||
substring match. Returns False if no pattern is configured.
|
||||
"""
|
||||
import fnmatch
|
||||
pattern = self.get('url_match_pattern', '').strip()
|
||||
if not pattern or not url:
|
||||
return False
|
||||
if any(c in pattern for c in ('*', '?', '[')):
|
||||
return fnmatch.fnmatch(url.lower(), pattern.lower())
|
||||
return pattern.lower() in url.lower()
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -88,6 +88,28 @@ class FormattableTimestamp(str):
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableExtract(str):
|
||||
"""
|
||||
A str subclass that holds only the extracted changed fragments from a diff.
|
||||
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
|
||||
|
||||
{{ diff_changed_from }} → old value(s) only, e.g. "$99.99"
|
||||
{{ diff_changed_to }} → new value(s) only, e.g. "$109.99"
|
||||
|
||||
Multiple changed fragments are joined with newlines.
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
|
||||
extracted = extract_fn(raw)
|
||||
else:
|
||||
extracted = ''
|
||||
instance = super().__new__(cls, extracted)
|
||||
return instance
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
@@ -161,6 +183,8 @@ class NotificationContextData(dict):
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
@@ -244,16 +268,27 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
from changedetectionio.diff import extract_changed_from, extract_changed_to
|
||||
extract_specs = {
|
||||
'diff_changed_from': extract_changed_from,
|
||||
'diff_changed_to': extract_changed_to,
|
||||
}
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only create FormattableDiff objects for diff keys actually used in the notification text
|
||||
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
|
||||
for key in NotificationContextData().keys():
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
if not key.startswith('diff'):
|
||||
continue
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
continue
|
||||
if key in diff_specs:
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
elif key in extract_specs:
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
|
||||
@@ -980,12 +980,20 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
def get_all_tags_for_watch(self, uuid):
|
||||
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
|
||||
watch = self.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return {}
|
||||
|
||||
# Should return a dict of full tag info linked by UUID
|
||||
if watch:
|
||||
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
# Start with manually assigned tags
|
||||
result = dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
|
||||
return {}
|
||||
# Additionally include any tag whose url_match_pattern matches this watch's URL
|
||||
watch_url = watch.get('url', '')
|
||||
if watch_url:
|
||||
for tag_uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
if tag_uuid not in result and tag.matches_url(watch_url):
|
||||
result[tag_uuid] = tag
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def extra_browsers(self):
|
||||
|
||||
@@ -98,6 +98,14 @@
|
||||
<td><code>{{ '{{diff_patch}}' }}</code></td>
|
||||
<td>{{ _('The diff output - patch in unified format') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_from}}' }}</code></td>
|
||||
<td>{{ _('Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_to}}' }}</code></td>
|
||||
<td>{{ _('Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{current_snapshot}}' }}</code></td>
|
||||
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
|
||||
|
||||
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
|
||||
set_longer_modified_response, delete_all_watches
|
||||
|
||||
import logging
|
||||
|
||||
import os
|
||||
|
||||
# NOTE - RELIES ON mailserver as hostname running, see github build recipes
|
||||
smtp_test_server = 'mailserver'
|
||||
smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver')
|
||||
|
||||
ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())
|
||||
|
||||
|
||||
144
changedetectionio/tests/test_tag_url_match.py
Normal file
144
changedetectionio/tests/test_tag_url_match.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration tests for auto-applying tags to watches by URL pattern matching.
|
||||
|
||||
Verifies:
|
||||
- A tag with url_match_pattern shows on the watch overview list (via get_all_tags_for_watch)
|
||||
- The auto-applied tag appears on the watch edit page
|
||||
- A watch whose URL does NOT match the pattern does not get the tag
|
||||
"""
|
||||
|
||||
import json
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup
|
||||
|
||||
|
||||
def test_tag_url_pattern_shows_in_overview(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Tag with a matching url_match_pattern must appear in the watch overview row."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create a tag with a URL match pattern
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Auto GitHub", "url_match_pattern": "*github.com*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_uuid = res.json['uuid']
|
||||
|
||||
# Add a watch that matches the pattern
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://github.com/someuser/repo"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
matching_watch_uuid = res.json['uuid']
|
||||
|
||||
# Add a watch that does NOT match
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://example.com/page"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
non_matching_watch_uuid = res.json['uuid']
|
||||
|
||||
# Watch overview — the tag label must appear in the matching watch's row
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert res.status_code == 200
|
||||
html = res.get_data(as_text=True)
|
||||
|
||||
# The tag title should appear somewhere on the page (it's rendered per-watch via get_all_tags_for_watch)
|
||||
assert "Auto GitHub" in html, "Auto-matched tag title must appear in watch overview"
|
||||
|
||||
# Verify via the datastore directly that get_all_tags_for_watch returns the pattern-matched tag
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
matching_tags = datastore.get_all_tags_for_watch(matching_watch_uuid)
|
||||
assert tag_uuid in matching_tags, "Pattern-matched tag must be returned for matching watch"
|
||||
|
||||
non_matching_tags = datastore.get_all_tags_for_watch(non_matching_watch_uuid)
|
||||
assert tag_uuid not in non_matching_tags, "Pattern-matched tag must NOT appear for non-matching watch"
|
||||
|
||||
|
||||
def test_auto_applied_tag_shows_on_watch_edit(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""The watch edit page must show auto-applied tags (from URL pattern) separately."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Auto Docs", "url_match_pattern": "*docs.example.com*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://docs.example.com/guide"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
# Watch edit page must mention the auto-applied tag
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||
assert res.status_code == 200
|
||||
html = res.get_data(as_text=True)
|
||||
|
||||
assert "Auto Docs" in html, "Auto-applied tag name must appear on watch edit page"
|
||||
assert "automatically applied" in html.lower() or "auto" in html.lower(), \
|
||||
"Watch edit page must indicate the tag is auto-applied by pattern"
|
||||
|
||||
|
||||
def test_multiple_pattern_tags_all_applied(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""A watch matching multiple tag patterns must receive all of them, not just the first."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Two tags with different patterns that both match the same URL
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Docs", "url_match_pattern": "*docs.*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_docs_uuid = res.json['uuid']
|
||||
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Python", "url_match_pattern": "*python*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_python_uuid = res.json['uuid']
|
||||
|
||||
# A third tag whose pattern does NOT match
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Rust", "url_match_pattern": "*rust-lang*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_rust_uuid = res.json['uuid']
|
||||
|
||||
# Watch URL matches both "docs" and "python" patterns but not "rust"
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://docs.python.org/3/library/fnmatch.html"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
resolved = datastore.get_all_tags_for_watch(watch_uuid)
|
||||
|
||||
assert tag_docs_uuid in resolved, "First matching tag must be included"
|
||||
assert tag_python_uuid in resolved, "Second matching tag must be included"
|
||||
assert tag_rust_uuid not in resolved, "Non-matching tag must NOT be included"
|
||||
85
changedetectionio/tests/unit/test_auth_decorator_order.py
Normal file
85
changedetectionio/tests/unit/test_auth_decorator_order.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Static analysis test: verify @login_optionally_required is always applied
|
||||
AFTER (inner to) @blueprint.route(), not before it.
|
||||
|
||||
In Flask, @route() must be the outermost decorator because it registers
|
||||
whatever function it receives. If @login_optionally_required is placed
|
||||
above @route(), the raw unprotected function gets registered and auth is
|
||||
silently bypassed (GHSA-jmrh-xmgh-x9j4).
|
||||
|
||||
Correct order (route outermost, auth inner):
|
||||
@blueprint.route('/path')
|
||||
@login_optionally_required
|
||||
def view(): ...
|
||||
|
||||
Wrong order (auth never called):
|
||||
@login_optionally_required ← registered by route, then discarded
|
||||
@blueprint.route('/path')
|
||||
def view(): ...
|
||||
"""
|
||||
|
||||
import ast
|
||||
import pathlib
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = pathlib.Path(__file__).parents[3] # …/changedetection.io/
|
||||
SOURCE_ROOT = REPO_ROOT / "changedetectionio"
|
||||
|
||||
|
||||
def _is_route_decorator(node: ast.expr) -> bool:
|
||||
"""Return True if the decorator looks like @something.route(...)."""
|
||||
return (
|
||||
isinstance(node, ast.Call)
|
||||
and isinstance(node.func, ast.Attribute)
|
||||
and node.func.attr == "route"
|
||||
)
|
||||
|
||||
|
||||
def _is_auth_decorator(node: ast.expr) -> bool:
|
||||
"""Return True if the decorator is @login_optionally_required."""
|
||||
return isinstance(node, ast.Name) and node.id == "login_optionally_required"
|
||||
|
||||
|
||||
def collect_violations() -> list[str]:
|
||||
violations = []
|
||||
|
||||
for path in SOURCE_ROOT.rglob("*.py"):
|
||||
try:
|
||||
tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
|
||||
except SyntaxError:
|
||||
continue
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
continue
|
||||
|
||||
decorators = node.decorator_list
|
||||
auth_indices = [i for i, d in enumerate(decorators) if _is_auth_decorator(d)]
|
||||
route_indices = [i for i, d in enumerate(decorators) if _is_route_decorator(d)]
|
||||
|
||||
# Bad order: auth decorator appears at a lower index (higher up) than a route decorator
|
||||
for auth_idx in auth_indices:
|
||||
for route_idx in route_indices:
|
||||
if auth_idx < route_idx:
|
||||
rel = path.relative_to(REPO_ROOT)
|
||||
violations.append(
|
||||
f"{rel}:{node.lineno} — `{node.name}`: "
|
||||
f"@login_optionally_required (line {decorators[auth_idx].lineno}) "
|
||||
f"is above @route (line {decorators[route_idx].lineno}); "
|
||||
f"auth wrapper will never be called"
|
||||
)
|
||||
|
||||
return violations
|
||||
|
||||
|
||||
def test_auth_decorator_order():
|
||||
violations = collect_violations()
|
||||
if violations:
|
||||
msg = (
|
||||
"\n\nFound routes where @login_optionally_required is placed ABOVE @blueprint.route().\n"
|
||||
"This silently disables authentication — @route() registers the raw function\n"
|
||||
"and the auth wrapper is never called.\n\n"
|
||||
"Fix: move @blueprint.route() to be the outermost (topmost) decorator.\n\n"
|
||||
+ "\n".join(f" • {v}" for v in violations)
|
||||
)
|
||||
pytest.fail(msg)
|
||||
@@ -64,7 +64,7 @@ class TestTriggerConditions(unittest.TestCase):
|
||||
"conditions": [
|
||||
{"operator": ">=", "field": "extracted_number", "value": "10"},
|
||||
{"operator": "<=", "field": "extracted_number", "value": "5000"},
|
||||
{"operator": "in", "field": "page_text", "value": "rock"},
|
||||
{"operator": "in", "field": "page_filtered_text", "value": "rock"},
|
||||
#{"operator": "starts_with", "field": "page_text", "value": "I saw"},
|
||||
]
|
||||
}
|
||||
|
||||
@@ -15,7 +15,9 @@ from changedetectionio.diff import (
|
||||
CHANGED_PLACEMARKER_OPEN,
|
||||
CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED,
|
||||
extract_changed_from,
|
||||
extract_changed_to,
|
||||
)
|
||||
|
||||
|
||||
@@ -381,5 +383,140 @@ Line 3 with tabs and spaces"""
|
||||
self.assertNotIn('[-Line 2-]', output)
|
||||
self.assertNotIn('[+Line 2+]', output)
|
||||
|
||||
def test_diff_changed_from_to_word_level(self):
|
||||
"""Primary use case: extract just the old/new value from a changed line (e.g. price monitoring)"""
|
||||
before = "Widget costs $99.99 per month"
|
||||
after = "Widget costs $109.99 per month"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99.99")
|
||||
self.assertEqual(extract_changed_to(raw), "$109.99")
|
||||
|
||||
def test_diff_changed_from_to_multiple_changes(self):
|
||||
"""Multiple changed fragments on different lines are joined with newline.
|
||||
An unchanged line between the two changes ensures each is a 1-to-1 replace,
|
||||
so word_diff fires per line rather than falling back to multi-line block mode."""
|
||||
before = "Price $99\nunchanged\nTax $5"
|
||||
after = "Price $149\nunchanged\nTax $12"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99\n$5")
|
||||
self.assertEqual(extract_changed_to(raw), "$149\n$12")
|
||||
|
||||
def test_diff_changed_from_to_pure_insert_delete(self):
|
||||
"""Pure line additions/deletions (no inline word diff) are also captured"""
|
||||
before = "old line"
|
||||
after = "new line"
|
||||
|
||||
# word_diff=False forces line-level CHANGED markers
|
||||
raw = diff.render_diff(before, after, word_diff=False)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "old line")
|
||||
self.assertEqual(extract_changed_to(raw), "new line")
|
||||
|
||||
def test_diff_changed_from_to_similar_numbers(self):
|
||||
"""$90.00 → $9.00 must not produce a partial match like '0.00'.
|
||||
The tokenizer splits on whitespace only, so '$90.00' and '$9.00' are
|
||||
each a single atomic token — diff never sees their internal characters."""
|
||||
before = "for sale $90.00"
|
||||
after = "for sale $9.00"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$90.00")
|
||||
self.assertEqual(extract_changed_to(raw), "$9.00")
|
||||
|
||||
def test_diff_changed_from_to_whole_line_replaced(self):
|
||||
"""When every token on the line changed (no common tokens), render_inline_word_diff
|
||||
takes the whole_line_replaced path using CHANGED/CHANGED_INTO markers instead of
|
||||
REMOVED/ADDED. Extraction must still work via the alternation in the regex."""
|
||||
before = "$99"
|
||||
after = "$109"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99")
|
||||
self.assertEqual(extract_changed_to(raw), "$109")
|
||||
|
||||
def test_diff_changed_from_to_multiple_words_same_line(self):
|
||||
"""When multiple words change on the same line all fragments are joined with newline.
|
||||
'quick brown fox jumps' -> 'slow brown fox hops' gives 'quick\njumps' / 'slow\nhops'.
|
||||
These tokens work best when a single value changes per line."""
|
||||
before = "quick brown fox jumps"
|
||||
after = "slow brown fox hops"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "quick\njumps")
|
||||
self.assertEqual(extract_changed_to(raw), "slow\nhops")
|
||||
|
||||
def test_diff_changed_from_to_no_change(self):
|
||||
"""No changes → empty string"""
|
||||
content = "nothing changed here"
|
||||
|
||||
raw = diff.render_diff(content, content, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "")
|
||||
self.assertEqual(extract_changed_to(raw), "")
|
||||
|
||||
|
||||
def test_word_diff_no_prefix_whole_line_replaced(self):
|
||||
"""When include_change_type_prefix=False, word-level diffs for whole-line
|
||||
replacements must not include placemarkers (issue #3816)."""
|
||||
before = "73"
|
||||
after = "100"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
|
||||
|
||||
self.assertNotIn('PLACEMARKER', raw)
|
||||
# Should contain just the raw values separated by newline
|
||||
self.assertIn('73', raw)
|
||||
self.assertIn('100', raw)
|
||||
|
||||
def test_word_diff_no_prefix_inline_changes(self):
|
||||
"""When include_change_type_prefix=False, inline word-level diffs
|
||||
must not include placemarkers (issue #3816)."""
|
||||
before = "the price is 50 dollars"
|
||||
after = "the price is 75 dollars"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
|
||||
|
||||
self.assertNotIn('PLACEMARKER', raw)
|
||||
self.assertIn('50', raw)
|
||||
self.assertIn('75', raw)
|
||||
|
||||
def test_word_diff_with_prefix_still_wraps(self):
|
||||
"""Default include_change_type_prefix=True must still wrap tokens."""
|
||||
before = "73"
|
||||
after = "100"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=True)
|
||||
|
||||
self.assertIn('PLACEMARKER', raw)
|
||||
|
||||
def test_word_diff_no_prefix_exact_output(self):
|
||||
"""Pin exact output for include_change_type_prefix=False to catch regressions.
|
||||
|
||||
Whole-line replacement: old and new values separated by newline, no markers.
|
||||
Inline partial replacement: equal tokens kept, changed tokens (both old and new)
|
||||
appended without markers — this means old+new are concatenated in place.
|
||||
"""
|
||||
# Whole-line replaced: both values on separate lines, clean
|
||||
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=False)
|
||||
self.assertEqual(raw, '73\n100')
|
||||
|
||||
# Inline word replacement: equal context preserved, old+new token concatenated in-place
|
||||
raw = diff.render_diff('the price is 50 dollars', 'the price is 75 dollars',
|
||||
word_diff=True, include_change_type_prefix=False)
|
||||
self.assertEqual(raw, 'the price is 5075 dollars')
|
||||
|
||||
# Sanity: with prefix the whole-line case is fully wrapped
|
||||
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=True)
|
||||
self.assertEqual(raw, '@changed_PLACEMARKER_OPEN73@changed_PLACEMARKER_CLOSED\n'
|
||||
'@changed_into_PLACEMARKER_OPEN100@changed_into_PLACEMARKER_CLOSED')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
68
changedetectionio/tests/unit/test_tag_url_match.py
Normal file
68
changedetectionio/tests/unit/test_tag_url_match.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# run from dir above changedetectionio/ dir
|
||||
# python3 -m unittest changedetectionio.tests.unit.test_tag_url_match
|
||||
|
||||
import unittest
|
||||
from changedetectionio.model.Tag import model as TagModel
|
||||
|
||||
|
||||
def make_tag(pattern):
|
||||
"""Minimal Tag instance for testing matches_url — skips datastore wiring."""
|
||||
tag = TagModel.__new__(TagModel)
|
||||
dict.__init__(tag)
|
||||
tag['url_match_pattern'] = pattern
|
||||
return tag
|
||||
|
||||
|
||||
class TestTagUrlMatch(unittest.TestCase):
|
||||
|
||||
def test_wildcard_matches(self):
|
||||
tag = make_tag('*example.com*')
|
||||
self.assertTrue(tag.matches_url('https://example.com/page'))
|
||||
self.assertTrue(tag.matches_url('https://www.example.com/shop/item'))
|
||||
self.assertFalse(tag.matches_url('https://other.com/page'))
|
||||
|
||||
def test_wildcard_case_insensitive(self):
|
||||
tag = make_tag('*EXAMPLE.COM*')
|
||||
self.assertTrue(tag.matches_url('https://example.com/page'))
|
||||
|
||||
def test_substring_match(self):
|
||||
tag = make_tag('github.com/myorg')
|
||||
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
|
||||
self.assertFalse(tag.matches_url('https://github.com/otherorg/repo'))
|
||||
|
||||
def test_substring_case_insensitive(self):
|
||||
tag = make_tag('GitHub.com/MyOrg')
|
||||
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
|
||||
|
||||
def test_empty_pattern_never_matches(self):
|
||||
tag = make_tag('')
|
||||
self.assertFalse(tag.matches_url('https://example.com'))
|
||||
|
||||
def test_empty_url_never_matches(self):
|
||||
tag = make_tag('*example.com*')
|
||||
self.assertFalse(tag.matches_url(''))
|
||||
|
||||
def test_question_mark_wildcard(self):
|
||||
tag = make_tag('https://example.com/item-?')
|
||||
self.assertTrue(tag.matches_url('https://example.com/item-1'))
|
||||
self.assertFalse(tag.matches_url('https://example.com/item-12'))
|
||||
|
||||
def test_substring_is_broad(self):
|
||||
"""Plain substring matching is intentionally broad — 'evil.com' matches anywhere
|
||||
in the URL string, including 'notevil.com'. Users who need precise domain matching
|
||||
should use a wildcard pattern like '*://evil.com/*' instead."""
|
||||
tag = make_tag('evil.com')
|
||||
self.assertTrue(tag.matches_url('https://evil.com/page'))
|
||||
self.assertTrue(tag.matches_url('https://notevil.com')) # substring match — expected
|
||||
|
||||
def test_precise_domain_match_with_wildcard(self):
|
||||
"""Use wildcard pattern for precise domain matching to avoid substring surprises."""
|
||||
tag = make_tag('*://evil.com/*')
|
||||
self.assertTrue(tag.matches_url('https://evil.com/page'))
|
||||
self.assertFalse(tag.matches_url('https://notevil.com/page'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -76,7 +76,9 @@ These commands read settings from `../../setup.cfg` automatically.
|
||||
- `en_US` - English (US)
|
||||
- `fr` - French (Français)
|
||||
- `it` - Italian (Italiano)
|
||||
- `ja` - Japanese (日本語)
|
||||
- `ko` - Korean (한국어)
|
||||
- `pt_BR` - Portuguese (Brasil)
|
||||
- `zh` - Chinese Simplified (中文简体)
|
||||
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
|
||||
|
||||
|
||||
Binary file not shown.
@@ -1617,7 +1617,7 @@ msgstr "Bereich zeichnen"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Clear selection"
|
||||
msgstr "Klare Auswahl"
|
||||
msgstr "Auswahl löschen"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "One moment, fetching screenshot and element information.."
|
||||
|
||||
BIN
changedetectionio/translations/ja/LC_MESSAGES/messages.mo
Normal file
BIN
changedetectionio/translations/ja/LC_MESSAGES/messages.mo
Normal file
Binary file not shown.
3418
changedetectionio/translations/ja/LC_MESSAGES/messages.po
Normal file
3418
changedetectionio/translations/ja/LC_MESSAGES/messages.po
Normal file
File diff suppressed because it is too large
Load Diff
BIN
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.mo
Normal file
BIN
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.mo
Normal file
Binary file not shown.
3482
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.po
Normal file
3482
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.po
Normal file
File diff suppressed because it is too large
Load Diff
@@ -725,6 +725,13 @@ components:
|
||||
- true: Tag settings override watch settings
|
||||
- false: Tag settings do not override (watches use their own settings)
|
||||
- null: Not decided yet / inherit default behavior
|
||||
url_match_pattern:
|
||||
type: string
|
||||
description: |
|
||||
Automatically apply this tag to any watch whose URL matches this pattern.
|
||||
Supports fnmatch wildcards (* and ?): e.g. *://example.com/* or github.com/myorg.
|
||||
Plain strings are matched as case-insensitive substrings.
|
||||
Leave empty to disable auto-matching.
|
||||
# Future: Aggregated statistics from all watches with this tag
|
||||
# check_count:
|
||||
# type: integer
|
||||
|
||||
@@ -98,7 +98,7 @@ pytest-flask ~=1.3
|
||||
pytest-mock ~=3.15
|
||||
|
||||
# OpenAPI validation support
|
||||
openapi-core[flask] ~= 0.22
|
||||
openapi-core[flask] ~= 0.23
|
||||
|
||||
loguru
|
||||
|
||||
|
||||
Reference in New Issue
Block a user