mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-12 14:07:57 +00:00
Compare commits
11 Commits
0.54.8
...
fix/step-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
126c9864f8 | ||
|
|
01546cfbe4 | ||
|
|
77116f5203 | ||
|
|
238d6ba72d | ||
|
|
ede06a92bd | ||
|
|
9d4249c820 | ||
|
|
b5bac1c868 | ||
|
|
0479aa9654 | ||
|
|
746e213398 | ||
|
|
84d97ec9cf | ||
|
|
c8f13f5084 |
@@ -20,8 +20,7 @@
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB · Max decompressed size: %(decomp)s MB',
|
||||
upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -22,10 +22,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
|
||||
|
||||
from changedetectionio import processors
|
||||
output = render_template("groups-overview.html",
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
available_tags=sorted_tags,
|
||||
form=add_form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
tag_count=tag_count,
|
||||
)
|
||||
|
||||
@@ -208,9 +210,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
template = env.from_string(template_str)
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
# Watches whose URL currently matches this tag's pattern
|
||||
matching_watches = {
|
||||
w_uuid: watch
|
||||
for w_uuid, watch in datastore.data['watching'].items()
|
||||
if default.matches_url(watch.get('url', ''))
|
||||
}
|
||||
|
||||
output = render_template("edit-tag.html",
|
||||
extra_form_content=included_content,
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
matching_watches=matching_watches,
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
**template_args
|
||||
)
|
||||
|
||||
@@ -10,6 +10,8 @@ from changedetectionio.processors.restock_diff.forms import processor_settings_f
|
||||
|
||||
class group_restock_settings_form(restock_settings_form):
|
||||
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
|
||||
url_match_pattern = StringField('Auto-apply to watches with URLs matching',
|
||||
render_kw={"placeholder": "e.g. *://example.com/* or github.com/myorg"})
|
||||
|
||||
class SingleTag(Form):
|
||||
|
||||
|
||||
@@ -43,6 +43,20 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url_match_pattern, class="m-d") }}
|
||||
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
|
||||
</div>
|
||||
{% if matching_watches %}
|
||||
<div class="pure-control-group">
|
||||
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
|
||||
<ul class="tag-url-match-list">
|
||||
{% for w_uuid, w in matching_watches.items() %}
|
||||
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -3,6 +3,22 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<style>
|
||||
{%- for uuid, tag in available_tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
|
||||
@@ -48,7 +64,7 @@
|
||||
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
|
||||
|
||||
@@ -320,7 +320,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
'capabilities': capabilities
|
||||
'capabilities': capabilities,
|
||||
'auto_applied_tags': {
|
||||
tag_uuid: tag
|
||||
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
|
||||
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
|
||||
},
|
||||
}
|
||||
|
||||
included_content = None
|
||||
|
||||
@@ -81,6 +81,14 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
|
||||
{% if auto_applied_tags %}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Also automatically applied by URL pattern:') }}
|
||||
{% for tag_uuid, tag in auto_applied_tags.items() %}
|
||||
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
|
||||
{% endfor %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
|
||||
@@ -49,6 +49,9 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
|
||||
@@ -75,6 +75,9 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
|
||||
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
|
||||
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
|
||||
@@ -56,6 +56,10 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
|
||||
if total_height > capture_height:
|
||||
stitched = stitched.crop((0, 0, max_width, capture_height))
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
@@ -104,15 +104,17 @@ class fetcher(Fetcher):
|
||||
|
||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
||||
from selenium.webdriver.remote.client_config import ClientConfig
|
||||
from urllib3.util import Timeout
|
||||
driver = None
|
||||
try:
|
||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
||||
remote_connection = RemoteConnection(
|
||||
self.browser_connection_url,
|
||||
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
|
||||
client_config = ClientConfig(
|
||||
remote_server_addr=self.browser_connection_url,
|
||||
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
|
||||
)
|
||||
remote_connection.set_timeout(30) # seconds
|
||||
remote_connection = RemoteConnection(client_config=client_config)
|
||||
|
||||
# Now create the driver with the RemoteConnection
|
||||
driver = RemoteWebDriver(
|
||||
command_executor=remote_connection,
|
||||
options=options
|
||||
|
||||
@@ -45,6 +45,36 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
# Regexes built from the constants above — no brittle hardcoded strings
|
||||
_EXTRACT_REMOVED_RE = re.compile(
|
||||
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
|
||||
)
|
||||
_EXTRACT_ADDED_RE = re.compile(
|
||||
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
|
||||
)
|
||||
|
||||
|
||||
def extract_changed_from(raw_diff: str) -> str:
|
||||
"""Extract only the removed/changed-from fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def extract_changed_to(raw_diff: str) -> str:
|
||||
"""Extract only the added/changed-into fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
"""
|
||||
|
||||
@@ -28,18 +28,20 @@ def get_timeago_locale(flask_locale):
|
||||
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'pt_BR': 'pt_BR', # Portuguese (Brasil)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'ja': 'ja', # Japanese
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
@@ -53,7 +55,8 @@ LANGUAGE_DATA = {
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
|
||||
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
|
||||
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
|
||||
@@ -46,11 +46,26 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
def matches_url(self, url: str) -> bool:
|
||||
"""Return True if this tag should be auto-applied to the given watch URL.
|
||||
|
||||
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
|
||||
substring match. Returns False if no pattern is configured.
|
||||
"""
|
||||
import fnmatch
|
||||
pattern = self.get('url_match_pattern', '').strip()
|
||||
if not pattern or not url:
|
||||
return False
|
||||
if any(c in pattern for c in ('*', '?', '[')):
|
||||
return fnmatch.fnmatch(url.lower(), pattern.lower())
|
||||
return pattern.lower() in url.lower()
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -88,6 +88,28 @@ class FormattableTimestamp(str):
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableExtract(str):
|
||||
"""
|
||||
A str subclass that holds only the extracted changed fragments from a diff.
|
||||
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
|
||||
|
||||
{{ diff_changed_from }} → old value(s) only, e.g. "$99.99"
|
||||
{{ diff_changed_to }} → new value(s) only, e.g. "$109.99"
|
||||
|
||||
Multiple changed fragments are joined with newlines.
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
|
||||
extracted = extract_fn(raw)
|
||||
else:
|
||||
extracted = ''
|
||||
instance = super().__new__(cls, extracted)
|
||||
return instance
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
@@ -161,6 +183,8 @@ class NotificationContextData(dict):
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
@@ -244,16 +268,27 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
from changedetectionio.diff import extract_changed_from, extract_changed_to
|
||||
extract_specs = {
|
||||
'diff_changed_from': extract_changed_from,
|
||||
'diff_changed_to': extract_changed_to,
|
||||
}
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only create FormattableDiff objects for diff keys actually used in the notification text
|
||||
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
|
||||
for key in NotificationContextData().keys():
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
if not key.startswith('diff'):
|
||||
continue
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
continue
|
||||
if key in diff_specs:
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
elif key in extract_specs:
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
@@ -461,7 +496,7 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
|
||||
'notification_body': body,
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
|
||||
@@ -980,12 +980,20 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
def get_all_tags_for_watch(self, uuid):
|
||||
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
|
||||
watch = self.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return {}
|
||||
|
||||
# Should return a dict of full tag info linked by UUID
|
||||
if watch:
|
||||
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
# Start with manually assigned tags
|
||||
result = dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
|
||||
return {}
|
||||
# Additionally include any tag whose url_match_pattern matches this watch's URL
|
||||
watch_url = watch.get('url', '')
|
||||
if watch_url:
|
||||
for tag_uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
if tag_uuid not in result and tag.matches_url(watch_url):
|
||||
result[tag_uuid] = tag
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def extra_browsers(self):
|
||||
|
||||
@@ -98,6 +98,14 @@
|
||||
<td><code>{{ '{{diff_patch}}' }}</code></td>
|
||||
<td>{{ _('The diff output - patch in unified format') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_from}}' }}</code></td>
|
||||
<td>{{ _('Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_to}}' }}</code></td>
|
||||
<td>{{ _('Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{current_snapshot}}' }}</code></td>
|
||||
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
|
||||
|
||||
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
|
||||
set_longer_modified_response, delete_all_watches
|
||||
|
||||
import logging
|
||||
|
||||
import os
|
||||
|
||||
# NOTE - RELIES ON mailserver as hostname running, see github build recipes
|
||||
smtp_test_server = 'mailserver'
|
||||
smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver')
|
||||
|
||||
ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())
|
||||
|
||||
|
||||
144
changedetectionio/tests/test_tag_url_match.py
Normal file
144
changedetectionio/tests/test_tag_url_match.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration tests for auto-applying tags to watches by URL pattern matching.
|
||||
|
||||
Verifies:
|
||||
- A tag with url_match_pattern shows on the watch overview list (via get_all_tags_for_watch)
|
||||
- The auto-applied tag appears on the watch edit page
|
||||
- A watch whose URL does NOT match the pattern does not get the tag
|
||||
"""
|
||||
|
||||
import json
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup
|
||||
|
||||
|
||||
def test_tag_url_pattern_shows_in_overview(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Tag with a matching url_match_pattern must appear in the watch overview row."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create a tag with a URL match pattern
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Auto GitHub", "url_match_pattern": "*github.com*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_uuid = res.json['uuid']
|
||||
|
||||
# Add a watch that matches the pattern
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://github.com/someuser/repo"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
matching_watch_uuid = res.json['uuid']
|
||||
|
||||
# Add a watch that does NOT match
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://example.com/page"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
non_matching_watch_uuid = res.json['uuid']
|
||||
|
||||
# Watch overview — the tag label must appear in the matching watch's row
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert res.status_code == 200
|
||||
html = res.get_data(as_text=True)
|
||||
|
||||
# The tag title should appear somewhere on the page (it's rendered per-watch via get_all_tags_for_watch)
|
||||
assert "Auto GitHub" in html, "Auto-matched tag title must appear in watch overview"
|
||||
|
||||
# Verify via the datastore directly that get_all_tags_for_watch returns the pattern-matched tag
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
matching_tags = datastore.get_all_tags_for_watch(matching_watch_uuid)
|
||||
assert tag_uuid in matching_tags, "Pattern-matched tag must be returned for matching watch"
|
||||
|
||||
non_matching_tags = datastore.get_all_tags_for_watch(non_matching_watch_uuid)
|
||||
assert tag_uuid not in non_matching_tags, "Pattern-matched tag must NOT appear for non-matching watch"
|
||||
|
||||
|
||||
def test_auto_applied_tag_shows_on_watch_edit(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""The watch edit page must show auto-applied tags (from URL pattern) separately."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Auto Docs", "url_match_pattern": "*docs.example.com*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://docs.example.com/guide"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
# Watch edit page must mention the auto-applied tag
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||
assert res.status_code == 200
|
||||
html = res.get_data(as_text=True)
|
||||
|
||||
assert "Auto Docs" in html, "Auto-applied tag name must appear on watch edit page"
|
||||
assert "automatically applied" in html.lower() or "auto" in html.lower(), \
|
||||
"Watch edit page must indicate the tag is auto-applied by pattern"
|
||||
|
||||
|
||||
def test_multiple_pattern_tags_all_applied(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""A watch matching multiple tag patterns must receive all of them, not just the first."""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Two tags with different patterns that both match the same URL
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Docs", "url_match_pattern": "*docs.*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_docs_uuid = res.json['uuid']
|
||||
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Python", "url_match_pattern": "*python*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_python_uuid = res.json['uuid']
|
||||
|
||||
# A third tag whose pattern does NOT match
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "Org Rust", "url_match_pattern": "*rust-lang*"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
tag_rust_uuid = res.json['uuid']
|
||||
|
||||
# Watch URL matches both "docs" and "python" patterns but not "rust"
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "https://docs.python.org/3/library/fnmatch.html"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, res.data
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
resolved = datastore.get_all_tags_for_watch(watch_uuid)
|
||||
|
||||
assert tag_docs_uuid in resolved, "First matching tag must be included"
|
||||
assert tag_python_uuid in resolved, "Second matching tag must be included"
|
||||
assert tag_rust_uuid not in resolved, "Non-matching tag must NOT be included"
|
||||
@@ -15,7 +15,9 @@ from changedetectionio.diff import (
|
||||
CHANGED_PLACEMARKER_OPEN,
|
||||
CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED,
|
||||
extract_changed_from,
|
||||
extract_changed_to,
|
||||
)
|
||||
|
||||
|
||||
@@ -381,5 +383,84 @@ Line 3 with tabs and spaces"""
|
||||
self.assertNotIn('[-Line 2-]', output)
|
||||
self.assertNotIn('[+Line 2+]', output)
|
||||
|
||||
def test_diff_changed_from_to_word_level(self):
|
||||
"""Primary use case: extract just the old/new value from a changed line (e.g. price monitoring)"""
|
||||
before = "Widget costs $99.99 per month"
|
||||
after = "Widget costs $109.99 per month"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99.99")
|
||||
self.assertEqual(extract_changed_to(raw), "$109.99")
|
||||
|
||||
def test_diff_changed_from_to_multiple_changes(self):
|
||||
"""Multiple changed fragments on different lines are joined with newline.
|
||||
An unchanged line between the two changes ensures each is a 1-to-1 replace,
|
||||
so word_diff fires per line rather than falling back to multi-line block mode."""
|
||||
before = "Price $99\nunchanged\nTax $5"
|
||||
after = "Price $149\nunchanged\nTax $12"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99\n$5")
|
||||
self.assertEqual(extract_changed_to(raw), "$149\n$12")
|
||||
|
||||
def test_diff_changed_from_to_pure_insert_delete(self):
|
||||
"""Pure line additions/deletions (no inline word diff) are also captured"""
|
||||
before = "old line"
|
||||
after = "new line"
|
||||
|
||||
# word_diff=False forces line-level CHANGED markers
|
||||
raw = diff.render_diff(before, after, word_diff=False)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "old line")
|
||||
self.assertEqual(extract_changed_to(raw), "new line")
|
||||
|
||||
def test_diff_changed_from_to_similar_numbers(self):
|
||||
"""$90.00 → $9.00 must not produce a partial match like '0.00'.
|
||||
The tokenizer splits on whitespace only, so '$90.00' and '$9.00' are
|
||||
each a single atomic token — diff never sees their internal characters."""
|
||||
before = "for sale $90.00"
|
||||
after = "for sale $9.00"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$90.00")
|
||||
self.assertEqual(extract_changed_to(raw), "$9.00")
|
||||
|
||||
def test_diff_changed_from_to_whole_line_replaced(self):
|
||||
"""When every token on the line changed (no common tokens), render_inline_word_diff
|
||||
takes the whole_line_replaced path using CHANGED/CHANGED_INTO markers instead of
|
||||
REMOVED/ADDED. Extraction must still work via the alternation in the regex."""
|
||||
before = "$99"
|
||||
after = "$109"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "$99")
|
||||
self.assertEqual(extract_changed_to(raw), "$109")
|
||||
|
||||
def test_diff_changed_from_to_multiple_words_same_line(self):
|
||||
"""When multiple words change on the same line all fragments are joined with newline.
|
||||
'quick brown fox jumps' -> 'slow brown fox hops' gives 'quick\njumps' / 'slow\nhops'.
|
||||
These tokens work best when a single value changes per line."""
|
||||
before = "quick brown fox jumps"
|
||||
after = "slow brown fox hops"
|
||||
|
||||
raw = diff.render_diff(before, after, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "quick\njumps")
|
||||
self.assertEqual(extract_changed_to(raw), "slow\nhops")
|
||||
|
||||
def test_diff_changed_from_to_no_change(self):
|
||||
"""No changes → empty string"""
|
||||
content = "nothing changed here"
|
||||
|
||||
raw = diff.render_diff(content, content, word_diff=True)
|
||||
|
||||
self.assertEqual(extract_changed_from(raw), "")
|
||||
self.assertEqual(extract_changed_to(raw), "")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
"""
|
||||
Unit test for send_step_failure_notification regression.
|
||||
|
||||
Before the fix, line 499 called self._check_cascading_vars('notification_format', watch)
|
||||
which raises AttributeError because _check_cascading_vars is a module-level function,
|
||||
not a method of NotificationService.
|
||||
"""
|
||||
|
||||
import queue
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
|
||||
def _make_datastore(watch_uuid, notification_url):
|
||||
"""Minimal datastore mock that NotificationService and _check_cascading_vars need."""
|
||||
watch = MagicMock()
|
||||
watch.get = lambda key, default=None: {
|
||||
'uuid': watch_uuid,
|
||||
'url': 'https://example.com',
|
||||
'notification_urls': [notification_url],
|
||||
'notification_format': '',
|
||||
'notification_muted': False,
|
||||
}.get(key, default)
|
||||
watch.__getitem__ = lambda self, key: watch.get(key)
|
||||
|
||||
datastore = MagicMock()
|
||||
datastore.data = {
|
||||
'watching': {watch_uuid: watch},
|
||||
'settings': {
|
||||
'application': {
|
||||
'notification_urls': [],
|
||||
'notification_format': 'text',
|
||||
'filter_failure_notification_threshold_attempts': 3,
|
||||
}
|
||||
}
|
||||
}
|
||||
datastore.get_all_tags_for_watch.return_value = {}
|
||||
return datastore, watch
|
||||
|
||||
|
||||
def test_send_step_failure_notification_does_not_raise():
|
||||
"""send_step_failure_notification must not raise AttributeError (wrong self. prefix on module-level function)."""
|
||||
from changedetectionio.notification_service import NotificationService
|
||||
|
||||
watch_uuid = 'test-uuid-1234'
|
||||
notification_q = queue.Queue()
|
||||
datastore, _ = _make_datastore(watch_uuid, 'post://localhost/test')
|
||||
service = NotificationService(datastore=datastore, notification_q=notification_q)
|
||||
|
||||
# Before the fix this raised:
|
||||
# AttributeError: 'NotificationService' object has no attribute '_check_cascading_vars'
|
||||
service.send_step_failure_notification(watch_uuid=watch_uuid, step_n=0)
|
||||
|
||||
|
||||
def test_send_step_failure_notification_queues_item():
|
||||
"""A notification object should be placed on the queue when URLs are configured."""
|
||||
from changedetectionio.notification_service import NotificationService
|
||||
|
||||
watch_uuid = 'test-uuid-5678'
|
||||
notification_q = queue.Queue()
|
||||
datastore, _ = _make_datastore(watch_uuid, 'post://localhost/test')
|
||||
service = NotificationService(datastore=datastore, notification_q=notification_q)
|
||||
|
||||
service.send_step_failure_notification(watch_uuid=watch_uuid, step_n=1)
|
||||
|
||||
assert not notification_q.empty(), "Expected a notification to be queued"
|
||||
item = notification_q.get_nowait()
|
||||
assert 'notification_title' in item
|
||||
assert 'position 2' in item['notification_title']
|
||||
68
changedetectionio/tests/unit/test_tag_url_match.py
Normal file
68
changedetectionio/tests/unit/test_tag_url_match.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# run from dir above changedetectionio/ dir
|
||||
# python3 -m unittest changedetectionio.tests.unit.test_tag_url_match
|
||||
|
||||
import unittest
|
||||
from changedetectionio.model.Tag import model as TagModel
|
||||
|
||||
|
||||
def make_tag(pattern):
|
||||
"""Minimal Tag instance for testing matches_url — skips datastore wiring."""
|
||||
tag = TagModel.__new__(TagModel)
|
||||
dict.__init__(tag)
|
||||
tag['url_match_pattern'] = pattern
|
||||
return tag
|
||||
|
||||
|
||||
class TestTagUrlMatch(unittest.TestCase):
|
||||
|
||||
def test_wildcard_matches(self):
|
||||
tag = make_tag('*example.com*')
|
||||
self.assertTrue(tag.matches_url('https://example.com/page'))
|
||||
self.assertTrue(tag.matches_url('https://www.example.com/shop/item'))
|
||||
self.assertFalse(tag.matches_url('https://other.com/page'))
|
||||
|
||||
def test_wildcard_case_insensitive(self):
|
||||
tag = make_tag('*EXAMPLE.COM*')
|
||||
self.assertTrue(tag.matches_url('https://example.com/page'))
|
||||
|
||||
def test_substring_match(self):
|
||||
tag = make_tag('github.com/myorg')
|
||||
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
|
||||
self.assertFalse(tag.matches_url('https://github.com/otherorg/repo'))
|
||||
|
||||
def test_substring_case_insensitive(self):
|
||||
tag = make_tag('GitHub.com/MyOrg')
|
||||
self.assertTrue(tag.matches_url('https://github.com/myorg/repo'))
|
||||
|
||||
def test_empty_pattern_never_matches(self):
|
||||
tag = make_tag('')
|
||||
self.assertFalse(tag.matches_url('https://example.com'))
|
||||
|
||||
def test_empty_url_never_matches(self):
|
||||
tag = make_tag('*example.com*')
|
||||
self.assertFalse(tag.matches_url(''))
|
||||
|
||||
def test_question_mark_wildcard(self):
|
||||
tag = make_tag('https://example.com/item-?')
|
||||
self.assertTrue(tag.matches_url('https://example.com/item-1'))
|
||||
self.assertFalse(tag.matches_url('https://example.com/item-12'))
|
||||
|
||||
def test_substring_is_broad(self):
|
||||
"""Plain substring matching is intentionally broad — 'evil.com' matches anywhere
|
||||
in the URL string, including 'notevil.com'. Users who need precise domain matching
|
||||
should use a wildcard pattern like '*://evil.com/*' instead."""
|
||||
tag = make_tag('evil.com')
|
||||
self.assertTrue(tag.matches_url('https://evil.com/page'))
|
||||
self.assertTrue(tag.matches_url('https://notevil.com')) # substring match — expected
|
||||
|
||||
def test_precise_domain_match_with_wildcard(self):
|
||||
"""Use wildcard pattern for precise domain matching to avoid substring surprises."""
|
||||
tag = make_tag('*://evil.com/*')
|
||||
self.assertTrue(tag.matches_url('https://evil.com/page'))
|
||||
self.assertFalse(tag.matches_url('https://notevil.com/page'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -76,7 +76,9 @@ These commands read settings from `../../setup.cfg` automatically.
|
||||
- `en_US` - English (US)
|
||||
- `fr` - French (Français)
|
||||
- `it` - Italian (Italiano)
|
||||
- `ja` - Japanese (日本語)
|
||||
- `ko` - Korean (한국어)
|
||||
- `pt_BR` - Portuguese (Brasil)
|
||||
- `zh` - Chinese Simplified (中文简体)
|
||||
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
|
||||
|
||||
|
||||
Binary file not shown.
@@ -1617,7 +1617,7 @@ msgstr "Bereich zeichnen"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "Clear selection"
|
||||
msgstr "Klare Auswahl"
|
||||
msgstr "Auswahl löschen"
|
||||
|
||||
#: changedetectionio/blueprint/ui/templates/edit.html
|
||||
msgid "One moment, fetching screenshot and element information.."
|
||||
|
||||
BIN
changedetectionio/translations/ja/LC_MESSAGES/messages.mo
Normal file
BIN
changedetectionio/translations/ja/LC_MESSAGES/messages.mo
Normal file
Binary file not shown.
3418
changedetectionio/translations/ja/LC_MESSAGES/messages.po
Normal file
3418
changedetectionio/translations/ja/LC_MESSAGES/messages.po
Normal file
File diff suppressed because it is too large
Load Diff
BIN
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.mo
Normal file
BIN
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.mo
Normal file
Binary file not shown.
3482
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.po
Normal file
3482
changedetectionio/translations/pt_BR/LC_MESSAGES/messages.po
Normal file
File diff suppressed because it is too large
Load Diff
@@ -725,6 +725,13 @@ components:
|
||||
- true: Tag settings override watch settings
|
||||
- false: Tag settings do not override (watches use their own settings)
|
||||
- null: Not decided yet / inherit default behavior
|
||||
url_match_pattern:
|
||||
type: string
|
||||
description: |
|
||||
Automatically apply this tag to any watch whose URL matches this pattern.
|
||||
Supports fnmatch wildcards (* and ?): e.g. *://example.com/* or github.com/myorg.
|
||||
Plain strings are matched as case-insensitive substrings.
|
||||
Leave empty to disable auto-matching.
|
||||
# Future: Aggregated statistics from all watches with this tag
|
||||
# check_count:
|
||||
# type: integer
|
||||
|
||||
Reference in New Issue
Block a user