From 8dc39d4a3d5de0f884f9c0d16e0158dc4b2f0378 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Nov 2025 17:38:18 +0100 Subject: [PATCH] RSS feeds for a single watches! --- changedetectionio/blueprint/rss/__init__.py | 18 ++- changedetectionio/blueprint/rss/blueprint.py | 139 +++++++++++++----- .../settings/templates/settings.html | 2 +- changedetectionio/blueprint/ui/edit.py | 7 +- .../blueprint/ui/templates/edit.html | 1 + changedetectionio/forms.py | 2 +- changedetectionio/model/App.py | 4 +- changedetectionio/notification/handler.py | 2 + changedetectionio/store.py | 55 ++++--- changedetectionio/templates/base.html | 9 +- changedetectionio/tests/test_backend.py | 3 +- changedetectionio/tests/test_rss.py | 73 +++++++++ 12 files changed, 249 insertions(+), 66 deletions(-) diff --git a/changedetectionio/blueprint/rss/__init__.py b/changedetectionio/blueprint/rss/__init__.py index d4e09194..adecd339 100644 --- a/changedetectionio/blueprint/rss/__init__.py +++ b/changedetectionio/blueprint/rss/__init__.py @@ -1 +1,17 @@ -RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')] +from copy import deepcopy +from loguru import logger + +from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH +from changedetectionio.notification import valid_notification_formats +RSS_CONTENT_FORMAT_DEFAULT = 'text' + +# Some stuff not related +RSS_FORMAT_TYPES = deepcopy(valid_notification_formats) +if RSS_FORMAT_TYPES.get('markdown'): + del RSS_FORMAT_TYPES['markdown'] + +if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH): + del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH] + +if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT): + logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}") diff --git a/changedetectionio/blueprint/rss/blueprint.py b/changedetectionio/blueprint/rss/blueprint.py index 602aa3db..7a27dcc7 100644 --- a/changedetectionio/blueprint/rss/blueprint.py +++ b/changedetectionio/blueprint/rss/blueprint.py @@ -37,6 +37,51 @@ def clean_entry_content(content): def construct_blueprint(datastore: ChangeDetectionStore): rss_blueprint = Blueprint('rss', __name__) + # Helper function to generate GUID for RSS entries + def generate_watch_guid(watch): + """Generate a unique GUID for a watch RSS entry.""" + return f"{watch['uuid']}/{watch.last_changed}" + + # Helper function to generate diff content for a watch + def generate_watch_diff_content(watch, dates, rss_content_format): + """ + Generate HTML diff content for a watch given its history dates. + Returns the rendered HTML content ready for RSS/display. + """ + from changedetectionio import diff + + # Same logic as watch-overview.html + if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'): + watch_label = watch.label + else: + watch_label = watch.get('url') + + try: + html_diff = diff.render_diff( + previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]), + newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]), + include_equal=False + ) + + requested_output_format = datastore.data['settings']['application'].get('rss_content_format') + url, html_diff, n_title = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format) + + except FileNotFoundError as e: + html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." + + # @note: We use
 because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
+        rss_template = "
{{watch_label}} had a change.\n\n{{html_diff}}\n
" + if 'html' in rss_content_format: + rss_template = "\n

{{watch_label}}

\n

{{html_diff}}

\n\n" + + content = jinja_render(template_str=rss_template, watch_label=watch_label, html_diff=html_diff, watch_url=watch.link) + + # Out of range chars could also break feedgen + if scan_invalid_chars_in_rss(content): + content = clean_entry_content(content) + + return content, watch_label + # Some RSS reader situations ended up with rss/ (forward slash after RSS) due # to some earlier blueprint rerouting work, it should goto feed. @rss_blueprint.route("/", methods=['GET']) @@ -51,6 +96,8 @@ def construct_blueprint(datastore: ChangeDetectionStore): # Always requires token set app_rss_token = datastore.data['settings']['application'].get('rss_access_token') rss_url_token = request.args.get('token') + rss_content_format = datastore.data['settings']['application'].get('rss_content_format') + if rss_url_token != app_rss_token: return "Access denied, bad token", 403 @@ -81,10 +128,6 @@ def construct_blueprint(datastore: ChangeDetectionStore): fg.description('Feed description') fg.link(href='https://changedetection.io') - html_colour_enable = False - if datastore.data['settings']['application'].get('rss_content_format') == 'html': - html_colour_enable = True - for watch in sorted_watches: dates = list(watch.history.keys()) @@ -95,7 +138,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): if not watch.viewed: # Re #239 - GUID needs to be individual for each event # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) - guid = "{}/{}".format(watch['uuid'], watch.last_changed) + guid = generate_watch_guid(watch) fe = fg.add_entry() # Include a link to the diff page, they will have to login here to see if password protection is enabled. @@ -109,38 +152,9 @@ def construct_blueprint(datastore: ChangeDetectionStore): fe.link(link=diff_link) - # Same logic as watch-overview.html - if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'): - watch_label = watch.label - else: - watch_label = watch.get('url') + content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format) fe.title(title=watch_label) - try: - - html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]), - newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]), - include_equal=False, - line_feed_sep="
" - ) - - - requested_output_format = 'htmlcolor' if html_colour_enable else 'html' - html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format) - - except FileNotFoundError as e: - html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." - - # @todo Make this configurable and also consider html-colored markup - # @todo User could decide if goes to the diff page, or to the watch link - rss_template = "\n

{{watch_title}}

\n

{{html_diff}}

\n\n" - - content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link) - - # Out of range chars could also break feedgen - if scan_invalid_chars_in_rss(content): - content = clean_entry_content(content) - fe.content(content=content, type='CDATA') fe.guid(guid, permalink=False) dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) @@ -152,4 +166,59 @@ def construct_blueprint(datastore: ChangeDetectionStore): logger.trace(f"RSS generated in {time.time() - now:.3f}s") return response + @rss_blueprint.route("/watch/", methods=['GET']) + def rss_single_watch(uuid): + """ + Display the most recent change for a single watch as RSS feed. + Returns RSS XML with a single entry showing the diff between the last two snapshots. + """ + # Always requires token set + app_rss_token = datastore.data['settings']['application'].get('rss_access_token') + rss_url_token = request.args.get('token') + rss_content_format = datastore.data['settings']['application'].get('rss_content_format') + + if rss_url_token != app_rss_token: + return "Access denied, bad token", 403 + + # Get the watch by UUID + watch = datastore.data['watching'].get(uuid) + if not watch: + return f"Watch with UUID {uuid} not found", 404 + + # Check if watch has at least 2 history snapshots + dates = list(watch.history.keys()) + if len(dates) < 2: + return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400 + + # Add uuid to watch for proper functioning + watch['uuid'] = uuid + + # Generate the diff content using the shared helper function + content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format) + + # Create RSS feed with single entry + fg = FeedGenerator() + fg.title(f'changedetection.io - {watch.label}') + fg.description('Changes') + fg.link(href='https://changedetection.io') + + # Add single entry for this watch + guid = generate_watch_guid(watch) + fe = fg.add_entry() + + # Include a link to the diff page + diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)} + fe.link(link=diff_link) + + fe.title(title=watch_label) + fe.content(content=content, type='CDATA') + fe.guid(guid, permalink=False) + dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) + dt = dt.replace(tzinfo=pytz.UTC) + fe.pubDate(dt) + + response = make_response(fg.rss_str()) + response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') + return response + return rss_blueprint \ No newline at end of file diff --git a/changedetectionio/blueprint/settings/templates/settings.html b/changedetectionio/blueprint/settings/templates/settings.html index 9913e095..5d4b5536 100644 --- a/changedetectionio/blueprint/settings/templates/settings.html +++ b/changedetectionio/blueprint/settings/templates/settings.html @@ -86,7 +86,7 @@
{{ render_checkbox_field(form.application.form.rss_reader_mode) }} - Transforms RSS/RDF feed watches into beautiful text only + When watching RSS/Atom feeds, convert them into clean text for better change detection.
diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py index f68bdbd1..d394a906 100644 --- a/changedetectionio/blueprint/ui/edit.py +++ b/changedetectionio/blueprint/ui/edit.py @@ -236,7 +236,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe # Import the global plugin system from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras - + app_rss_token = datastore.data['settings']['application'].get('rss_access_token'), template_args = { 'available_processors': processors.available_processors(), 'available_timezones': sorted(available_timezones()), @@ -252,6 +252,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), 'jq_support': jq_support, 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), + 'app_rss_token': app_rss_token, + 'rss_uuid_feed' : { + 'label': watch.label, + 'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token) + }, 'settings_application': datastore.data['settings']['application'], 'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'), 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), diff --git a/changedetectionio/blueprint/ui/templates/edit.html b/changedetectionio/blueprint/ui/templates/edit.html index f6e7f6a0..bb4c50a0 100644 --- a/changedetectionio/blueprint/ui/templates/edit.html +++ b/changedetectionio/blueprint/ui/templates/edit.html @@ -476,6 +476,7 @@ Math: {{ 1 + 1 }}") }} class="pure-button button-error">Clear History{% endif %} Clone & Edit + RSS Feed for this watch diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index ce441df9..51760efe 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -1000,7 +1000,7 @@ class globalSettingsApplicationForm(commonSettingsForm): validators=[validators.NumberRange(min=0, message="Should be atleast zero (disabled)")]) - rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES) + rss_content_format = SelectField('RSS Content format', choices=list(RSS_FORMAT_TYPES.items())) removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 0152aed6..cbf81370 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -1,7 +1,7 @@ from os import getenv from copy import deepcopy -from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES +from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT from changedetectionio.notification import ( default_notification_body, @@ -54,7 +54,7 @@ class model(dict): 'password': False, 'render_anchor_tag_content': False, 'rss_access_token': None, - 'rss_content_format': RSS_FORMAT_TYPES[0][0], + 'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT, 'rss_hide_muted_watches': True, 'rss_reader_mode': False, 'scheduler_timezone_default': None, # Default IANA timezone name diff --git a/changedetectionio/notification/handler.py b/changedetectionio/notification/handler.py index 1e80db8b..f3be4104 100644 --- a/changedetectionio/notification/handler.py +++ b/changedetectionio/notification/handler.py @@ -187,6 +187,8 @@ def replace_placemarkers_in_text(text, url, requested_output_format): def apply_service_tweaks(url, n_body, n_title, requested_output_format): + logger.debug(f"Applying markup in '{requested_output_format}' mode") + # Re 323 - Limit discord length to their 2000 char limit total or it wont send. # Because different notifications may require different pre-processing, run each sequentially :( # 2000 bytes minus - diff --git a/changedetectionio/store.py b/changedetectionio/store.py index a05ec623..ba223ae3 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -6,6 +6,7 @@ from flask import ( flash ) +from .blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT from .html_tools import TRANSLATE_WHITESPACE_TABLE from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH from copy import deepcopy, copy @@ -775,6 +776,28 @@ class ChangeDetectionStore: return updates_available + def add_notification_url(self, notification_url): + + logger.debug(f">>> Adding new notification_url - '{notification_url}'") + + notification_urls = self.data['settings']['application'].get('notification_urls', []) + + if notification_url in notification_urls: + return notification_url + + with self.lock: + notification_urls = self.__data['settings']['application'].get('notification_urls', []) + + if notification_url in notification_urls: + return notification_url + + # Append and update the datastore + notification_urls.append(notification_url) + self.__data['settings']['application']['notification_urls'] = notification_urls + self.needs_write = True + + return notification_url + # Run all updates # IMPORTANT - Each update could be run even when they have a new install and the schema is correct # So therefor - each `update_n` should be very careful about checking if it needs to actually run @@ -1087,25 +1110,15 @@ class ChangeDetectionStore: formats['markdown'] = 'Markdown' re_run(formats) - def add_notification_url(self, notification_url): - - logger.debug(f">>> Adding new notification_url - '{notification_url}'") - - notification_urls = self.data['settings']['application'].get('notification_urls', []) - - if notification_url in notification_urls: - return notification_url - - with self.lock: - notification_urls = self.__data['settings']['application'].get('notification_urls', []) - - if notification_url in notification_urls: - return notification_url - - # Append and update the datastore - notification_urls.append(notification_url) - self.__data['settings']['application']['notification_urls'] = notification_urls - self.needs_write = True - - return notification_url + # RSS types should be inline with the same names as notification types + def update_24(self): + rss_format = self.data['settings']['application'].get('rss_content_format') + if not rss_format or 'text' in rss_format: + # might have been 'plaintext, 'plain text' or something + self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT + elif 'html' in rss_format: + self.data['settings']['application']['rss_content_format'] = 'htmlcolor' + else: + # safe fallback to text + self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT diff --git a/changedetectionio/templates/base.html b/changedetectionio/templates/base.html index 5b398d30..dd56ed27 100644 --- a/changedetectionio/templates/base.html +++ b/changedetectionio/templates/base.html @@ -8,8 +8,13 @@ Change Detection{{extra_title}} {% if app_rss_token %} - - {% endif %} + + + {% if rss_uuid_feed %} + + + {%- endif -%} + {%- endif -%} {% if extra_stylesheets %} diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py index e9e81fe5..0fa74094 100644 --- a/changedetectionio/tests/test_backend.py +++ b/changedetectionio/tests/test_backend.py @@ -77,10 +77,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure assert b'' not in content + assert 'body' not in content + assert '(changed) Which is across multiple lines\n' + assert 'modified head title had a change.' # Because it picked it up as watch_title in default template + elif expected_type == 'html': + assert '<p>' in content + assert '<body>' in content + assert '<p>(changed) Which is across multiple lines<br>' in content + assert f'href="{url}">modified head title had a change.</a>' + elif expected_type == 'htmlcolor': + assert '<body>' in content + assert ' role="note" aria-label="Changed text" title="Changed text">Which is across multiple lines</span>' in content + assert f'href="{url}">modified head title had a change.</a>' + else: + raise Exception(f"Unknown type {expected_type}") + + + item = root.findall('.//item')[0].findtext('description') + check_formatting(expected_type=rss_content_format, content=item, url=test_url) + + # Now the default one is over, lets try all the others + for k in list(RSS_FORMAT_TYPES.keys()): + res = client.post( + url_for("settings.settings_page"), + data={"application-rss_content_format": k}, + follow_redirects=True + ) + assert b'Settings updated' in res.data + + res = client.get( + url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token), + follow_redirects=False + ) + assert res.status_code == 200 + root = ET.fromstring(res.data) + item = root.findall('.//item')[0].findtext('description') + check_formatting(expected_type=k, content=item, url=test_url)