Compare commits

..

2 Commits

Author SHA1 Message Date
dgtlmoon
eb8b2b98c0 refactor, add test 2025-11-13 18:46:07 +01:00
dgtlmoon
ab9774cf2d RSS Feed per tag/group too 2025-11-13 18:45:45 +01:00
10 changed files with 39 additions and 436 deletions

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python3
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.51.1'
__version__ = '0.50.43'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -46,7 +46,7 @@ def generate_watch_guid(watch):
return f"{watch['uuid']}/{watch.last_changed}"
def generate_watch_diff_content(watch, dates, rss_content_format, datastore, date_index_from=-2, date_index_to=-1):
def generate_watch_diff_content(watch, dates, rss_content_format, datastore):
"""
Generate HTML diff content for a watch given its history dates.
Returns tuple of (content, watch_label).
@@ -56,8 +56,6 @@ def generate_watch_diff_content(watch, dates, rss_content_format, datastore, dat
dates: List of history snapshot dates
rss_content_format: Format for RSS content (html or text)
datastore: The ChangeDetectionStore instance
date_index_from: Index of the "from" date in the dates list (default: -2)
date_index_to: Index of the "to" date in the dates list (default: -1)
Returns:
Tuple of (content, watch_label) - the rendered HTML content and watch label
@@ -72,8 +70,8 @@ def generate_watch_diff_content(watch, dates, rss_content_format, datastore, dat
try:
html_diff = diff.render_diff(
previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[date_index_from]),
newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[date_index_to]),
previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]),
newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]),
include_equal=False
)

View File

@@ -95,14 +95,6 @@ def construct_main_feed_routes(rss_blueprint, datastore):
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
# Add categories based on watch tags
for tag_uuid in watch.get('tags', []):
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
if tag:
tag_title = tag.get('title', '')
if tag_title:
fe.category(term=tag_title)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
logger.trace(f"RSS generated in {time.time() - now:.3f}s")

View File

@@ -2,7 +2,6 @@ from flask import make_response, request, url_for
from feedgen.feed import FeedGenerator
import datetime
import pytz
import locale
from ._util import generate_watch_guid, generate_watch_diff_content
@@ -19,9 +18,8 @@ def construct_single_watch_routes(rss_blueprint, datastore):
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
def rss_single_watch(uuid):
"""
Display the most recent changes for a single watch as RSS feed.
Returns RSS XML with multiple entries showing diffs between consecutive snapshots.
The number of entries is controlled by the rss_diff_length setting.
Display the most recent change for a single watch as RSS feed.
Returns RSS XML with a single entry showing the diff between the last two snapshots.
"""
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
@@ -44,115 +42,29 @@ def construct_single_watch_routes(rss_blueprint, datastore):
# Add uuid to watch for proper functioning
watch['uuid'] = uuid
# Get the number of diffs to include (default: 5)
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
# Generate the diff content using the shared helper function
content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format, datastore)
# Calculate how many diffs we can actually show (limited by available history)
# We need at least 2 snapshots to create 1 diff
max_possible_diffs = len(dates) - 1
num_diffs = min(rss_diff_length, max_possible_diffs) if rss_diff_length > 0 else max_possible_diffs
# Create RSS feed
# Create RSS feed with single entry
fg = FeedGenerator()
# Set title: use "label (url)" if label differs from url, otherwise just url
watch_url = watch.get('url', '')
watch_label = watch.label
if watch_label and watch_label != watch_url:
feed_title = f'changedetection.io - {watch_label} ({watch_url})'
else:
feed_title = f'changedetection.io - {watch_url}'
fg.title(feed_title)
fg.title(f'changedetection.io - {watch.label}')
fg.description('Changes')
fg.link(href='https://changedetection.io')
# Loop through history and create RSS entries for each diff
# Add entries in reverse order because feedgen reverses them
# This way, the newest change appears first in the final RSS
for i in range(num_diffs - 1, -1, -1):
# Calculate indices for this diff (working backwards from newest)
# i=0: compare dates[-2] to dates[-1] (most recent change)
# i=1: compare dates[-3] to dates[-2] (previous change)
# etc.
date_index_to = -(i + 1)
date_index_from = -(i + 2)
# Add single entry for this watch
guid = generate_watch_guid(watch)
fe = fg.add_entry()
try:
# Generate the diff content for this pair of snapshots
timestamp_to = dates[date_index_to]
timestamp_from = dates[date_index_from]
# Include a link to the diff page
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
content, watch_label = generate_watch_diff_content(
watch, dates, rss_content_format, datastore,
date_index_from=date_index_from,
date_index_to=date_index_to
)
# Generate edit watch link and add to content
edit_watch_url = url_for('ui.ui_edit.edit_page',
uuid=watch['uuid'],
_external=True)
# Add edit watch links at top and bottom of content
if 'html' in rss_content_format:
edit_link_html = f'<p><a href="{edit_watch_url}">[edit watch]</a></p>'
# Insert after <body> and before </body>
content = content.replace('<body>', f'<body>\n{edit_link_html}', 1)
content = content.replace('</body>', f'{edit_link_html}\n</body>', 1)
else:
# For plain text format, add plain text links in separate <pre> blocks
edit_link_top = f'<pre>[edit watch] {edit_watch_url}</pre>\n'
edit_link_bottom = f'\n<pre>[edit watch] {edit_watch_url}</pre>'
content = edit_link_top + content + edit_link_bottom
# Create a unique GUID for this specific diff
guid = f"{watch['uuid']}/{timestamp_to}"
fe = fg.add_entry()
# Include a link to the diff page with specific versions
diff_link = {'href': url_for('ui.ui_views.diff_history_page',
uuid=watch['uuid'],
from_version=timestamp_from,
to_version=timestamp_to,
_external=True)}
fe.link(link=diff_link)
# Format the date using locale-aware formatting with timezone
dt = datetime.datetime.fromtimestamp(int(timestamp_to))
dt = dt.replace(tzinfo=pytz.UTC)
# Get local timezone-aware datetime
local_tz = datetime.datetime.now().astimezone().tzinfo
local_dt = dt.astimezone(local_tz)
# Format date with timezone - using strftime for locale awareness
try:
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
except:
# Fallback if locale issues
formatted_date = local_dt.isoformat()
# Use formatted date in title instead of "Change 1, 2, 3"
fe.title(title=f"{watch_label} - Change @ {formatted_date}")
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
# Use the timestamp of the "to" snapshot for pubDate
fe.pubDate(dt)
# Add categories based on watch tags
for tag_uuid in watch.get('tags', []):
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
if tag:
tag_title = tag.get('title', '')
if tag_title:
fe.category(term=tag_title)
except (IndexError, FileNotFoundError) as e:
# Skip this diff if we can't generate it
continue
fe.title(title=watch_label)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')

View File

@@ -81,14 +81,6 @@ def construct_tag_routes(rss_blueprint, datastore):
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
# Add categories based on watch tags
for tag_uuid in watch.get('tags', []):
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
if tag:
tag_title = tag.get('title', '')
if tag_title:
fe.category(term=tag_title)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
return response

View File

@@ -24,7 +24,6 @@
<li class="tab"><a href="#filters">Global Filters</a></li>
<li class="tab"><a href="#ui-options">UI Options</a></li>
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#rss">RSS</a></li>
<li class="tab"><a href="#timedate">Time &amp Date</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
</ul>
@@ -66,13 +65,26 @@
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
<span class="pure-form-message-inline">Allow access to the watch change history page when password is enabled (Good for sharing the diff page)
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
</div>
<div class="grey-form-border">
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_content_format) }}
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
<span class="pure-form-message-inline">When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
</div>
</div>
</fieldset>
</div>
@@ -218,24 +230,6 @@ nav
</p>
</div>
</div>
<div class="tab-pane-inner" id="rss">
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_content_format) }}
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.rss_diff_length) }}
<span class="pure-form-message-inline">Maximum number of history snapshots to include in the watch specific RSS feed.</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
<span class="pure-form-message-inline">For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
</div>
</div>
<div class="tab-pane-inner" id="timedate">
<div class="pure-control-group">
Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.

View File

@@ -1009,10 +1009,8 @@ class globalSettingsApplicationForm(commonSettingsForm):
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
validators=[validators.Optional()])
rss_reader_mode = BooleanField('RSS reader mode ', default=False, validators=[validators.Optional()])
rss_diff_length = IntegerField(label='Number of changes to show in watch RSS feed',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, message="Should contain zero or more attempts")])
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
validators=[validators.Optional()])
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"},

View File

@@ -55,7 +55,6 @@ class model(dict):
'render_anchor_tag_content': False,
'rss_access_token': None,
'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT,
'rss_diff_length': 5,
'rss_hide_muted_watches': True,
'rss_reader_mode': False,
'scheduler_timezone_default': None, # Default IANA timezone name

View File

@@ -314,40 +314,3 @@ def test_rss_single_watch_feed(client, live_server, measure_memory_usage, datast
item = root.findall('.//item')[0].findtext('description')
check_formatting(expected_type=k, content=item, url=test_url)
# Test RSS entry order: Create multiple versions and verify newest appears first
for version in range(3, 6): # Create versions 3, 4, 5
set_html_content(datastore_path, f"Version {version} content")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(0.5) # Small delay to ensure different timestamps
# Fetch RSS feed again to verify order
res = client.get(
url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token),
follow_redirects=False
)
assert res.status_code == 200
# Parse RSS and check order (newest first)
root = ET.fromstring(res.data)
items = root.findall('.//item')
assert len(items) >= 3, f"Expected at least 3 items, got {len(items)}"
# Get descriptions from first 3 items
descriptions = []
for item in items[:3]:
desc = item.findtext('description')
descriptions.append(desc if desc else "")
# First item should contain newest change (Version 5)
assert b"Version 5" in descriptions[0].encode() or "Version 5" in descriptions[0], \
f"First item should show newest change (Version 5), but got: {descriptions[0][:200]}"
# Second item should contain Version 4
assert b"Version 4" in descriptions[1].encode() or "Version 4" in descriptions[1], \
f"Second item should show Version 4, but got: {descriptions[1][:200]}"
# Third item should contain Version 3
assert b"Version 3" in descriptions[2].encode() or "Version 3" in descriptions[2], \
f"Third item should show Version 3, but got: {descriptions[2][:200]}"

View File

@@ -1,245 +0,0 @@
#!/usr/bin/env python3
import time
import os
import xml.etree.ElementTree as ET
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, extract_UUID_from_client, delete_all_watches
def test_rss_single_watch_order(client, live_server, measure_memory_usage, datastore_path):
"""
Test that single watch RSS feed shows changes in correct order (newest first).
"""
# Create initial content
def set_response(datastore_path, version):
test_return_data = f"""<html>
<body>
<p>Version {version} content</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
# Start with version 1
set_response(datastore_path, 1)
# Add a watch
test_url = url_for('test_endpoint', _external=True) + "?order_test=1"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": 'test-tag'},
follow_redirects=True
)
assert b"Watch added" in res.data
# Get the watch UUID
watch_uuid = extract_UUID_from_client(client)
# Wait for initial check
wait_for_all_checks(client)
# Create multiple versions by triggering changes
for version in range(2, 6): # Create versions 2, 3, 4, 5
set_response(datastore_path, version)
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(0.5) # Small delay to ensure different timestamps
# Get RSS token
rss_token = extract_rss_token_from_UI(client)
# Request RSS feed for the single watch
res = client.get(
url_for("rss.rss_single_watch", uuid=watch_uuid, token=rss_token, _external=True),
follow_redirects=True
)
# Should return valid RSS
assert res.status_code == 200
assert b"<?xml" in res.data or b"<rss" in res.data
# Parse the RSS/XML
root = ET.fromstring(res.data)
# Find all items (RSS 2.0) or entries (Atom)
items = root.findall('.//item')
if not items:
items = root.findall('.//{http://www.w3.org/2005/Atom}entry')
# Should have multiple items
assert len(items) >= 3, f"Expected at least 3 items, got {len(items)}"
# Get the descriptions/content from first 3 items
descriptions = []
for item in items[:3]:
# Try RSS format first
desc = item.findtext('description')
if not desc:
# Try Atom format
content_elem = item.find('{http://www.w3.org/2005/Atom}content')
if content_elem is not None:
desc = content_elem.text
descriptions.append(desc if desc else "")
print(f"First item content: {descriptions[0][:100] if descriptions[0] else 'None'}")
print(f"Second item content: {descriptions[1][:100] if descriptions[1] else 'None'}")
print(f"Third item content: {descriptions[2][:100] if descriptions[2] else 'None'}")
# The FIRST item should contain the NEWEST change (Version 5)
# The SECOND item should contain Version 4
# The THIRD item should contain Version 3
assert b"Version 5" in descriptions[0].encode() or "Version 5" in descriptions[0], \
f"First item should show newest change (Version 5), but got: {descriptions[0][:200]}"
# Verify the order is correct
assert b"Version 4" in descriptions[1].encode() or "Version 4" in descriptions[1], \
f"Second item should show Version 4, but got: {descriptions[1][:200]}"
assert b"Version 3" in descriptions[2].encode() or "Version 3" in descriptions[2], \
f"Third item should show Version 3, but got: {descriptions[2][:200]}"
# Clean up
delete_all_watches(client)
def test_rss_categories_from_tags(client, live_server, measure_memory_usage, datastore_path):
"""
Test that RSS feeds include category tags from watch tags.
"""
# Create initial content
test_return_data = """<html>
<body>
<p>Test content for RSS categories</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
# Create some tags first
res = client.post(
url_for("tags.form_tag_add"),
data={"name": "Security"},
follow_redirects=True
)
res = client.post(
url_for("tags.form_tag_add"),
data={"name": "Python"},
follow_redirects=True
)
res = client.post(
url_for("tags.form_tag_add"),
data={"name": "Tech News"},
follow_redirects=True
)
# Add a watch with tags
test_url = url_for('test_endpoint', _external=True) + "?category_test=1"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": "Security, Python, Tech News"},
follow_redirects=True
)
assert b"Watch added" in res.data
# Get the watch UUID
watch_uuid = extract_UUID_from_client(client)
# Wait for initial check
wait_for_all_checks(client)
# Trigger one change
test_return_data_v2 = """<html>
<body>
<p>Updated content for RSS categories</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data_v2)
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Get RSS token
rss_token = extract_rss_token_from_UI(client)
# Test 1: Check single watch RSS feed
res = client.get(
url_for("rss.rss_single_watch", uuid=watch_uuid, token=rss_token, _external=True),
follow_redirects=True
)
assert res.status_code == 200
assert b"<?xml" in res.data or b"<rss" in res.data
# Parse the RSS/XML
root = ET.fromstring(res.data)
# Find all items
items = root.findall('.//item')
assert len(items) >= 1, "Expected at least 1 item in RSS feed"
# Get categories from first item
categories = [cat.text for cat in items[0].findall('category')]
print(f"Found categories in single watch RSS: {categories}")
# Should have all three categories
assert "Security" in categories, f"Expected 'Security' category, got: {categories}"
assert "Python" in categories, f"Expected 'Python' category, got: {categories}"
assert "Tech News" in categories, f"Expected 'Tech News' category, got: {categories}"
assert len(categories) == 3, f"Expected 3 categories, got {len(categories)}: {categories}"
# Test 2: Check main RSS feed
res = client.get(
url_for("rss.feed", token=rss_token, _external=True),
follow_redirects=True
)
assert res.status_code == 200
root = ET.fromstring(res.data)
items = root.findall('.//item')
assert len(items) >= 1, "Expected at least 1 item in main RSS feed"
# Get categories from first item in main feed
categories = [cat.text for cat in items[0].findall('category')]
print(f"Found categories in main RSS feed: {categories}")
# Should have all three categories
assert "Security" in categories, f"Expected 'Security' category in main feed, got: {categories}"
assert "Python" in categories, f"Expected 'Python' category in main feed, got: {categories}"
assert "Tech News" in categories, f"Expected 'Tech News' category in main feed, got: {categories}"
# Test 3: Check tag-specific RSS feed (should also have categories)
# Get the tag UUID for "Security" and verify the tag feed also has categories
from .util import get_UUID_for_tag_name
security_tag_uuid = get_UUID_for_tag_name(client, name="Security")
if security_tag_uuid:
res = client.get(
url_for("rss.rss_tag_feed", tag_uuid=security_tag_uuid, token=rss_token, _external=True),
follow_redirects=True
)
assert res.status_code == 200
root = ET.fromstring(res.data)
items = root.findall('.//item')
if len(items) >= 1:
categories = [cat.text for cat in items[0].findall('category')]
print(f"Found categories in tag RSS feed: {categories}")
# Should still have all three categories
assert "Security" in categories, f"Expected 'Security' category in tag feed, got: {categories}"
assert "Python" in categories, f"Expected 'Python' category in tag feed, got: {categories}"
assert "Tech News" in categories, f"Expected 'Tech News' category in tag feed, got: {categories}"
# Clean up
delete_all_watches(client)