mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-05-06 01:31:06 +00:00
Ability to apply filters (first, last etc)
This commit is contained in:
@@ -397,6 +397,11 @@ class perform_site_check(difference_detection_processor):
|
||||
# RSS preprocessing
|
||||
if stream_content_type.is_rss:
|
||||
content = content_processor.preprocess_rss(content)
|
||||
if self.datastore.data["settings"]["application"].get("rss_reader_mode"):
|
||||
# Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc)
|
||||
stream_content_type.is_rss = False
|
||||
stream_content_type.is_html = True
|
||||
self.fetcher.content = content
|
||||
|
||||
# PDF preprocessing
|
||||
if watch.is_pdf or stream_content_type.is_pdf:
|
||||
|
||||
@@ -109,8 +109,22 @@ def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
|
||||
if item_parts:
|
||||
formatted_items.append('\n'.join(item_parts))
|
||||
|
||||
# Join all items with <br><br><hr>
|
||||
return '<html><body>'+'<br><hr><br>'.join(formatted_items)
|
||||
# Wrap each item in a div with classes (first, last, item-N)
|
||||
items_html = []
|
||||
total_items = len(formatted_items)
|
||||
for idx, item in enumerate(formatted_items):
|
||||
classes = ['rss-item']
|
||||
if idx == 0:
|
||||
classes.append('first')
|
||||
if idx == total_items - 1:
|
||||
classes.append('last')
|
||||
classes.append(f'item-{idx + 1}')
|
||||
|
||||
class_str = ' '.join(classes)
|
||||
items_html.append(f'<div class="{class_str}">{item}</div>')
|
||||
|
||||
# Join items with two <br> tags
|
||||
return f'<html><body>{"\n<br><br>".join(items_html)}</body></html>'
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error formatting RSS items: {str(e)}")
|
||||
|
||||
@@ -25,6 +25,8 @@ def set_original_cdata_xml():
|
||||
<pubDate>Thu, 07 Aug 2025 00:00:00 GMT</pubDate>
|
||||
<description><p>Wet noodles escape<br><p>they also found themselves outside</p> </description>
|
||||
</item>
|
||||
|
||||
|
||||
<item>
|
||||
<title>TS-2025-004</title>
|
||||
<link>https://wetscale.com/security-bulletins/#ts-2025-004</link>
|
||||
@@ -69,3 +71,28 @@ def test_rss_reader_mode(client, live_server, measure_memory_usage):
|
||||
assert 'PubDate: Thu, 07 Aug 2025 00:00:00 GMT' in snapshot_contents
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_rss_reader_mode_with_css_filters(client, live_server, measure_memory_usage):
|
||||
set_original_cdata_xml()
|
||||
|
||||
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
|
||||
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
|
||||
test_url = url_for('test_endpoint', content_type="text/xml; charset=UTF-8", _external=True)
|
||||
live_server.app.config['DATASTORE'].data['settings']['application']['rss_reader_mode'] = True
|
||||
|
||||
|
||||
# Add our URL to the import page
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={'include_filters': [".last"]})
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
dates = list(watch.history.keys())
|
||||
snapshot_contents = watch.get_history_snapshot(dates[0])
|
||||
assert 'Wet noodles escape' not in snapshot_contents
|
||||
assert '<br>' not in snapshot_contents
|
||||
assert '<' not in snapshot_contents
|
||||
assert 'The days of Terminator and The Matrix' in snapshot_contents
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user