# -*- coding: utf-8 -*- from flask import url_for from .util import wait_for_all_checks, delete_all_watches from ..processors.magic import RSS_XML_CONTENT_TYPES import os def set_rss_atom_feed_response(datastore_path, header='', ): test_return_data = f"""{header} RSS Feed en-us water News RSS 🍁 Lets go discount

ok heres the description

Wed, 08 Oct 2025 15:28:55 +0000 https://store.waterpowered.com/news/app/1643320/view/511845698831908921
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) return None def set_original_response(datastore_path): test_return_data = """ Some initial text

Which is across multiple lines


So let's see what happens.
Some text thats the same
Some text that will change
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) return None def set_modified_response(datastore_path): test_return_data = """ Some initial text

Which is across multiple lines


So let's see what happens. THIS CHANGES AND SHOULDNT TRIGGER A CHANGE
Some text thats the same
Some new text
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) return None # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 def test_check_xpath_filter_utf8(client, live_server, measure_memory_usage, datastore_path): filter = '//item/*[self::description]' d = ''' rpilocator.com https://rpilocator.com Find Raspberry Pi Computers in Stock Thu, 19 May 2022 23:27:30 GMT https://rpilocator.com/favicon.png rpilocator.com https://rpilocator.com/ 32 32 Stock Alert (UK): RPi CM4 - 1GB RAM, No MMC, No Wifi is In Stock at Pimoroni Stock Alert (UK): RPi CM4 - 1GB RAM, No MMC, No Wifi is In Stock at Pimoroni https://rpilocator.com?vendor=pimoroni&utm_source=feed&utm_medium=rss pimoroni UK CM4 F9FAB0D9-DF6F-40C8-8DEE5FC0646BB722 Thu, 19 May 2022 14:32:32 GMT ''' with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(d) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8") uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'Unicode strings with encoding declaration are not supported.' not in res.data delete_all_watches(client) # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 def test_check_xpath_text_function_utf8(client, live_server, measure_memory_usage, datastore_path): filter = '//item/title/text()' d = ''' rpilocator.com https://rpilocator.com Find Raspberry Pi Computers in Stock Thu, 19 May 2022 23:27:30 GMT https://rpilocator.com/favicon.png rpilocator.com https://rpilocator.com/ 32 32 Stock Alert (UK): RPi CM4 something else unrelated Stock Alert (UK): Big monitor something else unrelated ''' with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(d) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8") uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'Unicode strings with encoding declaration are not supported.' not in res.data # The service should echo back the request headers res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) assert b'Stock Alert (UK): RPi CM4' in res.data assert b'Stock Alert (UK): Big monitor' in res.data delete_all_watches(client) def test_check_markup_xpath_filter_restriction(client, live_server, measure_memory_usage, datastore_path): xpath_filter = "//*[contains(@class, 'sametext')]" set_original_response(datastore_path=datastore_path) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) # Give the thread time to pick it up wait_for_all_checks(client) # Goto the edit page, add our ignore text # Add our URL to the import page res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": xpath_filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data # Give the thread time to pick it up wait_for_all_checks(client) # view it/reset state back to viewed client.get(url_for("ui.ui_diff.diff_history_page", uuid="first"), follow_redirects=True) # Make a change set_modified_response(datastore_path=datastore_path) # Trigger a check client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) # Give the thread time to pick it up wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'has-unread-changes' not in res.data delete_all_watches(client) def test_xpath_validation(client, live_server, measure_memory_usage, datastore_path): # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": "/something horrible", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"is not a valid XPath expression" in res.data delete_all_watches(client) def test_xpath23_prefix_validation(client, live_server, measure_memory_usage, datastore_path): # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": "xpath:/something horrible", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"is not a valid XPath expression" in res.data delete_all_watches(client) def test_xpath1_lxml(client, live_server, measure_memory_usage, datastore_path): d = ''' rpilocator.com https://rpilocator.com Find Raspberry Pi Computers in Stock Thu, 19 May 2022 23:27:30 GMT https://rpilocator.com/favicon.png rpilocator.com https://rpilocator.com/ 32 32 Stock Alert (UK): RPi CM4 something else unrelated Stock Alert (UK): Big monitorěěěě something else unrelated '''.encode('utf-8') with open(os.path.join(datastore_path, "endpoint-content.txt"), "wb") as f: f.write(d) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": "xpath1://title/text()", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) ##### #2312 wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'_ElementStringResult' not in res.data # tested with 5.1.1 when it was removed and 5.1.0 assert b'Exception' not in res.data res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) assert b"rpilocator.com" in res.data # in selector assert "Stock Alert (UK): Big monitorěěěě".encode('utf-8') in res.data # not in selector ##### def test_xpath1_validation(client, live_server, measure_memory_usage, datastore_path): # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": "xpath1:/something horrible", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"is not a valid XPath expression" in res.data delete_all_watches(client) # actually only really used by the distll.io importer, but could be handy too def test_check_with_prefix_include_filters(client, live_server, measure_memory_usage, datastore_path): delete_all_watches(client) set_original_response(datastore_path=datastore_path) wait_for_all_checks(client) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) assert b"Some text thats the same" in res.data # in selector assert b"Some text that will change" not in res.data # not in selector delete_all_watches(client) def test_various_rules(client, live_server, measure_memory_usage, datastore_path): # Just check these don't error ## live_server_setup(live_server) # Setup on conftest per function with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(""" Some initial text

Which is across multiple lines


So let's see what happens.
Some text thats the same
Some text that will change
some linky another some linky """) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) for r in ['//div', '//a', 'xpath://div', 'xpath://a']: res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": r, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) wait_for_all_checks(client) assert b"Updated watch." in res.data res = client.get(url_for("watchlist.index")) assert b'fetch-error' not in res.data, f"Should not see errors after '{r} filter" delete_all_watches(client) def test_xpath_20(client, live_server, measure_memory_usage, datastore_path): test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) set_original_response(datastore_path=datastore_path) test_url = url_for('test_endpoint', _external=True) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={"include_filters": "//*[contains(@class, 'sametext')]|//*[contains(@class, 'changetext')]", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get( url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True ) assert b"Some text thats the same" in res.data # in selector assert b"Some text that will change" in res.data # in selector delete_all_watches(client) def test_xpath_20_function_count(client, live_server, measure_memory_usage, datastore_path): set_original_response(datastore_path=datastore_path) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": "xpath:count(//div) * 123456789987654321", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) assert b"246913579975308642" in res.data # in selector delete_all_watches(client) def test_xpath_20_function_count2(client, live_server, measure_memory_usage, datastore_path): set_original_response(datastore_path=datastore_path) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": "/html/body/count(div) * 123456789987654321", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) assert b"246913579975308642" in res.data # in selector delete_all_watches(client) def test_xpath_20_function_string_join_matches(client, live_server, measure_memory_usage, datastore_path): set_original_response(datastore_path=datastore_path) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ "include_filters": "xpath:string-join(//*[contains(@class, 'sametext')]|//*[matches(@class, 'changetext')], 'specialconjunction')", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get( url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True ) assert b"Some text thats the samespecialconjunctionSome text that will change" in res.data # in selector delete_all_watches(client) def _subtest_xpath_rss(client, datastore_path, content_type='text/html'): # Add our URL to the import page test_url = url_for('test_endpoint', content_type=content_type, _external=True) res = client.post( url_for("ui.ui_views.form_quick_watch_add"), data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, follow_redirects=True ) assert b"Watch added in Paused state, saving will unpause" in res.data res = client.post( url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1), data={ "url": test_url, "include_filters": "xpath://item", "tags": '', "fetch_backend": "html_requests", "time_between_check_use_default": "y", }, follow_redirects=True ) assert b"unpaused" in res.data wait_for_all_checks(client) res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) assert b"Lets go discount" in res.data, f"When testing for Lets go discount called with content type '{content_type}'" assert b"Events and Announcements" not in res.data, f"When testing for Lets go discount called with content type '{content_type}'" # It should not be here because thats not our selector target delete_all_watches(client) # Be sure all-in-the-wild types of RSS feeds work with xpath def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path): for feed_header in ['', '']: set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path) for content_type in RSS_XML_CONTENT_TYPES: _subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path) # GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends # Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions. def test_xpath_blocked_functions_unit(): """Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed).""" import elementpath from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser from lxml import html html_content = '

safe content

' dangerous_expressions = [ "unparsed-text('file:///etc/passwd')", "unparsed-text-lines('file:///etc/passwd')", "unparsed-text-available('file:///etc/passwd')", "doc('file:///etc/passwd')", "doc-available('file:///etc/passwd')", "json-doc('file:///datastore/changedetection.json')", "collection('file:///datastore/')", "uri-collection('file:///datastore/')", "transform(map{})", "load-xquery-module('foo')", "environment-variable('PATH')", "available-environment-variables()", ] for expr in dangerous_expressions: # xpath_filter() must raise, not silently return file contents try: result = xpath_filter(expr, html_content) assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}" except elementpath.ElementPathError: pass # expected # SafeXPath3Parser must reject the expression at parse time tree = html.fromstring(html_content) try: elementpath.select(tree, expr, parser=SafeXPath3Parser) assert False, f"SafeXPath3Parser should have raised for: {expr!r}" except elementpath.ElementPathError: pass # expected # Sanity check: normal XPath still works result = xpath_filter('//p/text()', html_content) assert result == 'safe content' # GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions. def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path): """Edit-form validation must reject dangerous XPath 3.0 functions before they are stored.""" from flask import url_for set_original_response(datastore_path=datastore_path) test_url = url_for('test_endpoint', _external=True) client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) dangerous_expressions = [ "xpath:unparsed-text('file:///etc/passwd')", "xpath:environment-variable('PATH')", "xpath:doc('file:///etc/passwd')", ] for expr in dangerous_expressions: res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": expr, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b"is not a valid XPath expression" in res.data, \ f"Form should reject dangerous expression: {expr!r}" delete_all_watches(client)