#!/usr/bin/env python3 import time from flask import url_for from .util import live_server_setup, wait_for_all_checks, delete_all_watches import os from ..html_tools import * def set_original_response(datastore_path): test_return_data = """ Some initial text

Which is across multiple lines


So let's see what happens.
Some text thats the same
Some text that will change
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) return None def set_modified_response(datastore_path): test_return_data = """ Some initial text

which has this one new line


So let's see what happens.
Some text thats the same
Some text that did change ( 1000 online
80 guests
2000 online )
SomeCase insensitive 3456
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) return None def set_multiline_response(datastore_path): test_return_data = """

Something
across 6 billion multiple
lines

aaand something lines

and this should be
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) return None # def test_setup(client, live_server, measure_memory_usage, datastore_path): # live_server_setup(live_server) # Setup on conftest per function def test_check_filter_multiline(client, live_server, measure_memory_usage, datastore_path): ## live_server_setup(live_server) # Setup on conftest per function set_multiline_response(datastore_path=datastore_path) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) # Goto the edit page, add our ignore text # Add our URL to the import page res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": '', # Test a regex and a plaintext 'extract_text': '/something.+?6 billion.+?lines/si\r\nand this should be', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) # Issue 1828 assert b'not at the start of the expression' not in res.data res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) # Plaintext that doesnt look like a regex should match also assert b'and this should be' in res.data assert b'Something' in res.data assert b'across 6 billion multiple' in res.data assert b'lines' in res.data # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking) assert b'aaand something lines' not in res.data def test_check_filter_and_regex_extract(client, live_server, measure_memory_usage, datastore_path): include_filters = ".changetext" set_original_response(datastore_path=datastore_path) # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) # Give the thread time to pick it up wait_for_all_checks(client) # Goto the edit page, add our ignore text # Add our URL to the import page res = client.post( url_for("ui.ui_edit.edit_page", uuid="first"), data={"include_filters": include_filters, 'extract_text': '/\d+ online/\r\n/\d+ guests/\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i\r\n/issue1828.+?2022/i', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data # Give the thread time to pick it up wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) #issue 1828 assert b'not at the start of the expression' not in res.data # Make a change set_modified_response(datastore_path=datastore_path) # Trigger a check client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) # Give the thread time to pick it up wait_for_all_checks(client) # It should have 'has-unread-changes' still # Because it should be looking at only that 'sametext' id res = client.get(url_for("watchlist.index")) assert b'has-unread-changes' in res.data # Check HTML conversion detected and workd res = client.get( url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True ) assert b'1000 online' in res.data # All regex matching should be here assert b'2000 online' in res.data # Both regexs should be here assert b'80 guests' in res.data # Regex with flag handling should be here assert b'SomeCase insensitive 3456' in res.data # Singular group from /somecase insensitive (345\d)/i assert b'3456' in res.data # Regex with multiline flag handling should be here # Should not be here assert b'Some text that did change' not in res.data def test_regex_error_handling(client, live_server, measure_memory_usage, datastore_path): # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) time.sleep(0.2) ### test regex error handling res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={"extract_text": '/something bad\d{3/XYZ', "url": test_url, "fetch_backend": "html_requests", "time_between_check_use_default": "y"}, follow_redirects=True ) assert b'is not a valid regular expression.' in res.data delete_all_watches(client) def test_extract_lines_containing(client, live_server, measure_memory_usage, datastore_path): """Test the 'extract_lines_containing' filter keeps only lines with matching substrings.""" test_return_data = """

Current temperature: 21 celsius

Humidity: 55%

Wind speed: 10 km/h

Feels like: 19 celsius

UV index: 3

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ 'extract_lines_containing': 'celsius', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) # Lines containing 'celsius' should be present assert b'celsius' in res.data # Lines without 'celsius' should be excluded assert b'Humidity' not in res.data assert b'Wind speed' not in res.data assert b'UV index' not in res.data delete_all_watches(client) def test_extract_lines_containing_case_insensitive(client, live_server, measure_memory_usage, datastore_path): """Test that extract_lines_containing is case-insensitive.""" test_return_data = """

PRICE: $99.99

Price drops to $79.99

Stock: Available

price history shows decline

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ 'extract_lines_containing': 'price', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) # All three price lines (different cases) should match assert b'$99.99' in res.data assert b'$79.99' in res.data assert b'price history' in res.data # Non-price line should be excluded assert b'Stock' not in res.data delete_all_watches(client) def test_extract_lines_containing_multiple_terms(client, live_server, measure_memory_usage, datastore_path): """Test that multiple extract_lines_containing entries act as OR (keep line if any term matches).""" test_return_data = """

Temperature: 21 celsius

Humidity: 55%

Wind speed: 10 km/h

Rain chance: 20%

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ 'extract_lines_containing': 'celsius\r\nhumidity', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) assert b'celsius' in res.data assert b'Humidity' in res.data # Wind and Rain lines should be excluded assert b'Wind speed' not in res.data assert b'Rain chance' not in res.data delete_all_watches(client) def test_extract_lines_containing_with_ignore_text(client, live_server, measure_memory_usage, datastore_path): """ extract_lines_containing narrows to matching lines; ignore_text then suppresses specific lines from triggering change detection (they remain visible but don't affect the checksum). Filters are set BEFORE the first check so the filtered+ignored checksum is the baseline from the very start — no race between a forced-recheck and the next content write. """ initial_data = """

Temperature: 21 celsius

Feels like: 19 celsius

Humidity: 55%

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(initial_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={'paused': True}) # Set filters BEFORE the first check so the baseline is always filtered+ignored. # (Setting them after an initial unfiltered check creates a race: the forced recheck # that updates previous_md5 must complete before the next content write, which is # timing-sensitive and fails intermittently on slower systems / Python 3.14.) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), data={ 'extract_lines_containing': 'celsius', 'ignore_text': 'Feels like', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"unpaused" in res.data # First check — establishes filtered+ignored baseline. previous_md5 was False so # a change is always detected here; mark_all_viewed clears it before we assert. client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) # Sanity: preview should only show celsius lines res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) assert b'celsius' in res.data assert b'Humidity' not in res.data # Change ONLY the ignored "Feels like" line — should NOT trigger a change changed_data = """

Temperature: 21 celsius

Feels like: 17 celsius

Humidity: 55%

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(changed_data) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'has-unread-changes' not in res.data, "Changing an ignored line should not trigger a change notification" client.get(url_for("ui.mark_all_viewed"), follow_redirects=True) time.sleep(1) # Change the non-ignored celsius line — SHOULD trigger triggered_data = """

Temperature: 30 celsius

Feels like: 17 celsius

Humidity: 55%

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(triggered_data) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'has-unread-changes' in res.data, "Changing a non-ignored line should trigger a change notification" delete_all_watches(client) def test_extract_lines_containing_with_extract_text_regex(client, live_server, measure_memory_usage, datastore_path): """ extract_lines_containing first narrows to relevant lines, then extract_text regex pulls specific tokens from those lines — verifying correct pipeline ordering. """ test_return_data = """

Widget price: $49.99 each

Gadget price: $129.00 each

Latest news: price index up 2%

Stock count: 150 units

Shipping cost: $5.99

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ # Step 1: keep lines containing "price" (excludes Stock count and Shipping cost) 'extract_lines_containing': 'price', # Step 2: from those lines extract only dollar amounts 'extract_text': r'/\$[\d.]+/', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) # Dollar amounts from price lines should be extracted assert b'$49.99' in res.data assert b'$129.00' in res.data # "price index up 2%" has no dollar amount — nothing extracted from that line # "Shipping cost" line was excluded by extract_lines_containing before regex ran assert b'$5.99' not in res.data # Raw line text should not appear — regex replaced it with just the match assert b'Widget' not in res.data assert b'Stock count' not in res.data delete_all_watches(client) def test_extract_lines_containing_with_include_filters_css(client, live_server, measure_memory_usage, datastore_path): """ CSS include_filters narrows the HTML first; extract_lines_containing then filters within that already-reduced text — verifying correct pipeline ordering. """ test_return_data = """

Temperature: 21 celsius

Humidity: 60%

Wind: 15 km/h

Local forecast: warm celsius weather ahead

Markets closed early

""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ # CSS filter: only look inside the weather div 'include_filters': 'div.weather', # Then keep only celsius lines from that section 'extract_lines_containing': 'celsius', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) # Only the celsius line from the weather div should survive both filters assert b'celsius' in res.data # Other weather lines excluded by extract_lines_containing assert b'Humidity' not in res.data assert b'Wind' not in res.data # News div content excluded entirely by CSS filter (even though it contains "celsius") assert b'Markets' not in res.data assert b'forecast' not in res.data delete_all_watches(client)