#!/usr/bin/env python3 import time from flask import url_for from .util import live_server_setup, wait_for_all_checks, delete_all_watches import os from ..html_tools import * def set_original_response(datastore_path): test_return_data = """
Some initial textWhich is across multiple lines
which has this one new line
Something
across 6 billion multiple
lines
Current temperature: 21 celsius
Humidity: 55%
Wind speed: 10 km/h
Feels like: 19 celsius
UV index: 3
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ 'extract_lines_containing': 'celsius', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) # Lines containing 'celsius' should be present assert b'celsius' in res.data # Lines without 'celsius' should be excluded assert b'Humidity' not in res.data assert b'Wind speed' not in res.data assert b'UV index' not in res.data delete_all_watches(client) def test_extract_lines_containing_case_insensitive(client, live_server, measure_memory_usage, datastore_path): """Test that extract_lines_containing is case-insensitive.""" test_return_data = """PRICE: $99.99
Price drops to $79.99
Stock: Available
price history shows decline
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ 'extract_lines_containing': 'price', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) # All three price lines (different cases) should match assert b'$99.99' in res.data assert b'$79.99' in res.data assert b'price history' in res.data # Non-price line should be excluded assert b'Stock' not in res.data delete_all_watches(client) def test_extract_lines_containing_multiple_terms(client, live_server, measure_memory_usage, datastore_path): """Test that multiple extract_lines_containing entries act as OR (keep line if any term matches).""" test_return_data = """Temperature: 21 celsius
Humidity: 55%
Wind speed: 10 km/h
Rain chance: 20%
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ 'extract_lines_containing': 'celsius\r\nhumidity', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) assert b'celsius' in res.data assert b'Humidity' in res.data # Wind and Rain lines should be excluded assert b'Wind speed' not in res.data assert b'Rain chance' not in res.data delete_all_watches(client) def test_extract_lines_containing_with_ignore_text(client, live_server, measure_memory_usage, datastore_path): """ extract_lines_containing narrows to matching lines; ignore_text then suppresses specific lines from triggering change detection (they remain visible but don't affect the checksum). Filters are set BEFORE the first check so the filtered+ignored checksum is the baseline from the very start — no race between a forced-recheck and the next content write. """ initial_data = """Temperature: 21 celsius
Feels like: 19 celsius
Humidity: 55%
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(initial_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={'paused': True}) # Set filters BEFORE the first check so the baseline is always filtered+ignored. # (Setting them after an initial unfiltered check creates a race: the forced recheck # that updates previous_md5 must complete before the next content write, which is # timing-sensitive and fails intermittently on slower systems / Python 3.14.) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), data={ 'extract_lines_containing': 'celsius', 'ignore_text': 'Feels like', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"unpaused" in res.data # First check — establishes filtered+ignored baseline. previous_md5 was False so # a change is always detected here; mark_all_viewed clears it before we assert. client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) # Sanity: preview should only show celsius lines res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) assert b'celsius' in res.data assert b'Humidity' not in res.data # Change ONLY the ignored "Feels like" line — should NOT trigger a change changed_data = """Temperature: 21 celsius
Feels like: 17 celsius
Humidity: 55%
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(changed_data) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'has-unread-changes' not in res.data, "Changing an ignored line should not trigger a change notification" client.get(url_for("ui.mark_all_viewed"), follow_redirects=True) time.sleep(1) # Change the non-ignored celsius line — SHOULD trigger triggered_data = """Temperature: 30 celsius
Feels like: 17 celsius
Humidity: 55%
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(triggered_data) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.get(url_for("watchlist.index")) assert b'has-unread-changes' in res.data, "Changing a non-ignored line should trigger a change notification" delete_all_watches(client) def test_extract_lines_containing_with_extract_text_regex(client, live_server, measure_memory_usage, datastore_path): """ extract_lines_containing first narrows to relevant lines, then extract_text regex pulls specific tokens from those lines — verifying correct pipeline ordering. """ test_return_data = """Widget price: $49.99 each
Gadget price: $129.00 each
Latest news: price index up 2%
Stock count: 150 units
Shipping cost: $5.99
""" with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: f.write(test_return_data) test_url = url_for('test_endpoint', _external=True) uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) res = client.post( url_for("ui.ui_edit.edit_page", uuid=uuid), data={ # Step 1: keep lines containing "price" (excludes Stock count and Shipping cost) 'extract_lines_containing': 'price', # Step 2: from those lines extract only dollar amounts 'extract_text': r'/\$[\d.]+/', "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y" }, follow_redirects=True ) assert b"Updated watch." in res.data wait_for_all_checks(client) res = client.get(url_for("ui.ui_preview.preview_page", uuid=uuid), follow_redirects=True) # Dollar amounts from price lines should be extracted assert b'$49.99' in res.data assert b'$129.00' in res.data # "price index up 2%" has no dollar amount — nothing extracted from that line # "Shipping cost" line was excluded by extract_lines_containing before regex ran assert b'$5.99' not in res.data # Raw line text should not appear — regex replaced it with just the match assert b'Widget' not in res.data assert b'Stock count' not in res.data delete_all_watches(client) def test_extract_lines_containing_with_include_filters_css(client, live_server, measure_memory_usage, datastore_path): """ CSS include_filters narrows the HTML first; extract_lines_containing then filters within that already-reduced text — verifying correct pipeline ordering. """ test_return_data = """Temperature: 21 celsius
Humidity: 60%
Wind: 15 km/h
Local forecast: warm celsius weather ahead
Markets closed early