Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
4fdabd53fc Solve circular import 2024-07-29 12:50:43 +02:00
dgtlmoon
b97d34d77c Bump field text 2024-07-29 11:45:44 +02:00
dgtlmoon
e12b5a6f71 Re #2528 - handle zero-byte responses with "Empty pages are a change" the same as when the HTML doesnt render any useful text 2024-07-29 11:41:05 +02:00
26 changed files with 102 additions and 222 deletions

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.46.04'
__version__ = '0.46.02'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -85,8 +85,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
playwright_browser=browsersteps_start_session['browser'],
proxy=proxy,
start_url=datastore.data['watching'][watch_uuid].get('url'),
headers=datastore.data['watching'][watch_uuid].get('headers')
start_url=datastore.data['watching'][watch_uuid].get('url')
)
# For test

View File

@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul id="advanced-help-selectors">
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>

View File

@@ -65,8 +65,8 @@ class Fetcher():
def __init__(self):
import importlib.resources
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
@abstractmethod
def get_error(self):

View File

@@ -75,7 +75,6 @@ function isItemInStock() {
'vergriffen',
'vorbestellen',
'vorbestellung ist bald möglich',
'we don\'t currently have any',
'we couldn\'t find any products that match',
'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.',
@@ -174,8 +173,7 @@ function isItemInStock() {
const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue
}
elementText = "";
@@ -189,7 +187,7 @@ function isItemInStock() {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
return outOfStockText; // item is out of stock
}
}

View File

@@ -164,15 +164,6 @@ visibleElementsArray.forEach(function (element) {
}
}
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
let text = element.textContent.trim().slice(0, 30).trim();
while (/\n{2,}|\t{2,}/.test(text)) {
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
}
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,)/.test(text) ;
size_pos.push({
xpath: xpath_result,
@@ -180,16 +171,9 @@ visibleElementsArray.forEach(function (element) {
height: Math.round(bbox['height']),
left: Math.floor(bbox['left']),
top: Math.floor(bbox['top']) + scroll_y,
// tagName used by Browser Steps
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
// tagtype used by Browser Steps
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
isClickable: window.getComputedStyle(element).cursor === "pointer",
// Used by the keras trainer
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
hasDigitCurrency: hasDigitCurrency,
label: label,
isClickable: window.getComputedStyle(element).cursor == "pointer"
});
});

View File

@@ -729,12 +729,6 @@ def changedetection_app(config=None, datastore_o=None):
for p in datastore.proxy_list:
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
# Add some HTML to be used for form validation
if datastore.data['watching'][uuid].history.keys():
timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
else:
form.last_html_for_form_validation = "<html><body></body></html>"
if request.method == 'POST' and form.validate():
@@ -1383,19 +1377,17 @@ def changedetection_app(config=None, datastore_o=None):
import brotli
watch = datastore.data['watching'].get(uuid)
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
latest_filename = list(watch.history.keys())[-1]
if watch and os.path.isdir(watch.watch_data_dir):
latest_filename = list(watch.history.keys())[0]
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
if html_fname.endswith('.br'):
# Read and decompress the Brotli file
if html_fname.endswith('.br'):
# Read and decompress the Brotli file
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read())
else:
decompressed_data = f.read()
buffer = BytesIO(decompressed_data)
buffer = BytesIO(decompressed_data)
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
# Return a 500 error

View File

@@ -1,9 +1,6 @@
import os
import re
import elementpath
from changedetectionio.html_tools import xpath_filter, xpath1_filter
from changedetectionio.strtobool import strtobool
from wtforms import (
@@ -325,39 +322,52 @@ class ValidateCSSJSONXPATHInput(object):
self.allow_json = allow_json
def __call__(self, form, field):
from lxml.etree import XPathEvalError
if isinstance(field.data, str):
data = [field.data]
else:
data = field.data
for line in data:
line = line.strip()
# Nothing to see here
if not len(line.strip()):
return
if not line:
continue
if line.startswith('xpath') or line.startswith('/'):
# Does it look like XPath?
if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
if not self.allow_xpath:
raise ValidationError("XPath not permitted in this field!")
if line.startswith('xpath1:'):
filter_function = xpath1_filter
else:
line = line.replace('xpath:', '')
filter_function = xpath_filter
from lxml import etree, html
import elementpath
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
tree = html.fromstring("<html></html>")
line = line.replace('xpath:', '')
try:
# Call the determined function
res = filter_function(xpath_filter=line, html_content=form.last_html_for_form_validation)
# It's OK if this is an empty result, we just want to check that it doesn't crash the parser
except (elementpath.ElementPathError,XPathEvalError) as e:
elementpath.select(tree, line.strip(), parser=XPath3Parser)
except elementpath.ElementPathError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
except Exception as e:
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
elif 'json:' in line:
if line.strip().startswith('xpath1:'):
if not self.allow_xpath:
raise ValidationError("XPath not permitted in this field!")
from lxml import etree, html
tree = html.fromstring("<html></html>")
line = re.sub(r'^xpath1:', '', line)
try:
tree.xpath(line.strip())
except etree.XPathEvalError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
if 'json:' in line:
if not self.allow_json:
raise ValidationError("JSONPath not permitted in this field!")
@@ -382,7 +392,7 @@ class ValidateCSSJSONXPATHInput(object):
if not self.allow_json:
raise ValidationError("jq not permitted in this field!")
elif line.startswith('jq:'):
if 'jq:' in line:
try:
import jq
except ModuleNotFoundError:

View File

@@ -8,7 +8,6 @@ from xml.sax.saxutils import escape as xml_escape
import json
import re
from loguru import logger
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
@@ -109,20 +108,6 @@ def elementpath_tostring(obj):
return str(obj)
def extract_namespaces(xml_content):
"""
Extracts all namespaces from the XML content.
"""
from lxml import etree
from io import BytesIO
it = etree.iterparse(BytesIO(xml_content), events=('start-ns',))
namespaces = {}
for _, ns in it:
prefix, uri = ns
namespaces[prefix] = uri
return namespaces
# Return str Utf-8 of matched rules
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
from lxml import etree, html
@@ -138,14 +123,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
html_block = ""
# Automatically extract all namespaces from the XML content
namespaces = {'re': 'http://exslt.org/regular-expressions'}
try:
namespaces.update(extract_namespaces(html_content.encode('utf-8')))
except Exception as e:
logger.warning(f"Problem extracting namespaces from HTMl/XML content {str(e)}")
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
#@note: //title/text() wont work where <title>CDATA..
if type(r) != list:

View File

@@ -1,12 +1,11 @@
from babel.numbers import parse_decimal
from changedetectionio.model.Watch import model as BaseWatch
from typing import Union
import re
from babel.numbers import parse_decimal
class Restock(dict):
def parse_currency(self, raw_value: str) -> Union[float, None]:
def parse_currency(self, raw_value: str) -> float:
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
standardized_value = raw_value
@@ -22,11 +21,8 @@ class Restock(dict):
# Remove any non-numeric characters except for the decimal point
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
if standardized_value:
# Convert to float
return float(parse_decimal(standardized_value, locale='en'))
return None
# Convert to float
return float(parse_decimal(standardized_value, locale='en'))
def __init__(self, *args, **kwargs):
# Define default values

View File

@@ -40,16 +40,13 @@ def get_itemprop_availability(html_content) -> Restock:
import extruct
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
value = {}
now = time.time()
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
try:
data = extruct.extract(html_content, syntaxes=syntaxes)
except Exception as e:
logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
return Restock()
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
data = extruct.extract(html_content, syntaxes=syntaxes)
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
# First phase, dead simple scanning of anything that looks useful

View File

@@ -77,12 +77,11 @@ class perform_site_check(difference_detection_processor):
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
# Go into RSS preprocess for converting CDATA/comment to usable text
# Ctype_header could be unset if we are just reprocessing the existin content
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']) or not ctype_header:
top_text = self.fetcher.content[:200].lower().strip()
if '<rss' in top_text or 'search.yahoo.com/mrss/' in top_text:
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
if '<rss' in self.fetcher.content[:100].lower():
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
is_rss = True
# source: support, basically treat it as plaintext
if watch.is_source_type_url:
is_html = False

View File

@@ -18,11 +18,9 @@ $(document).ready(function () {
});
$(".toggle-show").click(function (e) {
$("#notification-token-toggle").click(function (e) {
e.preventDefault();
let target = $(this).data('target');
$(target).toggle();
$('#notification-tokens-info').toggle();
});
});

View File

@@ -11,11 +11,8 @@
class="notification-urls" )
}}
<div class="pure-form-message-inline">
<p>
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
</p>
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul style="display: none" id="advanced-help-notifications">
<ul>
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@@ -43,7 +40,7 @@
</div>
<div class="pure-controls">
<div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
</div>
<div class="pure-controls" style="display: none;" id="notification-tokens-info">
<table class="pure-table" id="token-table">

View File

@@ -4,7 +4,6 @@
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
<script>
const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
@@ -276,9 +275,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
<ul id="advanced-help-selectors" style="display: none;">
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
@@ -298,12 +297,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
<li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</span>
</div>
<fieldset class="pure-control-group">

View File

@@ -2,7 +2,7 @@
import os
import time
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
assert b'not-in-stock' not in res.data
# We should have a notification
wait_for_notification_endpoint_output()
time.sleep(2)
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
os.unlink("test-datastore/notification.txt")
@@ -103,7 +103,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(5)
assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"
# BUT we should see that it correctly shows "not in stock"

View File

@@ -2,7 +2,7 @@
import os.path
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
from .util import live_server_setup, wait_for_all_checks
from changedetectionio import html_tools
@@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
assert b'unviewed' in res.data
# Takes a moment for apprise to fire
wait_for_notification_endpoint_output()
time.sleep(3)
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
with open("test-datastore/notification.txt", 'rb') as f:
response = f.read()

View File

@@ -69,12 +69,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
# Check the 'get latest snapshot works'
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
assert b'which has this one new line' in res.data
# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("index"))
assert b'unviewed' in res.data
@@ -92,7 +86,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert expected_url.encode('utf-8') in res.data
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
res = client.get(url_for("diff_history_page", uuid=uuid))
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'selected=""' in res.data, "Confirm diff history page loaded"
# Check the [preview] pulls the right one
@@ -149,12 +143,18 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b'unviewed' not in res.data
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
uuid = extract_UUID_from_client(client)
client.get(url_for("clear_watch_history", uuid=uuid))
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'preview/' in res.data
# Check the 'get latest snapshot works'
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
assert b'<head><title>head title</title></head>' in res.data
#
# Cleanup everything
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -4,7 +4,7 @@
import os
import time
from flask import url_for
from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
from .util import set_original_response, live_server_setup
from changedetectionio.model import App
@@ -102,15 +102,14 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_notification_endpoint_output()
time.sleep(3)
# Shouldn't exist, shouldn't have fired
assert not os.path.isfile("test-datastore/notification.txt")
# Now the filter should exist
set_response_with_filter()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_notification_endpoint_output()
time.sleep(3)
assert os.path.isfile("test-datastore/notification.txt")

View File

@@ -1,8 +1,7 @@
import os
import time
from flask import url_for
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
wait_for_notification_endpoint_output
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
from changedetectionio.model import App
@@ -108,8 +107,7 @@ def run_filter_test(client, live_server, content_filter):
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
wait_for_notification_endpoint_output()
time.sleep(2) # delay for apprise to fire
# Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt")
@@ -129,7 +127,6 @@ def run_filter_test(client, live_server, content_filter):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
wait_for_notification_endpoint_output()
# It should have sent a notification, but..
assert os.path.isfile("test-datastore/notification.txt")
# but it should not contain the info about a failed filter (because there was none in this case)

View File

@@ -2,8 +2,6 @@
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import time
def set_nonrenderable_response():
test_return_data = """<html>
@@ -13,16 +11,17 @@ def set_nonrenderable_response():
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
time.sleep(1)
return None
def set_zero_byte_response():
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("")
time.sleep(1)
return None
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):

View File

@@ -3,7 +3,7 @@ import os
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
from ..notification import default_notification_format
instock_props = [
@@ -182,8 +182,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
# price changed to something LESS than min (900), SHOULD be a change
set_original_response(props_markup=instock_props[0], price='890.45')
# let previous runs wait
time.sleep(2)
time.sleep(1)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client)
@@ -198,8 +197,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
# Depending on the LOCALE it may be either of these (generally for US/default/etc)
assert b'1,890.45' in res.data or b'1890.45' in res.data
assert b'1,890.45' or b'1890.45' in res.data
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -364,7 +362,7 @@ def test_change_with_notification_values(client, live_server):
set_original_response(props_markup=instock_props[0], price='1950.45')
client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client)
wait_for_notification_endpoint_output()
time.sleep(3)
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
with open("test-datastore/notification.txt", 'r') as f:
notification = f.read()

View File

@@ -164,46 +164,3 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
assert b'Some other description' not in res.data # Should NOT be selected by the xpath
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_namespace_selectors(live_server, client):
set_original_cdata_xml()
#live_server_setup(live_server)
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
# because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
res = client.post(
url_for("edit_page", uuid=uuid),
data={
"include_filters": "//media:thumbnail/@url",
"fetch_backend": "html_requests",
"headers": "",
"proxy": "no-proxy",
"tags": "",
"url": test_url,
},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'CDATA' not in res.data
assert b'<![' not in res.data
assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -76,17 +76,6 @@ def set_more_modified_response():
return None
def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
from os.path import isfile
for i in range(1, 20):
time.sleep(1)
if isfile("test-datastore/notification.txt"):
return True
return False
# kinda funky, but works for now
def extract_api_key_from_UI(client):
import re

View File

@@ -18,7 +18,7 @@ services:
#
# Log levels are in descending order. (TRACE is the most detailed one)
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
# - LOGGER_LEVEL=TRACE
# - LOGGER_LEVEL=DEBUG
#
# Alternative WebDriver/selenium URL, do not use "'s or 's!
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
@@ -29,9 +29,8 @@ services:
#
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
#
# Alternative target "Chrome" Playwright URL, do not use "'s or 's!
# "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
# - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
# Alternative Playwright URL, do not use "'s or 's!
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
#
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
#
@@ -74,10 +73,10 @@ services:
# condition: service_started
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
# sockpuppetbrowser:
# hostname: sockpuppetbrowser
# playwright-chrome:
# hostname: playwright-chrome
# image: dgtlmoon/sockpuppetbrowser:latest
# cap_add:
# - SYS_ADMIN

View File

@@ -79,9 +79,8 @@ pyppeteerstealth>=0.0.4
pytest ~=7.2
pytest-flask ~=1.2
# Anything 4.0 and up but not 5.0
jsonschema ~= 4.0
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
jsonschema==4.17.3
loguru