mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-17 13:35:50 +00:00
Compare commits
3 Commits
enhanced-R
...
2528-empty
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4fdabd53fc | ||
|
|
b97d34d77c | ||
|
|
e12b5a6f71 |
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.46.04'
|
||||
__version__ = '0.46.02'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -85,8 +85,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_start_session['browser'],
|
||||
proxy=proxy,
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url'),
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url')
|
||||
)
|
||||
|
||||
# For test
|
||||
|
||||
@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
{% if '/text()' in field %}
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
||||
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<ul id="advanced-help-selectors">
|
||||
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
|
||||
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
<ul>
|
||||
|
||||
@@ -65,8 +65,8 @@ class Fetcher():
|
||||
|
||||
def __init__(self):
|
||||
import importlib.resources
|
||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
|
||||
@@ -75,7 +75,6 @@ function isItemInStock() {
|
||||
'vergriffen',
|
||||
'vorbestellen',
|
||||
'vorbestellung ist bald möglich',
|
||||
'we don\'t currently have any',
|
||||
'we couldn\'t find any products that match',
|
||||
'we do not currently have an estimate of when this product will be back in stock.',
|
||||
'we don\'t know when or if this item will be back in stock.',
|
||||
@@ -174,8 +173,7 @@ function isItemInStock() {
|
||||
const element = elementsToScan[i];
|
||||
// outside the 'fold' or some weird text in the heading area
|
||||
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
||||
// Note: theres also an automated test that places the 'out of stock' text fairly low down
|
||||
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
||||
if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
||||
continue
|
||||
}
|
||||
elementText = "";
|
||||
@@ -189,7 +187,7 @@ function isItemInStock() {
|
||||
// and these mean its out of stock
|
||||
for (const outOfStockText of outOfStockTexts) {
|
||||
if (elementText.includes(outOfStockText)) {
|
||||
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
|
||||
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
|
||||
return outOfStockText; // item is out of stock
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,15 +164,6 @@ visibleElementsArray.forEach(function (element) {
|
||||
}
|
||||
}
|
||||
|
||||
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
|
||||
|
||||
let text = element.textContent.trim().slice(0, 30).trim();
|
||||
while (/\n{2,}|\t{2,}/.test(text)) {
|
||||
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
|
||||
}
|
||||
|
||||
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
||||
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
|
||||
|
||||
size_pos.push({
|
||||
xpath: xpath_result,
|
||||
@@ -180,16 +171,9 @@ visibleElementsArray.forEach(function (element) {
|
||||
height: Math.round(bbox['height']),
|
||||
left: Math.floor(bbox['left']),
|
||||
top: Math.floor(bbox['top']) + scroll_y,
|
||||
// tagName used by Browser Steps
|
||||
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
||||
// tagtype used by Browser Steps
|
||||
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
||||
isClickable: window.getComputedStyle(element).cursor === "pointer",
|
||||
// Used by the keras trainer
|
||||
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
|
||||
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
|
||||
hasDigitCurrency: hasDigitCurrency,
|
||||
label: label,
|
||||
isClickable: window.getComputedStyle(element).cursor == "pointer"
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@@ -729,12 +729,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
for p in datastore.proxy_list:
|
||||
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||
|
||||
# Add some HTML to be used for form validation
|
||||
if datastore.data['watching'][uuid].history.keys():
|
||||
timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
|
||||
form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
|
||||
else:
|
||||
form.last_html_for_form_validation = "<html><body></body></html>"
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
|
||||
@@ -1383,19 +1377,17 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
import brotli
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
if watch and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[0]
|
||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
with open(html_fname, 'rb') as f:
|
||||
decompressed_data = brotli.decompress(f.read())
|
||||
else:
|
||||
decompressed_data = f.read()
|
||||
|
||||
buffer = BytesIO(decompressed_data)
|
||||
buffer = BytesIO(decompressed_data)
|
||||
|
||||
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||
|
||||
|
||||
# Return a 500 error
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
import elementpath
|
||||
|
||||
from changedetectionio.html_tools import xpath_filter, xpath1_filter
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from wtforms import (
|
||||
@@ -325,39 +322,52 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
self.allow_json = allow_json
|
||||
|
||||
def __call__(self, form, field):
|
||||
from lxml.etree import XPathEvalError
|
||||
|
||||
if isinstance(field.data, str):
|
||||
data = [field.data]
|
||||
else:
|
||||
data = field.data
|
||||
|
||||
for line in data:
|
||||
line = line.strip()
|
||||
# Nothing to see here
|
||||
if not len(line.strip()):
|
||||
return
|
||||
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if line.startswith('xpath') or line.startswith('/'):
|
||||
# Does it look like XPath?
|
||||
if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
|
||||
if not self.allow_xpath:
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
|
||||
if line.startswith('xpath1:'):
|
||||
filter_function = xpath1_filter
|
||||
else:
|
||||
line = line.replace('xpath:', '')
|
||||
filter_function = xpath_filter
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
# Call the determined function
|
||||
res = filter_function(xpath_filter=line, html_content=form.last_html_for_form_validation)
|
||||
# It's OK if this is an empty result, we just want to check that it doesn't crash the parser
|
||||
except (elementpath.ElementPathError,XPathEvalError) as e:
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
except Exception as e:
|
||||
except:
|
||||
raise ValidationError("A system-error occurred when validating your XPath expression")
|
||||
|
||||
elif 'json:' in line:
|
||||
if line.strip().startswith('xpath1:'):
|
||||
if not self.allow_xpath:
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = re.sub(r'^xpath1:', '', line)
|
||||
|
||||
try:
|
||||
tree.xpath(line.strip())
|
||||
except etree.XPathEvalError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
except:
|
||||
raise ValidationError("A system-error occurred when validating your XPath expression")
|
||||
|
||||
if 'json:' in line:
|
||||
if not self.allow_json:
|
||||
raise ValidationError("JSONPath not permitted in this field!")
|
||||
|
||||
@@ -382,7 +392,7 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
if not self.allow_json:
|
||||
raise ValidationError("jq not permitted in this field!")
|
||||
|
||||
elif line.startswith('jq:'):
|
||||
if 'jq:' in line:
|
||||
try:
|
||||
import jq
|
||||
except ModuleNotFoundError:
|
||||
|
||||
@@ -8,7 +8,6 @@ from xml.sax.saxutils import escape as xml_escape
|
||||
import json
|
||||
import re
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
||||
@@ -109,20 +108,6 @@ def elementpath_tostring(obj):
|
||||
|
||||
return str(obj)
|
||||
|
||||
def extract_namespaces(xml_content):
|
||||
"""
|
||||
Extracts all namespaces from the XML content.
|
||||
"""
|
||||
from lxml import etree
|
||||
from io import BytesIO
|
||||
|
||||
it = etree.iterparse(BytesIO(xml_content), events=('start-ns',))
|
||||
namespaces = {}
|
||||
for _, ns in it:
|
||||
prefix, uri = ns
|
||||
namespaces[prefix] = uri
|
||||
return namespaces
|
||||
|
||||
# Return str Utf-8 of matched rules
|
||||
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
|
||||
from lxml import etree, html
|
||||
@@ -138,14 +123,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
# Automatically extract all namespaces from the XML content
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
try:
|
||||
namespaces.update(extract_namespaces(html_content.encode('utf-8')))
|
||||
except Exception as e:
|
||||
logger.warning(f"Problem extracting namespaces from HTMl/XML content {str(e)}")
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
|
||||
#@note: //title/text() wont work where <title>CDATA..
|
||||
|
||||
if type(r) != list:
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from typing import Union
|
||||
import re
|
||||
from babel.numbers import parse_decimal
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
def parse_currency(self, raw_value: str) -> float:
|
||||
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
|
||||
standardized_value = raw_value
|
||||
|
||||
@@ -22,11 +21,8 @@ class Restock(dict):
|
||||
# Remove any non-numeric characters except for the decimal point
|
||||
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
# Convert to float
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Define default values
|
||||
|
||||
@@ -40,16 +40,13 @@ def get_itemprop_availability(html_content) -> Restock:
|
||||
import extruct
|
||||
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
|
||||
|
||||
value = {}
|
||||
now = time.time()
|
||||
|
||||
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
|
||||
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
|
||||
try:
|
||||
data = extruct.extract(html_content, syntaxes=syntaxes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
|
||||
return Restock()
|
||||
|
||||
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
|
||||
|
||||
data = extruct.extract(html_content, syntaxes=syntaxes)
|
||||
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
|
||||
|
||||
# First phase, dead simple scanning of anything that looks useful
|
||||
|
||||
@@ -77,12 +77,11 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
# Go into RSS preprocess for converting CDATA/comment to usable text
|
||||
# Ctype_header could be unset if we are just reprocessing the existin content
|
||||
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']) or not ctype_header:
|
||||
top_text = self.fetcher.content[:200].lower().strip()
|
||||
if '<rss' in top_text or 'search.yahoo.com/mrss/' in top_text:
|
||||
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
|
||||
if '<rss' in self.fetcher.content[:100].lower():
|
||||
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
|
||||
is_rss = True
|
||||
|
||||
# source: support, basically treat it as plaintext
|
||||
if watch.is_source_type_url:
|
||||
is_html = False
|
||||
|
||||
@@ -18,11 +18,9 @@ $(document).ready(function () {
|
||||
|
||||
});
|
||||
|
||||
$(".toggle-show").click(function (e) {
|
||||
$("#notification-token-toggle").click(function (e) {
|
||||
e.preventDefault();
|
||||
let target = $(this).data('target');
|
||||
$(target).toggle();
|
||||
$('#notification-tokens-info').toggle();
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
|
||||
@@ -11,11 +11,8 @@
|
||||
class="notification-urls" )
|
||||
}}
|
||||
<div class="pure-form-message-inline">
|
||||
<p>
|
||||
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
</p>
|
||||
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<ul style="display: none" id="advanced-help-notifications">
|
||||
<ul>
|
||||
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
@@ -43,7 +40,7 @@
|
||||
|
||||
</div>
|
||||
<div class="pure-controls">
|
||||
<div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
|
||||
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
|
||||
</div>
|
||||
<div class="pure-controls" style="display: none;" id="notification-tokens-info">
|
||||
<table class="pure-table" id="token-table">
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
|
||||
<script>
|
||||
const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
|
||||
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
|
||||
@@ -276,9 +275,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
{% if '/text()' in field %}
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
||||
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
|
||||
<ul id="advanced-help-selectors" style="display: none;">
|
||||
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
|
||||
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
<ul>
|
||||
@@ -298,12 +297,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-control-group">
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
default_notification_format,
|
||||
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
|
||||
assert b'not-in-stock' not in res.data
|
||||
|
||||
# We should have a notification
|
||||
wait_for_notification_endpoint_output()
|
||||
time.sleep(2)
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
@@ -103,7 +103,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):
|
||||
set_original_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(5)
|
||||
assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"
|
||||
|
||||
# BUT we should see that it correctly shows "not in stock"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os.path
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from changedetectionio import html_tools
|
||||
|
||||
|
||||
@@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
# Takes a moment for apprise to fire
|
||||
wait_for_notification_endpoint_output()
|
||||
time.sleep(3)
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
|
||||
with open("test-datastore/notification.txt", 'rb') as f:
|
||||
response = f.read()
|
||||
|
||||
@@ -69,12 +69,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
assert b'which has this one new line' in res.data
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
@@ -92,7 +86,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
assert expected_url.encode('utf-8') in res.data
|
||||
|
||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||
res = client.get(url_for("diff_history_page", uuid=uuid))
|
||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
||||
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
||||
|
||||
# Check the [preview] pulls the right one
|
||||
@@ -149,12 +143,18 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
||||
uuid = extract_UUID_from_client(client)
|
||||
client.get(url_for("clear_watch_history", uuid=uuid))
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'preview/' in res.data
|
||||
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
assert b'<head><title>head title</title></head>' in res.data
|
||||
|
||||
#
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
|
||||
from .util import set_original_response, live_server_setup
|
||||
from changedetectionio.model import App
|
||||
|
||||
|
||||
@@ -102,15 +102,14 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_notification_endpoint_output()
|
||||
time.sleep(3)
|
||||
|
||||
# Shouldn't exist, shouldn't have fired
|
||||
assert not os.path.isfile("test-datastore/notification.txt")
|
||||
# Now the filter should exist
|
||||
set_response_with_filter()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_notification_endpoint_output()
|
||||
time.sleep(3)
|
||||
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
|
||||
wait_for_notification_endpoint_output
|
||||
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
|
||||
from changedetectionio.model import App
|
||||
|
||||
|
||||
@@ -108,8 +107,7 @@ def run_filter_test(client, live_server, content_filter):
|
||||
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
wait_for_notification_endpoint_output()
|
||||
time.sleep(2) # delay for apprise to fire
|
||||
# Now it should exist and contain our "filter not found" alert
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
@@ -129,7 +127,6 @@ def run_filter_test(client, live_server, content_filter):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
wait_for_notification_endpoint_output()
|
||||
# It should have sent a notification, but..
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
# but it should not contain the info about a failed filter (because there was none in this case)
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
import time
|
||||
|
||||
|
||||
def set_nonrenderable_response():
|
||||
test_return_data = """<html>
|
||||
@@ -13,16 +11,17 @@ def set_nonrenderable_response():
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
time.sleep(1)
|
||||
|
||||
return None
|
||||
|
||||
def set_zero_byte_response():
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("")
|
||||
time.sleep(1)
|
||||
|
||||
return None
|
||||
|
||||
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from ..notification import default_notification_format
|
||||
|
||||
instock_props = [
|
||||
@@ -182,8 +182,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
# price changed to something LESS than min (900), SHOULD be a change
|
||||
set_original_response(props_markup=instock_props[0], price='890.45')
|
||||
# let previous runs wait
|
||||
time.sleep(2)
|
||||
|
||||
time.sleep(1)
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
wait_for_all_checks(client)
|
||||
@@ -198,8 +197,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
# Depending on the LOCALE it may be either of these (generally for US/default/etc)
|
||||
assert b'1,890.45' in res.data or b'1890.45' in res.data
|
||||
assert b'1,890.45' or b'1890.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
@@ -364,7 +362,7 @@ def test_change_with_notification_values(client, live_server):
|
||||
set_original_response(props_markup=instock_props[0], price='1950.45')
|
||||
client.get(url_for("form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
wait_for_notification_endpoint_output()
|
||||
time.sleep(3)
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
notification = f.read()
|
||||
|
||||
@@ -164,46 +164,3 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
|
||||
assert b'Some other description' not in res.data # Should NOT be selected by the xpath
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
def test_namespace_selectors(live_server, client):
|
||||
set_original_cdata_xml()
|
||||
#live_server_setup(live_server)
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
# because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid),
|
||||
data={
|
||||
"include_filters": "//media:thumbnail/@url",
|
||||
"fetch_backend": "html_requests",
|
||||
"headers": "",
|
||||
"proxy": "no-proxy",
|
||||
"tags": "",
|
||||
"url": test_url,
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'CDATA' not in res.data
|
||||
assert b'<![' not in res.data
|
||||
assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -76,17 +76,6 @@ def set_more_modified_response():
|
||||
return None
|
||||
|
||||
|
||||
def wait_for_notification_endpoint_output():
|
||||
'''Apprise can take a few seconds to fire'''
|
||||
from os.path import isfile
|
||||
for i in range(1, 20):
|
||||
time.sleep(1)
|
||||
if isfile("test-datastore/notification.txt"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# kinda funky, but works for now
|
||||
def extract_api_key_from_UI(client):
|
||||
import re
|
||||
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
#
|
||||
# Log levels are in descending order. (TRACE is the most detailed one)
|
||||
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
||||
# - LOGGER_LEVEL=TRACE
|
||||
# - LOGGER_LEVEL=DEBUG
|
||||
#
|
||||
# Alternative WebDriver/selenium URL, do not use "'s or 's!
|
||||
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
|
||||
@@ -29,9 +29,8 @@ services:
|
||||
#
|
||||
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
|
||||
#
|
||||
# Alternative target "Chrome" Playwright URL, do not use "'s or 's!
|
||||
# "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
|
||||
# Alternative Playwright URL, do not use "'s or 's!
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
|
||||
#
|
||||
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
|
||||
#
|
||||
@@ -74,10 +73,10 @@ services:
|
||||
# condition: service_started
|
||||
|
||||
|
||||
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
|
||||
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
||||
# sockpuppetbrowser:
|
||||
# hostname: sockpuppetbrowser
|
||||
# playwright-chrome:
|
||||
# hostname: playwright-chrome
|
||||
# image: dgtlmoon/sockpuppetbrowser:latest
|
||||
# cap_add:
|
||||
# - SYS_ADMIN
|
||||
|
||||
@@ -79,9 +79,8 @@ pyppeteerstealth>=0.0.4
|
||||
pytest ~=7.2
|
||||
pytest-flask ~=1.2
|
||||
|
||||
# Anything 4.0 and up but not 5.0
|
||||
jsonschema ~= 4.0
|
||||
|
||||
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
|
||||
jsonschema==4.17.3
|
||||
|
||||
loguru
|
||||
|
||||
|
||||
Reference in New Issue
Block a user