Compare commits

...

24 Commits

Author SHA1 Message Date
dgtlmoon
515b1bc87f Merge branch 'master' into extra-filters 2024-09-17 18:32:14 +02:00
Dawid Wróbel
a5ff1cd1d7 browser_steps: add "click element containing text if exists" (#2629) 2024-09-17 18:30:54 +02:00
dgtlmoon
543cb205d2 Testing - Fixing Restock test #2641 2024-09-17 18:29:12 +02:00
dgtlmoon
273adfa0a4 Testing - Fix false filter missing check alerts 2024-09-17 16:55:04 +02:00
Felipe Tuffani
8ecfd17973 Restock/Price detection - Fix duplicated prices with different data type on single page product #2636 (#2638) 2024-09-17 11:22:54 +02:00
dgtlmoon
19f3851c9d Memory management improvements - LXML and other libraries can leak allocation, wrap in a sub-process (#2626) 2024-09-11 16:20:49 +02:00
dgtlmoon
7f2fa20318 Small memory allocation fixes (#2625) 2024-09-11 14:51:32 +02:00
dgtlmoon
ea87b301d8 Merge branch 'master' into extra-filters 2024-09-11 11:32:12 +02:00
dgtlmoon
e16814e40b Testing - locale fix for test (#2623) 2024-09-11 11:31:07 +02:00
dgtlmoon
5108201f0b Filters should apply at the end of the chain 2024-09-10 14:28:58 +02:00
dgtlmoon
7289e4e193 Fix bad example 2024-09-10 12:32:07 +02:00
dgtlmoon
34e684eb37 rearrange 2024-09-09 22:19:50 +02:00
dgtlmoon
f032a1b1b3 Adding form.remove_duplicate_lines 2024-09-09 22:16:16 +02:00
dgtlmoon
0506c01c07 Added form.trim_text_whitespace 2024-09-09 22:10:36 +02:00
dgtlmoon
09aae40c4a tweak style 2024-09-09 22:03:59 +02:00
dgtlmoon
9270d4053b smarter check? 2024-09-09 20:34:43 +02:00
dgtlmoon
160c267e9f add elay 2024-09-09 18:09:03 +02:00
dgtlmoon
97f47e7b3b Merge branch 'master' into text-filter-preview 2024-09-09 17:28:58 +02:00
dgtlmoon
337fcab3f1 Testing/Code - Improving test reliability (#2617) 2024-09-09 16:50:00 +02:00
dgtlmoon
7a496e3e15 tweak error messages 2024-09-09 11:35:28 +02:00
dgtlmoon
2c564d5c3f Abort existing requests so it doesnt train-wreck 2024-09-07 15:47:54 +02:00
dgtlmoon
59b8971a96 test and label tweaks 2024-09-07 15:42:58 +02:00
dgtlmoon
801791f904 live preview of text filters 2024-09-06 22:53:28 +02:00
dgtlmoon
eaccd6026c UI - Hiding noisy info under 'show advanced help' button (#2609) 2024-09-06 14:33:06 +02:00
29 changed files with 570 additions and 222 deletions

View File

@@ -0,0 +1,78 @@
# include the decorator
from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
import json
from apprise.utils import parse_url as apprise_parse_url
from apprise import URLBase
url = kwargs['meta'].get('url')
if url.startswith('post'):
r = requests.post
elif url.startswith('get'):
r = requests.get
elif url.startswith('put'):
r = requests.put
elif url.startswith('delete'):
r = requests.delete
url = url.replace('post://', 'http://')
url = url.replace('posts://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('get://', 'http://')
url = url.replace('gets://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('delete://', 'http://')
url = url.replace('deletes://', 'https://')
headers = {}
params = {}
auth = None
# Convert /foobar?+some-header=hello to proper header dictionary
results = apprise_parse_url(url)
if results:
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = {URLBase.unquote(x): URLBase.unquote(y)
for x, y in results['qsd+'].items()}
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[URLBase.unquote(k)] = URLBase.unquote(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
elif results.get('user'):
auth = (URLBase.unquote(results.get('user')))
# Try to auto-guess if it's JSON
try:
json.loads(body)
headers['Content-Type'] = 'application/json; charset=utf-8'
except ValueError as e:
pass
r(results.get('url'),
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)

View File

@@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
'Click element if exists': '1 0',
'Click element': '1 0',
'Click element containing text': '0 1',
'Click element containing text if exists': '0 1',
'Enter text in field': '1 1',
'Execute JS': '0 1',
# 'Extract text and use as filter': '1 0',
@@ -96,12 +97,24 @@ class steppable_browser_interface():
return self.action_goto_url(value=self.start_url)
def action_click_element_containing_text(self, selector=None, value=''):
logger.debug("Clicking element containing text")
if not len(value.strip()):
return
elem = self.page.get_by_text(value)
if elem.count():
elem.first.click(delay=randint(200, 500), timeout=3000)
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
logger.debug("Clicking element containing text if exists")
if not len(value.strip()):
return
elem = self.page.get_by_text(value)
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
if elem.count():
elem.first.click(delay=randint(200, 500), timeout=3000)
else:
return
def action_enter_text_in_field(self, selector, value):
if not len(selector.strip()):
return

View File

@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
<ul>
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul id="advanced-help-selectors">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>

View File

@@ -1,8 +1,6 @@
from loguru import logger
import chardet
import hashlib
import os
import requests
from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher
@@ -28,6 +26,9 @@ class fetcher(Fetcher):
is_binary=False,
empty_pages_are_a_change=False):
import chardet
import requests
if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url)

View File

@@ -1,6 +1,8 @@
#!/usr/bin/env python3
import datetime
import importlib
import flask_login
import locale
import os
@@ -10,7 +12,9 @@ import threading
import time
import timeago
from .content_fetchers.exceptions import ReplyWithContentButNoText
from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
from .processors.text_json_diff.processor import FilterNotFoundInResponse
from .safe_jinja import render as jinja_render
from changedetectionio.strtobool import strtobool
from copy import deepcopy
@@ -537,7 +541,8 @@ def changedetection_app(config=None, datastore_o=None):
import random
from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset)
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
@@ -1395,6 +1400,57 @@ def changedetection_app(config=None, datastore_o=None):
# Return a 500 error
abort(500)
@app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
@login_optionally_required
def watch_get_preview_rendered(uuid):
'''For when viewing the "preview" of the rendered text from inside of Edit'''
now = time.time()
import brotli
from . import forms
text_after_filter = ''
tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=request.form
)
# Only update vars that came in via the AJAX post
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
tmp_watch.update(p)
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid # probably not needed anymore anyway?
)
# Use the last loaded HTML as the input
update_handler.fetcher.content = decompressed_data
try:
changed_detected, update_obj, contents, text_after_filter = update_handler.run_changedetection(
watch=tmp_watch,
skip_when_checksum_same=False,
)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"
if not text_after_filter.strip():
text_after_filter = 'Empty content'
logger.trace(f"Parsed in {time.time()-now:.3f}s")
return text_after_filter.strip()
@app.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required
def form_quick_watch_add():

View File

@@ -221,7 +221,8 @@ class ValidateAppRiseServers(object):
def __call__(self, form, field):
import apprise
apobj = apprise.Apprise()
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
for server_url in field.data:
if not apobj.add(server_url):
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
@@ -479,8 +480,10 @@ class processor_text_json_diff_form(commonSettingsForm):
body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
filter_text_added = BooleanField('Added lines', default=True)
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)

View File

@@ -1,10 +1,4 @@
from bs4 import BeautifulSoup
from inscriptis import get_text
from jsonpath_ng.ext import parse
from typing import List
from inscriptis.model.config import ParserConfig
from xml.sax.saxutils import escape as xml_escape
import json
import re
@@ -39,6 +33,7 @@ def perl_style_slash_enclosed_regex_to_options(regex):
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
html_block = ""
r = soup.select(include_filters, separator="")
@@ -56,6 +51,7 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
return html_block
def subtractive_css_selector(css_selector, html_content):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
for item in soup.select(css_selector):
item.decompose()
@@ -181,6 +177,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
# Extract/find element
def extract_element(find='title', html_content=''):
from bs4 import BeautifulSoup
#Re #106, be sure to handle when its not found
element_text = None
@@ -194,6 +191,8 @@ def extract_element(find='title', html_content=''):
#
def _parse_json(json_data, json_filter):
from jsonpath_ng.ext import parse
if json_filter.startswith("json:"):
jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data)
@@ -242,6 +241,8 @@ def _get_stripped_text_from_json_match(match):
# json_filter - ie json:$..price
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
from bs4 import BeautifulSoup
stripped_text_from_html = False
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
@@ -352,6 +353,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
return "\n".encode('utf8').join(output)
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
from xml.sax.saxutils import escape as xml_escape
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
def repl(m):
text = m.group(1)
@@ -360,6 +362,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
return re.sub(pattern, repl, html_content)
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
"""Converts html string to a string with just the text. If ignoring
rendering anchor tag content is enable, anchor tag content are also
included in the text

View File

@@ -59,6 +59,7 @@ class watch_base(dict):
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'time_between_check_use_default': True,
'title': None,
'trim_text_whitespace': False,
'track_ldjson_price_data': None,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',

View File

@@ -1,9 +1,10 @@
import apprise
import time
from apprise import NotifyFormat
import json
import apprise
from loguru import logger
valid_tokens = {
'base_url': '',
'current_snapshot': '',
@@ -34,86 +35,11 @@ valid_notification_formats = {
default_notification_format_for_watch: default_notification_format_for_watch
}
# include the decorator
from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
from apprise.utils import parse_url as apprise_parse_url
from apprise import URLBase
url = kwargs['meta'].get('url')
if url.startswith('post'):
r = requests.post
elif url.startswith('get'):
r = requests.get
elif url.startswith('put'):
r = requests.put
elif url.startswith('delete'):
r = requests.delete
url = url.replace('post://', 'http://')
url = url.replace('posts://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('get://', 'http://')
url = url.replace('gets://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('delete://', 'http://')
url = url.replace('deletes://', 'https://')
headers = {}
params = {}
auth = None
# Convert /foobar?+some-header=hello to proper header dictionary
results = apprise_parse_url(url)
if results:
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = {URLBase.unquote(x): URLBase.unquote(y)
for x, y in results['qsd+'].items()}
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[URLBase.unquote(k)] = URLBase.unquote(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
elif results.get('user'):
auth = (URLBase.unquote(results.get('user')))
# Try to auto-guess if it's JSON
try:
json.loads(body)
headers['Content-Type'] = 'application/json; charset=utf-8'
except ValueError as e:
pass
r(results.get('url'),
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)
def process_notification(n_object, datastore):
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
from .safe_jinja import render as jinja_render
now = time.time()

View File

@@ -1,4 +1,6 @@
from abc import abstractmethod
from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.strtobool import strtobool
from copy import deepcopy
@@ -23,10 +25,11 @@ class difference_detection_processor():
super().__init__(*args, **kwargs)
self.datastore = datastore
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
# Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher()
def call_browser(self):
from requests.structures import CaseInsensitiveDict
from changedetectionio.content_fetchers.exceptions import EmptyReply
# Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
@@ -159,7 +162,7 @@ class difference_detection_processor():
some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
changed_detected = False
return changed_detected, update_obj, ''.encode('utf-8')
return changed_detected, update_obj, ''.encode('utf-8'), b''
def find_sub_packages(package_name):

View File

@@ -2,8 +2,7 @@ from .. import difference_detection_processor
from ..exceptions import ProcessorException
from . import Restock
from loguru import logger
import hashlib
import re
import urllib3
import time
@@ -27,6 +26,25 @@ def _search_prop_by_value(matches, value):
if value in prop[0]:
return prop[1] # Yield the desired value and exit the function
def _deduplicate_prices(data):
seen = set()
unique_data = []
for datum in data:
# Convert 'value' to float if it can be a numeric string, otherwise leave it as is
try:
normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
except ValueError:
normalized_value = datum.value
# If the normalized value hasn't been seen yet, add it to unique data
if normalized_value not in seen:
unique_data.append(datum)
seen.add(normalized_value)
return unique_data
# should return Restock()
# add casting?
def get_itemprop_availability(html_content) -> Restock:
@@ -36,6 +54,7 @@ def get_itemprop_availability(html_content) -> Restock:
"""
from jsonpath_ng import parse
import re
now = time.time()
import extruct
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
@@ -60,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
availability_parse = parse('$..(availability|Availability)')
price_result = price_parse.find(data)
price_result = _deduplicate_prices(price_parse.find(data))
if price_result:
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
# parse that for the UI?
@@ -122,6 +141,10 @@ class perform_site_check(difference_detection_processor):
xpath_data = None
def run_changedetection(self, watch, skip_when_checksum_same=True):
import hashlib
from concurrent.futures import ProcessPoolExecutor
from functools import partial
if not watch:
raise Exception("Watch no longer exists.")
@@ -149,7 +172,11 @@ class perform_site_check(difference_detection_processor):
itemprop_availability = {}
try:
itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
with ProcessPoolExecutor() as executor:
# Use functools.partial to create a callable with arguments
# anything using bs4/lxml etc is quite "leaky"
future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
itemprop_availability = future.result()
except MoreThanOnePriceFound as e:
# Add the real data
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
@@ -263,4 +290,4 @@ class perform_site_check(difference_detection_processor):
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip(), b''

View File

@@ -36,6 +36,9 @@ class PDFToHTMLToolNotFound(ValueError):
class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch, skip_when_checksum_same=True):
from concurrent.futures import ProcessPoolExecutor
from functools import partial
changed_detected = False
html_content = ""
screenshot = False # as bytes
@@ -171,20 +174,30 @@ class perform_site_check(difference_detection_processor):
for filter_rule in include_filters_rule:
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
with ProcessPoolExecutor() as executor:
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url,
is_rss=is_rss)
is_rss=is_rss))
html_content += future.result()
elif filter_rule.startswith('xpath1:'):
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
with ProcessPoolExecutor() as executor:
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url,
is_rss=is_rss)
is_rss=is_rss))
html_content += future.result()
else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content += html_tools.include_filters(include_filters=filter_rule,
with ProcessPoolExecutor() as executor:
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url)
append_pretty_line_formatting=not watch.is_source_type_url))
html_content += future.result()
if not html_content.strip():
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
@@ -197,18 +210,13 @@ class perform_site_check(difference_detection_processor):
else:
# extract text
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
stripped_text_from_html = \
html_tools.html_to_text(
html_content=html_content,
with ProcessPoolExecutor() as executor:
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
render_anchor_tag_content=do_anchor,
is_rss=is_rss # #1874 activate the <title workaround hack
)
if watch.get('sort_text_alphabetically') and stripped_text_from_html:
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
is_rss=is_rss)) #1874 activate the <title workaround hack
stripped_text_from_html = future.result()
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
@@ -236,7 +244,7 @@ class perform_site_check(difference_detection_processor):
# We had some content, but no differences were found
# Store our new file as the MD5 so it will trigger in the future
c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest()
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8'), stripped_text_from_html.encode('utf-8')
else:
stripped_text_from_html = rendered_diff
@@ -298,6 +306,20 @@ class perform_site_check(difference_detection_processor):
stripped_text_from_html = b''.join(regex_matched_output)
text_content_before_ignored_filter = stripped_text_from_html
if watch.get('sort_text_alphabetically') and stripped_text_from_html:
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace(b'\n\n', b'\n')
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.decode('utf-8').splitlines(), key=lambda x: x.lower())).encode('utf-8')
#
if watch.get('trim_text_whitespace') and stripped_text_from_html:
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.decode('utf-8').splitlines()).encode('utf-8')
#
if watch.get('remove_duplicate_lines') and stripped_text_from_html:
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.decode('utf-8').splitlines())).encode('utf-8')
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
@@ -357,4 +379,4 @@ class perform_site_check(difference_detection_processor):
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter
return changed_detected, update_obj, text_content_before_ignored_filter, stripped_text_from_html

View File

@@ -18,9 +18,11 @@ $(document).ready(function () {
});
$("#notification-token-toggle").click(function (e) {
$(".toggle-show").click(function (e) {
e.preventDefault();
$('#notification-tokens-info').toggle();
let target = $(this).data('target');
$(target).toggle();
});
});

View File

@@ -12,6 +12,50 @@ function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
checkbox.addEventListener('change', updateOpacity);
}
(function($) {
// Object to store ongoing requests by namespace
const requests = {};
$.abortiveSingularAjax = function(options) {
const namespace = options.namespace || 'default';
// Abort the current request in this namespace if it's still ongoing
if (requests[namespace]) {
requests[namespace].abort();
}
// Start a new AJAX request and store its reference in the correct namespace
requests[namespace] = $.ajax(options);
// Return the current request in case it's needed
return requests[namespace];
};
})(jQuery);
function request_textpreview_update() {
const data = {};
$('textarea:visible, input:visible').each(function () {
const $element = $(this); // Cache the jQuery object for the current element
const name = $element.attr('name'); // Get the name attribute of the element
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : undefined) : $element.val();
});
$.abortiveSingularAjax({
type: "POST",
url: preview_text_edit_filters_url,
data: data,
namespace: 'watchEdit'
}).done(function (data) {
$('#filters-and-triggers #text-preview-inner').text(data);
}).fail(function (error) {
if (error.statusText === 'abort') {
console.log('Request was aborted due to a new request being fired.');
} else {
$('#filters-and-triggers #text-preview-inner').text('There was an error communicating with the server.');
}
})
}
$(document).ready(function () {
$('#notification-setting-reset-to-default').click(function (e) {
$('#notification_title').val('');
@@ -27,5 +71,22 @@ $(document).ready(function () {
toggleOpacity('#time_between_check_use_default', '#time_between_check', false);
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
$("#text-preview-inner").css('max-height', (vh-300)+"px");
$("#activate-text-preview").click(function (e) {
$(this).fadeOut();
$('body').toggleClass('preview-text-enabled')
request_textpreview_update();
$("#text-preview-refresh").click(function (e) {
request_textpreview_update();
});
$('textarea:visible, input:visible').on('keyup keypress blur change click', function (e) {
request_textpreview_update();
});
});
});

View File

@@ -0,0 +1,45 @@
body.preview-text-enabled {
#filters-and-triggers > div {
display: flex; /* Establishes Flexbox layout */
gap: 20px; /* Adds space between the columns */
position: relative; /* Ensures the sticky positioning is relative to this parent */
}
/* layout of the page */
#edit-text-filter, #text-preview {
flex: 1; /* Each column takes an equal amount of available space */
align-self: flex-start; /* Aligns the right column to the start, allowing it to maintain its content height */
}
#edit-text-filter {
#pro-tips {
display: none;
}
}
#text-preview {
position: sticky;
top: 25px;
display: block !important;
}
/* actual preview area */
#text-preview-inner {
background: var(--color-grey-900);
border: 1px solid var(--color-grey-600);
padding: 1rem;
color: #333;
font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
font-size: 12px;
overflow-x: scroll;
white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
}
}
#activate-text-preview {
right: 0;
position: absolute;
z-index: 0;
box-shadow: 1px 1px 4px var(--color-shadow-jump);
}

View File

@@ -12,6 +12,7 @@
@import "parts/_darkmode";
@import "parts/_menu";
@import "parts/_love";
@import "parts/preview_text_filter";
body {
color: var(--color-text);

View File

@@ -411,6 +411,47 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
fill: #ff0000 !important;
transition: all ease 0.3s !important; }
body.preview-text-enabled {
/* layout of the page */
/* actual preview area */ }
body.preview-text-enabled #filters-and-triggers > div {
display: flex;
/* Establishes Flexbox layout */
gap: 20px;
/* Adds space between the columns */
position: relative;
/* Ensures the sticky positioning is relative to this parent */ }
body.preview-text-enabled #edit-text-filter, body.preview-text-enabled #text-preview {
flex: 1;
/* Each column takes an equal amount of available space */
align-self: flex-start;
/* Aligns the right column to the start, allowing it to maintain its content height */ }
body.preview-text-enabled #edit-text-filter #pro-tips {
display: none; }
body.preview-text-enabled #text-preview {
position: sticky;
top: 25px;
display: block !important; }
body.preview-text-enabled #text-preview-inner {
background: var(--color-grey-900);
border: 1px solid var(--color-grey-600);
padding: 1rem;
color: #333;
font-family: "Courier New", Courier, monospace;
/* Sets the font to a monospace type */
font-size: 12px;
overflow-x: scroll;
white-space: pre-wrap;
/* Preserves whitespace and line breaks like <pre> */
overflow-wrap: break-word;
/* Allows long words to break and wrap to the next line */ }
#activate-text-preview {
right: 0;
position: absolute;
z-index: 0;
box-shadow: 1px 1px 4px var(--color-shadow-jump); }
body {
color: var(--color-text);
background: var(--color-background-page);
@@ -1194,11 +1235,9 @@ ul {
color: #fff;
opacity: 0.7; }
.restock-label svg {
vertical-align: middle; }
#chrome-extension-link {
padding: 9px;
border: 1px solid var(--color-grey-800);

View File

@@ -11,7 +11,6 @@ from threading import Lock
import json
import os
import re
import requests
import secrets
import threading
import time
@@ -270,6 +269,7 @@ class ChangeDetectionStore:
self.needs_write_urgent = True
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
import requests
if extras is None:
extras = {}

View File

@@ -11,8 +11,11 @@
class="notification-urls" )
}}
<div class="pure-form-message-inline">
<ul>
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
<p>
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
</p>
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul style="display: none" id="advanced-help-notifications">
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@@ -40,7 +43,7 @@
</div>
<div class="pure-controls">
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
<div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
</div>
<div class="pure-controls" style="display: none;" id="notification-tokens-info">
<table class="pure-table" id="token-table">

View File

@@ -33,7 +33,7 @@
<script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
</head>
<body>
<body class="">
<div class="header">
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
{% if has_password and not current_user.is_authenticated %}

View File

@@ -4,6 +4,7 @@
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
<script>
const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
@@ -253,7 +254,10 @@ User-Agent: wonderbra 1.0") }}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner" id="filters-and-triggers">
<div class="pure-control-group">
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">Activate preview</span>
<div>
<div id="edit-text-filter">
<div class="pure-control-group" id="pro-tips">
<strong>Pro-tips:</strong><br>
<ul>
<li>
@@ -275,9 +279,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
<ul>
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
<ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
@@ -297,9 +301,12 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
</ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
<li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
</span>
</div>
<fieldset class="pure-control-group">
@@ -326,14 +333,21 @@ nav
<span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.remove_duplicate_lines) }}
<span class="pure-form-message-inline">Remove duplicate lines of text</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.sort_text_alphabetically) }}
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
{{ render_checkbox_field(form.trim_text_whitespace) }}
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
</fieldset>
<fieldset>
<div class="pure-control-group">
@@ -383,7 +397,7 @@ Unavailable") }}
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
{{ render_field(form.extract_text, rows=5, placeholder="Example: /\d+ online/") }}
<span class="pure-form-message-inline">
<ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
@@ -403,7 +417,19 @@ Unavailable") }}
</fieldset>
</div>
</div>
{% endif %}
<div id="text-preview" style="display: none;" >
<script>
const preview_text_edit_filters_url="{{url_for('watch_get_preview_rendered', uuid=uuid)}}";
</script>
<span><strong>Preview of the text that is used for changedetection after all filters run.</strong></span><br>
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
<p>
<div id="text-preview-inner"></div>
</p>
</div>
</div>
</div>
{% endif %}
{# rendered sub Template #}
{% if extra_form_content %}
<div class="tab-pane-inner" id="extras_tab">

View File

@@ -2,7 +2,7 @@
import os
import time
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
assert b'not-in-stock' not in res.data
# We should have a notification
time.sleep(2)
wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
os.unlink("test-datastore/notification.txt")
@@ -103,6 +103,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(5)
assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"
# BUT we should see that it correctly shows "not in stock"

View File

@@ -2,7 +2,7 @@
import os.path
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
from changedetectionio import html_tools
@@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
assert b'unviewed' in res.data
# Takes a moment for apprise to fire
time.sleep(3)
wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
with open("test-datastore/notification.txt", 'rb') as f:
response = f.read()

View File

@@ -4,7 +4,7 @@
import os
import time
from flask import url_for
from .util import set_original_response, live_server_setup
from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
from changedetectionio.model import App
@@ -102,14 +102,15 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
follow_redirects=True
)
assert b"Updated watch." in res.data
time.sleep(3)
wait_for_notification_endpoint_output()
# Shouldn't exist, shouldn't have fired
assert not os.path.isfile("test-datastore/notification.txt")
# Now the filter should exist
set_response_with_filter()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(3)
wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt")

View File

@@ -1,7 +1,9 @@
import os
import time
from loguru import logger
from flask import url_for
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
wait_for_notification_endpoint_output
from changedetectionio.model import App
@@ -26,6 +28,12 @@ def run_filter_test(client, live_server, content_filter):
# Response WITHOUT the filter ID element
set_original_response()
# Goto the edit page, add our ignore text
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
# cleanup for the next
client.get(
url_for("form_delete", uuid="all"),
@@ -34,83 +42,92 @@ def run_filter_test(client, live_server, content_filter):
if os.path.isfile("test-datastore/notification.txt"):
os.unlink("test-datastore/notification.txt")
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": ''},
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"Watch added" in res.data
# Give the thread time to pick up the first version
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Goto the edit page, add our ignore text
# Add our URL to the import page
url = url_for('test_notification_endpoint', _external=True)
notification_url = url.replace('http', 'json')
uuid = extract_UUID_from_client(client)
print(">>>> Notification URL: " + notification_url)
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
# Just a regular notification setting, this will be used by the special 'filter not found' notification
notification_form_data = {"notification_urls": notification_url,
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
"notification_body": "BASE URL: {{base_url}}\n"
"Watch URL: {{watch_url}}\n"
"Watch UUID: {{watch_uuid}}\n"
"Watch title: {{watch_title}}\n"
"Watch tag: {{watch_tag}}\n"
"Preview: {{preview_url}}\n"
"Diff URL: {{diff_url}}\n"
"Snapshot: {{current_snapshot}}\n"
"Diff: {{diff}}\n"
"Diff Full: {{diff_full}}\n"
"Diff as Patch: {{diff_patch}}\n"
":-)",
"notification_format": "Text"}
watch_data = {"notification_urls": notification_url,
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
"notification_body": "BASE URL: {{base_url}}\n"
"Watch URL: {{watch_url}}\n"
"Watch UUID: {{watch_uuid}}\n"
"Watch title: {{watch_title}}\n"
"Watch tag: {{watch_tag}}\n"
"Preview: {{preview_url}}\n"
"Diff URL: {{diff_url}}\n"
"Snapshot: {{current_snapshot}}\n"
"Diff: {{diff}}\n"
"Diff Full: {{diff_full}}\n"
"Diff as Patch: {{diff_patch}}\n"
":-)",
"notification_format": "Text",
"fetch_backend": "html_requests",
"filter_failure_notification_send": 'y',
"headers": "",
"tags": "my tag",
"title": "my title 123",
"time_between_check-hours": 5, # So that the queue runner doesnt also put it in
"url": test_url,
}
notification_form_data.update({
"url": test_url,
"tags": "my tag",
"title": "my title 123",
"headers": "",
"filter_failure_notification_send": 'y',
"include_filters": content_filter,
"fetch_backend": "html_requests"})
# A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
res = client.post(
url_for("edit_page", uuid="first"),
data=notification_form_data,
url_for("edit_page", uuid=uuid),
data=watch_data,
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
# Now the notification should not exist, because we didnt reach the threshold
# Now add a filter, because recheck hours == 5, ONLY pressing of the [edit] or [recheck all] should trigger
watch_data['include_filters'] = content_filter
res = client.post(
url_for("edit_page", uuid=uuid),
data=watch_data,
follow_redirects=True
)
assert b"Updated watch." in res.data
# It should have checked once so far and given this error (because we hit SAVE)
wait_for_all_checks(client)
assert not os.path.isfile("test-datastore/notification.txt")
# Hitting [save] would have triggered a recheck, and we have a filter, so this would be ONE failure
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 1, "Should have been checked once"
# recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
# Add 4 more checks
checked = 0
ATTEMPT_THRESHOLD_SETTING = live_server.app.config['DATASTORE'].data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
for i in range(0, ATTEMPT_THRESHOLD_SETTING - 2):
checked += 1
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(2) # delay for apprise to fire
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"
# We should see something in the frontend
res = client.get(url_for("index"))
assert b'Warning, no filters were found' in res.data
res = client.get(url_for("index"))
assert b'Warning, no filters were found' in res.data
assert not os.path.isfile("test-datastore/notification.txt")
time.sleep(1)
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
time.sleep(2)
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(2) # delay for apprise to fire
wait_for_notification_endpoint_output()
# Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt")
with open("test-datastore/notification.txt", 'r') as f:
notification = f.read()
@@ -123,10 +140,11 @@ def run_filter_test(client, live_server, content_filter):
set_response_with_filter()
# Try several times, it should NOT have 'filter not found'
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
for i in range(0, ATTEMPT_THRESHOLD_SETTING + 2):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
wait_for_notification_endpoint_output()
# It should have sent a notification, but..
assert os.path.isfile("test-datastore/notification.txt")
# but it should not contain the info about a failed filter (because there was none in this case)
@@ -135,9 +153,6 @@ def run_filter_test(client, live_server, content_filter):
assert not 'CSS/xPath filter was not present in the page' in notification
# Re #1247 - All tokens got replaced correctly in the notification
res = client.get(url_for("index"))
uuid = extract_UUID_from_client(client)
# UUID is correct, but notification contains tag uuid as UUIID wtf
assert uuid in notification
# cleanup for the next
@@ -152,9 +167,11 @@ def test_setup(live_server):
live_server_setup(live_server)
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
run_filter_test(client, live_server,'#nope-doesnt-exist')
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent

View File

@@ -2,6 +2,8 @@
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import time
def set_nonrenderable_response():
test_return_data = """<html>
@@ -11,17 +13,16 @@ def set_nonrenderable_response():
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
time.sleep(1)
return None
def set_zero_byte_response():
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("")
time.sleep(1)
return None
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):

View File

@@ -3,7 +3,7 @@ import os
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from ..notification import default_notification_format
instock_props = [
@@ -146,14 +146,13 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
# A change in price, should trigger a change by default
wait_for_all_checks(client)
data = {
"tags": "",
"url": test_url,
"headers": "",
"time_between_check-hours": 5,
'fetch_backend': "html_requests"
}
data.update(extra_watch_edit_form)
@@ -178,11 +177,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
assert b'1,000.45' or b'1000.45' in res.data #depending on locale
assert b'unviewed' not in res.data
# price changed to something LESS than min (900), SHOULD be a change
set_original_response(props_markup=instock_props[0], price='890.45')
# let previous runs wait
time.sleep(1)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client)
@@ -197,7 +194,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'1,890.45' or b'1890.45' in res.data
# Depending on the LOCALE it may be either of these (generally for US/default/etc)
assert b'1,890.45' in res.data or b'1890.45' in res.data
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -362,7 +360,7 @@ def test_change_with_notification_values(client, live_server):
set_original_response(props_markup=instock_props[0], price='1950.45')
client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client)
time.sleep(3)
wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
with open("test-datastore/notification.txt", 'r') as f:
notification = f.read()

View File

@@ -76,6 +76,18 @@ def set_more_modified_response():
return None
def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
#@todo - could check the apprise object directly instead of looking for this file
from os.path import isfile
for i in range(1, 20):
time.sleep(1)
if isfile("test-datastore/notification.txt"):
return True
return False
# kinda funky, but works for now
def extract_api_key_from_UI(client):
import re

View File

@@ -189,7 +189,9 @@ class update_worker(threading.Thread):
'screenshot': None
})
self.notification_q.put(n_object)
logger.error(f"Sent filter not found notification for {watch_uuid}")
logger.debug(f"Sent filter not found notification for {watch_uuid}")
else:
logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
def send_step_failure_notification(self, watch_uuid, step_n):
watch = self.datastore.data['watching'].get(watch_uuid, False)
@@ -276,7 +278,7 @@ class update_worker(threading.Thread):
update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection(
changed_detected, update_obj, contents, content_after_filters = update_handler.run_changedetection(
watch=watch,
skip_when_checksum_same=skip_when_same_checksum,
)
@@ -364,18 +366,22 @@ class update_worker(threading.Thread):
# Only when enabled, send the notification
if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 5)
c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
if threshold > 0 and c >= threshold:
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
if c >= threshold:
if not watch.get('notification_muted'):
logger.debug(f"Sending filter failed notification for {uuid}")
self.send_filter_failure_notification(uuid)
c = 0
logger.debug(f"Reset filter failure count back to zero")
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
else:
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
process_changedetection_results = False
@@ -422,7 +428,7 @@ class update_worker(threading.Thread):
)
if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 5)
c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',