mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-10-30 22:27:52 +00:00
Compare commits
33 Commits
2528-empty
...
socks5-tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8b6c3133f | ||
|
|
9ea68087d8 | ||
|
|
40d23aa2fa | ||
|
|
18e1655844 | ||
|
|
a8de06e2e5 | ||
|
|
fd25fe8a5f | ||
|
|
e173954cdd | ||
|
|
e830fb2320 | ||
|
|
c6589ee1b4 | ||
|
|
dc936a2e8a | ||
|
|
8c1527c1ad | ||
|
|
a5ff1cd1d7 | ||
|
|
543cb205d2 | ||
|
|
273adfa0a4 | ||
|
|
8ecfd17973 | ||
|
|
19f3851c9d | ||
|
|
7f2fa20318 | ||
|
|
e16814e40b | ||
|
|
337fcab3f1 | ||
|
|
eaccd6026c | ||
|
|
5b70625eaa | ||
|
|
60d292107d | ||
|
|
1cb38347da | ||
|
|
55fe2abf42 | ||
|
|
4225900ec3 | ||
|
|
1fb4342488 | ||
|
|
7071df061a | ||
|
|
6dd1fa2b88 | ||
|
|
371f85d544 | ||
|
|
932cf15e1e | ||
|
|
bf0d410d32 | ||
|
|
730f37c7ba | ||
|
|
8a35d62e02 |
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.46.02'
|
||||
__version__ = '0.46.04'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
78
changedetectionio/apprise_plugin/__init__.py
Normal file
78
changedetectionio/apprise_plugin/__init__.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# include the decorator
|
||||
from apprise.decorators import notify
|
||||
|
||||
@notify(on="delete")
|
||||
@notify(on="deletes")
|
||||
@notify(on="get")
|
||||
@notify(on="gets")
|
||||
@notify(on="post")
|
||||
@notify(on="posts")
|
||||
@notify(on="put")
|
||||
@notify(on="puts")
|
||||
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
||||
import requests
|
||||
import json
|
||||
from apprise.utils import parse_url as apprise_parse_url
|
||||
from apprise import URLBase
|
||||
|
||||
url = kwargs['meta'].get('url')
|
||||
|
||||
if url.startswith('post'):
|
||||
r = requests.post
|
||||
elif url.startswith('get'):
|
||||
r = requests.get
|
||||
elif url.startswith('put'):
|
||||
r = requests.put
|
||||
elif url.startswith('delete'):
|
||||
r = requests.delete
|
||||
|
||||
url = url.replace('post://', 'http://')
|
||||
url = url.replace('posts://', 'https://')
|
||||
url = url.replace('put://', 'http://')
|
||||
url = url.replace('puts://', 'https://')
|
||||
url = url.replace('get://', 'http://')
|
||||
url = url.replace('gets://', 'https://')
|
||||
url = url.replace('put://', 'http://')
|
||||
url = url.replace('puts://', 'https://')
|
||||
url = url.replace('delete://', 'http://')
|
||||
url = url.replace('deletes://', 'https://')
|
||||
|
||||
headers = {}
|
||||
params = {}
|
||||
auth = None
|
||||
|
||||
# Convert /foobar?+some-header=hello to proper header dictionary
|
||||
results = apprise_parse_url(url)
|
||||
if results:
|
||||
# Add our headers that the user can potentially over-ride if they wish
|
||||
# to to our returned result set and tidy entries by unquoting them
|
||||
headers = {URLBase.unquote(x): URLBase.unquote(y)
|
||||
for x, y in results['qsd+'].items()}
|
||||
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
||||
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
||||
for k, v in results['qsd'].items():
|
||||
if not k.strip('+-') in results['qsd+'].keys():
|
||||
params[URLBase.unquote(k)] = URLBase.unquote(v)
|
||||
|
||||
# Determine Authentication
|
||||
auth = ''
|
||||
if results.get('user') and results.get('password'):
|
||||
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
|
||||
elif results.get('user'):
|
||||
auth = (URLBase.unquote(results.get('user')))
|
||||
|
||||
# Try to auto-guess if it's JSON
|
||||
try:
|
||||
json.loads(body)
|
||||
headers['Content-Type'] = 'application/json; charset=utf-8'
|
||||
except ValueError as e:
|
||||
pass
|
||||
|
||||
r(results.get('url'),
|
||||
auth=auth,
|
||||
data=body.encode('utf-8') if type(body) is str else body,
|
||||
headers=headers,
|
||||
params=params
|
||||
)
|
||||
@@ -85,7 +85,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_start_session['browser'],
|
||||
proxy=proxy,
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url')
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url'),
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
# For test
|
||||
|
||||
@@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
|
||||
'Click element if exists': '1 0',
|
||||
'Click element': '1 0',
|
||||
'Click element containing text': '0 1',
|
||||
'Click element containing text if exists': '0 1',
|
||||
'Enter text in field': '1 1',
|
||||
'Execute JS': '0 1',
|
||||
# 'Extract text and use as filter': '1 0',
|
||||
@@ -96,12 +97,24 @@ class steppable_browser_interface():
|
||||
return self.action_goto_url(value=self.start_url)
|
||||
|
||||
def action_click_element_containing_text(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text")
|
||||
if not len(value.strip()):
|
||||
return
|
||||
elem = self.page.get_by_text(value)
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
||||
|
||||
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text if exists")
|
||||
if not len(value.strip()):
|
||||
return
|
||||
elem = self.page.get_by_text(value)
|
||||
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
||||
else:
|
||||
return
|
||||
|
||||
def action_enter_text_in_field(self, selector, value):
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
|
||||
@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
{% if '/text()' in field %}
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
|
||||
|
||||
<ul>
|
||||
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
||||
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<ul id="advanced-help-selectors">
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
<ul>
|
||||
@@ -89,11 +89,13 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
|
||||
footer
|
||||
nav
|
||||
.stockticker") }}
|
||||
.stockticker
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
|
||||
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
|
||||
@@ -65,8 +65,8 @@ class Fetcher():
|
||||
|
||||
def __init__(self):
|
||||
import importlib.resources
|
||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
|
||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
@@ -81,7 +81,8 @@ class Fetcher():
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False):
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
|
||||
@@ -83,7 +83,8 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False):
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
import playwright._impl._errors
|
||||
@@ -130,7 +131,7 @@ class fetcher(Fetcher):
|
||||
if response is None:
|
||||
context.close()
|
||||
browser.close()
|
||||
logger.debug("Content Fetcher > Response object was none")
|
||||
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
try:
|
||||
@@ -166,10 +167,10 @@ class fetcher(Fetcher):
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if len(self.page.content().strip()) == 0:
|
||||
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
||||
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
|
||||
context.close()
|
||||
browser.close()
|
||||
logger.debug("Content Fetcher > Content was empty")
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
|
||||
@@ -75,7 +75,8 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes,
|
||||
current_include_filters,
|
||||
is_binary
|
||||
is_binary,
|
||||
empty_pages_are_a_change
|
||||
):
|
||||
|
||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||
@@ -153,7 +154,7 @@ class fetcher(Fetcher):
|
||||
if response is None:
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.warning("Content Fetcher > Response object was none")
|
||||
logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
self.headers = response.headers
|
||||
@@ -186,10 +187,11 @@ class fetcher(Fetcher):
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
content = await self.page.content
|
||||
if len(content.strip()) == 0:
|
||||
|
||||
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
||||
logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.error("Content Fetcher > Content was empty")
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
@@ -247,7 +249,7 @@ class fetcher(Fetcher):
|
||||
await self.fetch_page(**kwargs)
|
||||
|
||||
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
||||
current_include_filters=None, is_binary=False):
|
||||
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
|
||||
|
||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
|
||||
@@ -262,7 +264,8 @@ class fetcher(Fetcher):
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=current_include_filters,
|
||||
is_binary=is_binary
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
), timeout=max_time))
|
||||
except asyncio.TimeoutError:
|
||||
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import chardet
|
||||
import requests
|
||||
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
@@ -26,7 +23,11 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False):
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
|
||||
import chardet
|
||||
import requests
|
||||
|
||||
if self.browser_steps_get_valid_steps():
|
||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||
@@ -74,7 +75,10 @@ class fetcher(Fetcher):
|
||||
self.headers = r.headers
|
||||
|
||||
if not r.content or not len(r.content):
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
if not empty_pages_are_a_change:
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
else:
|
||||
logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True")
|
||||
|
||||
# @todo test this
|
||||
# @todo maybe you really want to test zero-byte return pages?
|
||||
|
||||
@@ -75,6 +75,7 @@ function isItemInStock() {
|
||||
'vergriffen',
|
||||
'vorbestellen',
|
||||
'vorbestellung ist bald möglich',
|
||||
'we don\'t currently have any',
|
||||
'we couldn\'t find any products that match',
|
||||
'we do not currently have an estimate of when this product will be back in stock.',
|
||||
'we don\'t know when or if this item will be back in stock.',
|
||||
@@ -173,7 +174,8 @@ function isItemInStock() {
|
||||
const element = elementsToScan[i];
|
||||
// outside the 'fold' or some weird text in the heading area
|
||||
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
||||
if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
||||
// Note: theres also an automated test that places the 'out of stock' text fairly low down
|
||||
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
||||
continue
|
||||
}
|
||||
elementText = "";
|
||||
@@ -187,7 +189,7 @@ function isItemInStock() {
|
||||
// and these mean its out of stock
|
||||
for (const outOfStockText of outOfStockTexts) {
|
||||
if (elementText.includes(outOfStockText)) {
|
||||
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
|
||||
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
|
||||
return outOfStockText; // item is out of stock
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,6 +164,15 @@ visibleElementsArray.forEach(function (element) {
|
||||
}
|
||||
}
|
||||
|
||||
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
|
||||
|
||||
let text = element.textContent.trim().slice(0, 30).trim();
|
||||
while (/\n{2,}|\t{2,}/.test(text)) {
|
||||
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
|
||||
}
|
||||
|
||||
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
||||
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
|
||||
|
||||
size_pos.push({
|
||||
xpath: xpath_result,
|
||||
@@ -171,9 +180,16 @@ visibleElementsArray.forEach(function (element) {
|
||||
height: Math.round(bbox['height']),
|
||||
left: Math.floor(bbox['left']),
|
||||
top: Math.floor(bbox['top']) + scroll_y,
|
||||
// tagName used by Browser Steps
|
||||
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
||||
// tagtype used by Browser Steps
|
||||
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
||||
isClickable: window.getComputedStyle(element).cursor == "pointer"
|
||||
isClickable: window.getComputedStyle(element).cursor === "pointer",
|
||||
// Used by the keras trainer
|
||||
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
|
||||
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
|
||||
hasDigitCurrency: hasDigitCurrency,
|
||||
label: label,
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@@ -56,7 +56,8 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False):
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
|
||||
@@ -537,7 +537,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
import random
|
||||
from .apprise_asset import asset
|
||||
apobj = apprise.Apprise(asset=asset)
|
||||
|
||||
# so that the custom endpoints are registered
|
||||
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
||||
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
|
||||
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
|
||||
|
||||
@@ -1377,17 +1378,19 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
import brotli
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[0]
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
with open(html_fname, 'rb') as f:
|
||||
with open(html_fname, 'rb') as f:
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
decompressed_data = brotli.decompress(f.read())
|
||||
else:
|
||||
decompressed_data = f.read()
|
||||
|
||||
buffer = BytesIO(decompressed_data)
|
||||
buffer = BytesIO(decompressed_data)
|
||||
|
||||
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||
|
||||
|
||||
# Return a 500 error
|
||||
|
||||
@@ -221,7 +221,8 @@ class ValidateAppRiseServers(object):
|
||||
def __call__(self, form, field):
|
||||
import apprise
|
||||
apobj = apprise.Apprise()
|
||||
|
||||
# so that the custom endpoints are registered
|
||||
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
||||
for server_url in field.data:
|
||||
if not apobj.add(server_url):
|
||||
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
|
||||
@@ -468,7 +469,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
|
||||
extract_text = StringListField('Extract text', [ValidateListRegex()])
|
||||
|
||||
@@ -479,8 +480,10 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
body = TextAreaField('Request body', [validators.Optional()])
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
||||
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
||||
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
||||
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
||||
|
||||
filter_text_added = BooleanField('Added lines', default=True)
|
||||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||
@@ -575,7 +578,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
password = SaltyPasswordField()
|
||||
pager_size = IntegerField('Pager size',
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from inscriptis import get_text
|
||||
from jsonpath_ng.ext import parse
|
||||
from typing import List
|
||||
from inscriptis.model.config import ParserConfig
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
from lxml import etree
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -39,6 +34,7 @@ def perl_style_slash_enclosed_regex_to_options(regex):
|
||||
|
||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
||||
def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
html_block = ""
|
||||
r = soup.select(include_filters, separator="")
|
||||
@@ -56,16 +52,32 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
|
||||
return html_block
|
||||
|
||||
def subtractive_css_selector(css_selector, html_content):
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
for item in soup.select(css_selector):
|
||||
item.decompose()
|
||||
return str(soup)
|
||||
|
||||
def subtractive_xpath_selector(xpath_selector, html_content):
|
||||
html_tree = etree.HTML(html_content)
|
||||
elements_to_remove = html_tree.xpath(xpath_selector)
|
||||
|
||||
for element in elements_to_remove:
|
||||
element.getparent().remove(element)
|
||||
|
||||
modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
|
||||
return modified_html
|
||||
|
||||
def element_removal(selectors: List[str], html_content):
|
||||
"""Joins individual filters into one css filter."""
|
||||
selector = ",".join(selectors)
|
||||
return subtractive_css_selector(selector, html_content)
|
||||
"""Removes elements that match a list of CSS or xPath selectors."""
|
||||
modified_html = html_content
|
||||
for selector in selectors:
|
||||
if selector.startswith(('xpath:', 'xpath1:', '//')):
|
||||
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
|
||||
modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
|
||||
else:
|
||||
modified_html = subtractive_css_selector(selector, modified_html)
|
||||
return modified_html
|
||||
|
||||
def elementpath_tostring(obj):
|
||||
"""
|
||||
@@ -181,6 +193,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
|
||||
|
||||
# Extract/find element
|
||||
def extract_element(find='title', html_content=''):
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
#Re #106, be sure to handle when its not found
|
||||
element_text = None
|
||||
@@ -194,6 +207,8 @@ def extract_element(find='title', html_content=''):
|
||||
|
||||
#
|
||||
def _parse_json(json_data, json_filter):
|
||||
from jsonpath_ng.ext import parse
|
||||
|
||||
if json_filter.startswith("json:"):
|
||||
jsonpath_expression = parse(json_filter.replace('json:', ''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
@@ -242,6 +257,8 @@ def _get_stripped_text_from_json_match(match):
|
||||
# json_filter - ie json:$..price
|
||||
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
|
||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
stripped_text_from_html = False
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
@@ -352,6 +369,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
return "\n".encode('utf8').join(output)
|
||||
|
||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
|
||||
def repl(m):
|
||||
text = m.group(1)
|
||||
@@ -360,6 +378,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
|
||||
return re.sub(pattern, repl, html_content)
|
||||
|
||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
|
||||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
|
||||
"""Converts html string to a string with just the text. If ignoring
|
||||
rendering anchor tag content is enable, anchor tag content are also
|
||||
included in the text
|
||||
|
||||
@@ -60,6 +60,8 @@ class watch_base(dict):
|
||||
'time_between_check_use_default': True,
|
||||
'title': None,
|
||||
'track_ldjson_price_data': None,
|
||||
'trim_text_whitespace': False,
|
||||
'remove_duplicate_lines': False,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': '',
|
||||
'uuid': str(uuid.uuid4()),
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import apprise
|
||||
|
||||
import time
|
||||
from apprise import NotifyFormat
|
||||
import json
|
||||
import apprise
|
||||
from loguru import logger
|
||||
|
||||
|
||||
valid_tokens = {
|
||||
'base_url': '',
|
||||
'current_snapshot': '',
|
||||
@@ -34,86 +35,11 @@ valid_notification_formats = {
|
||||
default_notification_format_for_watch: default_notification_format_for_watch
|
||||
}
|
||||
|
||||
# include the decorator
|
||||
from apprise.decorators import notify
|
||||
|
||||
@notify(on="delete")
|
||||
@notify(on="deletes")
|
||||
@notify(on="get")
|
||||
@notify(on="gets")
|
||||
@notify(on="post")
|
||||
@notify(on="posts")
|
||||
@notify(on="put")
|
||||
@notify(on="puts")
|
||||
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
||||
import requests
|
||||
from apprise.utils import parse_url as apprise_parse_url
|
||||
from apprise import URLBase
|
||||
|
||||
url = kwargs['meta'].get('url')
|
||||
|
||||
if url.startswith('post'):
|
||||
r = requests.post
|
||||
elif url.startswith('get'):
|
||||
r = requests.get
|
||||
elif url.startswith('put'):
|
||||
r = requests.put
|
||||
elif url.startswith('delete'):
|
||||
r = requests.delete
|
||||
|
||||
url = url.replace('post://', 'http://')
|
||||
url = url.replace('posts://', 'https://')
|
||||
url = url.replace('put://', 'http://')
|
||||
url = url.replace('puts://', 'https://')
|
||||
url = url.replace('get://', 'http://')
|
||||
url = url.replace('gets://', 'https://')
|
||||
url = url.replace('put://', 'http://')
|
||||
url = url.replace('puts://', 'https://')
|
||||
url = url.replace('delete://', 'http://')
|
||||
url = url.replace('deletes://', 'https://')
|
||||
|
||||
headers = {}
|
||||
params = {}
|
||||
auth = None
|
||||
|
||||
# Convert /foobar?+some-header=hello to proper header dictionary
|
||||
results = apprise_parse_url(url)
|
||||
if results:
|
||||
# Add our headers that the user can potentially over-ride if they wish
|
||||
# to to our returned result set and tidy entries by unquoting them
|
||||
headers = {URLBase.unquote(x): URLBase.unquote(y)
|
||||
for x, y in results['qsd+'].items()}
|
||||
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
||||
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
||||
for k, v in results['qsd'].items():
|
||||
if not k.strip('+-') in results['qsd+'].keys():
|
||||
params[URLBase.unquote(k)] = URLBase.unquote(v)
|
||||
|
||||
# Determine Authentication
|
||||
auth = ''
|
||||
if results.get('user') and results.get('password'):
|
||||
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
|
||||
elif results.get('user'):
|
||||
auth = (URLBase.unquote(results.get('user')))
|
||||
|
||||
# Try to auto-guess if it's JSON
|
||||
try:
|
||||
json.loads(body)
|
||||
headers['Content-Type'] = 'application/json; charset=utf-8'
|
||||
except ValueError as e:
|
||||
pass
|
||||
|
||||
r(results.get('url'),
|
||||
auth=auth,
|
||||
data=body.encode('utf-8') if type(body) is str else body,
|
||||
headers=headers,
|
||||
params=params
|
||||
)
|
||||
|
||||
|
||||
def process_notification(n_object, datastore):
|
||||
# so that the custom endpoints are registered
|
||||
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
||||
|
||||
from .safe_jinja import render as jinja_render
|
||||
now = time.time()
|
||||
|
||||
@@ -26,6 +26,8 @@ class difference_detection_processor():
|
||||
|
||||
def call_browser(self):
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from changedetectionio.content_fetchers.exceptions import EmptyReply
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
@@ -133,8 +135,18 @@ class difference_detection_processor():
|
||||
is_binary = self.watch.is_pdf
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
|
||||
is_binary=is_binary)
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
|
||||
self.fetcher.run(url=url,
|
||||
timeout=timeout,
|
||||
request_headers=request_headers,
|
||||
request_body=request_body,
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
)
|
||||
|
||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||
self.fetcher.quit()
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
import re
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> float:
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
|
||||
standardized_value = raw_value
|
||||
|
||||
@@ -21,8 +22,11 @@ class Restock(dict):
|
||||
# Remove any non-numeric characters except for the decimal point
|
||||
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
|
||||
|
||||
# Convert to float
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Define default values
|
||||
|
||||
@@ -2,8 +2,7 @@ from .. import difference_detection_processor
|
||||
from ..exceptions import ProcessorException
|
||||
from . import Restock
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import urllib3
|
||||
import time
|
||||
|
||||
@@ -27,6 +26,25 @@ def _search_prop_by_value(matches, value):
|
||||
if value in prop[0]:
|
||||
return prop[1] # Yield the desired value and exit the function
|
||||
|
||||
def _deduplicate_prices(data):
|
||||
seen = set()
|
||||
unique_data = []
|
||||
|
||||
for datum in data:
|
||||
# Convert 'value' to float if it can be a numeric string, otherwise leave it as is
|
||||
try:
|
||||
normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
|
||||
except ValueError:
|
||||
normalized_value = datum.value
|
||||
|
||||
# If the normalized value hasn't been seen yet, add it to unique data
|
||||
if normalized_value not in seen:
|
||||
unique_data.append(datum)
|
||||
seen.add(normalized_value)
|
||||
|
||||
return unique_data
|
||||
|
||||
|
||||
# should return Restock()
|
||||
# add casting?
|
||||
def get_itemprop_availability(html_content) -> Restock:
|
||||
@@ -36,17 +54,21 @@ def get_itemprop_availability(html_content) -> Restock:
|
||||
"""
|
||||
from jsonpath_ng import parse
|
||||
|
||||
import re
|
||||
now = time.time()
|
||||
import extruct
|
||||
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
|
||||
|
||||
value = {}
|
||||
now = time.time()
|
||||
|
||||
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
|
||||
|
||||
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
|
||||
try:
|
||||
data = extruct.extract(html_content, syntaxes=syntaxes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
|
||||
return Restock()
|
||||
|
||||
data = extruct.extract(html_content, syntaxes=syntaxes)
|
||||
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
|
||||
|
||||
# First phase, dead simple scanning of anything that looks useful
|
||||
@@ -57,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
|
||||
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
|
||||
availability_parse = parse('$..(availability|Availability)')
|
||||
|
||||
price_result = price_parse.find(data)
|
||||
price_result = _deduplicate_prices(price_parse.find(data))
|
||||
if price_result:
|
||||
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
|
||||
# parse that for the UI?
|
||||
@@ -119,6 +141,10 @@ class perform_site_check(difference_detection_processor):
|
||||
xpath_data = None
|
||||
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
import hashlib
|
||||
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
@@ -132,6 +158,20 @@ class perform_site_check(difference_detection_processor):
|
||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Which restock settings to compare against?
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
|
||||
@@ -146,7 +186,11 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
itemprop_availability = {}
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments
|
||||
# anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
|
||||
itemprop_availability = future.result()
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
|
||||
@@ -36,6 +36,9 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
screenshot = False # as bytes
|
||||
@@ -171,20 +174,30 @@ class perform_site_check(difference_detection_processor):
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
else:
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||
append_pretty_line_formatting=not watch.is_source_type_url))
|
||||
html_content += future.result()
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
|
||||
@@ -197,18 +210,27 @@ class perform_site_check(difference_detection_processor):
|
||||
else:
|
||||
# extract text
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
stripped_text_from_html = \
|
||||
html_tools.html_to_text(
|
||||
html_content=html_content,
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss # #1874 activate the <title workaround hack
|
||||
)
|
||||
is_rss=is_rss)) #1874 activate the <title workaround hack
|
||||
stripped_text_from_html = future.result()
|
||||
|
||||
if watch.get('sort_text_alphabetically') and stripped_text_from_html:
|
||||
|
||||
if watch.get('trim_text_whitespace'):
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||
|
||||
if watch.get('remove_duplicate_lines'):
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
if watch.get('sort_text_alphabetically'):
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
|
||||
stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
|
||||
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
@@ -290,7 +312,7 @@ class perform_site_check(difference_detection_processor):
|
||||
for match in res:
|
||||
regex_matched_output += [match] + [b'\n']
|
||||
|
||||
# Now we will only show what the regex matched
|
||||
##########################################################
|
||||
stripped_text_from_html = b''
|
||||
text_content_before_ignored_filter = b''
|
||||
if regex_matched_output:
|
||||
@@ -298,6 +320,8 @@ class perform_site_check(difference_detection_processor):
|
||||
stripped_text_from_html = b''.join(regex_matched_output)
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
|
||||
|
||||
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||
|
||||
@@ -16,25 +16,31 @@ echo "---------------------------------- SOCKS5 -------------------"
|
||||
docker run --network changedet-network \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=proxiesjson" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
# SOCKS5 related - by manually entering in UI
|
||||
docker run --network changedet-network \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=manual" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
|
||||
|
||||
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
|
||||
docker run --network changedet-network \
|
||||
-e "SOCKSTEST=manual-playwright" \
|
||||
--hostname cdio \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
|
||||
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
|
||||
--rm \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
echo "socks5 server logs"
|
||||
docker logs socks5proxy
|
||||
|
||||
@@ -18,9 +18,11 @@ $(document).ready(function () {
|
||||
|
||||
});
|
||||
|
||||
$("#notification-token-toggle").click(function (e) {
|
||||
$(".toggle-show").click(function (e) {
|
||||
e.preventDefault();
|
||||
$('#notification-tokens-info').toggle();
|
||||
let target = $(this).data('target');
|
||||
$(target).toggle();
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
|
||||
@@ -40,15 +40,29 @@
|
||||
}
|
||||
}
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
height: 100%;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%;
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
|
||||
}
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
flex-grow: 0; /* Don't allow it to grow */
|
||||
flex-shrink: 0; /* Don't allow it to shrink */
|
||||
flex-basis: auto; /* Base width is determined by the content */
|
||||
max-width: 400px; /* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
}
|
||||
|
||||
/* this is duplicate :( */
|
||||
|
||||
@@ -46,14 +46,31 @@
|
||||
#browser_steps li > label {
|
||||
display: none; }
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
height: 100%;
|
||||
overflow-y: scroll; }
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh; }
|
||||
height: 70vh;
|
||||
font-size: 80%; }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px; }
|
||||
#browser-steps .flex-wrapper #browser-steps-fieldlist {
|
||||
flex-grow: 0;
|
||||
/* Don't allow it to grow */
|
||||
flex-shrink: 0;
|
||||
/* Don't allow it to shrink */
|
||||
flex-basis: auto;
|
||||
/* Base width is determined by the content */
|
||||
max-width: 400px;
|
||||
/* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll; }
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
@@ -1194,11 +1211,9 @@ ul {
|
||||
color: #fff;
|
||||
opacity: 0.7; }
|
||||
|
||||
|
||||
.restock-label svg {
|
||||
vertical-align: middle; }
|
||||
|
||||
|
||||
#chrome-extension-link {
|
||||
padding: 9px;
|
||||
border: 1px solid var(--color-grey-800);
|
||||
|
||||
@@ -11,7 +11,6 @@ from threading import Lock
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import secrets
|
||||
import threading
|
||||
import time
|
||||
@@ -270,6 +269,7 @@ class ChangeDetectionStore:
|
||||
self.needs_write_urgent = True
|
||||
|
||||
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
|
||||
import requests
|
||||
|
||||
if extras is None:
|
||||
extras = {}
|
||||
|
||||
@@ -11,8 +11,11 @@
|
||||
class="notification-urls" )
|
||||
}}
|
||||
<div class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
|
||||
<p>
|
||||
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
</p>
|
||||
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<ul style="display: none" id="advanced-help-notifications">
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
@@ -40,7 +43,7 @@
|
||||
|
||||
</div>
|
||||
<div class="pure-controls">
|
||||
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
|
||||
<div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
|
||||
</div>
|
||||
<div class="pure-controls" style="display: none;" id="notification-tokens-info">
|
||||
<table class="pure-table" id="token-table">
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
|
||||
<script>
|
||||
const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
|
||||
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
|
||||
@@ -199,7 +200,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
<div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
|
||||
<div class="flex-wrapper" >
|
||||
|
||||
<div id="browser-steps-ui" class="noselect" style="width: 100%; background-color: #eee; border-radius: 5px;">
|
||||
<div id="browser-steps-ui" class="noselect">
|
||||
|
||||
<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
|
||||
<span class="loader" >
|
||||
@@ -214,7 +215,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
<canvas class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<div id="browser-steps-fieldlist" style="padding-left: 1em; width: 350px; font-size: 80%;" >
|
||||
<div id="browser-steps-fieldlist" >
|
||||
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
{{ render_field(form.browser_steps) }}
|
||||
</div>
|
||||
@@ -275,9 +276,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
{% if '/text()' in field %}
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
|
||||
|
||||
<ul>
|
||||
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
||||
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
|
||||
<ul id="advanced-help-selectors" style="display: none;">
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
<ul>
|
||||
@@ -297,21 +298,25 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||
<li>
|
||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
|
||||
footer
|
||||
nav
|
||||
.stockticker") }}
|
||||
.stockticker
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS selectors </li>
|
||||
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
@@ -326,11 +331,22 @@ nav
|
||||
<span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
|
||||
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
|
||||
</fieldset>
|
||||
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.remove_duplicate_lines) }}
|
||||
<span class="pure-form-message-inline">Remove duplicate lines of text</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.sort_text_alphabetically) }}
|
||||
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.trim_text_whitespace) }}
|
||||
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
@@ -155,11 +155,13 @@
|
||||
{{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header
|
||||
footer
|
||||
nav
|
||||
.stockticker") }}
|
||||
.stockticker
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
|
||||
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
|
||||
@@ -1,12 +1,27 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_response():
|
||||
import time
|
||||
data = f"""<html>
|
||||
<body>
|
||||
<h1>Awesome, you made it</h1>
|
||||
yeah the socks request worked
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def test_socks5(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
set_response()
|
||||
|
||||
# Setup a proxy
|
||||
res = client.post(
|
||||
@@ -24,7 +39,10 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
|
||||
# Because the socks server should connect back to us
|
||||
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
|
||||
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
@@ -60,4 +78,4 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "+0200:".encode('utf-8') in res.data
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
|
||||
@@ -1,16 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_response():
|
||||
import time
|
||||
data = f"""<html>
|
||||
<body>
|
||||
<h1>Awesome, you made it</h1>
|
||||
yeah the socks request worked
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
time.sleep(1)
|
||||
|
||||
# should be proxies.json mounted from run_proxy_tests.sh already
|
||||
# -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
|
||||
def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
|
||||
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
|
||||
set_response()
|
||||
# Because the socks server should connect back to us
|
||||
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
|
||||
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
res = client.get(url_for("settings_page"))
|
||||
assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data
|
||||
@@ -49,4 +65,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "+0200:".encode('utf-8') in res.data
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
default_notification_format,
|
||||
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
|
||||
assert b'not-in-stock' not in res.data
|
||||
|
||||
# We should have a notification
|
||||
time.sleep(2)
|
||||
wait_for_notification_endpoint_output()
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
@@ -103,6 +103,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
|
||||
set_original_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(5)
|
||||
assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"
|
||||
|
||||
# BUT we should see that it correctly shows "not in stock"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os.path
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from changedetectionio import html_tools
|
||||
|
||||
|
||||
@@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
# Takes a moment for apprise to fire
|
||||
time.sleep(3)
|
||||
wait_for_notification_endpoint_output()
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
|
||||
with open("test-datastore/notification.txt", 'rb') as f:
|
||||
response = f.read()
|
||||
|
||||
@@ -69,6 +69,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
assert b'which has this one new line' in res.data
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
@@ -86,7 +92,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
assert expected_url.encode('utf-8') in res.data
|
||||
|
||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
||||
res = client.get(url_for("diff_history_page", uuid=uuid))
|
||||
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
||||
|
||||
# Check the [preview] pulls the right one
|
||||
@@ -143,18 +149,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
||||
uuid = extract_UUID_from_client(client)
|
||||
client.get(url_for("clear_watch_history", uuid=uuid))
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'preview/' in res.data
|
||||
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
assert b'<head><title>head title</title></head>' in res.data
|
||||
|
||||
#
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -87,6 +87,9 @@ def test_element_removal_output():
|
||||
Some initial text<br>
|
||||
<p>across multiple lines</p>
|
||||
<div id="changetext">Some text that changes</div>
|
||||
<div>Some text should be matched by xPath // selector</div>
|
||||
<div>Some text should be matched by xPath selector</div>
|
||||
<div>Some text should be matched by xPath1 selector</div>
|
||||
</body>
|
||||
<footer>
|
||||
<p>Footer</p>
|
||||
@@ -94,7 +97,16 @@ def test_element_removal_output():
|
||||
</html>
|
||||
"""
|
||||
html_blob = element_removal(
|
||||
["header", "footer", "nav", "#changetext"], html_content=content
|
||||
[
|
||||
"header",
|
||||
"footer",
|
||||
"nav",
|
||||
"#changetext",
|
||||
"//*[contains(text(), 'xPath // selector')]",
|
||||
"xpath://*[contains(text(), 'xPath selector')]",
|
||||
"xpath1://*[contains(text(), 'xPath1 selector')]"
|
||||
],
|
||||
html_content=content
|
||||
)
|
||||
text = get_text(html_blob)
|
||||
assert (
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup
|
||||
from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
|
||||
from changedetectionio.model import App
|
||||
|
||||
|
||||
@@ -102,14 +102,15 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(3)
|
||||
wait_for_notification_endpoint_output()
|
||||
|
||||
# Shouldn't exist, shouldn't have fired
|
||||
assert not os.path.isfile("test-datastore/notification.txt")
|
||||
# Now the filter should exist
|
||||
set_response_with_filter()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(3)
|
||||
|
||||
wait_for_notification_endpoint_output()
|
||||
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import os
|
||||
import time
|
||||
from loguru import logger
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
|
||||
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
|
||||
wait_for_notification_endpoint_output
|
||||
from changedetectionio.model import App
|
||||
|
||||
|
||||
@@ -26,6 +28,12 @@ def run_filter_test(client, live_server, content_filter):
|
||||
# Response WITHOUT the filter ID element
|
||||
set_original_response()
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
@@ -34,83 +42,90 @@ def run_filter_test(client, live_server, content_filter):
|
||||
if os.path.isfile("test-datastore/notification.txt"):
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": ''},
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
# Give the thread time to pick up the first version
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
url = url_for('test_notification_endpoint', _external=True)
|
||||
notification_url = url.replace('http', 'json')
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
print(">>>> Notification URL: " + notification_url)
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
|
||||
|
||||
# Just a regular notification setting, this will be used by the special 'filter not found' notification
|
||||
notification_form_data = {"notification_urls": notification_url,
|
||||
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
|
||||
"notification_body": "BASE URL: {{base_url}}\n"
|
||||
"Watch URL: {{watch_url}}\n"
|
||||
"Watch UUID: {{watch_uuid}}\n"
|
||||
"Watch title: {{watch_title}}\n"
|
||||
"Watch tag: {{watch_tag}}\n"
|
||||
"Preview: {{preview_url}}\n"
|
||||
"Diff URL: {{diff_url}}\n"
|
||||
"Snapshot: {{current_snapshot}}\n"
|
||||
"Diff: {{diff}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
"Diff as Patch: {{diff_patch}}\n"
|
||||
":-)",
|
||||
"notification_format": "Text"}
|
||||
watch_data = {"notification_urls": notification_url,
|
||||
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
|
||||
"notification_body": "BASE URL: {{base_url}}\n"
|
||||
"Watch URL: {{watch_url}}\n"
|
||||
"Watch UUID: {{watch_uuid}}\n"
|
||||
"Watch title: {{watch_title}}\n"
|
||||
"Watch tag: {{watch_tag}}\n"
|
||||
"Preview: {{preview_url}}\n"
|
||||
"Diff URL: {{diff_url}}\n"
|
||||
"Snapshot: {{current_snapshot}}\n"
|
||||
"Diff: {{diff}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
"Diff as Patch: {{diff_patch}}\n"
|
||||
":-)",
|
||||
"notification_format": "Text",
|
||||
"fetch_backend": "html_requests",
|
||||
"filter_failure_notification_send": 'y',
|
||||
"headers": "",
|
||||
"tags": "my tag",
|
||||
"title": "my title 123",
|
||||
"time_between_check-hours": 5, # So that the queue runner doesnt also put it in
|
||||
"url": test_url,
|
||||
}
|
||||
|
||||
notification_form_data.update({
|
||||
"url": test_url,
|
||||
"tags": "my tag",
|
||||
"title": "my title 123",
|
||||
"headers": "",
|
||||
"filter_failure_notification_send": 'y',
|
||||
"include_filters": content_filter,
|
||||
"fetch_backend": "html_requests"})
|
||||
|
||||
# A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data=notification_form_data,
|
||||
url_for("edit_page", uuid=uuid),
|
||||
data=watch_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
|
||||
|
||||
# Now the notification should not exist, because we didnt reach the threshold
|
||||
# Now add a filter, because recheck hours == 5, ONLY pressing of the [edit] or [recheck all] should trigger
|
||||
watch_data['include_filters'] = content_filter
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid),
|
||||
data=watch_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# It should have checked once so far and given this error (because we hit SAVE)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
assert not os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
# Hitting [save] would have triggered a recheck, and we have a filter, so this would be ONE failure
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 1, "Should have been checked once"
|
||||
|
||||
# recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
|
||||
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
|
||||
# Add 4 more checks
|
||||
checked = 0
|
||||
ATTEMPT_THRESHOLD_SETTING = live_server.app.config['DATASTORE'].data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
||||
for i in range(0, ATTEMPT_THRESHOLD_SETTING - 2):
|
||||
checked += 1
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(2) # delay for apprise to fire
|
||||
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"
|
||||
res = client.get(url_for("index"))
|
||||
assert b'Warning, no filters were found' in res.data
|
||||
assert not os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
# We should see something in the frontend
|
||||
res = client.get(url_for("index"))
|
||||
assert b'Warning, no filters were found' in res.data
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
|
||||
|
||||
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(2) # delay for apprise to fire
|
||||
wait_for_notification_endpoint_output()
|
||||
|
||||
# Now it should exist and contain our "filter not found" alert
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
notification = f.read()
|
||||
|
||||
@@ -123,10 +138,11 @@ def run_filter_test(client, live_server, content_filter):
|
||||
set_response_with_filter()
|
||||
|
||||
# Try several times, it should NOT have 'filter not found'
|
||||
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
|
||||
for i in range(0, ATTEMPT_THRESHOLD_SETTING + 2):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
wait_for_notification_endpoint_output()
|
||||
# It should have sent a notification, but..
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
# but it should not contain the info about a failed filter (because there was none in this case)
|
||||
@@ -135,9 +151,6 @@ def run_filter_test(client, live_server, content_filter):
|
||||
assert not 'CSS/xPath filter was not present in the page' in notification
|
||||
|
||||
# Re #1247 - All tokens got replaced correctly in the notification
|
||||
res = client.get(url_for("index"))
|
||||
uuid = extract_UUID_from_client(client)
|
||||
# UUID is correct, but notification contains tag uuid as UUIID wtf
|
||||
assert uuid in notification
|
||||
|
||||
# cleanup for the next
|
||||
@@ -152,9 +165,11 @@ def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
|
||||
# live_server_setup(live_server)
|
||||
run_filter_test(client, live_server,'#nope-doesnt-exist')
|
||||
|
||||
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
|
||||
# live_server_setup(live_server)
|
||||
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
|
||||
|
||||
# Test that notification is never sent
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
from .util import set_original_response, set_modified_response, live_server_setup
|
||||
|
||||
sleep_time_for_fetch_thread = 3
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
import time
|
||||
|
||||
|
||||
def set_nonrenderable_response():
|
||||
@@ -16,12 +13,18 @@ def set_nonrenderable_response():
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
time.sleep(1)
|
||||
|
||||
return None
|
||||
|
||||
def set_zero_byte_response():
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("")
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
|
||||
set_original_response()
|
||||
live_server_setup(live_server)
|
||||
@@ -35,18 +38,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Do this a few times.. ensures we dont accidently set the status
|
||||
for n in range(3):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
#####################
|
||||
@@ -64,7 +60,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
@@ -86,14 +82,20 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
client.get(url_for("mark_all_viewed"), follow_redirects=True)
|
||||
|
||||
|
||||
|
||||
# A totally zero byte (#2528) response should also not trigger an error
|
||||
set_zero_byte_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
|
||||
assert b'fetch-error' not in res.data
|
||||
|
||||
#
|
||||
# Cleanup everything
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
|
||||
from ..notification import default_notification_format
|
||||
|
||||
instock_props = [
|
||||
@@ -146,14 +146,13 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# A change in price, should trigger a change by default
|
||||
wait_for_all_checks(client)
|
||||
|
||||
data = {
|
||||
"tags": "",
|
||||
"url": test_url,
|
||||
"headers": "",
|
||||
"time_between_check-hours": 5,
|
||||
'fetch_backend': "html_requests"
|
||||
}
|
||||
data.update(extra_watch_edit_form)
|
||||
@@ -178,11 +177,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
assert b'1,000.45' or b'1000.45' in res.data #depending on locale
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
# price changed to something LESS than min (900), SHOULD be a change
|
||||
set_original_response(props_markup=instock_props[0], price='890.45')
|
||||
# let previous runs wait
|
||||
time.sleep(1)
|
||||
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
wait_for_all_checks(client)
|
||||
@@ -197,7 +194,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'1,890.45' or b'1890.45' in res.data
|
||||
# Depending on the LOCALE it may be either of these (generally for US/default/etc)
|
||||
assert b'1,890.45' in res.data or b'1890.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
@@ -362,7 +360,7 @@ def test_change_with_notification_values(client, live_server):
|
||||
set_original_response(props_markup=instock_props[0], price='1950.45')
|
||||
client.get(url_for("form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(3)
|
||||
wait_for_notification_endpoint_output()
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
notification = f.read()
|
||||
|
||||
@@ -11,6 +11,8 @@ def set_original_ignore_response():
|
||||
<p>Some initial text</p>
|
||||
<p>Which is across multiple lines</p>
|
||||
<p>So let's see what happens.</p>
|
||||
<p> So let's see what happens. <br> </p>
|
||||
<p>A - sortable line</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -164,5 +166,52 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
|
||||
assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
|
||||
assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
|
||||
def test_extra_filters(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
set_original_ignore_response()
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"remove_duplicate_lines": "y",
|
||||
"trim_text_whitespace": "y",
|
||||
"sort_text_alphabetically": "", # leave this OFF for testing
|
||||
"url": test_url,
|
||||
"fetch_backend": "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first")
|
||||
)
|
||||
|
||||
assert res.data.count(b"see what happens.") == 1
|
||||
|
||||
# still should remain unsorted ('A - sortable line') stays at the end
|
||||
assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines')
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -76,6 +76,17 @@ def set_more_modified_response():
|
||||
return None
|
||||
|
||||
|
||||
def wait_for_notification_endpoint_output():
|
||||
'''Apprise can take a few seconds to fire'''
|
||||
from os.path import isfile
|
||||
for i in range(1, 20):
|
||||
time.sleep(1)
|
||||
if isfile("test-datastore/notification.txt"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# kinda funky, but works for now
|
||||
def extract_api_key_from_UI(client):
|
||||
import re
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from .processors.exceptions import ProcessorException
|
||||
from . import content_fetchers
|
||||
|
||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from changedetectionio import html_tools
|
||||
|
||||
@@ -190,7 +189,9 @@ class update_worker(threading.Thread):
|
||||
'screenshot': None
|
||||
})
|
||||
self.notification_q.put(n_object)
|
||||
logger.error(f"Sent filter not found notification for {watch_uuid}")
|
||||
logger.debug(f"Sent filter not found notification for {watch_uuid}")
|
||||
else:
|
||||
logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
|
||||
|
||||
def send_step_failure_notification(self, watch_uuid, step_n):
|
||||
watch = self.datastore.data['watching'].get(watch_uuid, False)
|
||||
@@ -301,7 +302,7 @@ class update_worker(threading.Thread):
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers.exceptions.ReplyWithContentButNoText as e:
|
||||
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
# Backend (not filters) gave zero output
|
||||
@@ -327,7 +328,7 @@ class update_worker(threading.Thread):
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
|
||||
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 403:
|
||||
err_text = "Error - 403 (Access denied) received"
|
||||
elif e.status_code == 404:
|
||||
@@ -365,38 +366,42 @@ class update_worker(threading.Thread):
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
c = watch.get('consecutive_filter_failures', 5)
|
||||
c = watch.get('consecutive_filter_failures', 0)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
|
||||
if threshold > 0 and c >= threshold:
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
||||
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
|
||||
if c >= threshold:
|
||||
if not watch.get('notification_muted'):
|
||||
logger.debug(f"Sending filter failed notification for {uuid}")
|
||||
self.send_filter_failure_notification(uuid)
|
||||
c = 0
|
||||
logger.debug(f"Reset filter failure count back to zero")
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
else:
|
||||
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
|
||||
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e:
|
||||
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
|
||||
# Yes fine, so nothing todo, don't continue to process.
|
||||
process_changedetection_results = False
|
||||
changed_detected = False
|
||||
except content_fetchers.exceptions.BrowserConnectError as e:
|
||||
except content_fetchers_exceptions.BrowserConnectError as e:
|
||||
self.datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': e.msg
|
||||
}
|
||||
)
|
||||
process_changedetection_results = False
|
||||
except content_fetchers.exceptions.BrowserFetchTimedOut as e:
|
||||
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
|
||||
self.datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': e.msg
|
||||
}
|
||||
)
|
||||
process_changedetection_results = False
|
||||
except content_fetchers.exceptions.BrowserStepsStepException as e:
|
||||
except content_fetchers_exceptions.BrowserStepsStepException as e:
|
||||
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
@@ -423,7 +428,7 @@ class update_worker(threading.Thread):
|
||||
)
|
||||
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
c = watch.get('consecutive_filter_failures', 5)
|
||||
c = watch.get('consecutive_filter_failures', 0)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
@@ -438,25 +443,25 @@ class update_worker(threading.Thread):
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers.exceptions.EmptyReply as e:
|
||||
except content_fetchers_exceptions.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers.exceptions.ScreenshotUnavailable as e:
|
||||
except content_fetchers_exceptions.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers.exceptions.JSActionExceptions as e:
|
||||
except content_fetchers_exceptions.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers.exceptions.PageUnloadable as e:
|
||||
except content_fetchers_exceptions.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
@@ -468,7 +473,7 @@ class update_worker(threading.Thread):
|
||||
'last_check_status': e.status_code,
|
||||
'has_ldjson_price_data': None})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e:
|
||||
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
|
||||
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
#
|
||||
# Log levels are in descending order. (TRACE is the most detailed one)
|
||||
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
||||
# - LOGGER_LEVEL=DEBUG
|
||||
# - LOGGER_LEVEL=TRACE
|
||||
#
|
||||
# Alternative WebDriver/selenium URL, do not use "'s or 's!
|
||||
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
|
||||
@@ -29,8 +29,9 @@ services:
|
||||
#
|
||||
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
|
||||
#
|
||||
# Alternative Playwright URL, do not use "'s or 's!
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
|
||||
# Alternative target "Chrome" Playwright URL, do not use "'s or 's!
|
||||
# "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
|
||||
#
|
||||
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
|
||||
#
|
||||
@@ -73,10 +74,10 @@ services:
|
||||
# condition: service_started
|
||||
|
||||
|
||||
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
|
||||
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
||||
# playwright-chrome:
|
||||
# hostname: playwright-chrome
|
||||
# sockpuppetbrowser:
|
||||
# hostname: sockpuppetbrowser
|
||||
# image: dgtlmoon/sockpuppetbrowser:latest
|
||||
# cap_add:
|
||||
# - SYS_ADMIN
|
||||
|
||||
@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise~=1.8.1
|
||||
apprise==1.9.0
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
|
||||
@@ -79,8 +79,9 @@ pyppeteerstealth>=0.0.4
|
||||
pytest ~=7.2
|
||||
pytest-flask ~=1.2
|
||||
|
||||
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
|
||||
jsonschema==4.17.3
|
||||
# Anything 4.0 and up but not 5.0
|
||||
jsonschema ~= 4.0
|
||||
|
||||
|
||||
loguru
|
||||
|
||||
|
||||
Reference in New Issue
Block a user