Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
ff49d6d4c5 Merge branch 'master' into fix-file-access 2024-07-18 11:03:58 +02:00
dgtlmoon
d0ecee7794 Tweak test file 2024-07-18 10:59:32 +02:00
dgtlmoon
d55f4727a2 Fixing file:// file pickup 2024-07-18 09:41:21 +02:00
91 changed files with 229 additions and 498 deletions

View File

@@ -95,7 +95,7 @@ jobs:
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
cache-from: type=gha
cache-to: type=gha,mode=max
@@ -116,7 +116,7 @@ jobs:
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
ghcr.io/dgtlmoon/changedetection.io:latest
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
cache-from: type=gha
cache-to: type=gha,mode=max
# Looks like this was disabled

View File

@@ -64,7 +64,7 @@ jobs:
with:
context: ./
file: ./Dockerfile
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache

View File

@@ -43,7 +43,7 @@ Requires Playwright to be enabled.
### Awesome restock and price change notifications
Enable the _"Re-stock & Price detection for single product pages"_ option to activate the best way to monitor product pricing, this will extract any meta-data in the HTML page and give you many options to follow the pricing of the product.
Enable the _"Re-stock & Price detection for single product pages"_ option to activate the best way to monitor product pricing.
Easily organise and monitor prices for products from the dashboard, get alerts and notifications when the price of a product changes or comes back in stock again!

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# Only exists for direct CLI usage

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.46.04'
__version__ = '0.45.26'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -85,8 +85,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
playwright_browser=browsersteps_start_session['browser'],
proxy=proxy,
start_url=datastore.data['watching'][watch_uuid].get('url'),
headers=datastore.data['watching'][watch_uuid].get('headers')
start_url=datastore.data['watching'][watch_uuid].get('url')
)
# For test

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os
import time

View File

@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul id="advanced-help-selectors">
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>

View File

@@ -65,8 +65,8 @@ class Fetcher():
def __init__(self):
import importlib.resources
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
@abstractmethod
def get_error(self):
@@ -81,8 +81,7 @@ class Fetcher():
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False,
empty_pages_are_a_change=False):
is_binary=False):
# Should set self.error, self.status_code and self.content
pass

View File

@@ -83,8 +83,7 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False,
empty_pages_are_a_change=False):
is_binary=False):
from playwright.sync_api import sync_playwright
import playwright._impl._errors
@@ -131,7 +130,7 @@ class fetcher(Fetcher):
if response is None:
context.close()
browser.close()
logger.debug("Content Fetcher > Response object from the browser communication was none")
logger.debug("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None)
try:
@@ -167,10 +166,10 @@ class fetcher(Fetcher):
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
if len(self.page.content().strip()) == 0:
context.close()
browser.close()
logger.debug("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status)
# Run Browser Steps here

View File

@@ -75,8 +75,7 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes,
current_include_filters,
is_binary,
empty_pages_are_a_change
is_binary
):
from changedetectionio.content_fetchers import visualselector_xpath_selectors
@@ -154,7 +153,7 @@ class fetcher(Fetcher):
if response is None:
await self.page.close()
await browser.close()
logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
logger.warning("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None)
self.headers = response.headers
@@ -187,11 +186,10 @@ class fetcher(Fetcher):
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
content = await self.page.content
if not empty_pages_are_a_change and len(content.strip()) == 0:
logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
if len(content.strip()) == 0:
await self.page.close()
await browser.close()
logger.error("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status)
# Run Browser Steps here
@@ -249,7 +247,7 @@ class fetcher(Fetcher):
await self.fetch_page(**kwargs)
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
current_include_filters=None, is_binary=False):
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
@@ -264,8 +262,7 @@ class fetcher(Fetcher):
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=current_include_filters,
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
is_binary=is_binary
), timeout=max_time))
except asyncio.TimeoutError:
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))

View File

@@ -1,8 +1,9 @@
from loguru import logger
import chardet
import hashlib
import os
import chardet
import requests
from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher
@@ -25,8 +26,7 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False,
empty_pages_are_a_change=False):
is_binary=False):
if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url)
@@ -53,7 +53,7 @@ class fetcher(Fetcher):
session.mount('file://', FileAdapter())
r = session.request(method=request_method,
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
data=request_body,
url=url,
headers=request_headers,
timeout=timeout,
@@ -74,10 +74,7 @@ class fetcher(Fetcher):
self.headers = r.headers
if not r.content or not len(r.content):
if not empty_pages_are_a_change:
raise EmptyReply(url=url, status_code=r.status_code)
else:
logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True")
raise EmptyReply(url=url, status_code=r.status_code)
# @todo test this
# @todo maybe you really want to test zero-byte return pages?

View File

@@ -75,7 +75,6 @@ function isItemInStock() {
'vergriffen',
'vorbestellen',
'vorbestellung ist bald möglich',
'we don\'t currently have any',
'we couldn\'t find any products that match',
'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.',
@@ -174,8 +173,7 @@ function isItemInStock() {
const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue
}
elementText = "";
@@ -189,7 +187,7 @@ function isItemInStock() {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
return outOfStockText; // item is out of stock
}
}

View File

@@ -164,15 +164,6 @@ visibleElementsArray.forEach(function (element) {
}
}
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
let text = element.textContent.trim().slice(0, 30).trim();
while (/\n{2,}|\t{2,}/.test(text)) {
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
}
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,)/.test(text) ;
size_pos.push({
xpath: xpath_result,
@@ -180,16 +171,9 @@ visibleElementsArray.forEach(function (element) {
height: Math.round(bbox['height']),
left: Math.floor(bbox['left']),
top: Math.floor(bbox['top']) + scroll_y,
// tagName used by Browser Steps
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
// tagtype used by Browser Steps
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
isClickable: window.getComputedStyle(element).cursor === "pointer",
// Used by the keras trainer
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
hasDigitCurrency: hasDigitCurrency,
label: label,
isClickable: window.getComputedStyle(element).cursor == "pointer"
});
});
@@ -230,7 +214,7 @@ if (include_filters.length) {
console.log(e);
}
if (results != null && results.length) {
if (results.length) {
// Iterate over the results
results.forEach(node => {

View File

@@ -56,8 +56,7 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False,
empty_pages_are_a_change=False):
is_binary=False):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import datetime
import flask_login
@@ -532,21 +532,12 @@ def changedetection_app(config=None, datastore_o=None):
@login_optionally_required
def ajax_callback_send_notification_test(watch_uuid=None):
# Watch_uuid could be unset in the case its used in tag editor, global setings
# Watch_uuid could be unsuet in the case its used in tag editor, global setings
import apprise
import random
from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset)
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
# Use an existing random one on the global/main settings form
if not watch_uuid and (is_global_settings_form or is_group_settings_form):
logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}")
watch_uuid = random.choice(list(datastore.data['watching'].keys()))
watch = datastore.data['watching'].get(watch_uuid)
watch = datastore.data['watching'].get(watch_uuid) if watch_uuid else None
notification_urls = request.form['notification_urls'].strip().splitlines()
@@ -558,6 +549,8 @@ def changedetection_app(config=None, datastore_o=None):
tag = datastore.tag_exists_by_name(k.strip())
notification_urls = tag.get('notifications_urls') if tag and tag.get('notifications_urls') else None
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
if not notification_urls and not is_global_settings_form and not is_group_settings_form:
# In the global settings, use only what is typed currently in the text box
logger.debug("Test notification - Trying by global system settings notifications")
@@ -576,7 +569,7 @@ def changedetection_app(config=None, datastore_o=None):
try:
# use the same as when it is triggered, but then override it with the form test values
n_object = {
'watch_url': request.form.get('window_url', "https://changedetection.io"),
'watch_url': request.form['window_url'],
'notification_urls': notification_urls
}
@@ -729,12 +722,6 @@ def changedetection_app(config=None, datastore_o=None):
for p in datastore.proxy_list:
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
# Add some HTML to be used for form validation
if datastore.data['watching'][uuid].history.keys():
timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
else:
form.last_html_for_form_validation = "<html><body></body></html>"
if request.method == 'POST' and form.validate():
@@ -1375,32 +1362,6 @@ def changedetection_app(config=None, datastore_o=None):
except FileNotFoundError:
abort(404)
@app.route("/edit/<string:uuid>/get-html", methods=['GET'])
@login_optionally_required
def watch_get_latest_html(uuid):
from io import BytesIO
from flask import send_file
import brotli
watch = datastore.data['watching'].get(uuid)
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
latest_filename = list(watch.history.keys())[-1]
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
if html_fname.endswith('.br'):
# Read and decompress the Brotli file
decompressed_data = brotli.decompress(f.read())
else:
decompressed_data = f.read()
buffer = BytesIO(decompressed_data)
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
# Return a 500 error
abort(500)
@app.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required
def form_quick_watch_add():

View File

@@ -1,9 +1,6 @@
import os
import re
import elementpath
from changedetectionio.html_tools import xpath_filter, xpath1_filter
from changedetectionio.strtobool import strtobool
from wtforms import (
@@ -325,39 +322,52 @@ class ValidateCSSJSONXPATHInput(object):
self.allow_json = allow_json
def __call__(self, form, field):
from lxml.etree import XPathEvalError
if isinstance(field.data, str):
data = [field.data]
else:
data = field.data
for line in data:
line = line.strip()
# Nothing to see here
if not len(line.strip()):
return
if not line:
continue
if line.startswith('xpath') or line.startswith('/'):
# Does it look like XPath?
if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
if not self.allow_xpath:
raise ValidationError("XPath not permitted in this field!")
if line.startswith('xpath1:'):
filter_function = xpath1_filter
else:
line = line.replace('xpath:', '')
filter_function = xpath_filter
from lxml import etree, html
import elementpath
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
tree = html.fromstring("<html></html>")
line = line.replace('xpath:', '')
try:
# Call the determined function
res = filter_function(xpath_filter=line, html_content=form.last_html_for_form_validation)
# It's OK if this is an empty result, we just want to check that it doesn't crash the parser
except (elementpath.ElementPathError,XPathEvalError) as e:
elementpath.select(tree, line.strip(), parser=XPath3Parser)
except elementpath.ElementPathError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
except Exception as e:
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
elif 'json:' in line:
if line.strip().startswith('xpath1:'):
if not self.allow_xpath:
raise ValidationError("XPath not permitted in this field!")
from lxml import etree, html
tree = html.fromstring("<html></html>")
line = re.sub(r'^xpath1:', '', line)
try:
tree.xpath(line.strip())
except etree.XPathEvalError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
if 'json:' in line:
if not self.allow_json:
raise ValidationError("JSONPath not permitted in this field!")
@@ -382,7 +392,7 @@ class ValidateCSSJSONXPATHInput(object):
if not self.allow_json:
raise ValidationError("jq not permitted in this field!")
elif line.startswith('jq:'):
if 'jq:' in line:
try:
import jq
except ModuleNotFoundError:

View File

@@ -8,7 +8,6 @@ from xml.sax.saxutils import escape as xml_escape
import json
import re
from loguru import logger
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
@@ -109,20 +108,6 @@ def elementpath_tostring(obj):
return str(obj)
def extract_namespaces(xml_content):
"""
Extracts all namespaces from the XML content.
"""
from lxml import etree
from io import BytesIO
it = etree.iterparse(BytesIO(xml_content), events=('start-ns',))
namespaces = {}
for _, ns in it:
prefix, uri = ns
namespaces[prefix] = uri
return namespaces
# Return str Utf-8 of matched rules
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
from lxml import etree, html
@@ -138,14 +123,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
html_block = ""
# Automatically extract all namespaces from the XML content
namespaces = {'re': 'http://exslt.org/regular-expressions'}
try:
namespaces.update(extract_namespaces(html_content.encode('utf-8')))
except Exception as e:
logger.warning(f"Problem extracting namespaces from HTMl/XML content {str(e)}")
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
#@note: //title/text() wont work where <title>CDATA..
if type(r) != list:

View File

@@ -107,7 +107,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
r(results.get('url'),
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
data=body,
headers=headers,
params=params
)
@@ -157,7 +157,7 @@ def process_notification(n_object, datastore):
logger.warning(f"Process Notification: skipping empty notification URL.")
continue
logger.info(f">> Process Notification: AppRise notifying {url}")
logger.info(">> Process Notification: AppRise notifying {}".format(url))
url = jinja_render(template_str=url, **notification_parameters)
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
@@ -230,7 +230,6 @@ def process_notification(n_object, datastore):
log_value = logs.getvalue()
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
logger.critical(log_value)
raise Exception(log_value)
# Return what was sent for better logging - after the for loop

View File

@@ -26,8 +26,6 @@ class difference_detection_processor():
def call_browser(self):
from requests.structures import CaseInsensitiveDict
from changedetectionio.content_fetchers.exceptions import EmptyReply
# Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
@@ -135,18 +133,8 @@ class difference_detection_processor():
is_binary = self.watch.is_pdf
# And here we go! call the right browser with browser-specific settings
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
self.fetcher.run(url=url,
timeout=timeout,
request_headers=request_headers,
request_body=request_body,
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=self.watch.get('include_filters'),
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
)
self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
is_binary=is_binary)
#@todo .quit here could go on close object, so we can run JS if change-detected
self.fetcher.quit()

View File

@@ -1,12 +1,11 @@
from babel.numbers import parse_decimal
from changedetectionio.model.Watch import model as BaseWatch
from typing import Union
import re
from babel.numbers import parse_decimal
class Restock(dict):
def parse_currency(self, raw_value: str) -> Union[float, None]:
def parse_currency(self, raw_value: str) -> float:
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
standardized_value = raw_value
@@ -22,11 +21,8 @@ class Restock(dict):
# Remove any non-numeric characters except for the decimal point
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
if standardized_value:
# Convert to float
return float(parse_decimal(standardized_value, locale='en'))
return None
# Convert to float
return float(parse_decimal(standardized_value, locale='en'))
def __init__(self, *args, **kwargs):
# Define default values
@@ -49,10 +45,13 @@ class Restock(dict):
def __setitem__(self, key, value):
# Custom logic to handle setting price and original_price
if key == 'price' or key == 'original_price':
if key == 'price':
if isinstance(value, str):
value = self.parse_currency(raw_value=value)
if value and not self.get('original_price'):
self['original_price'] = value
super().__setitem__(key, value)
class Watch(BaseWatch):

View File

@@ -40,16 +40,13 @@ def get_itemprop_availability(html_content) -> Restock:
import extruct
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
value = {}
now = time.time()
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
try:
data = extruct.extract(html_content, syntaxes=syntaxes)
except Exception as e:
logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
return Restock()
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
data = extruct.extract(html_content, syntaxes=syntaxes)
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
# First phase, dead simple scanning of anything that looks useful
@@ -180,11 +177,6 @@ class perform_site_check(difference_detection_processor):
# Main detection method
fetched_md5 = None
# store original price if not set
if itemprop_availability and itemprop_availability.get('price') and not itemprop_availability.get('original_price'):
itemprop_availability['original_price'] = itemprop_availability.get('price')
update_obj['restock']["original_price"] = itemprop_availability.get('price')
if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
raise ProcessorException(
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
@@ -203,7 +195,7 @@ class perform_site_check(difference_detection_processor):
# What we store in the snapshot
price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}"
snapshot_content = f"{update_obj.get('restock').get('in_stock')} - {price}"
# Main detection method
fetched_md5 = hashlib.md5(snapshot_content.encode('utf-8')).hexdigest()

View File

@@ -77,12 +77,11 @@ class perform_site_check(difference_detection_processor):
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
# Go into RSS preprocess for converting CDATA/comment to usable text
# Ctype_header could be unset if we are just reprocessing the existin content
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']) or not ctype_header:
top_text = self.fetcher.content[:200].lower().strip()
if '<rss' in top_text or 'search.yahoo.com/mrss/' in top_text:
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
if '<rss' in self.fetcher.content[:100].lower():
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
is_rss = True
# source: support, basically treat it as plaintext
if watch.is_source_type_url:
is_html = False

View File

@@ -18,11 +18,9 @@ $(document).ready(function () {
});
$(".toggle-show").click(function (e) {
$("#notification-token-toggle").click(function (e) {
e.preventDefault();
let target = $(this).data('target');
$(target).toggle();
$('#notification-tokens-info').toggle();
});
});

View File

@@ -11,11 +11,8 @@
class="notification-urls" )
}}
<div class="pure-form-message-inline">
<p>
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
</p>
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul style="display: none" id="advanced-help-notifications">
<ul>
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@@ -43,7 +40,7 @@
</div>
<div class="pure-controls">
<div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
</div>
<div class="pure-controls" style="display: none;" id="notification-tokens-info">
<table class="pure-table" id="token-table">

View File

@@ -4,7 +4,6 @@
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
<script>
const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
@@ -276,9 +275,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
<ul id="advanced-help-selectors" style="display: none;">
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
@@ -298,12 +297,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
<li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</span>
</div>
<fieldset class="pure-control-group">
@@ -483,12 +479,6 @@ Unavailable") }}
</tr>
</tbody>
</table>
{% if watch.history_n %}
<p>
<a href="{{url_for('watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>
</p>
{% endif %}
</div>
</div>
<div id="actions">

View File

@@ -76,7 +76,7 @@
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
</div>
{% if form.requests.proxy %}
<div class="pure-control-group inline-radio">

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import resource
import time
from threading import Thread

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
# !/usr/bin/python3
import os
from flask import url_for

View File

@@ -1,3 +1,3 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from .. import conftest

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from .. import conftest

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os
import time
from flask import url_for

View File

@@ -1,3 +1,3 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from .. import conftest

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os
import time
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
assert b'not-in-stock' not in res.data
# We should have a notification
wait_for_notification_endpoint_output()
time.sleep(2)
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
os.unlink("test-datastore/notification.txt")
@@ -103,7 +103,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(5)
assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"
# BUT we should see that it correctly shows "not in stock"

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import asyncio
from aiosmtpd.controller import Controller
from aiosmtpd.smtp import SMTP

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os.path
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
from .util import live_server_setup, wait_for_all_checks
from changedetectionio import html_tools
@@ -112,7 +112,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
res = client.post(
url_for("settings_page"),
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
"application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了',
"application-notification_body": 'triggered text was -{{triggered_text}}-',
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_urls": test_notification_url,
"application-minutes_between_check": 180,
@@ -165,12 +165,11 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
assert b'unviewed' in res.data
# Takes a moment for apprise to fire
wait_for_notification_endpoint_output()
time.sleep(3)
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
with open("test-datastore/notification.txt", 'rb') as f:
response = f.read()
assert b'-Oh yes please-' in response
assert '网站监测 内容更新了'.encode('utf-8') in response
with open("test-datastore/notification.txt", 'r') as f:
response= f.read()
assert '-Oh yes please-' in response
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for
@@ -69,12 +69,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
# Check the 'get latest snapshot works'
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
assert b'which has this one new line' in res.data
# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("index"))
assert b'unviewed' in res.data
@@ -92,7 +86,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert expected_url.encode('utf-8') in res.data
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
res = client.get(url_for("diff_history_page", uuid=uuid))
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'selected=""' in res.data, "Confirm diff history page loaded"
# Check the [preview] pulls the right one
@@ -149,6 +143,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b'unviewed' not in res.data
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
uuid = extract_UUID_from_client(client)
client.get(url_for("clear_watch_history", uuid=uuid))
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from .util import set_original_response, live_server_setup, wait_for_all_checks
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# coding=utf-8
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,10 +1,10 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# https://www.reddit.com/r/selfhosted/comments/wa89kp/comment/ii3a4g7/?context=3
import os
import time
from flask import url_for
from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
from .util import set_original_response, live_server_setup
from changedetectionio.model import App
@@ -102,15 +102,14 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_notification_endpoint_output()
time.sleep(3)
# Shouldn't exist, shouldn't have fired
assert not os.path.isfile("test-datastore/notification.txt")
# Now the filter should exist
set_response_with_filter()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_notification_endpoint_output()
time.sleep(3)
assert os.path.isfile("test-datastore/notification.txt")

View File

@@ -1,8 +1,7 @@
import os
import time
from flask import url_for
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
wait_for_notification_endpoint_output
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
from changedetectionio.model import App
@@ -108,8 +107,7 @@ def run_filter_test(client, live_server, content_filter):
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
wait_for_notification_endpoint_output()
time.sleep(2) # delay for apprise to fire
# Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt")
@@ -129,7 +127,6 @@ def run_filter_test(client, live_server, content_filter):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
wait_for_notification_endpoint_output()
# It should have sent a notification, but..
assert os.path.isfile("test-datastore/notification.txt")
# but it should not contain the info about a failed filter (because there was none in this case)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
import os

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
"""Test suite for the method to extract text from an html string"""
from ..html_tools import html_to_text

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from . util import live_server_setup
from changedetectionio import html_tools

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
"""Test suite for the render/not render anchor tag content functionality"""
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import io
import os
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# coding=utf-8
import time

View File

@@ -1,8 +1,11 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup
sleep_time_for_fetch_thread = 3
def set_nonrenderable_response():
@@ -13,18 +16,12 @@ def set_nonrenderable_response():
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
time.sleep(1)
return None
def set_zero_byte_response():
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("")
time.sleep(1)
return None
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
set_original_response()
live_server_setup(live_server)
@@ -38,11 +35,18 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b"1 Imported" in res.data
wait_for_all_checks(client)
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Do this a few times.. ensures we dont accidently set the status
for n in range(3):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
#####################
@@ -60,7 +64,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
@@ -82,20 +86,14 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
client.get(url_for("mark_all_viewed"), follow_redirects=True)
# A totally zero byte (#2528) response should also not trigger an error
set_zero_byte_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
assert b'fetch-error' not in res.data
#
# Cleanup everything

View File

@@ -3,8 +3,6 @@ import os
import time
import re
from flask import url_for
from loguru import logger
from .util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, wait_for_all_checks, \
set_longer_modified_response
from . util import extract_UUID_from_client
@@ -291,11 +289,11 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444 }',
"application-notification_format": default_notification_format,
"application-notification_urls": test_notification_url,
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
},
follow_redirects=True
)
@@ -324,7 +322,6 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
j = json.loads(x)
assert j['url'].startswith('http://localhost')
assert j['secret'] == 444
assert j['somebug'] == '网站监测 内容更新了'
# URL check, this will always be converted to lowercase
assert os.path.isfile("test-datastore/notification-url.txt")
@@ -350,82 +347,3 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
url_for("form_delete", uuid="all"),
follow_redirects=True
)
#2510
def test_global_send_test_notification(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_original_response()
if os.path.isfile("test-datastore/notification.txt"):
os.unlink("test-datastore/notification.txt")
# otherwise other settings would have already existed from previous tests in this file
res = client.post(
url_for("settings_page"),
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
#1995 UTF-8 content should be encoded
"application-notification_body": 'change detection is cool 网站监测 内容更新了',
"application-notification_format": default_notification_format,
"application-notification_urls": "",
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
},
follow_redirects=True
)
assert b'Settings updated' in res.data
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": 'nice one'},
follow_redirects=True
)
assert b"Watch added" in res.data
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123"
######### Test global/system settings
res = client.post(
url_for("ajax_callback_send_notification_test")+"?mode=global-settings",
data={"notification_urls": test_notification_url},
follow_redirects=True
)
assert res.status_code != 400
assert res.status_code != 500
# Give apprise time to fire
time.sleep(4)
with open("test-datastore/notification.txt", 'r') as f:
x = f.read()
assert 'change detection is cool 网站监测 内容更新了' in x
os.unlink("test-datastore/notification.txt")
######### Test group/tag settings
res = client.post(
url_for("ajax_callback_send_notification_test")+"?mode=group-settings",
data={"notification_urls": test_notification_url},
follow_redirects=True
)
assert res.status_code != 400
assert res.status_code != 500
# Give apprise time to fire
time.sleep(4)
with open("test-datastore/notification.txt", 'r') as f:
x = f.read()
# Should come from notification.py default handler when there is no notification body to pull from
assert 'change detection is cool 网站监测 内容更新了' in x
client.get(
url_for("form_delete", uuid="all"),
follow_redirects=True
)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
from ..notification import default_notification_format
instock_props = [
@@ -182,8 +182,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
# price changed to something LESS than min (900), SHOULD be a change
set_original_response(props_markup=instock_props[0], price='890.45')
# let previous runs wait
time.sleep(2)
time.sleep(1)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client)
@@ -198,8 +197,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
# Depending on the LOCALE it may be either of these (generally for US/default/etc)
assert b'1,890.45' in res.data or b'1890.45' in res.data
assert b'1,890.45' or b'1890.45' in res.data
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -364,7 +362,7 @@ def test_change_with_notification_values(client, live_server):
set_original_response(props_markup=instock_props[0], price='1950.45')
client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client)
wait_for_notification_endpoint_output()
time.sleep(3)
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
with open("test-datastore/notification.txt", 'r') as f:
notification = f.read()

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for
@@ -164,46 +164,3 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
assert b'Some other description' not in res.data # Should NOT be selected by the xpath
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_namespace_selectors(live_server, client):
set_original_cdata_xml()
#live_server_setup(live_server)
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
# because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
res = client.post(
url_for("edit_page", uuid=uuid),
data={
"include_filters": "//media:thumbnail/@url",
"fetch_backend": "html_requests",
"headers": "",
"proxy": "no-proxy",
"tags": "",
"url": test_url,
},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'CDATA' not in res.data
assert b'<![' not in res.data
assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_jinja2_security

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_notification_diff

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_restock_logic

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_notification_diff

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from flask import make_response, request
from flask import url_for
@@ -76,17 +76,6 @@ def set_more_modified_response():
return None
def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
from os.path import isfile
for i in range(1, 20):
time.sleep(1)
if isfile("test-datastore/notification.txt"):
return True
return False
# kinda funky, but works for now
def extract_api_key_from_UI(client):
import re

View File

@@ -1,3 +1,3 @@
#!/usr/bin/env python3
#!/usr/bin/python3
from .. import conftest

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/python3
import os
from flask import url_for

View File

@@ -1,5 +1,6 @@
from .processors.exceptions import ProcessorException
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
from . import content_fetchers
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio import html_tools
@@ -300,7 +301,7 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
process_changedetection_results = False
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
except content_fetchers.exceptions.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text
# Backend (not filters) gave zero output
@@ -326,7 +327,7 @@ class update_worker(threading.Thread):
process_changedetection_results = False
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 403:
err_text = "Error - 403 (Access denied) received"
elif e.status_code == 404:
@@ -379,23 +380,23 @@ class update_worker(threading.Thread):
process_changedetection_results = False
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e:
# Yes fine, so nothing todo, don't continue to process.
process_changedetection_results = False
changed_detected = False
except content_fetchers_exceptions.BrowserConnectError as e:
except content_fetchers.exceptions.BrowserConnectError as e:
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
}
)
process_changedetection_results = False
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
except content_fetchers.exceptions.BrowserFetchTimedOut as e:
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
}
)
process_changedetection_results = False
except content_fetchers_exceptions.BrowserStepsStepException as e:
except content_fetchers.exceptions.BrowserStepsStepException as e:
if not self.datastore.data['watching'].get(uuid):
continue
@@ -437,25 +438,25 @@ class update_worker(threading.Thread):
process_changedetection_results = False
except content_fetchers_exceptions.EmptyReply as e:
except content_fetchers.exceptions.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.ScreenshotUnavailable as e:
except content_fetchers.exceptions.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.JSActionExceptions as e:
except content_fetchers.exceptions.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.PageUnloadable as e:
except content_fetchers.exceptions.PageUnloadable as e:
err_text = "Page request from server didnt respond correctly"
if e.message:
err_text = "{} - {}".format(err_text, e.message)
@@ -467,7 +468,7 @@ class update_worker(threading.Thread):
'last_check_status': e.status_code,
'has_ldjson_price_data': None})
process_changedetection_results = False
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e:
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False

View File

@@ -18,7 +18,7 @@ services:
#
# Log levels are in descending order. (TRACE is the most detailed one)
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
# - LOGGER_LEVEL=TRACE
# - LOGGER_LEVEL=DEBUG
#
# Alternative WebDriver/selenium URL, do not use "'s or 's!
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
@@ -29,9 +29,8 @@ services:
#
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
#
# Alternative target "Chrome" Playwright URL, do not use "'s or 's!
# "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
# - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
# Alternative Playwright URL, do not use "'s or 's!
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
#
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
#
@@ -74,10 +73,10 @@ services:
# condition: service_started
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
# sockpuppetbrowser:
# hostname: sockpuppetbrowser
# playwright-chrome:
# hostname: playwright-chrome
# image: dgtlmoon/sockpuppetbrowser:latest
# cap_add:
# - SYS_ADMIN

View File

@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
# jq not available on Windows so must be installed manually
# Notification library
apprise~=1.8.1
apprise~=1.8.0
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
@@ -79,9 +79,8 @@ pyppeteerstealth>=0.0.4
pytest ~=7.2
pytest-flask ~=1.2
# Anything 4.0 and up but not 5.0
jsonschema ~= 4.0
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
jsonschema==4.17.3
loguru