mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-06 09:35:48 +00:00
Compare commits
19 Commits
post-reque
...
total-byte
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92d715272a | ||
|
|
5b70625eaa | ||
|
|
60d292107d | ||
|
|
1cb38347da | ||
|
|
55fe2abf42 | ||
|
|
4225900ec3 | ||
|
|
1fb4342488 | ||
|
|
7071df061a | ||
|
|
6dd1fa2b88 | ||
|
|
371f85d544 | ||
|
|
932cf15e1e | ||
|
|
bf0d410d32 | ||
|
|
730f37c7ba | ||
|
|
0e5261dd87 | ||
|
|
8a35d62e02 | ||
|
|
f527744024 | ||
|
|
71c9b1273c | ||
|
|
ec68450df1 | ||
|
|
2fd762a783 |
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||||
|
|
||||||
__version__ = '0.46.01'
|
__version__ = '0.46.04'
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
|
|||||||
@@ -85,7 +85,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||||
playwright_browser=browsersteps_start_session['browser'],
|
playwright_browser=browsersteps_start_session['browser'],
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
start_url=datastore.data['watching'][watch_uuid].get('url')
|
start_url=datastore.data['watching'][watch_uuid].get('url'),
|
||||||
|
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||||
)
|
)
|
||||||
|
|
||||||
# For test
|
# For test
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ class Fetcher():
|
|||||||
instock_data = None
|
instock_data = None
|
||||||
instock_data_js = ""
|
instock_data_js = ""
|
||||||
status_code = None
|
status_code = None
|
||||||
|
total_bytes = None
|
||||||
webdriver_js_execute_code = None
|
webdriver_js_execute_code = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
xpath_element_js = ""
|
xpath_element_js = ""
|
||||||
@@ -65,8 +66,8 @@ class Fetcher():
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
import importlib.resources
|
import importlib.resources
|
||||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
|
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_error(self):
|
def get_error(self):
|
||||||
@@ -81,7 +82,8 @@ class Fetcher():
|
|||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_include_filters=None,
|
current_include_filters=None,
|
||||||
is_binary=False):
|
is_binary=False,
|
||||||
|
empty_pages_are_a_change=False):
|
||||||
# Should set self.error, self.status_code and self.content
|
# Should set self.error, self.status_code and self.content
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -83,7 +83,8 @@ class fetcher(Fetcher):
|
|||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_include_filters=None,
|
current_include_filters=None,
|
||||||
is_binary=False):
|
is_binary=False,
|
||||||
|
empty_pages_are_a_change=False):
|
||||||
|
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
import playwright._impl._errors
|
import playwright._impl._errors
|
||||||
@@ -130,7 +131,7 @@ class fetcher(Fetcher):
|
|||||||
if response is None:
|
if response is None:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
logger.debug("Content Fetcher > Response object was none")
|
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -166,10 +167,10 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||||
|
|
||||||
if len(self.page.content().strip()) == 0:
|
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
||||||
|
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
logger.debug("Content Fetcher > Content was empty")
|
|
||||||
raise EmptyReply(url=url, status_code=response.status)
|
raise EmptyReply(url=url, status_code=response.status)
|
||||||
|
|
||||||
# Run Browser Steps here
|
# Run Browser Steps here
|
||||||
|
|||||||
@@ -75,7 +75,8 @@ class fetcher(Fetcher):
|
|||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes,
|
ignore_status_codes,
|
||||||
current_include_filters,
|
current_include_filters,
|
||||||
is_binary
|
is_binary,
|
||||||
|
empty_pages_are_a_change
|
||||||
):
|
):
|
||||||
|
|
||||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||||
@@ -153,7 +154,7 @@ class fetcher(Fetcher):
|
|||||||
if response is None:
|
if response is None:
|
||||||
await self.page.close()
|
await self.page.close()
|
||||||
await browser.close()
|
await browser.close()
|
||||||
logger.warning("Content Fetcher > Response object was none")
|
logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
self.headers = response.headers
|
self.headers = response.headers
|
||||||
@@ -186,10 +187,11 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||||
content = await self.page.content
|
content = await self.page.content
|
||||||
if len(content.strip()) == 0:
|
|
||||||
|
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
||||||
|
logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
|
||||||
await self.page.close()
|
await self.page.close()
|
||||||
await browser.close()
|
await browser.close()
|
||||||
logger.error("Content Fetcher > Content was empty")
|
|
||||||
raise EmptyReply(url=url, status_code=response.status)
|
raise EmptyReply(url=url, status_code=response.status)
|
||||||
|
|
||||||
# Run Browser Steps here
|
# Run Browser Steps here
|
||||||
@@ -247,7 +249,7 @@ class fetcher(Fetcher):
|
|||||||
await self.fetch_page(**kwargs)
|
await self.fetch_page(**kwargs)
|
||||||
|
|
||||||
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
||||||
current_include_filters=None, is_binary=False):
|
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
|
||||||
|
|
||||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||||
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
|
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
|
||||||
@@ -262,7 +264,8 @@ class fetcher(Fetcher):
|
|||||||
request_method=request_method,
|
request_method=request_method,
|
||||||
ignore_status_codes=ignore_status_codes,
|
ignore_status_codes=ignore_status_codes,
|
||||||
current_include_filters=current_include_filters,
|
current_include_filters=current_include_filters,
|
||||||
is_binary=is_binary
|
is_binary=is_binary,
|
||||||
|
empty_pages_are_a_change=empty_pages_are_a_change
|
||||||
), timeout=max_time))
|
), timeout=max_time))
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
|
from loguru import logger
|
||||||
|
import chardet
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import chardet
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from changedetectionio import strtobool
|
from changedetectionio import strtobool
|
||||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||||
from changedetectionio.content_fetchers.base import Fetcher
|
from changedetectionio.content_fetchers.base import Fetcher
|
||||||
@@ -13,6 +12,27 @@ from changedetectionio.content_fetchers.base import Fetcher
|
|||||||
class fetcher(Fetcher):
|
class fetcher(Fetcher):
|
||||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||||
|
|
||||||
|
def get_total_bytes_received(self, response):
|
||||||
|
# Calculate the size of the response content
|
||||||
|
content_size = len(response.content)
|
||||||
|
# Calculate the size of the response headers
|
||||||
|
headers_size = sum(len(k) + len(v) for k, v in response.headers.items()) + len(response.headers) * 4 # adding 4 for ': ' and '\r\n'
|
||||||
|
|
||||||
|
# Total bytes received
|
||||||
|
total_received = content_size + headers_size
|
||||||
|
return total_received
|
||||||
|
|
||||||
|
def get_total_bytes_transferred(self, request):
|
||||||
|
# Calculate the size of the request headers
|
||||||
|
headers_size = sum(len(k) + len(v) for k, v in request.headers.items()) + len(request.headers) * 4 # adding 4 for ': ' and '\r\n'
|
||||||
|
|
||||||
|
# Calculate the size of the request body, if any
|
||||||
|
body_size = len(request.body or '')
|
||||||
|
|
||||||
|
# Total bytes transferred (request + response)
|
||||||
|
total_transferred = headers_size + body_size
|
||||||
|
return total_transferred
|
||||||
|
|
||||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.proxy_override = proxy_override
|
self.proxy_override = proxy_override
|
||||||
@@ -26,7 +46,8 @@ class fetcher(Fetcher):
|
|||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_include_filters=None,
|
current_include_filters=None,
|
||||||
is_binary=False):
|
is_binary=False,
|
||||||
|
empty_pages_are_a_change=False):
|
||||||
|
|
||||||
if self.browser_steps_get_valid_steps():
|
if self.browser_steps_get_valid_steps():
|
||||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||||
@@ -53,13 +74,17 @@ class fetcher(Fetcher):
|
|||||||
session.mount('file://', FileAdapter())
|
session.mount('file://', FileAdapter())
|
||||||
|
|
||||||
r = session.request(method=request_method,
|
r = session.request(method=request_method,
|
||||||
data=request_body,
|
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||||
url=url,
|
url=url,
|
||||||
headers=request_headers,
|
headers=request_headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
proxies=proxies,
|
proxies=proxies,
|
||||||
verify=False)
|
verify=False)
|
||||||
|
|
||||||
|
total_received = self.get_total_bytes_received(response=r)
|
||||||
|
request_prepared = r.request
|
||||||
|
self.total_bytes = self.get_total_bytes_transferred(request_prepared) + total_received
|
||||||
|
|
||||||
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
||||||
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
||||||
# This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
|
# This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
|
||||||
@@ -74,7 +99,10 @@ class fetcher(Fetcher):
|
|||||||
self.headers = r.headers
|
self.headers = r.headers
|
||||||
|
|
||||||
if not r.content or not len(r.content):
|
if not r.content or not len(r.content):
|
||||||
raise EmptyReply(url=url, status_code=r.status_code)
|
if not empty_pages_are_a_change:
|
||||||
|
raise EmptyReply(url=url, status_code=r.status_code)
|
||||||
|
else:
|
||||||
|
logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True")
|
||||||
|
|
||||||
# @todo test this
|
# @todo test this
|
||||||
# @todo maybe you really want to test zero-byte return pages?
|
# @todo maybe you really want to test zero-byte return pages?
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ function isItemInStock() {
|
|||||||
'vergriffen',
|
'vergriffen',
|
||||||
'vorbestellen',
|
'vorbestellen',
|
||||||
'vorbestellung ist bald möglich',
|
'vorbestellung ist bald möglich',
|
||||||
|
'we don\'t currently have any',
|
||||||
'we couldn\'t find any products that match',
|
'we couldn\'t find any products that match',
|
||||||
'we do not currently have an estimate of when this product will be back in stock.',
|
'we do not currently have an estimate of when this product will be back in stock.',
|
||||||
'we don\'t know when or if this item will be back in stock.',
|
'we don\'t know when or if this item will be back in stock.',
|
||||||
@@ -173,7 +174,8 @@ function isItemInStock() {
|
|||||||
const element = elementsToScan[i];
|
const element = elementsToScan[i];
|
||||||
// outside the 'fold' or some weird text in the heading area
|
// outside the 'fold' or some weird text in the heading area
|
||||||
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
||||||
if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
// Note: theres also an automated test that places the 'out of stock' text fairly low down
|
||||||
|
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
elementText = "";
|
elementText = "";
|
||||||
@@ -187,7 +189,7 @@ function isItemInStock() {
|
|||||||
// and these mean its out of stock
|
// and these mean its out of stock
|
||||||
for (const outOfStockText of outOfStockTexts) {
|
for (const outOfStockText of outOfStockTexts) {
|
||||||
if (elementText.includes(outOfStockText)) {
|
if (elementText.includes(outOfStockText)) {
|
||||||
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
|
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
|
||||||
return outOfStockText; // item is out of stock
|
return outOfStockText; // item is out of stock
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -164,6 +164,15 @@ visibleElementsArray.forEach(function (element) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
|
||||||
|
|
||||||
|
let text = element.textContent.trim().slice(0, 30).trim();
|
||||||
|
while (/\n{2,}|\t{2,}/.test(text)) {
|
||||||
|
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
||||||
|
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
|
||||||
|
|
||||||
size_pos.push({
|
size_pos.push({
|
||||||
xpath: xpath_result,
|
xpath: xpath_result,
|
||||||
@@ -171,9 +180,16 @@ visibleElementsArray.forEach(function (element) {
|
|||||||
height: Math.round(bbox['height']),
|
height: Math.round(bbox['height']),
|
||||||
left: Math.floor(bbox['left']),
|
left: Math.floor(bbox['left']),
|
||||||
top: Math.floor(bbox['top']) + scroll_y,
|
top: Math.floor(bbox['top']) + scroll_y,
|
||||||
|
// tagName used by Browser Steps
|
||||||
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
||||||
|
// tagtype used by Browser Steps
|
||||||
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
||||||
isClickable: window.getComputedStyle(element).cursor == "pointer"
|
isClickable: window.getComputedStyle(element).cursor === "pointer",
|
||||||
|
// Used by the keras trainer
|
||||||
|
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
|
||||||
|
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
|
||||||
|
hasDigitCurrency: hasDigitCurrency,
|
||||||
|
label: label,
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -56,7 +56,8 @@ class fetcher(Fetcher):
|
|||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_include_filters=None,
|
current_include_filters=None,
|
||||||
is_binary=False):
|
is_binary=False,
|
||||||
|
empty_pages_are_a_change=False):
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||||
|
|||||||
@@ -1377,17 +1377,19 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
import brotli
|
import brotli
|
||||||
|
|
||||||
watch = datastore.data['watching'].get(uuid)
|
watch = datastore.data['watching'].get(uuid)
|
||||||
if watch and os.path.isdir(watch.watch_data_dir):
|
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||||
latest_filename = list(watch.history.keys())[0]
|
latest_filename = list(watch.history.keys())[-1]
|
||||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||||
if html_fname.endswith('.br'):
|
with open(html_fname, 'rb') as f:
|
||||||
# Read and decompress the Brotli file
|
if html_fname.endswith('.br'):
|
||||||
with open(html_fname, 'rb') as f:
|
# Read and decompress the Brotli file
|
||||||
decompressed_data = brotli.decompress(f.read())
|
decompressed_data = brotli.decompress(f.read())
|
||||||
|
else:
|
||||||
|
decompressed_data = f.read()
|
||||||
|
|
||||||
buffer = BytesIO(decompressed_data)
|
buffer = BytesIO(decompressed_data)
|
||||||
|
|
||||||
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||||
|
|
||||||
|
|
||||||
# Return a 500 error
|
# Return a 500 error
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
|||||||
|
|
||||||
r(results.get('url'),
|
r(results.get('url'),
|
||||||
auth=auth,
|
auth=auth,
|
||||||
data=body,
|
data=body.encode('utf-8') if type(body) is str else body,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
params=params
|
params=params
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ class difference_detection_processor():
|
|||||||
|
|
||||||
def call_browser(self):
|
def call_browser(self):
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
from changedetectionio.content_fetchers.exceptions import EmptyReply
|
||||||
|
|
||||||
# Protect against file:// access
|
# Protect against file:// access
|
||||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
||||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||||
@@ -133,8 +135,18 @@ class difference_detection_processor():
|
|||||||
is_binary = self.watch.is_pdf
|
is_binary = self.watch.is_pdf
|
||||||
|
|
||||||
# And here we go! call the right browser with browser-specific settings
|
# And here we go! call the right browser with browser-specific settings
|
||||||
self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
|
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||||
is_binary=is_binary)
|
|
||||||
|
self.fetcher.run(url=url,
|
||||||
|
timeout=timeout,
|
||||||
|
request_headers=request_headers,
|
||||||
|
request_body=request_body,
|
||||||
|
request_method=request_method,
|
||||||
|
ignore_status_codes=ignore_status_codes,
|
||||||
|
current_include_filters=self.watch.get('include_filters'),
|
||||||
|
is_binary=is_binary,
|
||||||
|
empty_pages_are_a_change=empty_pages_are_a_change
|
||||||
|
)
|
||||||
|
|
||||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||||
self.fetcher.quit()
|
self.fetcher.quit()
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
|
|
||||||
from changedetectionio.model.Watch import model as BaseWatch
|
|
||||||
import re
|
|
||||||
from babel.numbers import parse_decimal
|
from babel.numbers import parse_decimal
|
||||||
|
from changedetectionio.model.Watch import model as BaseWatch
|
||||||
|
from typing import Union
|
||||||
|
import re
|
||||||
|
|
||||||
class Restock(dict):
|
class Restock(dict):
|
||||||
|
|
||||||
def parse_currency(self, raw_value: str) -> float:
|
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||||
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
|
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
|
||||||
standardized_value = raw_value
|
standardized_value = raw_value
|
||||||
|
|
||||||
@@ -21,8 +22,11 @@ class Restock(dict):
|
|||||||
# Remove any non-numeric characters except for the decimal point
|
# Remove any non-numeric characters except for the decimal point
|
||||||
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
|
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
|
||||||
|
|
||||||
# Convert to float
|
if standardized_value:
|
||||||
return float(parse_decimal(standardized_value, locale='en'))
|
# Convert to float
|
||||||
|
return float(parse_decimal(standardized_value, locale='en'))
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
# Define default values
|
# Define default values
|
||||||
|
|||||||
@@ -40,13 +40,16 @@ def get_itemprop_availability(html_content) -> Restock:
|
|||||||
import extruct
|
import extruct
|
||||||
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
|
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
|
||||||
|
|
||||||
value = {}
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
|
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
|
||||||
|
|
||||||
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
|
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
|
||||||
|
try:
|
||||||
|
data = extruct.extract(html_content, syntaxes=syntaxes)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
|
||||||
|
return Restock()
|
||||||
|
|
||||||
data = extruct.extract(html_content, syntaxes=syntaxes)
|
|
||||||
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
|
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
|
||||||
|
|
||||||
# First phase, dead simple scanning of anything that looks useful
|
# First phase, dead simple scanning of anything that looks useful
|
||||||
|
|||||||
@@ -76,7 +76,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||||
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
|
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||||
</div>
|
</div>
|
||||||
{% if form.requests.proxy %}
|
{% if form.requests.proxy %}
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("settings_page"),
|
url_for("settings_page"),
|
||||||
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||||
"application-notification_body": 'triggered text was -{{triggered_text}}-',
|
"application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了',
|
||||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||||
"application-notification_urls": test_notification_url,
|
"application-notification_urls": test_notification_url,
|
||||||
"application-minutes_between_check": 180,
|
"application-minutes_between_check": 180,
|
||||||
@@ -167,9 +167,10 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||||||
# Takes a moment for apprise to fire
|
# Takes a moment for apprise to fire
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
|
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
|
||||||
with open("test-datastore/notification.txt", 'r') as f:
|
with open("test-datastore/notification.txt", 'rb') as f:
|
||||||
response= f.read()
|
response = f.read()
|
||||||
assert '-Oh yes please-' in response
|
assert b'-Oh yes please-' in response
|
||||||
|
assert '网站监测 内容更新了'.encode('utf-8') in response
|
||||||
|
|
||||||
|
|
||||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
|||||||
@@ -69,6 +69,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
uuid = extract_UUID_from_client(client)
|
||||||
|
|
||||||
|
# Check the 'get latest snapshot works'
|
||||||
|
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||||
|
assert b'which has this one new line' in res.data
|
||||||
|
|
||||||
# Now something should be ready, indicated by having a 'unviewed' class
|
# Now something should be ready, indicated by having a 'unviewed' class
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
assert b'unviewed' in res.data
|
assert b'unviewed' in res.data
|
||||||
@@ -86,7 +92,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
assert expected_url.encode('utf-8') in res.data
|
assert expected_url.encode('utf-8') in res.data
|
||||||
|
|
||||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
res = client.get(url_for("diff_history_page", uuid=uuid))
|
||||||
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
||||||
|
|
||||||
# Check the [preview] pulls the right one
|
# Check the [preview] pulls the right one
|
||||||
@@ -143,18 +149,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
assert b'unviewed' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
||||||
uuid = extract_UUID_from_client(client)
|
|
||||||
client.get(url_for("clear_watch_history", uuid=uuid))
|
client.get(url_for("clear_watch_history", uuid=uuid))
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
assert b'preview/' in res.data
|
assert b'preview/' in res.data
|
||||||
|
|
||||||
|
|
||||||
# Check the 'get latest snapshot works'
|
|
||||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
|
||||||
assert b'<head><title>head title</title></head>' in res.data
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Cleanup everything
|
# Cleanup everything
|
||||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import time
|
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from urllib.request import urlopen
|
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||||
from .util import set_original_response, set_modified_response, live_server_setup
|
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
|
|
||||||
def set_nonrenderable_response():
|
def set_nonrenderable_response():
|
||||||
test_return_data = """<html>
|
test_return_data = """<html>
|
||||||
@@ -22,6 +17,13 @@ def set_nonrenderable_response():
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def set_zero_byte_response():
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write("")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
|
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
|
||||||
set_original_response()
|
set_original_response()
|
||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
@@ -35,18 +37,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
|
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Do this a few times.. ensures we dont accidently set the status
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
for n in range(3):
|
res = client.get(url_for("index"))
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
|
||||||
# It should report nothing found (no new 'unviewed' class)
|
|
||||||
res = client.get(url_for("index"))
|
|
||||||
assert b'unviewed' not in res.data
|
|
||||||
|
|
||||||
|
|
||||||
#####################
|
#####################
|
||||||
@@ -64,7 +59,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'unviewed' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
@@ -86,14 +81,20 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'unviewed' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
assert b'unviewed' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
client.get(url_for("mark_all_viewed"), follow_redirects=True)
|
||||||
|
|
||||||
|
# A totally zero byte (#2528) response should also not trigger an error
|
||||||
|
set_zero_byte_response()
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
|
||||||
|
assert b'fetch-error' not in res.data
|
||||||
|
|
||||||
#
|
#
|
||||||
# Cleanup everything
|
# Cleanup everything
|
||||||
|
|||||||
@@ -291,11 +291,11 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
|||||||
data={
|
data={
|
||||||
"application-fetch_backend": "html_requests",
|
"application-fetch_backend": "html_requests",
|
||||||
"application-minutes_between_check": 180,
|
"application-minutes_between_check": 180,
|
||||||
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444 }',
|
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
|
||||||
"application-notification_format": default_notification_format,
|
"application-notification_format": default_notification_format,
|
||||||
"application-notification_urls": test_notification_url,
|
"application-notification_urls": test_notification_url,
|
||||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||||
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -324,6 +324,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
|||||||
j = json.loads(x)
|
j = json.loads(x)
|
||||||
assert j['url'].startswith('http://localhost')
|
assert j['url'].startswith('http://localhost')
|
||||||
assert j['secret'] == 444
|
assert j['secret'] == 444
|
||||||
|
assert j['somebug'] == '网站监测 内容更新了'
|
||||||
|
|
||||||
# URL check, this will always be converted to lowercase
|
# URL check, this will always be converted to lowercase
|
||||||
assert os.path.isfile("test-datastore/notification-url.txt")
|
assert os.path.isfile("test-datastore/notification-url.txt")
|
||||||
@@ -354,9 +355,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
|||||||
#2510
|
#2510
|
||||||
def test_global_send_test_notification(client, live_server, measure_memory_usage):
|
def test_global_send_test_notification(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
|
|
||||||
#live_server_setup(live_server)
|
#live_server_setup(live_server)
|
||||||
set_original_response()
|
set_original_response()
|
||||||
|
if os.path.isfile("test-datastore/notification.txt"):
|
||||||
|
os.unlink("test-datastore/notification.txt")
|
||||||
|
|
||||||
# otherwise other settings would have already existed from previous tests in this file
|
# otherwise other settings would have already existed from previous tests in this file
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -364,7 +366,8 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
|||||||
data={
|
data={
|
||||||
"application-fetch_backend": "html_requests",
|
"application-fetch_backend": "html_requests",
|
||||||
"application-minutes_between_check": 180,
|
"application-minutes_between_check": 180,
|
||||||
"application-notification_body": 'change detection is cool',
|
#1995 UTF-8 content should be encoded
|
||||||
|
"application-notification_body": 'change detection is cool 网站监测 内容更新了',
|
||||||
"application-notification_format": default_notification_format,
|
"application-notification_format": default_notification_format,
|
||||||
"application-notification_urls": "",
|
"application-notification_urls": "",
|
||||||
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||||
@@ -399,8 +402,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
|||||||
|
|
||||||
with open("test-datastore/notification.txt", 'r') as f:
|
with open("test-datastore/notification.txt", 'r') as f:
|
||||||
x = f.read()
|
x = f.read()
|
||||||
assert 'change detection is coo' in x
|
assert 'change detection is cool 网站监测 内容更新了' in x
|
||||||
|
|
||||||
|
|
||||||
os.unlink("test-datastore/notification.txt")
|
os.unlink("test-datastore/notification.txt")
|
||||||
|
|
||||||
@@ -420,7 +422,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
|||||||
with open("test-datastore/notification.txt", 'r') as f:
|
with open("test-datastore/notification.txt", 'r') as f:
|
||||||
x = f.read()
|
x = f.read()
|
||||||
# Should come from notification.py default handler when there is no notification body to pull from
|
# Should come from notification.py default handler when there is no notification body to pull from
|
||||||
assert 'change detection is coo' in x
|
assert 'change detection is cool 网站监测 内容更新了' in x
|
||||||
|
|
||||||
client.get(
|
client.get(
|
||||||
url_for("form_delete", uuid="all"),
|
url_for("form_delete", uuid="all"),
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
from .processors.exceptions import ProcessorException
|
from .processors.exceptions import ProcessorException
|
||||||
from . import content_fetchers
|
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||||
|
|
||||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||||
from changedetectionio import html_tools
|
from changedetectionio import html_tools
|
||||||
|
|
||||||
@@ -301,7 +300,7 @@ class update_worker(threading.Thread):
|
|||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
|
|
||||||
except content_fetchers.exceptions.ReplyWithContentButNoText as e:
|
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
|
||||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||||
# Page had elements/content but no renderable text
|
# Page had elements/content but no renderable text
|
||||||
# Backend (not filters) gave zero output
|
# Backend (not filters) gave zero output
|
||||||
@@ -327,7 +326,7 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
|
|
||||||
except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
|
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
|
||||||
if e.status_code == 403:
|
if e.status_code == 403:
|
||||||
err_text = "Error - 403 (Access denied) received"
|
err_text = "Error - 403 (Access denied) received"
|
||||||
elif e.status_code == 404:
|
elif e.status_code == 404:
|
||||||
@@ -380,23 +379,23 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
|
|
||||||
except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e:
|
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
|
||||||
# Yes fine, so nothing todo, don't continue to process.
|
# Yes fine, so nothing todo, don't continue to process.
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
except content_fetchers.exceptions.BrowserConnectError as e:
|
except content_fetchers_exceptions.BrowserConnectError as e:
|
||||||
self.datastore.update_watch(uuid=uuid,
|
self.datastore.update_watch(uuid=uuid,
|
||||||
update_obj={'last_error': e.msg
|
update_obj={'last_error': e.msg
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except content_fetchers.exceptions.BrowserFetchTimedOut as e:
|
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
|
||||||
self.datastore.update_watch(uuid=uuid,
|
self.datastore.update_watch(uuid=uuid,
|
||||||
update_obj={'last_error': e.msg
|
update_obj={'last_error': e.msg
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except content_fetchers.exceptions.BrowserStepsStepException as e:
|
except content_fetchers_exceptions.BrowserStepsStepException as e:
|
||||||
|
|
||||||
if not self.datastore.data['watching'].get(uuid):
|
if not self.datastore.data['watching'].get(uuid):
|
||||||
continue
|
continue
|
||||||
@@ -438,25 +437,25 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
|
|
||||||
except content_fetchers.exceptions.EmptyReply as e:
|
except content_fetchers_exceptions.EmptyReply as e:
|
||||||
# Some kind of custom to-str handler in the exception handler that does this?
|
# Some kind of custom to-str handler in the exception handler that does this?
|
||||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
'last_check_status': e.status_code})
|
'last_check_status': e.status_code})
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except content_fetchers.exceptions.ScreenshotUnavailable as e:
|
except content_fetchers_exceptions.ScreenshotUnavailable as e:
|
||||||
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
|
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
'last_check_status': e.status_code})
|
'last_check_status': e.status_code})
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except content_fetchers.exceptions.JSActionExceptions as e:
|
except content_fetchers_exceptions.JSActionExceptions as e:
|
||||||
err_text = "Error running JS Actions - Page request - "+e.message
|
err_text = "Error running JS Actions - Page request - "+e.message
|
||||||
if e.screenshot:
|
if e.screenshot:
|
||||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
'last_check_status': e.status_code})
|
'last_check_status': e.status_code})
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except content_fetchers.exceptions.PageUnloadable as e:
|
except content_fetchers_exceptions.PageUnloadable as e:
|
||||||
err_text = "Page request from server didnt respond correctly"
|
err_text = "Page request from server didnt respond correctly"
|
||||||
if e.message:
|
if e.message:
|
||||||
err_text = "{} - {}".format(err_text, e.message)
|
err_text = "{} - {}".format(err_text, e.message)
|
||||||
@@ -468,7 +467,7 @@ class update_worker(threading.Thread):
|
|||||||
'last_check_status': e.status_code,
|
'last_check_status': e.status_code,
|
||||||
'has_ldjson_price_data': None})
|
'has_ldjson_price_data': None})
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e:
|
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
|
||||||
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
|
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ services:
|
|||||||
#
|
#
|
||||||
# Log levels are in descending order. (TRACE is the most detailed one)
|
# Log levels are in descending order. (TRACE is the most detailed one)
|
||||||
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
||||||
# - LOGGER_LEVEL=DEBUG
|
# - LOGGER_LEVEL=TRACE
|
||||||
#
|
#
|
||||||
# Alternative WebDriver/selenium URL, do not use "'s or 's!
|
# Alternative WebDriver/selenium URL, do not use "'s or 's!
|
||||||
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
|
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
|
||||||
@@ -29,8 +29,9 @@ services:
|
|||||||
#
|
#
|
||||||
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
|
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
|
||||||
#
|
#
|
||||||
# Alternative Playwright URL, do not use "'s or 's!
|
# Alternative target "Chrome" Playwright URL, do not use "'s or 's!
|
||||||
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
|
# "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
|
||||||
|
# - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
|
||||||
#
|
#
|
||||||
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
|
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
|
||||||
#
|
#
|
||||||
@@ -73,10 +74,10 @@ services:
|
|||||||
# condition: service_started
|
# condition: service_started
|
||||||
|
|
||||||
|
|
||||||
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
|
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
|
||||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
||||||
# playwright-chrome:
|
# sockpuppetbrowser:
|
||||||
# hostname: playwright-chrome
|
# hostname: sockpuppetbrowser
|
||||||
# image: dgtlmoon/sockpuppetbrowser:latest
|
# image: dgtlmoon/sockpuppetbrowser:latest
|
||||||
# cap_add:
|
# cap_add:
|
||||||
# - SYS_ADMIN
|
# - SYS_ADMIN
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
|
|||||||
# jq not available on Windows so must be installed manually
|
# jq not available on Windows so must be installed manually
|
||||||
|
|
||||||
# Notification library
|
# Notification library
|
||||||
apprise~=1.8.0
|
apprise~=1.8.1
|
||||||
|
|
||||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||||
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
|
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
|
||||||
@@ -79,8 +79,9 @@ pyppeteerstealth>=0.0.4
|
|||||||
pytest ~=7.2
|
pytest ~=7.2
|
||||||
pytest-flask ~=1.2
|
pytest-flask ~=1.2
|
||||||
|
|
||||||
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
|
# Anything 4.0 and up but not 5.0
|
||||||
jsonschema==4.17.3
|
jsonschema ~= 4.0
|
||||||
|
|
||||||
|
|
||||||
loguru
|
loguru
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user