mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-10-31 22:57:18 +00:00
Compare commits
2 Commits
unpin-json
...
post-reque
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d3f6b92670 | ||
|
|
1813977133 |
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.46.03'
|
||||
__version__ = '0.46.01'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -85,8 +85,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_start_session['browser'],
|
||||
proxy=proxy,
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url'),
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url')
|
||||
)
|
||||
|
||||
# For test
|
||||
|
||||
@@ -65,8 +65,8 @@ class Fetcher():
|
||||
|
||||
def __init__(self):
|
||||
import importlib.resources
|
||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
@@ -81,8 +81,7 @@ class Fetcher():
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
is_binary=False):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
|
||||
@@ -83,8 +83,7 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
is_binary=False):
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
import playwright._impl._errors
|
||||
@@ -131,7 +130,7 @@ class fetcher(Fetcher):
|
||||
if response is None:
|
||||
context.close()
|
||||
browser.close()
|
||||
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
||||
logger.debug("Content Fetcher > Response object was none")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
try:
|
||||
@@ -167,10 +166,10 @@ class fetcher(Fetcher):
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
||||
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
|
||||
if len(self.page.content().strip()) == 0:
|
||||
context.close()
|
||||
browser.close()
|
||||
logger.debug("Content Fetcher > Content was empty")
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
|
||||
@@ -75,8 +75,7 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes,
|
||||
current_include_filters,
|
||||
is_binary,
|
||||
empty_pages_are_a_change
|
||||
is_binary
|
||||
):
|
||||
|
||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||
@@ -154,7 +153,7 @@ class fetcher(Fetcher):
|
||||
if response is None:
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
|
||||
logger.warning("Content Fetcher > Response object was none")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
self.headers = response.headers
|
||||
@@ -187,11 +186,10 @@ class fetcher(Fetcher):
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
content = await self.page.content
|
||||
|
||||
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
||||
logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
|
||||
if len(content.strip()) == 0:
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.error("Content Fetcher > Content was empty")
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
@@ -249,7 +247,7 @@ class fetcher(Fetcher):
|
||||
await self.fetch_page(**kwargs)
|
||||
|
||||
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
||||
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
|
||||
current_include_filters=None, is_binary=False):
|
||||
|
||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
|
||||
@@ -264,8 +262,7 @@ class fetcher(Fetcher):
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=current_include_filters,
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
is_binary=is_binary
|
||||
), timeout=max_time))
|
||||
except asyncio.TimeoutError:
|
||||
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
from loguru import logger
|
||||
import chardet
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import chardet
|
||||
import requests
|
||||
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
@@ -25,8 +26,7 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
is_binary=False):
|
||||
|
||||
if self.browser_steps_get_valid_steps():
|
||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||
@@ -74,10 +74,7 @@ class fetcher(Fetcher):
|
||||
self.headers = r.headers
|
||||
|
||||
if not r.content or not len(r.content):
|
||||
if not empty_pages_are_a_change:
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
else:
|
||||
logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True")
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
# @todo test this
|
||||
# @todo maybe you really want to test zero-byte return pages?
|
||||
|
||||
@@ -56,8 +56,7 @@ class fetcher(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
is_binary=False):
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
|
||||
@@ -1377,19 +1377,17 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
import brotli
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
if watch and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[0]
|
||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
with open(html_fname, 'rb') as f:
|
||||
decompressed_data = brotli.decompress(f.read())
|
||||
else:
|
||||
decompressed_data = f.read()
|
||||
|
||||
buffer = BytesIO(decompressed_data)
|
||||
buffer = BytesIO(decompressed_data)
|
||||
|
||||
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||
|
||||
|
||||
# Return a 500 error
|
||||
|
||||
@@ -26,8 +26,6 @@ class difference_detection_processor():
|
||||
|
||||
def call_browser(self):
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from changedetectionio.content_fetchers.exceptions import EmptyReply
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
@@ -135,18 +133,8 @@ class difference_detection_processor():
|
||||
is_binary = self.watch.is_pdf
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
|
||||
self.fetcher.run(url=url,
|
||||
timeout=timeout,
|
||||
request_headers=request_headers,
|
||||
request_body=request_body,
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
)
|
||||
self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
|
||||
is_binary=is_binary)
|
||||
|
||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||
self.fetcher.quit()
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from typing import Union
|
||||
import re
|
||||
from babel.numbers import parse_decimal
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
def parse_currency(self, raw_value: str) -> float:
|
||||
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
|
||||
standardized_value = raw_value
|
||||
|
||||
@@ -22,11 +21,8 @@ class Restock(dict):
|
||||
# Remove any non-numeric characters except for the decimal point
|
||||
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
# Convert to float
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Define default values
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
|
||||
@@ -112,7 +112,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
"application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了',
|
||||
"application-notification_body": 'triggered text was -{{triggered_text}}-',
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
"application-notification_urls": test_notification_url,
|
||||
"application-minutes_between_check": 180,
|
||||
@@ -167,10 +167,9 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
# Takes a moment for apprise to fire
|
||||
time.sleep(3)
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
|
||||
with open("test-datastore/notification.txt", 'rb') as f:
|
||||
response = f.read()
|
||||
assert b'-Oh yes please-' in response
|
||||
assert '网站监测 内容更新了'.encode('utf-8') in response
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
response= f.read()
|
||||
assert '-Oh yes please-' in response
|
||||
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -69,12 +69,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
assert b'which has this one new line' in res.data
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
@@ -92,7 +86,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
assert expected_url.encode('utf-8') in res.data
|
||||
|
||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||
res = client.get(url_for("diff_history_page", uuid=uuid))
|
||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
||||
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
||||
|
||||
# Check the [preview] pulls the right one
|
||||
@@ -149,12 +143,18 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
||||
uuid = extract_UUID_from_client(client)
|
||||
client.get(url_for("clear_watch_history", uuid=uuid))
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'preview/' in res.data
|
||||
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
assert b'<head><title>head title</title></head>' in res.data
|
||||
|
||||
#
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
from urllib.request import urlopen
|
||||
from .util import set_original_response, set_modified_response, live_server_setup
|
||||
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
|
||||
def set_nonrenderable_response():
|
||||
test_return_data = """<html>
|
||||
@@ -17,13 +22,6 @@ def set_nonrenderable_response():
|
||||
|
||||
return None
|
||||
|
||||
def set_zero_byte_response():
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("")
|
||||
|
||||
return None
|
||||
|
||||
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
|
||||
set_original_response()
|
||||
live_server_setup(live_server)
|
||||
@@ -37,11 +35,18 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
# Do this a few times.. ensures we dont accidently set the status
|
||||
for n in range(3):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
#####################
|
||||
@@ -59,7 +64,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
@@ -81,20 +86,14 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
client.get(url_for("mark_all_viewed"), follow_redirects=True)
|
||||
|
||||
# A totally zero byte (#2528) response should also not trigger an error
|
||||
set_zero_byte_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
|
||||
assert b'fetch-error' not in res.data
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Cleanup everything
|
||||
|
||||
@@ -291,11 +291,11 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
data={
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
|
||||
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444 }',
|
||||
"application-notification_format": default_notification_format,
|
||||
"application-notification_urls": test_notification_url,
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
|
||||
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
@@ -324,7 +324,6 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
j = json.loads(x)
|
||||
assert j['url'].startswith('http://localhost')
|
||||
assert j['secret'] == 444
|
||||
assert j['somebug'] == '网站监测 内容更新了'
|
||||
|
||||
# URL check, this will always be converted to lowercase
|
||||
assert os.path.isfile("test-datastore/notification-url.txt")
|
||||
@@ -355,10 +354,9 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
#2510
|
||||
def test_global_send_test_notification(client, live_server, measure_memory_usage):
|
||||
|
||||
|
||||
#live_server_setup(live_server)
|
||||
set_original_response()
|
||||
if os.path.isfile("test-datastore/notification.txt"):
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
# otherwise other settings would have already existed from previous tests in this file
|
||||
res = client.post(
|
||||
@@ -366,8 +364,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
data={
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
#1995 UTF-8 content should be encoded
|
||||
"application-notification_body": 'change detection is cool 网站监测 内容更新了',
|
||||
"application-notification_body": 'change detection is cool',
|
||||
"application-notification_format": default_notification_format,
|
||||
"application-notification_urls": "",
|
||||
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
@@ -402,7 +399,8 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
x = f.read()
|
||||
assert 'change detection is cool 网站监测 内容更新了' in x
|
||||
assert 'change detection is coo' in x
|
||||
|
||||
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
@@ -422,7 +420,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
x = f.read()
|
||||
# Should come from notification.py default handler when there is no notification body to pull from
|
||||
assert 'change detection is cool 网站监测 内容更新了' in x
|
||||
assert 'change detection is coo' in x
|
||||
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from .processors.exceptions import ProcessorException
|
||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||
from . import content_fetchers
|
||||
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from changedetectionio import html_tools
|
||||
|
||||
@@ -300,7 +301,7 @@ class update_worker(threading.Thread):
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
|
||||
except content_fetchers.exceptions.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
# Backend (not filters) gave zero output
|
||||
@@ -326,7 +327,7 @@ class update_worker(threading.Thread):
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
|
||||
except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 403:
|
||||
err_text = "Error - 403 (Access denied) received"
|
||||
elif e.status_code == 404:
|
||||
@@ -379,23 +380,23 @@ class update_worker(threading.Thread):
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
|
||||
except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e:
|
||||
# Yes fine, so nothing todo, don't continue to process.
|
||||
process_changedetection_results = False
|
||||
changed_detected = False
|
||||
except content_fetchers_exceptions.BrowserConnectError as e:
|
||||
except content_fetchers.exceptions.BrowserConnectError as e:
|
||||
self.datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': e.msg
|
||||
}
|
||||
)
|
||||
process_changedetection_results = False
|
||||
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
|
||||
except content_fetchers.exceptions.BrowserFetchTimedOut as e:
|
||||
self.datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': e.msg
|
||||
}
|
||||
)
|
||||
process_changedetection_results = False
|
||||
except content_fetchers_exceptions.BrowserStepsStepException as e:
|
||||
except content_fetchers.exceptions.BrowserStepsStepException as e:
|
||||
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
@@ -437,25 +438,25 @@ class update_worker(threading.Thread):
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.EmptyReply as e:
|
||||
except content_fetchers.exceptions.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers_exceptions.ScreenshotUnavailable as e:
|
||||
except content_fetchers.exceptions.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers_exceptions.JSActionExceptions as e:
|
||||
except content_fetchers.exceptions.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers_exceptions.PageUnloadable as e:
|
||||
except content_fetchers.exceptions.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
@@ -467,7 +468,7 @@ class update_worker(threading.Thread):
|
||||
'last_check_status': e.status_code,
|
||||
'has_ldjson_price_data': None})
|
||||
process_changedetection_results = False
|
||||
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
|
||||
except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e:
|
||||
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise~=1.8.1
|
||||
apprise~=1.8.0
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
|
||||
@@ -79,9 +79,8 @@ pyppeteerstealth>=0.0.4
|
||||
pytest ~=7.2
|
||||
pytest-flask ~=1.2
|
||||
|
||||
# Anything 4.0 and up but not 5.0
|
||||
jsonschema ~= 4.0
|
||||
|
||||
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
|
||||
jsonschema==4.17.3
|
||||
|
||||
loguru
|
||||
|
||||
|
||||
Reference in New Issue
Block a user