Compare commits

..

6 Commits

14 changed files with 391 additions and 339 deletions

1
.gitignore vendored
View File

@@ -10,5 +10,6 @@ dist
venv
test-datastore/*
test-datastore
test-memory.log
*.egg-info*
.vscode/settings.json

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.47.03'
__version__ = '0.47.04'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -13,6 +13,7 @@ from loguru import logger
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
import json
from urllib.parse import unquote_plus
from apprise.utils import parse_url as apprise_parse_url
from apprise import URLBase
@@ -47,7 +48,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
if results:
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = {URLBase.unquote(x): URLBase.unquote(y)
headers = {unquote_plus(x): unquote_plus(y)
for x, y in results['qsd+'].items()}
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
@@ -55,14 +56,14 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[URLBase.unquote(k)] = URLBase.unquote(v)
params[unquote_plus(k)] = unquote_plus(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
elif results.get('user'):
auth = (URLBase.unquote(results.get('user')))
auth = (unquote_plus(results.get('user')))
# Try to auto-guess if it's JSON
h = 'application/json; charset=utf-8'

View File

@@ -30,6 +30,8 @@ function isItemInStock() {
'dieser artikel ist bald wieder verfügbar',
'dostępne wkrótce',
'en rupture de stock',
'esgotado',
'indisponível',
'isn\'t in stock right now',
'isnt in stock right now',
'isnt in stock right now',
@@ -57,6 +59,7 @@ function isItemInStock() {
'notify me when available',
'notify me',
'notify when available',
'não disponível',
'não estamos a aceitar encomendas',
'out of stock',
'out-of-stock',

File diff suppressed because it is too large Load Diff

View File

@@ -515,6 +515,7 @@ class processor_text_json_diff_form(commonSettingsForm):
if not super().validate():
return False
from changedetectionio.safe_jinja import render as jinja_render
result = True
# Fail form validation when a body is set for a GET
@@ -524,18 +525,46 @@ class processor_text_json_diff_form(commonSettingsForm):
# Attempt to validate jinja2 templates in the URL
try:
from changedetectionio.safe_jinja import render as jinja_render
jinja_render(template_str=self.url.data)
except ModuleNotFoundError as e:
# incase jinja2_time or others is missing
logger.error(e)
self.url.errors.append(e)
self.url.errors.append(f'Invalid template syntax configuration: {e}')
result = False
except Exception as e:
logger.error(e)
self.url.errors.append('Invalid template syntax')
self.url.errors.append(f'Invalid template syntax: {e}')
result = False
# Attempt to validate jinja2 templates in the body
if self.body.data and self.body.data.strip():
try:
jinja_render(template_str=self.body.data)
except ModuleNotFoundError as e:
# incase jinja2_time or others is missing
logger.error(e)
self.body.errors.append(f'Invalid template syntax configuration: {e}')
result = False
except Exception as e:
logger.error(e)
self.body.errors.append(f'Invalid template syntax: {e}')
result = False
# Attempt to validate jinja2 templates in the headers
if len(self.headers.data) > 0:
try:
for header, value in self.headers.data.items():
jinja_render(template_str=value)
except ModuleNotFoundError as e:
# incase jinja2_time or others is missing
logger.error(e)
self.headers.errors.append(f'Invalid template syntax configuration: {e}')
result = False
except Exception as e:
logger.error(e)
self.headers.errors.append(f'Invalid template syntax in "{header}" header: {e}')
result = False
return result
class SingleExtraProxy(Form):

View File

@@ -102,6 +102,7 @@ class difference_detection_processor():
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
# Tweak the base config with the per-watch ones
from changedetectionio.safe_jinja import render as jinja_render
request_headers = CaseInsensitiveDict()
ua = self.datastore.data['settings']['requests'].get('default_ua')
@@ -118,9 +119,15 @@ class difference_detection_processor():
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
for header_name in request_headers:
request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))})
timeout = self.datastore.data['settings']['requests'].get('timeout')
request_body = self.watch.get('body')
if request_body:
request_body = jinja_render(template_str=self.watch.get('body'))
request_method = self.watch.get('method')
ignore_status_codes = self.watch.get('ignore_status_codes', False)

View File

@@ -28,17 +28,14 @@ $(document).ready(function() {
url: notification_base_url,
data : data,
statusCode: {
400: function() {
// More than likely the CSRF token was lost when the server restarted
alert("There was a problem processing the request, please reload the page.");
400: function(data) {
// More than likely the CSRF token was lost when the server restarted
alert(data.responseText);
}
}
}).done(function(data){
console.log(data);
alert(data);
}).fail(function(data){
console.log(data);
alert('There was an error communicating with the server.');
})
});
});

View File

@@ -65,8 +65,8 @@
<fieldset>
<div class="pure-control-group">
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
</div>
<div class="pure-control-group inline-radio">
{{ render_field(form.processor) }}
@@ -149,21 +149,24 @@
{{ render_field(form.method) }}
</div>
<div id="request-body">
{{ render_field(form.body, rows=5, placeholder="Example
{{ render_field(form.body, rows=7, placeholder="Example
{
\"name\":\"John\",
\"age\":30,
\"car\":null
\"car\":null,
\"year\":{% now 'Europe/Berlin', '%Y' %}
}") }}
</div>
<div class="pure-form-message">Variables are supported in the request body (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
</div>
</fieldset>
<!-- hmm -->
<div class="pure-control-group advanced-options" style="display: none;">
{{ render_field(form.headers, rows=5, placeholder="Example
{{ render_field(form.headers, rows=7, placeholder="Example
Cookie: foobar
User-Agent: wonderbra 1.0") }}
User-Agent: wonderbra 1.0
Math: {{ 1 + 1 }}") }}
<div class="pure-form-message">Variables are supported in the request header values (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
<div class="pure-form-message-inline">
{% if has_extra_headers_file %}
<strong>Alert! Extra headers file found and will be added to this watch!</strong>

View File

@@ -284,7 +284,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
# CUSTOM JSON BODY CHECK for POST://
set_original_response()
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#header-manipulation
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123"
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123&+second=hello+world%20%22space%22"
res = client.post(
url_for("settings_page"),
@@ -326,6 +326,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
assert j['secret'] == 444
assert j['somebug'] == '网站监测 内容更新了'
# URL check, this will always be converted to lowercase
assert os.path.isfile("test-datastore/notification-url.txt")
with open("test-datastore/notification-url.txt", 'r') as f:
@@ -337,6 +338,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
with open("test-datastore/notification-headers.txt", 'r') as f:
notification_headers = f.read()
assert 'custom-header: 123' in notification_headers.lower()
assert 'second: hello world "space"' in notification_headers.lower()
# Should always be automatically detected as JSON content type even when we set it as 'Text' (default)
@@ -429,24 +431,15 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
follow_redirects=True
)
#2727 - be sure a test notification when there are zero watches works ( should all be deleted now)
os.unlink("test-datastore/notification.txt")
######### Test global/system settings
######### Test global/system settings - When everything is deleted it should give a helpful error
# See #2727
res = client.post(
url_for("ajax_callback_send_notification_test")+"?mode=global-settings",
data={"notification_urls": test_notification_url},
follow_redirects=True
)
assert res.status_code == 400
assert b"Error: You must have atleast one watch configured for 'test notification' to work" in res.data
assert res.status_code != 400
assert res.status_code != 500
# Give apprise time to fire
time.sleep(4)
with open("test-datastore/notification.txt", 'r') as f:
x = f.read()
assert 'change detection is cool 网站监测 内容更新了' in x

View File

@@ -45,7 +45,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
"url": test_url,
"tags": "",
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
"headers": "xxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
"headers": "jinja2:{{ 1+1 }}\nxxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -61,6 +61,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
)
# Flask will convert the header key to uppercase
assert b"Jinja2:2" in res.data
assert b"Xxx:ooo" in res.data
assert b"Cool:yeah" in res.data
@@ -117,7 +118,8 @@ def test_body_in_request(client, live_server, measure_memory_usage):
wait_for_all_checks(client)
# Now the change which should trigger a change
body_value = 'Test Body Value'
body_value = 'Test Body Value {{ 1+1 }}'
body_value_formatted = 'Test Body Value 2'
res = client.post(
url_for("edit_page", uuid="first"),
data={
@@ -140,8 +142,9 @@ def test_body_in_request(client, live_server, measure_memory_usage):
# If this gets stuck something is wrong, something should always be there
assert b"No history found" not in res.data
# We should see what we sent in the reply
assert str.encode(body_value) in res.data
# We should see the formatted value of what we sent in the reply
assert str.encode(body_value) not in res.data
assert str.encode(body_value_formatted) in res.data
####### data sanity checks
# Add the test URL twice, we will check

View File

@@ -3,7 +3,7 @@ import os
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, extract_UUID_from_client
from ..notification import default_notification_format
instock_props = [
@@ -367,6 +367,12 @@ def test_change_with_notification_values(client, live_server):
assert "new price 1950.45" in notification
assert "title new price 1950.45" in notification
## Now test the "SEND TEST NOTIFICATION" is working
os.unlink("test-datastore/notification.txt")
uuid = extract_UUID_from_client(client)
res = client.post(url_for("ajax_callback_send_notification_test", watch_uuid=uuid), data={}, follow_redirects=True)
time.sleep(5)
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
def test_data_sanity(client, live_server):

View File

@@ -19,9 +19,11 @@ from loguru import logger
class update_worker(threading.Thread):
current_uuid = None
def __init__(self, app, *args, **kwargs):
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
self.q = q
self.app = app
self.notification_q = notification_q
self.datastore = datastore
super().__init__(*args, **kwargs)
def queue_notification_for_watch(self, notification_q, n_object, watch):
@@ -100,19 +102,19 @@ class update_worker(threading.Thread):
v = watch.get(var_name)
if v and not watch.get('notification_muted'):
if var_name == 'notification_format' and v == default_notification_format_for_watch:
return self.app.datastore.data['settings']['application'].get('notification_format')
return self.datastore.data['settings']['application'].get('notification_format')
return v
tags = self.app.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
if tags:
for tag_uuid, tag in tags.items():
v = tag.get(var_name)
if v and not tag.get('notification_muted'):
return v
if self.app.datastore.data['settings']['application'].get(var_name):
return self.app.datastore.data['settings']['application'].get(var_name)
if self.datastore.data['settings']['application'].get(var_name):
return self.datastore.data['settings']['application'].get(var_name)
# Otherwise could be defaults
if var_name == 'notification_format':
@@ -127,7 +129,7 @@ class update_worker(threading.Thread):
def send_content_changed_notification(self, watch_uuid):
n_object = {}
watch = self.app.datastore.data['watching'].get(watch_uuid)
watch = self.datastore.data['watching'].get(watch_uuid)
if not watch:
return
@@ -154,17 +156,17 @@ class update_worker(threading.Thread):
queued = True
count = watch.get('notification_alert_count', 0) + 1
self.app.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
self.queue_notification_for_watch(notification_q=self.app.notification_q, n_object=n_object, watch=watch)
self.queue_notification_for_watch(notification_q=self.notification_q, n_object=n_object, watch=watch)
return queued
def send_filter_failure_notification(self, watch_uuid):
threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
watch = self.app.datastore.data['watching'].get(watch_uuid)
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
watch = self.datastore.data['watching'].get(watch_uuid)
if not watch:
return
@@ -177,8 +179,8 @@ class update_worker(threading.Thread):
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']
elif len(self.app.datastore.data['settings']['application']['notification_urls']):
n_object['notification_urls'] = self.app.datastore.data['settings']['application']['notification_urls']
elif len(self.datastore.data['settings']['application']['notification_urls']):
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
# Only prepare to notify if the rules above matched
if 'notification_urls' in n_object:
@@ -187,16 +189,16 @@ class update_worker(threading.Thread):
'uuid': watch_uuid,
'screenshot': None
})
self.app.notification_q.put(n_object)
self.notification_q.put(n_object)
logger.debug(f"Sent filter not found notification for {watch_uuid}")
else:
logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
def send_step_failure_notification(self, watch_uuid, step_n):
watch = self.app.datastore.data['watching'].get(watch_uuid, False)
watch = self.datastore.data['watching'].get(watch_uuid, False)
if not watch:
return
threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1),
'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} "
"did not appear on the page after {} attempts, did the page change layout? "
@@ -207,8 +209,8 @@ class update_worker(threading.Thread):
if len(watch['notification_urls']):
n_object['notification_urls'] = watch['notification_urls']
elif len(self.app.datastore.data['settings']['application']['notification_urls']):
n_object['notification_urls'] = self.app.datastore.data['settings']['application']['notification_urls']
elif len(self.datastore.data['settings']['application']['notification_urls']):
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
# Only prepare to notify if the rules above matched
if 'notification_urls' in n_object:
@@ -216,7 +218,7 @@ class update_worker(threading.Thread):
'watch_url': watch['url'],
'uuid': watch_uuid
})
self.app.notification_q.put(n_object)
self.notification_q.put(n_object)
logger.error(f"Sent step not found notification for {watch_uuid}")
@@ -224,7 +226,7 @@ class update_worker(threading.Thread):
# All went fine, remove error artifacts
cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
for f in cleanup_files:
full_path = os.path.join(self.app.datastore.datastore_path, uuid, f)
full_path = os.path.join(self.datastore.datastore_path, uuid, f)
if os.path.isfile(full_path):
os.unlink(full_path)
@@ -235,23 +237,23 @@ class update_worker(threading.Thread):
update_handler = None
try:
queued_item_data = self.app.update_q.get(block=False)
queued_item_data = self.q.get(block=False)
except queue.Empty:
pass
else:
uuid = queued_item_data.item.get('uuid')
self.current_uuid = uuid
if uuid in list(self.app.datastore.data['watching'].keys()) and self.app.datastore.data['watching'][uuid].get('url'):
if uuid in list(self.datastore.data['watching'].keys()) and self.datastore.data['watching'][uuid].get('url'):
changed_detected = False
contents = b''
process_changedetection_results = True
update_obj = {}
# Clear last errors (move to preflight func?)
self.app.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
watch = self.app.datastore.data['watching'].get(uuid)
watch = self.datastore.data['watching'].get(uuid)
logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
now = time.time()
@@ -268,7 +270,7 @@ class update_worker(threading.Thread):
print(f"Processor module '{processor}' not found.")
raise e
update_handler = processor_module.perform_site_check(datastore=self.app.datastore,
update_handler = processor_module.perform_site_check(datastore=self.datastore,
watch_uuid=uuid
)
@@ -292,7 +294,7 @@ class update_worker(threading.Thread):
watch.save_screenshot(screenshot=e.screenshot)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
process_changedetection_results = False
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
@@ -309,7 +311,7 @@ class update_worker(threading.Thread):
else:
extra_help = ", it's possible that the filters were found, but contained no usable text."
self.app.datastore.update_watch(uuid=uuid, update_obj={
self.datastore.update_watch(uuid=uuid, update_obj={
'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
})
@@ -341,15 +343,15 @@ class update_worker(threading.Thread):
if e.page_text:
watch.save_error_text(contents=e.page_text)
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False
except FilterNotFoundInResponse as e:
if not self.app.datastore.data['watching'].get(uuid):
if not self.datastore.data['watching'].get(uuid):
continue
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
if e.screenshot:
@@ -363,7 +365,7 @@ class update_worker(threading.Thread):
c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
if c >= threshold:
if not watch.get('notification_muted'):
@@ -372,7 +374,7 @@ class update_worker(threading.Thread):
c = 0
logger.debug(f"Reset filter failure count back to zero")
self.app.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
else:
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
@@ -384,20 +386,20 @@ class update_worker(threading.Thread):
process_changedetection_results = False
changed_detected = False
except content_fetchers_exceptions.BrowserConnectError as e:
self.app.datastore.update_watch(uuid=uuid,
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
}
)
process_changedetection_results = False
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
self.app.datastore.update_watch(uuid=uuid,
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
}
)
process_changedetection_results = False
except content_fetchers_exceptions.BrowserStepsStepException as e:
if not self.app.datastore.data['watching'].get(uuid):
if not self.datastore.data['watching'].get(uuid):
continue
error_step = e.step_n + 1
@@ -415,7 +417,7 @@ class update_worker(threading.Thread):
logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}")
self.app.datastore.update_watch(uuid=uuid,
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': err_text,
'browser_steps_last_error_step': error_step
}
@@ -425,7 +427,7 @@ class update_worker(threading.Thread):
c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
if threshold > 0 and c >= threshold:
@@ -433,26 +435,26 @@ class update_worker(threading.Thread):
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
c = 0
self.app.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
process_changedetection_results = False
except content_fetchers_exceptions.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers_exceptions.PageUnloadable as e:
@@ -463,26 +465,26 @@ class update_worker(threading.Thread):
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code,
'has_ldjson_price_data': None})
process_changedetection_results = False
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False
logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
except Exception as e:
logger.error(f"Exception reached processing watch UUID: {uuid}")
logger.error(str(e))
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
# Other serious error
process_changedetection_results = False
else:
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
if not self.app.datastore.data['watching'].get(uuid):
if not self.datastore.data['watching'].get(uuid):
continue
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
@@ -496,14 +498,14 @@ class update_worker(threading.Thread):
self.cleanup_error_artifacts(uuid)
if not self.app.datastore.data['watching'].get(uuid):
if not self.datastore.data['watching'].get(uuid):
continue
#
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results:
# Extract <title> as title if possible/requested.
if self.app.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
if not watch['title'] or not len(watch['title']):
try:
update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
@@ -514,7 +516,7 @@ class update_worker(threading.Thread):
# Now update after running everything
timestamp = round(time.time())
try:
self.app.datastore.update_watch(uuid=uuid, update_obj=update_obj)
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# Also save the snapshot on the first time checked, "last checked" will always be updated, so we just check history length.
@@ -552,7 +554,7 @@ class update_worker(threading.Thread):
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
logger.critical("!!!! Exception in update_worker while processing process_changedetection_results !!!")
logger.critical(str(e))
self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
# Always record that we atleast tried
@@ -561,13 +563,13 @@ class update_worker(threading.Thread):
# Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
try:
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
self.app.datastore.update_watch(uuid=uuid,
self.datastore.update_watch(uuid=uuid,
update_obj={'remote_server_reply': server_header}
)
except Exception as e:
pass
self.app.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time()),
'check_count': count
})

View File

@@ -82,7 +82,7 @@ pytest-flask ~=1.2
# Anything 4.0 and up but not 5.0
jsonschema ~= 4.0
apscheduler ~= 3.9
loguru
# For scraping all possible metadata relating to products so we can do better restock detection