mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-01 07:08:47 +00:00
Compare commits
16 Commits
plugin-arc
...
filters-co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5db65bcafd | ||
|
|
86832228ed | ||
|
|
bd10a1f7c4 | ||
|
|
ccbfa1e20e | ||
|
|
29d34bcd22 | ||
|
|
9b4fb80bef | ||
|
|
2ff65b53fb | ||
|
|
57de4ffe4f | ||
|
|
51e2e8a226 | ||
|
|
8887459462 | ||
|
|
460c724e51 | ||
|
|
dcf4bf37ed | ||
|
|
e3cf22fc27 | ||
|
|
d497db639e | ||
|
|
7355ac8d21 | ||
|
|
2f2d0ea0f2 |
@@ -38,7 +38,9 @@ from flask_paginate import Pagination, get_page_parameter
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.45.1'
|
||||
__version__ = '0.45.2'
|
||||
|
||||
from changedetectionio.store import BASE_URL_NOT_SET_TEXT
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -356,12 +358,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
|
||||
# Description is the page you watch, link takes you to the diff JS UI page
|
||||
# Dict val base_url will get overriden with the env var if it is set.
|
||||
ext_base_url = datastore.data['settings']['application'].get('base_url')
|
||||
if ext_base_url:
|
||||
# Go with overriden value
|
||||
diff_link = {'href': "{}{}".format(ext_base_url, url_for('diff_history_page', uuid=watch['uuid'], _external=False))}
|
||||
else:
|
||||
diff_link = {'href': url_for('diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||
|
||||
# Because we are called via whatever web server, flask should figure out the right path (
|
||||
diff_link = {'href': url_for('diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
|
||||
fe.link(link=diff_link)
|
||||
|
||||
@@ -714,7 +714,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
output = render_template("edit.html",
|
||||
available_processors=processors.available_processors(),
|
||||
browser_steps_config=browser_step_ui_config,
|
||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
form=form,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
@@ -804,7 +803,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
output = render_template("settings.html",
|
||||
form=form,
|
||||
current_base_url = datastore.data['settings']['application']['base_url'],
|
||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
|
||||
@@ -77,11 +77,13 @@ class ScreenshotUnavailable(Exception):
|
||||
|
||||
|
||||
class ReplyWithContentButNoText(Exception):
|
||||
def __init__(self, status_code, url, screenshot=None):
|
||||
def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''):
|
||||
# Set this so we can use it in other parts of the app
|
||||
self.status_code = status_code
|
||||
self.url = url
|
||||
self.screenshot = screenshot
|
||||
self.has_filters = has_filters
|
||||
self.html_content = html_content
|
||||
return
|
||||
|
||||
|
||||
@@ -343,8 +345,8 @@ class base_html_playwright(Fetcher):
|
||||
'req_headers': request_headers,
|
||||
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
|
||||
'url': url,
|
||||
'user_agent': request_headers.get('User-Agent', 'Mozilla/5.0'),
|
||||
'proxy_username': self.proxy.get('username','') if self.proxy else False,
|
||||
'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
|
||||
'proxy_username': self.proxy.get('username', '') if self.proxy else False,
|
||||
'proxy_password': self.proxy.get('password', '') if self.proxy else False,
|
||||
'no_cache_list': [
|
||||
'twitter',
|
||||
@@ -443,7 +445,7 @@ class base_html_playwright(Fetcher):
|
||||
# Set user agent to prevent Cloudflare from blocking the browser
|
||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||
context = browser.new_context(
|
||||
user_agent=request_headers.get('User-Agent', 'Mozilla/5.0'),
|
||||
user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
|
||||
proxy=self.proxy,
|
||||
# This is needed to enable JavaScript execution on GitHub and others
|
||||
bypass_csp=True,
|
||||
@@ -684,7 +686,7 @@ class html_requests(Fetcher):
|
||||
is_binary=False):
|
||||
|
||||
# Make requests use a more modern looking user-agent
|
||||
if not 'User-Agent' in request_headers:
|
||||
if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None):
|
||||
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
||||
|
||||
|
||||
@@ -229,16 +229,19 @@ class ValidateJinja2Template(object):
|
||||
def __call__(self, form, field):
|
||||
from changedetectionio import notification
|
||||
|
||||
from jinja2 import Environment, BaseLoader, TemplateSyntaxError
|
||||
from jinja2 import Environment, BaseLoader, TemplateSyntaxError, UndefinedError
|
||||
from jinja2.meta import find_undeclared_variables
|
||||
|
||||
|
||||
try:
|
||||
jinja2_env = Environment(loader=BaseLoader)
|
||||
jinja2_env.globals.update(notification.valid_tokens)
|
||||
|
||||
rendered = jinja2_env.from_string(field.data).render()
|
||||
except TemplateSyntaxError as e:
|
||||
raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
|
||||
except UndefinedError as e:
|
||||
raise ValidationError(f"A variable or function is not defined: {e}") from e
|
||||
|
||||
ast = jinja2_env.parse(field.data)
|
||||
undefined = ", ".join(find_undeclared_variables(ast))
|
||||
@@ -502,7 +505,10 @@ class globalSettingsRequestForm(Form):
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
|
||||
base_url = StringField('Base URL', validators=[validators.Optional()])
|
||||
base_url = StringField('Notification base URL override',
|
||||
validators=[validators.Optional()],
|
||||
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
|
||||
)
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
|
||||
@@ -208,15 +208,11 @@ def create_notification_parameters(n_object, datastore):
|
||||
watch_tag = ''
|
||||
|
||||
# Create URLs to customise the notification with
|
||||
base_url = datastore.data['settings']['application']['base_url']
|
||||
# active_base_url - set in store.py data property
|
||||
base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||
|
||||
watch_url = n_object['watch_url']
|
||||
|
||||
# Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
|
||||
# like 'Join', so it's always best to atleast set something obvious so that they are not broken.
|
||||
if base_url == '':
|
||||
base_url = "<base-url-env-var-not-set>"
|
||||
|
||||
diff_url = "{}/diff/{}".format(base_url, uuid)
|
||||
preview_url = "{}/preview/{}".format(base_url, uuid)
|
||||
|
||||
@@ -226,7 +222,7 @@ def create_notification_parameters(n_object, datastore):
|
||||
# Valid_tokens also used as a field validator
|
||||
tokens.update(
|
||||
{
|
||||
'base_url': base_url if base_url is not None else '',
|
||||
'base_url': base_url,
|
||||
'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else '',
|
||||
'diff': n_object.get('diff', ''), # Null default in the case we use a test
|
||||
'diff_added': n_object.get('diff_added', ''), # Null default in the case we use a test
|
||||
|
||||
@@ -314,7 +314,12 @@ class perform_site_check(difference_detection_processor):
|
||||
# Treat pages with no renderable text content as a change? No by default
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||
raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
|
||||
raise content_fetcher.ReplyWithContentButNoText(url=url,
|
||||
status_code=fetcher.get_last_status_code(),
|
||||
screenshot=screenshot,
|
||||
has_filters=has_filter_rule,
|
||||
html_content=html_content
|
||||
)
|
||||
|
||||
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||
# in the future we'll implement other mechanisms.
|
||||
|
||||
@@ -18,7 +18,9 @@ module.exports = async ({page, context}) => {
|
||||
|
||||
await page.setBypassCSP(true)
|
||||
await page.setExtraHTTPHeaders(req_headers);
|
||||
await page.setUserAgent(user_agent);
|
||||
if (user_agent) {
|
||||
await page.setUserAgent(user_agent);
|
||||
}
|
||||
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
|
||||
|
||||
await page.setDefaultNavigationTimeout(0);
|
||||
|
||||
@@ -32,5 +32,10 @@ $(document).ready(function () {
|
||||
window.getSelection().removeAllRanges();
|
||||
|
||||
});
|
||||
|
||||
$("#notification-token-toggle").click(function (e) {
|
||||
e.preventDefault();
|
||||
$('#notification-tokens-info').toggle();
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -42,4 +42,8 @@ $(document).ready(function () {
|
||||
$('#notification_urls').val('');
|
||||
e.preventDefault();
|
||||
});
|
||||
$("#notification-token-toggle").click(function (e) {
|
||||
e.preventDefault();
|
||||
$('#notification-tokens-info').toggle();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -18,6 +18,9 @@ import threading
|
||||
import time
|
||||
import uuid as uuid_builder
|
||||
|
||||
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
|
||||
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
|
||||
|
||||
dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])
|
||||
|
||||
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
|
||||
@@ -175,12 +178,21 @@ class ChangeDetectionStore:
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
# Re #152, Return env base_url if not overriden, @todo also prefer the proxy pass url
|
||||
env_base_url = os.getenv('BASE_URL','')
|
||||
if not self.__data['settings']['application']['base_url']:
|
||||
self.__data['settings']['application']['base_url'] = env_base_url.strip('" ')
|
||||
# Re #152, Return env base_url if not overriden
|
||||
# Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
|
||||
# like 'Join', so it's always best to atleast set something obvious so that they are not broken.
|
||||
|
||||
return self.__data
|
||||
active_base_url = BASE_URL_NOT_SET_TEXT
|
||||
if self.__data['settings']['application'].get('base_url'):
|
||||
active_base_url = self.__data['settings']['application'].get('base_url')
|
||||
elif os.getenv('BASE_URL'):
|
||||
active_base_url = os.getenv('BASE_URL')
|
||||
|
||||
# I looked at various ways todo the following, but in the end just copying the dict seemed simplest/most reliable
|
||||
# even given the memory tradeoff - if you know a better way.. maybe return d|self.__data.. or something
|
||||
d = self.__data
|
||||
d['settings']['application']['active_base_url'] = active_base_url.strip('" ')
|
||||
return d
|
||||
|
||||
# Delete a single watch by UUID
|
||||
def delete(self, uuid):
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
<div class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) </code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
<li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>)</li>
|
||||
@@ -35,18 +35,14 @@
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_body , rows=5, class="notification-body", placeholder=settings_application['notification_body']) }}
|
||||
<span class="pure-form-message-inline">Body for all notifications</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<!-- unsure -->
|
||||
{{ render_field(form.notification_format , class="notification-format") }}
|
||||
<span class="pure-form-message-inline">Format for all notifications</span>
|
||||
<span class="pure-form-message-inline">Body for all notifications ‐ You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
|
||||
</span>
|
||||
|
||||
</div>
|
||||
<div class="pure-controls">
|
||||
<p class="pure-form-message-inline">
|
||||
You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL.
|
||||
</p>
|
||||
|
||||
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
|
||||
</div>
|
||||
<div class="pure-controls" style="display: none;" id="notification-tokens-info">
|
||||
<table class="pure-table" id="token-table">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -105,7 +101,7 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{current_snapshot}}' }}</code></td>
|
||||
<td>The current snapshot value, useful when combined with JSON or CSS filters
|
||||
<td>The current snapshot text contents value, useful when combined with JSON or CSS filters
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@@ -115,12 +111,15 @@
|
||||
</tbody>
|
||||
</table>
|
||||
<div class="pure-form-message-inline">
|
||||
<br>
|
||||
URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
|
||||
Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
|
||||
<br>
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
<p>
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br>
|
||||
For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_format , class="notification-format") }}
|
||||
<span class="pure-form-message-inline">Format for all notifications</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endmacro %}
|
||||
|
||||
@@ -62,14 +62,6 @@
|
||||
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
|
||||
class="m-d") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
|
||||
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
@@ -100,6 +92,13 @@
|
||||
{{ render_common_settings_form(form.application.form, emailprefix, settings_application) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
<div class="pure-control-group" id="notification-base-url">
|
||||
{{ render_field(form.application.form.base_url, class="m-d") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notification links.<br>
|
||||
Default value is the system environment variable '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="fetching">
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from .util import set_original_response, set_modified_response, live_server_setup
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
from zipfile import ZipFile
|
||||
@@ -19,12 +19,12 @@ def test_backup(client, live_server):
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": url_for('test_endpoint', _external=True)},
|
||||
data={"urls": url_for('test_endpoint', _external=True)+"?somechar=őőőőőőőő"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("get_backup"),
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
|
||||
from ..html_tools import *
|
||||
|
||||
@@ -176,3 +176,77 @@ def test_check_multiple_filters(client, live_server):
|
||||
assert b"Blob A" in res.data # CSS was ok
|
||||
assert b"Blob B" in res.data # xPath was ok
|
||||
assert b"Blob C" not in res.data # Should not be included
|
||||
|
||||
# The filter exists, but did not contain anything useful
|
||||
# Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector
|
||||
# Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text
|
||||
def test_filter_is_empty_help_suggestion(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
include_filters = "#blob-a"
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("""<html><body>
|
||||
<div id="blob-a">
|
||||
<img src="something.jpg">
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"include_filters": include_filters,
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
"headers": "",
|
||||
'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
res = client.get(
|
||||
url_for("index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'empty result or contain only an image' in res.data
|
||||
|
||||
|
||||
### Just an empty selector, no image
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("""<html><body>
|
||||
<div id="blob-a">
|
||||
<!-- doo doo -->
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'empty result or contain only an image' not in res.data
|
||||
assert b'but contained no usable text' in res.data
|
||||
|
||||
@@ -3,7 +3,7 @@ import threading
|
||||
import queue
|
||||
import time
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||
from .processors.restock_diff import UnableToExtractRestockData
|
||||
|
||||
@@ -251,7 +251,20 @@ class update_worker(threading.Thread):
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
# Backend (not filters) gave zero output
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
|
||||
extra_help = ""
|
||||
if e.has_filters:
|
||||
# Maybe it contains an image? offer a more helpful link
|
||||
has_img = html_tools.include_filters(include_filters='img',
|
||||
html_content=e.html_content)
|
||||
if has_img:
|
||||
extra_help = ", it's possible that the filters you have give an empty result or contain only an image <a href=\"https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images\">more help here</a>."
|
||||
else:
|
||||
extra_help = ", it's possible that the filters were found, but contained no usable text."
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={
|
||||
'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
|
||||
})
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
|
||||
process_changedetection_results = False
|
||||
|
||||
Reference in New Issue
Block a user