Compare commits

..

26 Commits

Author SHA1 Message Date
dgtlmoon bd7282cd04 fix test 2023-10-03 17:22:30 +02:00
dgtlmoon 0579c5be2f update texty 2023-10-03 17:17:36 +02:00
dgtlmoon 85aeea34b1 add helper text 2023-10-03 17:16:14 +02:00
dgtlmoon 2ccd0fc77b Should also support non-regex strings 2023-10-03 17:12:31 +02:00
dgtlmoon 18d48bc2a0 fix comment 2023-10-03 11:22:29 +02:00
dgtlmoon b344adb53d fix regex 2023-10-03 11:22:09 +02:00
dgtlmoon fc38e30989 WIP 2023-10-03 11:08:37 +02:00
dgtlmoon 8c8f378395 Merge branch 'master' into regex-cleanup-311 2023-10-03 10:14:55 +02:00
dgtlmoon 34bc7fe1a6 improve test 2023-10-03 10:04:52 +02:00
dgtlmoon 700729a332 UI - BrowserSteps - Browser Steps interface screen should resize relative to the browser 2023-10-02 18:06:25 +02:00
dgtlmoon b6060ac90c BrowserSteps - <input> of type 'number' should use 'enter text in field' 2023-10-02 11:50:15 +02:00
dgtlmoon 5cccccb0b6 Restock detect - bumping texts for restock detection 2023-09-26 14:32:39 +02:00
dgtlmoon c52eb512e8 UI - Proxy Scanner tool should also understand when a filter is empty or contains only an image 2023-09-26 14:29:42 +02:00
dgtlmoon 7282df9c08 UI + Fetching - Improving helper message when filter contains only an image (adding link to more help) 2023-09-26 14:10:07 +02:00
dgtlmoon e30b17b8bc UI + Fetching - Be more helpful when a filter contains no text, suggest ways to deal with images in filters (#1819) 2023-09-26 13:59:59 +02:00
Marcelo Alencar 1e88136325 Building application - Upgrade test workflows to latest versions (#1817) 2023-09-26 10:18:54 +02:00
dgtlmoon 57de4ffe4f Page fetching - Fixed possible incorrect browser user-agent header in playwright/puppeteer/browserless fetchers (#1811) 2023-09-24 08:42:24 +02:00
dgtlmoon 51e2e8a226 UI - Add extra validation help for notification body with Jinja2 markup (#1810) 2023-09-23 14:50:21 +02:00
dgtlmoon 8887459462 UI - More precise text to describe "current_snapshot" notification token 2023-09-23 14:31:48 +02:00
dgtlmoon 460c724e51 0.45.2 2023-09-22 09:45:55 +02:00
dgtlmoon dcf4bf37ed Code/Test - Improve testing for creating backups 2023-09-22 09:21:07 +02:00
dgtlmoon e3cf22fc27 UI - Re-order notification field settings 2023-09-14 14:34:44 +02:00
dgtlmoon d497db639e UI - Notifications - Tidyup - Hide the notification tokens but show with a button/link 2023-09-14 14:16:08 +02:00
dgtlmoon 7355ac8d21 UI - Notifications - Tweak discord help text 2023-09-14 13:55:48 +02:00
dgtlmoon 2f2d0ea0f2 RSS feeds - Fixing broken links from RSS index in some environments, refactor code (#152, #148, #1684, #1798) 2023-09-14 13:19:45 +02:00
dgtlmoon f991abf7ea Small regex tidyup for 3.11 2023-09-10 09:51:48 +02:00
30 changed files with 305 additions and 225 deletions
+4 -4
View File
@@ -30,11 +30,11 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,7 +45,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v1
uses: github/codeql-action/autobuild@v2
# ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -59,4 +59,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
uses: github/codeql-action/analyze@v2
+9 -9
View File
@@ -39,9 +39,9 @@ jobs:
# Or if we are in a tagged release scenario.
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
@@ -58,27 +58,27 @@ jobs:
echo ${{ github.ref }} > changedetectionio/tag.txt
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Login to GitHub Container Registry
uses: docker/login-action@v1
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Docker Hub Container Registry
uses: docker/login-action@v1
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v3
with:
install: true
version: latest
@@ -88,7 +88,7 @@ jobs:
- name: Build and push :dev
id: docker_build
if: ${{ github.ref }} == "refs/heads/master"
uses: docker/build-push-action@v2
uses: docker/build-push-action@v5
with:
context: ./
file: ./Dockerfile
@@ -105,7 +105,7 @@ jobs:
- name: Build and push :tag
id: docker_build_tag_release
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/build-push-action@v2
uses: docker/build-push-action@v5
with:
context: ./
file: ./Dockerfile
@@ -125,7 +125,7 @@ jobs:
run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}
- name: Cache Docker layers
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
+6 -6
View File
@@ -24,22 +24,22 @@ jobs:
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
# Just test that the build works, some libraries won't compile on ARM/rPi etc
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v3
with:
install: true
version: latest
@@ -49,7 +49,7 @@ jobs:
# Check we can still build under alpine/musl
- name: Test that the docker containers can build (musl via alpine check)
id: docker_build_musl
uses: docker/build-push-action@v2
uses: docker/build-push-action@v5
with:
context: ./
file: ./.github/test/Dockerfile-alpine
@@ -57,7 +57,7 @@ jobs:
- name: Test that the docker containers can build
id: docker_build
uses: docker/build-push-action@v2
uses: docker/build-push-action@v5
# https://github.com/docker/build-push-action#customizing
with:
context: ./
+2 -2
View File
@@ -7,11 +7,11 @@ jobs:
test-application:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
# Mainly just for link/flake8
- name: Set up Python 3.10
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: '3.10'
+2 -2
View File
@@ -11,10 +11,10 @@ jobs:
test-pip-build-basics:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
+7 -9
View File
@@ -38,7 +38,9 @@ from flask_paginate import Pagination, get_page_parameter
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.45.1'
__version__ = '0.45.2'
from changedetectionio.store import BASE_URL_NOT_SET_TEXT
datastore = None
@@ -356,12 +358,10 @@ def changedetection_app(config=None, datastore_o=None):
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
# Description is the page you watch, link takes you to the diff JS UI page
# Dict val base_url will get overriden with the env var if it is set.
ext_base_url = datastore.data['settings']['application'].get('base_url')
if ext_base_url:
# Go with overriden value
diff_link = {'href': "{}{}".format(ext_base_url, url_for('diff_history_page', uuid=watch['uuid'], _external=False))}
else:
diff_link = {'href': url_for('diff_history_page', uuid=watch['uuid'], _external=True)}
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
# Because we are called via whatever web server, flask should figure out the right path (
diff_link = {'href': url_for('diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
@@ -714,7 +714,6 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("edit.html",
available_processors=processors.available_processors(),
browser_steps_config=browser_step_ui_config,
current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
@@ -804,7 +803,6 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("settings.html",
form=form,
current_base_url = datastore.data['settings']['application']['base_url'],
hide_remove_pass=os.getenv("SALTED_PASS", False),
api_key=datastore.data['settings']['application'].get('api_access_token'),
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
@@ -57,9 +57,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
status.update({'status': 'ERROR OTHER', 'length': len(contents), 'text': f"Got empty reply with code {e.status_code} - Access denied"})
else:
status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': f"Empty reply with code {e.status_code}, needs chrome?"})
except content_fetcher.ReplyWithContentButNoText as e:
txt = f"Got reply but with no content - Status code {e.status_code} - It's possible that the filters were found, but contained no usable text (or contained only an image)."
status.update({'status': 'ERROR', 'text': txt})
except Exception as e:
status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': 'Error: '+str(e)})
status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': 'Error: '+type(e).__name__+str(e)})
else:
status.update({'status': 'OK', 'length': len(contents), 'text': ''})
+7 -5
View File
@@ -77,11 +77,13 @@ class ScreenshotUnavailable(Exception):
class ReplyWithContentButNoText(Exception):
def __init__(self, status_code, url, screenshot=None):
def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''):
# Set this so we can use it in other parts of the app
self.status_code = status_code
self.url = url
self.screenshot = screenshot
self.has_filters = has_filters
self.html_content = html_content
return
@@ -343,8 +345,8 @@ class base_html_playwright(Fetcher):
'req_headers': request_headers,
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
'url': url,
'user_agent': request_headers.get('User-Agent', 'Mozilla/5.0'),
'proxy_username': self.proxy.get('username','') if self.proxy else False,
'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
'proxy_username': self.proxy.get('username', '') if self.proxy else False,
'proxy_password': self.proxy.get('password', '') if self.proxy else False,
'no_cache_list': [
'twitter',
@@ -443,7 +445,7 @@ class base_html_playwright(Fetcher):
# Set user agent to prevent Cloudflare from blocking the browser
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
context = browser.new_context(
user_agent=request_headers.get('User-Agent', 'Mozilla/5.0'),
user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True,
@@ -684,7 +686,7 @@ class html_requests(Fetcher):
is_binary=False):
# Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers:
if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None):
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
+8 -2
View File
@@ -229,16 +229,19 @@ class ValidateJinja2Template(object):
def __call__(self, form, field):
from changedetectionio import notification
from jinja2 import Environment, BaseLoader, TemplateSyntaxError
from jinja2 import Environment, BaseLoader, TemplateSyntaxError, UndefinedError
from jinja2.meta import find_undeclared_variables
try:
jinja2_env = Environment(loader=BaseLoader)
jinja2_env.globals.update(notification.valid_tokens)
rendered = jinja2_env.from_string(field.data).render()
except TemplateSyntaxError as e:
raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
except UndefinedError as e:
raise ValidationError(f"A variable or function is not defined: {e}") from e
ast = jinja2_env.parse(field.data)
undefined = ", ".join(find_undeclared_variables(ast))
@@ -502,7 +505,10 @@ class globalSettingsRequestForm(Form):
class globalSettingsApplicationForm(commonSettingsForm):
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
base_url = StringField('Base URL', validators=[validators.Optional()])
base_url = StringField('Notification base URL override',
validators=[validators.Optional()],
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
)
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
+21 -13
View File
@@ -10,6 +10,7 @@ import re
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
# 'price' , 'lowPrice', 'highPrice' are usually under here
# all of those may or may not appear on different websites
LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
@@ -17,7 +18,23 @@ LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
class JSONNotFound(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
# Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "(?i)foobar" type configuration
def perl_style_slash_enclosed_regex_to_options(regex):
res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE)
if res:
flags = res.group(2) if res.group(2) else 'i'
regex = f"(?{flags}){res.group(1)}"
else:
# Fall back to just ignorecase as an option
regex = f"(?i){regex}"
return regex
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
soup = BeautifulSoup(html_content, "html.parser")
@@ -195,23 +212,14 @@ def strip_ignore_text(content, wordlist, mode="content"):
output = []
ignore_text = []
ignore_regex = []
ignored_line_numbers = []
for k in wordlist:
# Is it a regex?
x = re.search('^\/(.*)\/(.*)', k.strip())
if x:
# Starts with / but doesn't look like a regex
p = x.group(1)
try:
# @Todo python regex options can go before the regex str, but not really many of the options apply on a per-line basis
ignore_regex.append(re.compile(rf"{p}", re.IGNORECASE))
except Exception as e:
# Badly formed regex, treat as text
ignore_text.append(k.strip())
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
if res:
ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k)))
else:
# Had a / but doesn't work as regex
ignore_text.append(k.strip())
for line in content.splitlines():
+3 -7
View File
@@ -208,15 +208,11 @@ def create_notification_parameters(n_object, datastore):
watch_tag = ''
# Create URLs to customise the notification with
base_url = datastore.data['settings']['application']['base_url']
# active_base_url - set in store.py data property
base_url = datastore.data['settings']['application'].get('active_base_url')
watch_url = n_object['watch_url']
# Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
# like 'Join', so it's always best to atleast set something obvious so that they are not broken.
if base_url == '':
base_url = "<base-url-env-var-not-set>"
diff_url = "{}/diff/{}".format(base_url, uuid)
preview_url = "{}/preview/{}".format(base_url, uuid)
@@ -226,7 +222,7 @@ def create_notification_parameters(n_object, datastore):
# Valid_tokens also used as a field validator
tokens.update(
{
'base_url': base_url if base_url is not None else '',
'base_url': base_url,
'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else '',
'diff': n_object.get('diff', ''), # Null default in the case we use a test
'diff_added': n_object.get('diff_added', ''), # Null default in the case we use a test
+3 -22
View File
@@ -18,26 +18,7 @@ class difference_detection_processor():
def available_processors():
import importlib
import pkgutil
from . import restock_diff, text_json_diff
processors = [('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
discovered_plugins = {
name: importlib.import_module(name)
for finder, name, ispkg
in pkgutil.iter_modules()
if name.startswith('changedetectionio-plugin-')
}
try:
for name, plugin in discovered_plugins.items():
if hasattr(plugin, 'processors'):
for machine_name, desc in plugin.processors.items():
processors.append((machine_name, desc))
except Exception as e:
print (f"Problem fetching one or more plugins")
return processors
x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
# @todo Make this smarter with introspection of sorts.
return x
+37 -37
View File
@@ -11,17 +11,19 @@ from changedetectionio import content_fetcher, html_tools
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from copy import deepcopy
from . import difference_detection_processor
from ..html_tools import PERL_STYLE_REGEX
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Webpage Text/HTML, JSON and PDF changes'
name = 'Webpage Text/HTML, JSON and PDF changes'
description = 'Detects all text changes where possible'
class FilterNotFoundInResponse(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
class PDFToHTMLToolNotFound(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
@@ -37,19 +39,6 @@ class perform_site_check(difference_detection_processor):
super().__init__(*args, **kwargs)
self.datastore = datastore
# Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "foobar(?i)" type configuration
def forward_slash_enclosed_regex_to_options(self, regex):
res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE)
if res:
regex = res.group(1)
regex += '(?{})'.format(res.group(2))
else:
regex += '(?{})'.format('i')
return regex
def run(self, uuid, skip_when_checksum_same=True, preferred_proxy=None):
changed_detected = False
screenshot = False # as bytes
@@ -135,7 +124,8 @@ class perform_site_check(difference_detection_processor):
# requests for PDF's, images etc should be passwd the is_binary flag
is_binary = watch.is_pdf
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'), is_binary=is_binary)
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'),
is_binary=is_binary)
fetcher.quit()
self.screenshot = fetcher.screenshot
@@ -151,7 +141,6 @@ class perform_site_check(difference_detection_processor):
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
raise content_fetcher.checksumFromPreviousCheckWasTheSame()
# Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base?
@@ -231,8 +220,6 @@ class perform_site_check(difference_detection_processor):
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
is_html = False
if is_html or is_source:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
@@ -283,7 +270,6 @@ class perform_site_check(difference_detection_processor):
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
# @todo whitespace coming from missing rtrim()?
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
# Rewrite's the processing text based on only what diff result they want to see
@@ -293,13 +279,13 @@ class perform_site_check(difference_detection_processor):
# needs to not include (added) etc or it may get used twice
# Replace the processed text with the preferred result
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
newest_version_file_contents=stripped_text_from_html,
include_equal=False, # not the same lines
include_added=watch.get('filter_text_added', True),
include_removed=watch.get('filter_text_removed', True),
include_replaced=watch.get('filter_text_replaced', True),
line_feed_sep="\n",
include_change_type_prefix=False)
newest_version_file_contents=stripped_text_from_html,
include_equal=False, # not the same lines
include_added=watch.get('filter_text_added', True),
include_removed=watch.get('filter_text_removed', True),
include_replaced=watch.get('filter_text_replaced', True),
line_feed_sep="\n",
include_change_type_prefix=False)
watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
@@ -314,7 +300,12 @@ class perform_site_check(difference_detection_processor):
# Treat pages with no renderable text content as a change? No by default
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
raise content_fetcher.ReplyWithContentButNoText(url=url,
status_code=fetcher.get_last_status_code(),
screenshot=screenshot,
has_filters=has_filter_rule,
html_content=html_content
)
# We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms.
@@ -335,16 +326,25 @@ class perform_site_check(difference_detection_processor):
regex_matched_output = []
for s_re in extract_text:
# incase they specified something in '/.../x'
regex = self.forward_slash_enclosed_regex_to_options(s_re)
result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
for l in result:
if type(l) is tuple:
# @todo - some formatter option default (between groups)
regex_matched_output += list(l) + [b'\n']
else:
# @todo - some formatter option default (between each ungrouped result)
regex_matched_output += [l] + [b'\n']
for l in result:
if type(l) is tuple:
# @todo - some formatter option default (between groups)
regex_matched_output += list(l) + [b'\n']
else:
# @todo - some formatter option default (between each ungrouped result)
regex_matched_output += [l] + [b'\n']
else:
# Doesnt look like regex, just hunt for plaintext and return that which matches
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE)
res = r.findall(stripped_text_from_html)
if res:
for match in res:
regex_matched_output += [match] + [b'\n']
# Now we will only show what the regex matched
stripped_text_from_html = b''
+3 -1
View File
@@ -18,7 +18,9 @@ module.exports = async ({page, context}) => {
await page.setBypassCSP(true)
await page.setExtraHTTPHeaders(req_headers);
await page.setUserAgent(user_agent);
if (user_agent) {
await page.setUserAgent(user_agent);
}
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
await page.setDefaultNavigationTimeout(0);
@@ -5,14 +5,19 @@ function isItemInStock() {
'agotado',
'artikel zurzeit vergriffen',
'as soon as stock is available',
'ausverkauft', // sold out
'available for back order',
'back-order or out of stock',
'backordered',
'benachrichtigt mich', // notify me
'brak na stanie',
'brak w magazynie',
'coming soon',
'currently have any tickets for this',
'currently unavailable',
'dostępne wkrótce',
'en rupture de stock',
'ist derzeit nicht auf lager',
'item is no longer available',
'message if back in stock',
'nachricht bei',
@@ -37,6 +42,7 @@ function isItemInStock() {
'unavailable tickets',
'we do not currently have an estimate of when this product will be back in stock.',
'zur zeit nicht an lager',
'已售完',
];
+1 -1
View File
@@ -208,7 +208,7 @@ $(document).ready(function () {
console.log(x);
if (x && first_available.length) {
// @todo will it let you click shit that has a layer ontop? probably not.
if (x['tagtype'] === 'text' || x['tagtype'] === 'email' || x['tagName'] === 'textarea' || x['tagtype'] === 'password' || x['tagtype'] === 'search') {
if (x['tagtype'] === 'text' || x['tagtype'] === 'number' || x['tagtype'] === 'email' || x['tagName'] === 'textarea' || x['tagtype'] === 'password' || x['tagtype'] === 'search') {
$('select', first_available).val('Enter text in field').change();
$('input[type=text]', first_available).first().val(x['xpath']);
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
@@ -32,5 +32,10 @@ $(document).ready(function () {
window.getSelection().removeAllRanges();
});
$("#notification-token-toggle").click(function (e) {
e.preventDefault();
$('#notification-tokens-info').toggle();
});
});
@@ -42,4 +42,8 @@ $(document).ready(function () {
$('#notification_urls').val('');
e.preventDefault();
});
$("#notification-token-toggle").click(function (e) {
e.preventDefault();
$('#notification-tokens-info').toggle();
});
});
@@ -44,7 +44,7 @@
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 600px; /*@todo make this dynamic */
height: 70vh;
}
/* this is duplicate :( */
+1 -2
View File
@@ -50,8 +50,7 @@
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 600px;
/*@todo make this dynamic */ }
height: 70vh; }
/* this is duplicate :( */
#browsersteps-selector-wrapper {
+17 -19
View File
@@ -18,6 +18,9 @@ import threading
import time
import uuid as uuid_builder
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
@@ -126,7 +129,6 @@ class ChangeDetectionStore:
self.needs_write = True
self.scan_plugins()
# Finally start the thread that will manage periodic data saves to JSON
save_data_thread = threading.Thread(target=self.save_datastore).start()
@@ -176,12 +178,21 @@ class ChangeDetectionStore:
@property
def data(self):
# Re #152, Return env base_url if not overriden, @todo also prefer the proxy pass url
env_base_url = os.getenv('BASE_URL','')
if not self.__data['settings']['application']['base_url']:
self.__data['settings']['application']['base_url'] = env_base_url.strip('" ')
# Re #152, Return env base_url if not overriden
# Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
# like 'Join', so it's always best to atleast set something obvious so that they are not broken.
return self.__data
active_base_url = BASE_URL_NOT_SET_TEXT
if self.__data['settings']['application'].get('base_url'):
active_base_url = self.__data['settings']['application'].get('base_url')
elif os.getenv('BASE_URL'):
active_base_url = os.getenv('BASE_URL')
# I looked at various ways todo the following, but in the end just copying the dict seemed simplest/most reliable
# even given the memory tradeoff - if you know a better way.. maybe return d|self.__data.. or something
d = self.__data
d['settings']['application']['active_base_url'] = active_base_url.strip('" ')
return d
# Delete a single watch by UUID
def delete(self, uuid):
@@ -613,19 +624,6 @@ class ChangeDetectionStore:
def tag_exists_by_name(self, tag_name):
return any(v.get('title', '').lower() == tag_name.lower() for k, v in self.__data['settings']['application']['tags'].items())
def scan_plugins(self):
import importlib
import pkgutil
discovered_plugins = {
name: importlib.import_module(name)
for finder, name, ispkg
in pkgutil.iter_modules()
if name.startswith('changedetectionio-plugin-')
}
return discovered_plugins
# Run all updates
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
@@ -13,7 +13,7 @@
<div class="pure-form-message-inline">
<ul>
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) </code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
<li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>)</li>
@@ -35,18 +35,14 @@
</div>
<div class="pure-control-group">
{{ render_field(form.notification_body , rows=5, class="notification-body", placeholder=settings_application['notification_body']) }}
<span class="pure-form-message-inline">Body for all notifications</span>
</div>
<div class="pure-control-group">
<!-- unsure -->
{{ render_field(form.notification_format , class="notification-format") }}
<span class="pure-form-message-inline">Format for all notifications</span>
<span class="pure-form-message-inline">Body for all notifications &dash; You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
</span>
</div>
<div class="pure-controls">
<p class="pure-form-message-inline">
You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL.
</p>
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
</div>
<div class="pure-controls" style="display: none;" id="notification-tokens-info">
<table class="pure-table" id="token-table">
<thead>
<tr>
@@ -105,7 +101,7 @@
</tr>
<tr>
<td><code>{{ '{{current_snapshot}}' }}</code></td>
<td>The current snapshot value, useful when combined with JSON or CSS filters
<td>The current snapshot text contents value, useful when combined with JSON or CSS filters
</td>
</tr>
<tr>
@@ -115,12 +111,15 @@
</tbody>
</table>
<div class="pure-form-message-inline">
<br>
URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
<br>
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
<p>
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br>
For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
</p>
</div>
</div>
<div class="pure-control-group">
{{ render_field(form.notification_format , class="notification-format") }}
<span class="pure-form-message-inline">Format for all notifications</span>
</div>
</div>
{% endmacro %}
+3 -2
View File
@@ -378,15 +378,16 @@ Unavailable") }}
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
<span class="pure-form-message-inline">
<ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions;
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
<ul>
<li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
<li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li>
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
<li>Keyword example &dash; example <code>Out of stock</code></li>
<li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
</ul>
</li>
<li>One line per regular-expression/ string match</li>
<li>One line per regular-expression/string match</li>
</ul>
</span>
</div>
+7 -8
View File
@@ -62,14 +62,6 @@
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
class="m-d") }}
<span class="pure-form-message-inline">
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.pager_size) }}
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
@@ -100,6 +92,13 @@
{{ render_common_settings_form(form.application.form, emailprefix, settings_application) }}
</div>
</fieldset>
<div class="pure-control-group" id="notification-base-url">
{{ render_field(form.application.form.base_url, class="m-d") }}
<span class="pure-form-message-inline">
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notification links.<br>
Default value is the system environment variable '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
</span>
</div>
</div>
<div class="tab-pane-inner" id="fetching">
@@ -119,6 +119,9 @@
<a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
{% endif %}
{% if 'empty result or contain only an image' in watch.last_error %}
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
{% endif %}
</div>
{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
+3 -3
View File
@@ -1,6 +1,6 @@
#!/usr/bin/python3
from .util import set_original_response, set_modified_response, live_server_setup
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
from flask import url_for
from urllib.request import urlopen
from zipfile import ZipFile
@@ -19,12 +19,12 @@ def test_backup(client, live_server):
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
data={"urls": url_for('test_endpoint', _external=True)+"?somechar=őőőőőőőő"},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(3)
wait_for_all_checks(client)
res = client.get(
url_for("get_backup"),
+75 -1
View File
@@ -2,7 +2,7 @@
import time
from flask import url_for
from . util import live_server_setup
from .util import live_server_setup, wait_for_all_checks
from ..html_tools import *
@@ -176,3 +176,77 @@ def test_check_multiple_filters(client, live_server):
assert b"Blob A" in res.data # CSS was ok
assert b"Blob B" in res.data # xPath was ok
assert b"Blob C" not in res.data # Should not be included
# The filter exists, but did not contain anything useful
# Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector
# Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text
def test_filter_is_empty_help_suggestion(client, live_server):
#live_server_setup(live_server)
include_filters = "#blob-a"
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html><body>
<div id="blob-a">
<img src="something.jpg">
</div>
</body>
</html>
""")
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"include_filters": include_filters,
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(
url_for("index"),
follow_redirects=True
)
assert b'empty result or contain only an image' in res.data
### Just an empty selector, no image
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html><body>
<div id="blob-a">
<!-- doo doo -->
</div>
</body>
</html>
""")
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for("index"),
follow_redirects=True
)
assert b'empty result or contain only an image' not in res.data
assert b'but contained no usable text' in res.data
+25 -19
View File
@@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup
from .util import live_server_setup, wait_for_all_checks
from ..html_tools import *
@@ -55,6 +55,8 @@ def set_multiline_response():
</p>
<div>aaand something lines</div>
<br>
<div>and this should be</div>
</body>
</html>
"""
@@ -66,11 +68,10 @@ def set_multiline_response():
def test_setup(client, live_server):
live_server_setup(live_server)
def test_check_filter_multiline(client, live_server):
#live_server_setup(live_server)
set_multiline_response()
# Add our URL to the import page
@@ -82,14 +83,15 @@ def test_check_filter_multiline(client, live_server):
)
assert b"1 Imported" in res.data
time.sleep(3)
wait_for_all_checks(client)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"include_filters": '',
'extract_text': '/something.+?6 billion.+?lines/si',
# Test a regex and a plaintext
'extract_text': '/something.+?6 billion.+?lines/si\r\nand this should be',
"url": test_url,
"tags": "",
"headers": "",
@@ -99,13 +101,19 @@ def test_check_filter_multiline(client, live_server):
)
assert b"Updated watch." in res.data
time.sleep(3)
wait_for_all_checks(client)
res = client.get(url_for("index"))
# Issue 1828
assert b'not at the start of the expression' not in res.data
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Plaintext that doesnt look like a regex should match also
assert b'and this should be' in res.data
assert b'<div class="">Something' in res.data
assert b'<div class="">across 6 billion multiple' in res.data
@@ -115,14 +123,11 @@ def test_check_filter_multiline(client, live_server):
assert b'aaand something lines' not in res.data
def test_check_filter_and_regex_extract(client, live_server):
sleep_time_for_fetch_thread = 3
include_filters = ".changetext"
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
@@ -132,19 +137,15 @@ def test_check_filter_and_regex_extract(client, live_server):
)
assert b"1 Imported" in res.data
time.sleep(1)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"include_filters": include_filters,
'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
'extract_text': '/\d+ online/\r\n/\d+ guests/\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i\r\n/issue1828.+?2022/i',
"url": test_url,
"tags": "",
"headers": "",
@@ -155,8 +156,13 @@ def test_check_filter_and_regex_extract(client, live_server):
assert b"Updated watch." in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
res = client.get(url_for("index"))
#issue 1828
assert b'not at the start of the expression' not in res.data
# Make a change
set_modified_response()
@@ -164,7 +170,7 @@ def test_check_filter_and_regex_extract(client, live_server):
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
+5 -10
View File
@@ -2,7 +2,7 @@
import time
from flask import url_for
from . util import live_server_setup
from .util import live_server_setup, wait_for_all_checks
def set_original_ignore_response():
@@ -26,13 +26,8 @@ def test_trigger_regex_functionality(client, live_server):
live_server_setup(live_server)
sleep_time_for_fetch_thread = 3
set_original_ignore_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
@@ -43,7 +38,7 @@ def test_trigger_regex_functionality(client, live_server):
assert b"1 Imported" in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# It should report nothing found (just a new one shouldnt have anything)
res = client.get(url_for("index"))
@@ -57,7 +52,7 @@ def test_trigger_regex_functionality(client, live_server):
"fetch_backend": "html_requests"},
follow_redirects=True
)
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# so that we set the state to 'unviewed' after all the edits
client.get(url_for("diff_history_page", uuid="first"))
@@ -65,7 +60,7 @@ def test_trigger_regex_functionality(client, live_server):
f.write("some new noise")
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# It should report nothing found (nothing should match the regex)
res = client.get(url_for("index"))
@@ -75,7 +70,7 @@ def test_trigger_regex_functionality(client, live_server):
f.write("regex test123<br>\nsomething 123")
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
+17 -21
View File
@@ -1,11 +1,9 @@
import importlib
import os
import pkgutil
import queue
import threading
import queue
import time
from changedetectionio import content_fetcher
from changedetectionio import content_fetcher, html_tools
from .processors.text_json_diff import FilterNotFoundInResponse
from .processors.restock_diff import UnableToExtractRestockData
@@ -231,28 +229,13 @@ class update_worker(threading.Thread):
now = time.time()
try:
processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
processor = self.datastore.data['watching'][uuid].get('processor','text_json_diff')
# @todo some way to switch by name
if processor == 'restock_diff':
update_handler = restock_diff.perform_site_check(datastore=self.datastore)
else:
# Used as a default and also by some tests
discovered_plugins = {
name: importlib.import_module(name)
for finder, name, ispkg
in pkgutil.iter_modules()
if name.startswith('changedetectionio-plugin-')
}
for module_name, plugin in discovered_plugins.items():
if hasattr(plugin, 'processors'):
for machine_name, desc in plugin.processors:
if machine_name == processor:
module = importlib.import_module(f"{module_name}.processors.{plugin}")
update_handler = module.perform_site_check(datastore=self.datastore)
#processors.append((machine_name, desc))
update_handler = text_json_diff.perform_site_check(datastore=self.datastore)
changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
@@ -268,7 +251,20 @@ class update_worker(threading.Thread):
# Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text
# Backend (not filters) gave zero output
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
extra_help = ""
if e.has_filters:
# Maybe it contains an image? offer a more helpful link
has_img = html_tools.include_filters(include_filters='img',
html_content=e.html_content)
if has_img:
extra_help = ", it's possible that the filters you have give an empty result or contain only an image."
else:
extra_help = ", it's possible that the filters were found, but contained no usable text."
self.datastore.update_watch(uuid=uuid, update_obj={
'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
})
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
process_changedetection_results = False