mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-12 12:36:48 +00:00
Compare commits
15 Commits
ui-mobile-
...
extra-filt
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
515b1bc87f | ||
|
|
ea87b301d8 | ||
|
|
5108201f0b | ||
|
|
7289e4e193 | ||
|
|
34e684eb37 | ||
|
|
f032a1b1b3 | ||
|
|
0506c01c07 | ||
|
|
09aae40c4a | ||
|
|
9270d4053b | ||
|
|
160c267e9f | ||
|
|
97f47e7b3b | ||
|
|
7a496e3e15 | ||
|
|
2c564d5c3f | ||
|
|
59b8971a96 | ||
|
|
801791f904 |
@@ -89,13 +89,11 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
|
||||
footer
|
||||
nav
|
||||
.stockticker
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
.stockticker") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
|
||||
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
|
||||
@@ -1432,7 +1432,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
)
|
||||
# Use the last loaded HTML as the input
|
||||
update_handler.fetcher.content = decompressed_data
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
try:
|
||||
changed_detected, update_obj, contents, text_after_filter = update_handler.run_changedetection(
|
||||
watch=tmp_watch,
|
||||
|
||||
@@ -469,7 +469,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
|
||||
extract_text = StringListField('Extract text', [ValidateListRegex()])
|
||||
|
||||
@@ -578,7 +578,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
password = SaltyPasswordField()
|
||||
pager_size = IntegerField('Pager size',
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from typing import List
|
||||
from lxml import etree
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -58,26 +57,11 @@ def subtractive_css_selector(css_selector, html_content):
|
||||
item.decompose()
|
||||
return str(soup)
|
||||
|
||||
def subtractive_xpath_selector(xpath_selector, html_content):
|
||||
html_tree = etree.HTML(html_content)
|
||||
elements_to_remove = html_tree.xpath(xpath_selector)
|
||||
|
||||
for element in elements_to_remove:
|
||||
element.getparent().remove(element)
|
||||
|
||||
modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
|
||||
return modified_html
|
||||
|
||||
def element_removal(selectors: List[str], html_content):
|
||||
"""Removes elements that match a list of CSS or xPath selectors."""
|
||||
modified_html = html_content
|
||||
for selector in selectors:
|
||||
if selector.startswith(('xpath:', 'xpath1:', '//')):
|
||||
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
|
||||
modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
|
||||
else:
|
||||
modified_html = subtractive_css_selector(selector, modified_html)
|
||||
return modified_html
|
||||
"""Joins individual filters into one css filter."""
|
||||
selector = ",".join(selectors)
|
||||
return subtractive_css_selector(selector, html_content)
|
||||
|
||||
def elementpath_tostring(obj):
|
||||
"""
|
||||
|
||||
@@ -18,7 +18,6 @@ class watch_base(dict):
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'content-type': None,
|
||||
'date_created': None,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
@@ -60,9 +59,8 @@ class watch_base(dict):
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||
'time_between_check_use_default': True,
|
||||
'title': None,
|
||||
'track_ldjson_price_data': None,
|
||||
'trim_text_whitespace': False,
|
||||
'remove_duplicate_lines': False,
|
||||
'track_ldjson_price_data': None,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': '',
|
||||
'uuid': str(uuid.uuid4()),
|
||||
|
||||
@@ -143,6 +143,8 @@ class perform_site_check(difference_detection_processor):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
import hashlib
|
||||
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
@@ -156,20 +158,6 @@ class perform_site_check(difference_detection_processor):
|
||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Which restock settings to compare against?
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
|
||||
@@ -184,7 +172,11 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
itemprop_availability = {}
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments
|
||||
# anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
|
||||
itemprop_availability = future.result()
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
|
||||
@@ -36,6 +36,8 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
@@ -172,20 +174,30 @@ class perform_site_check(difference_detection_processor):
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
else:
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||
append_pretty_line_formatting=not watch.is_source_type_url))
|
||||
html_content += future.result()
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
|
||||
@@ -198,23 +210,13 @@ class perform_site_check(difference_detection_processor):
|
||||
else:
|
||||
# extract text
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss) # 1874 activate the <title workaround hack
|
||||
|
||||
|
||||
if watch.get('trim_text_whitespace'):
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||
|
||||
if watch.get('remove_duplicate_lines'):
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
if watch.get('sort_text_alphabetically'):
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss)) #1874 activate the <title workaround hack
|
||||
stripped_text_from_html = future.result()
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
@@ -296,7 +298,7 @@ class perform_site_check(difference_detection_processor):
|
||||
for match in res:
|
||||
regex_matched_output += [match] + [b'\n']
|
||||
|
||||
##########################################################
|
||||
# Now we will only show what the regex matched
|
||||
stripped_text_from_html = b''
|
||||
text_content_before_ignored_filter = b''
|
||||
if regex_matched_output:
|
||||
@@ -305,6 +307,18 @@ class perform_site_check(difference_detection_processor):
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
|
||||
|
||||
if watch.get('sort_text_alphabetically') and stripped_text_from_html:
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace(b'\n\n', b'\n')
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.decode('utf-8').splitlines(), key=lambda x: x.lower())).encode('utf-8')
|
||||
|
||||
#
|
||||
if watch.get('trim_text_whitespace') and stripped_text_from_html:
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.decode('utf-8').splitlines()).encode('utf-8')
|
||||
#
|
||||
if watch.get('remove_duplicate_lines') and stripped_text_from_html:
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.decode('utf-8').splitlines())).encode('utf-8')
|
||||
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
|
||||
@@ -16,31 +16,25 @@ echo "---------------------------------- SOCKS5 -------------------"
|
||||
docker run --network changedet-network \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=proxiesjson" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
# SOCKS5 related - by manually entering in UI
|
||||
docker run --network changedet-network \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=manual" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py'
|
||||
|
||||
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
|
||||
docker run --network changedet-network \
|
||||
-e "SOCKSTEST=manual-playwright" \
|
||||
--hostname cdio \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
|
||||
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
|
||||
--rm \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
echo "socks5 server logs"
|
||||
docker logs socks5proxy
|
||||
|
||||
@@ -33,11 +33,6 @@ function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
|
||||
})(jQuery);
|
||||
|
||||
function request_textpreview_update() {
|
||||
if (!$('body').hasClass('preview-text-enabled')) {
|
||||
console.error("Preview text was requested but body tag was not setup")
|
||||
return
|
||||
}
|
||||
|
||||
const data = {};
|
||||
$('textarea:visible, input:visible').each(function () {
|
||||
const $element = $(this); // Cache the jQuery object for the current element
|
||||
@@ -79,18 +74,18 @@ $(document).ready(function () {
|
||||
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
|
||||
$("#text-preview-inner").css('max-height', (vh-300)+"px");
|
||||
|
||||
// Realtime preview of 'Filters & Text' setup
|
||||
var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
|
||||
|
||||
$("#activate-text-preview").click(function (e) {
|
||||
$(this).fadeOut();
|
||||
$('body').toggleClass('preview-text-enabled')
|
||||
request_textpreview_update();
|
||||
|
||||
const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
|
||||
$("#text-preview-refresh")[method]('click', debounced_request_textpreview_update);
|
||||
$('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update);
|
||||
$('input:visible')[method]('keyup blur change', debounced_request_textpreview_update);
|
||||
$("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update);
|
||||
request_textpreview_update();
|
||||
$("#text-preview-refresh").click(function (e) {
|
||||
request_textpreview_update();
|
||||
});
|
||||
$('textarea:visible, input:visible').on('keyup keypress blur change click', function (e) {
|
||||
request_textpreview_update();
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@@ -40,39 +40,15 @@
|
||||
}
|
||||
}
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
height: 100%;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
font-size: 80%;
|
||||
|
||||
@media screen and (min-width: 800px) {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
#browser-steps-fieldlist {
|
||||
flex-grow: 0; /* Don't allow it to grow */
|
||||
flex-shrink: 0; /* Don't allow it to shrink */
|
||||
flex-basis: auto; /* Base width is determined by the content */
|
||||
max-width: 400px; /* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
|
||||
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
}
|
||||
}
|
||||
|
||||
#browser-steps-ui {
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
}
|
||||
|
||||
#browser-steps-field-list {
|
||||
text-align: center;
|
||||
}
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
}
|
||||
|
||||
/* this is duplicate :( */
|
||||
|
||||
@@ -46,36 +46,14 @@
|
||||
#browser_steps li > label {
|
||||
display: none; }
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
height: 100%;
|
||||
overflow-y: scroll; }
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
font-size: 80%; }
|
||||
@media screen and (min-width: 800px) {
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh; }
|
||||
#browser-steps .flex-wrapper #browser-steps-fieldlist {
|
||||
flex-grow: 0;
|
||||
/* Don't allow it to grow */
|
||||
flex-shrink: 0;
|
||||
/* Don't allow it to shrink */
|
||||
flex-basis: auto;
|
||||
/* Base width is determined by the content */
|
||||
max-width: 400px;
|
||||
/* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll; }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */ } }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
background-color: #eee;
|
||||
border-radius: 5px; }
|
||||
#browser-steps .flex-wrapper #browser-steps-field-list {
|
||||
text-align: center; }
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh; }
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
</p>
|
||||
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<ul style="display: none" id="advanced-help-notifications">
|
||||
<ul style="display: none" id="advanced-help-notifications">
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
|
||||
@@ -50,7 +50,7 @@
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#stats">Stats</a></li>
|
||||
@@ -200,7 +200,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
<div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
|
||||
<div class="flex-wrapper" >
|
||||
|
||||
<div id="browser-steps-ui" class="noselect">
|
||||
<div id="browser-steps-ui" class="noselect" style="width: 100%; background-color: #eee; border-radius: 5px;">
|
||||
|
||||
<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
|
||||
<span class="loader" >
|
||||
@@ -215,7 +215,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
<canvas class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<div id="browser-steps-fieldlist" >
|
||||
<div id="browser-steps-fieldlist" style="padding-left: 1em; width: 350px; font-size: 80%;" >
|
||||
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
{{ render_field(form.browser_steps) }}
|
||||
</div>
|
||||
@@ -313,13 +313,12 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
|
||||
footer
|
||||
nav
|
||||
.stockticker
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
.stockticker") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS selectors </li>
|
||||
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
@@ -398,7 +397,7 @@ Unavailable") }}
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
|
||||
{{ render_field(form.extract_text, rows=5, placeholder="Example: /\d+ online/") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
|
||||
|
||||
@@ -155,13 +155,11 @@
|
||||
{{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header
|
||||
footer
|
||||
nav
|
||||
.stockticker
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
.stockticker") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
|
||||
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
|
||||
@@ -1,27 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_response():
|
||||
import time
|
||||
data = f"""<html>
|
||||
<body>
|
||||
<h1>Awesome, you made it</h1>
|
||||
yeah the socks request worked
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def test_socks5(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
set_response()
|
||||
|
||||
# Setup a proxy
|
||||
res = client.post(
|
||||
@@ -39,10 +24,7 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
# Because the socks server should connect back to us
|
||||
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
|
||||
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
@@ -78,4 +60,4 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
assert "+0200:".encode('utf-8') in res.data
|
||||
|
||||
@@ -1,32 +1,16 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_response():
|
||||
import time
|
||||
data = f"""<html>
|
||||
<body>
|
||||
<h1>Awesome, you made it</h1>
|
||||
yeah the socks request worked
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
time.sleep(1)
|
||||
|
||||
# should be proxies.json mounted from run_proxy_tests.sh already
|
||||
# -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
|
||||
def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
set_response()
|
||||
# Because the socks server should connect back to us
|
||||
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
|
||||
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
|
||||
|
||||
res = client.get(url_for("settings_page"))
|
||||
assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data
|
||||
@@ -65,4 +49,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
assert "+0200:".encode('utf-8') in res.data
|
||||
|
||||
@@ -87,9 +87,6 @@ def test_element_removal_output():
|
||||
Some initial text<br>
|
||||
<p>across multiple lines</p>
|
||||
<div id="changetext">Some text that changes</div>
|
||||
<div>Some text should be matched by xPath // selector</div>
|
||||
<div>Some text should be matched by xPath selector</div>
|
||||
<div>Some text should be matched by xPath1 selector</div>
|
||||
</body>
|
||||
<footer>
|
||||
<p>Footer</p>
|
||||
@@ -97,16 +94,7 @@ def test_element_removal_output():
|
||||
</html>
|
||||
"""
|
||||
html_blob = element_removal(
|
||||
[
|
||||
"header",
|
||||
"footer",
|
||||
"nav",
|
||||
"#changetext",
|
||||
"//*[contains(text(), 'xPath // selector')]",
|
||||
"xpath://*[contains(text(), 'xPath selector')]",
|
||||
"xpath1://*[contains(text(), 'xPath1 selector')]"
|
||||
],
|
||||
html_content=content
|
||||
["header", "footer", "nav", "#changetext"], html_content=content
|
||||
)
|
||||
text = get_text(html_blob)
|
||||
assert (
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -38,11 +38,6 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Content type recording worked
|
||||
uuid = extract_UUID_from_client(client)
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['content-type'] == "text/html"
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
|
||||
@@ -11,8 +11,6 @@ def set_original_ignore_response():
|
||||
<p>Some initial text</p>
|
||||
<p>Which is across multiple lines</p>
|
||||
<p>So let's see what happens.</p>
|
||||
<p> So let's see what happens. <br> </p>
|
||||
<p>A - sortable line</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -166,52 +164,5 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
|
||||
assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
|
||||
assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
|
||||
def test_extra_filters(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
set_original_ignore_response()
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"remove_duplicate_lines": "y",
|
||||
"trim_text_whitespace": "y",
|
||||
"sort_text_alphabetically": "", # leave this OFF for testing
|
||||
"url": test_url,
|
||||
"fetch_backend": "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first")
|
||||
)
|
||||
|
||||
assert res.data.count(b"see what happens.") == 1
|
||||
|
||||
# still should remain unsorted ('A - sortable line') stays at the end
|
||||
assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines')
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -491,8 +491,6 @@ class update_worker(threading.Thread):
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
|
||||
# Mark that we never had any failures
|
||||
if not watch.get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
|
||||
@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise==1.9.0
|
||||
apprise~=1.8.1
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
|
||||
|
||||
Reference in New Issue
Block a user