Compare commits

...

21 Commits

Author SHA1 Message Date
dgtlmoon
4beab19f45 UI tweaks 2024-09-28 10:08:23 +02:00
dgtlmoon
0081f1437a debouncing inputs 2024-09-28 10:04:11 +02:00
dgtlmoon
7dc9d37bb8 Merge branch 'master' into text-filter-preview 2024-09-27 20:30:57 +02:00
dgtlmoon
ddfbef6db3 [test] Use local data instead of reaching out to changedetection when testing (#2660) 2024-09-27 20:30:19 +02:00
dgtlmoon
e173954cdd Restock monitor - Only try to process restock information (like scraping for "out of stock" keywords) if the page was actually rendered correctly. (#2645) 2024-09-20 09:19:57 +02:00
dgtlmoon
67ff4e696c remove duplicate 2024-09-18 16:41:11 +02:00
dgtlmoon
5c8c442a2e Merge branch 'master' into text-filter-preview 2024-09-18 16:32:11 +02:00
dgtlmoon
e830fb2320 Text filters - Adding filters "Trim whitespace" and "Remove duplicate lines" 2024-09-18 15:45:44 +02:00
dgtlmoon
c6589ee1b4 Browser Steps - UI - Use a better flexbox layout 2024-09-18 11:26:10 +02:00
Michael McMillan
dc936a2e8a Filters - Add support for also removing HTML elements using XPath selectors (#2632) 2024-09-17 22:43:04 +02:00
dgtlmoon
8c1527c1ad Update AppRise notification library to 1.9.0 (#2624) 2024-09-17 19:06:17 +02:00
dgtlmoon
c0cc9a9f56 Merge branch 'master' into text-filter-preview 2024-09-17 18:32:55 +02:00
dgtlmoon
e44761c981 Merge branch 'master' into text-filter-preview 2024-09-11 11:32:20 +02:00
dgtlmoon
09aae40c4a tweak style 2024-09-09 22:03:59 +02:00
dgtlmoon
9270d4053b smarter check? 2024-09-09 20:34:43 +02:00
dgtlmoon
160c267e9f add elay 2024-09-09 18:09:03 +02:00
dgtlmoon
97f47e7b3b Merge branch 'master' into text-filter-preview 2024-09-09 17:28:58 +02:00
dgtlmoon
7a496e3e15 tweak error messages 2024-09-09 11:35:28 +02:00
dgtlmoon
2c564d5c3f Abort existing requests so it doesnt train-wreck 2024-09-07 15:47:54 +02:00
dgtlmoon
59b8971a96 test and label tweaks 2024-09-07 15:42:58 +02:00
dgtlmoon
801791f904 live preview of text filters 2024-09-06 22:53:28 +02:00
26 changed files with 475 additions and 60 deletions

View File

@@ -89,11 +89,13 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer
nav
.stockticker") }}
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>

View File

@@ -1,6 +1,8 @@
#!/usr/bin/env python3
import datetime
import importlib
import flask_login
import locale
import os
@@ -10,7 +12,9 @@ import threading
import time
import timeago
from .content_fetchers.exceptions import ReplyWithContentButNoText
from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
from .processors.text_json_diff.processor import FilterNotFoundInResponse
from .safe_jinja import render as jinja_render
from changedetectionio.strtobool import strtobool
from copy import deepcopy
@@ -1396,6 +1400,57 @@ def changedetection_app(config=None, datastore_o=None):
# Return a 500 error
abort(500)
@app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
@login_optionally_required
def watch_get_preview_rendered(uuid):
'''For when viewing the "preview" of the rendered text from inside of Edit'''
now = time.time()
import brotli
from . import forms
text_after_filter = ''
tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=request.form
)
# Only update vars that came in via the AJAX post
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
tmp_watch.update(p)
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid # probably not needed anymore anyway?
)
# Use the last loaded HTML as the input
update_handler.fetcher.content = decompressed_data
try:
changed_detected, update_obj, contents, text_after_filter = update_handler.run_changedetection(
watch=tmp_watch,
skip_when_checksum_same=False,
)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"
if not text_after_filter.strip():
text_after_filter = 'Empty content'
logger.trace(f"Parsed in {time.time()-now:.3f}s")
return text_after_filter.strip()
@app.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required
def form_quick_watch_add():

View File

@@ -469,7 +469,7 @@ class processor_text_json_diff_form(commonSettingsForm):
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
extract_text = StringListField('Extract text', [ValidateListRegex()])
@@ -480,8 +480,10 @@ class processor_text_json_diff_form(commonSettingsForm):
body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
filter_text_added = BooleanField('Added lines', default=True)
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
@@ -576,7 +578,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
ignore_whitespace = BooleanField('Ignore whitespace')
password = SaltyPasswordField()
pager_size = IntegerField('Pager size',

View File

@@ -1,4 +1,5 @@
from typing import List
from lxml import etree
import json
import re
@@ -57,11 +58,26 @@ def subtractive_css_selector(css_selector, html_content):
item.decompose()
return str(soup)
def subtractive_xpath_selector(xpath_selector, html_content):
html_tree = etree.HTML(html_content)
elements_to_remove = html_tree.xpath(xpath_selector)
for element in elements_to_remove:
element.getparent().remove(element)
modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
return modified_html
def element_removal(selectors: List[str], html_content):
"""Joins individual filters into one css filter."""
selector = ",".join(selectors)
return subtractive_css_selector(selector, html_content)
"""Removes elements that match a list of CSS or xPath selectors."""
modified_html = html_content
for selector in selectors:
if selector.startswith(('xpath:', 'xpath1:', '//')):
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
else:
modified_html = subtractive_css_selector(selector, modified_html)
return modified_html
def elementpath_tostring(obj):
"""

View File

@@ -60,6 +60,8 @@ class watch_base(dict):
'time_between_check_use_default': True,
'title': None,
'track_ldjson_price_data': None,
'trim_text_whitespace': False,
'remove_duplicate_lines': False,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',
'uuid': str(uuid.uuid4()),

View File

@@ -1,4 +1,6 @@
from abc import abstractmethod
from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.strtobool import strtobool
from copy import deepcopy
@@ -23,10 +25,11 @@ class difference_detection_processor():
super().__init__(*args, **kwargs)
self.datastore = datastore
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
# Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher()
def call_browser(self):
from requests.structures import CaseInsensitiveDict
from changedetectionio.content_fetchers.exceptions import EmptyReply
# Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
@@ -159,7 +162,7 @@ class difference_detection_processor():
some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
changed_detected = False
return changed_detected, update_obj, ''.encode('utf-8')
return changed_detected, update_obj, ''.encode('utf-8'), b''
def find_sub_packages(package_name):

View File

@@ -158,6 +158,20 @@ class perform_site_check(difference_detection_processor):
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
from ...html_tools import html_to_text
text = html_to_text(self.fetcher.content)
logger.debug(f"Length of text after conversion: {len(text)}")
if not len(text):
from ...content_fetchers.exceptions import ReplyWithContentButNoText
raise ReplyWithContentButNoText(url=watch.link,
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
html_content=self.fetcher.content,
xpath_data=self.fetcher.xpath_data
)
# Which restock settings to compare against?
restock_settings = watch.get('restock_settings', {})
@@ -290,4 +304,4 @@ class perform_site_check(difference_detection_processor):
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip(), b''

View File

@@ -218,11 +218,19 @@ class perform_site_check(difference_detection_processor):
is_rss=is_rss)) #1874 activate the <title workaround hack
stripped_text_from_html = future.result()
if watch.get('sort_text_alphabetically') and stripped_text_from_html:
if watch.get('trim_text_whitespace'):
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
if watch.get('remove_duplicate_lines'):
stripped_text_from_html = '\n'.join(dict.fromkeys(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
if watch.get('sort_text_alphabetically'):
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
@@ -250,7 +258,7 @@ class perform_site_check(difference_detection_processor):
# We had some content, but no differences were found
# Store our new file as the MD5 so it will trigger in the future
c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest()
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8'), stripped_text_from_html.encode('utf-8')
else:
stripped_text_from_html = rendered_diff
@@ -304,7 +312,7 @@ class perform_site_check(difference_detection_processor):
for match in res:
regex_matched_output += [match] + [b'\n']
# Now we will only show what the regex matched
##########################################################
stripped_text_from_html = b''
text_content_before_ignored_filter = b''
if regex_matched_output:
@@ -312,6 +320,8 @@ class perform_site_check(difference_detection_processor):
stripped_text_from_html = b''.join(regex_matched_output)
text_content_before_ignored_filter = stripped_text_from_html
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
@@ -371,4 +381,4 @@ class perform_site_check(difference_detection_processor):
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter
return changed_detected, update_obj, text_content_before_ignored_filter, stripped_text_from_html

View File

@@ -16,25 +16,31 @@ echo "---------------------------------- SOCKS5 -------------------"
docker run --network changedet-network \
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
--rm \
-e "FLASK_SERVER_NAME=cdio" \
--hostname cdio \
-e "SOCKSTEST=proxiesjson" \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
# SOCKS5 related - by manually entering in UI
docker run --network changedet-network \
--rm \
-e "FLASK_SERVER_NAME=cdio" \
--hostname cdio \
-e "SOCKSTEST=manual" \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py'
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
docker run --network changedet-network \
-e "SOCKSTEST=manual-playwright" \
--hostname cdio \
-e "FLASK_SERVER_NAME=cdio" \
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
--rm \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
echo "socks5 server logs"
docker logs socks5proxy

View File

@@ -12,6 +12,54 @@ function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
checkbox.addEventListener('change', updateOpacity);
}
(function($) {
// Object to store ongoing requests by namespace
const requests = {};
$.abortiveSingularAjax = function(options) {
const namespace = options.namespace || 'default';
// Abort the current request in this namespace if it's still ongoing
if (requests[namespace]) {
requests[namespace].abort();
}
// Start a new AJAX request and store its reference in the correct namespace
requests[namespace] = $.ajax(options);
// Return the current request in case it's needed
return requests[namespace];
};
})(jQuery);
function request_textpreview_update() {
if (!$('body').hasClass('preview-text-enabled')) {
return
}
const data = {};
$('textarea:visible, input:visible').each(function () {
const $element = $(this); // Cache the jQuery object for the current element
const name = $element.attr('name'); // Get the name attribute of the element
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : undefined) : $element.val();
});
$.abortiveSingularAjax({
type: "POST",
url: preview_text_edit_filters_url,
data: data,
namespace: 'watchEdit'
}).done(function (data) {
$('#filters-and-triggers #text-preview-inner').text(data);
}).fail(function (error) {
if (error.statusText === 'abort') {
console.log('Request was aborted due to a new request being fired.');
} else {
$('#filters-and-triggers #text-preview-inner').text('There was an error communicating with the server.');
}
})
}
$(document).ready(function () {
$('#notification-setting-reset-to-default').click(function (e) {
$('#notification_title').val('');
@@ -27,5 +75,23 @@ $(document).ready(function () {
toggleOpacity('#time_between_check_use_default', '#time_between_check', false);
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
$("#text-preview-inner").css('max-height', (vh-300)+"px");
var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
$("#activate-text-preview").click(function (e) {
$(this).fadeOut();
$('body').toggleClass('preview-text-enabled')
request_textpreview_update();
$("#text-preview-refresh").click(function (e) {
request_textpreview_update();
});
$('textarea:visible').on('keyup blur', debounced_request_textpreview_update);
$('input:visible').on('keyup blur change', debounced_request_textpreview_update);
$("#filters-and-triggers-tab").on('click', debounced_request_textpreview_update);
});
});

View File

@@ -40,15 +40,29 @@
}
}
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll;
}
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 70vh;
font-size: 80%;
#browser-steps-ui {
flex-grow: 1; /* Allow it to grow and fill the available space */
flex-shrink: 1; /* Allow it to shrink if needed */
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
background-color: #eee;
border-radius: 5px;
}
#browser-steps-fieldlist {
flex-grow: 0; /* Don't allow it to grow */
flex-shrink: 0; /* Don't allow it to shrink */
flex-basis: auto; /* Base width is determined by the content */
max-width: 400px; /* Set a max width to prevent overflow */
padding-left: 1rem;
overflow-y: scroll;
}
}
/* this is duplicate :( */

View File

@@ -0,0 +1,45 @@
body.preview-text-enabled {
#filters-and-triggers > div {
display: flex; /* Establishes Flexbox layout */
gap: 20px; /* Adds space between the columns */
position: relative; /* Ensures the sticky positioning is relative to this parent */
}
/* layout of the page */
#edit-text-filter, #text-preview {
flex: 1; /* Each column takes an equal amount of available space */
align-self: flex-start; /* Aligns the right column to the start, allowing it to maintain its content height */
}
#edit-text-filter {
#pro-tips {
display: none;
}
}
#text-preview {
position: sticky;
top: 25px;
display: block !important;
}
/* actual preview area */
#text-preview-inner {
background: var(--color-grey-900);
border: 1px solid var(--color-grey-600);
padding: 1rem;
color: #333;
font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
font-size: 12px;
overflow-x: scroll;
white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
}
}
#activate-text-preview {
right: 0;
position: absolute;
z-index: 0;
box-shadow: 1px 1px 4px var(--color-shadow-jump);
}

View File

@@ -12,6 +12,7 @@
@import "parts/_darkmode";
@import "parts/_menu";
@import "parts/_love";
@import "parts/preview_text_filter";
body {
color: var(--color-text);

View File

@@ -46,14 +46,31 @@
#browser_steps li > label {
display: none; }
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll; }
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 70vh; }
height: 70vh;
font-size: 80%; }
#browser-steps .flex-wrapper #browser-steps-ui {
flex-grow: 1;
/* Allow it to grow and fill the available space */
flex-shrink: 1;
/* Allow it to shrink if needed */
flex-basis: 0;
/* Start with 0 base width so it stretches as much as possible */
background-color: #eee;
border-radius: 5px; }
#browser-steps .flex-wrapper #browser-steps-fieldlist {
flex-grow: 0;
/* Don't allow it to grow */
flex-shrink: 0;
/* Don't allow it to shrink */
flex-basis: auto;
/* Base width is determined by the content */
max-width: 400px;
/* Set a max width to prevent overflow */
padding-left: 1rem;
overflow-y: scroll; }
/* this is duplicate :( */
#browsersteps-selector-wrapper {
@@ -411,6 +428,47 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
fill: #ff0000 !important;
transition: all ease 0.3s !important; }
body.preview-text-enabled {
/* layout of the page */
/* actual preview area */ }
body.preview-text-enabled #filters-and-triggers > div {
display: flex;
/* Establishes Flexbox layout */
gap: 20px;
/* Adds space between the columns */
position: relative;
/* Ensures the sticky positioning is relative to this parent */ }
body.preview-text-enabled #edit-text-filter, body.preview-text-enabled #text-preview {
flex: 1;
/* Each column takes an equal amount of available space */
align-self: flex-start;
/* Aligns the right column to the start, allowing it to maintain its content height */ }
body.preview-text-enabled #edit-text-filter #pro-tips {
display: none; }
body.preview-text-enabled #text-preview {
position: sticky;
top: 25px;
display: block !important; }
body.preview-text-enabled #text-preview-inner {
background: var(--color-grey-900);
border: 1px solid var(--color-grey-600);
padding: 1rem;
color: #333;
font-family: "Courier New", Courier, monospace;
/* Sets the font to a monospace type */
font-size: 12px;
overflow-x: scroll;
white-space: pre-wrap;
/* Preserves whitespace and line breaks like <pre> */
overflow-wrap: break-word;
/* Allows long words to break and wrap to the next line */ }
#activate-text-preview {
right: 0;
position: absolute;
z-index: 0;
box-shadow: 1px 1px 4px var(--color-shadow-jump); }
body {
color: var(--color-text);
background: var(--color-background-page);
@@ -1194,11 +1252,9 @@ ul {
color: #fff;
opacity: 0.7; }
.restock-label svg {
vertical-align: middle; }
#chrome-extension-link {
padding: 9px;
border: 1px solid var(--color-grey-800);

View File

@@ -15,7 +15,7 @@
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
</p>
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul style="display: none" id="advanced-help-notifications">
<ul style="display: none" id="advanced-help-notifications">
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>

View File

@@ -33,7 +33,7 @@
<script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
</head>
<body>
<body class="">
<div class="header">
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
{% if has_password and not current_user.is_authenticated %}

View File

@@ -50,7 +50,7 @@
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
{% endif %}
<li class="tab"><a href="#notifications">Notifications</a></li>
<li class="tab"><a href="#stats">Stats</a></li>
@@ -200,7 +200,7 @@ User-Agent: wonderbra 1.0") }}
<div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
<div class="flex-wrapper" >
<div id="browser-steps-ui" class="noselect" style="width: 100%; background-color: #eee; border-radius: 5px;">
<div id="browser-steps-ui" class="noselect">
<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
<span class="loader" >
@@ -215,7 +215,7 @@ User-Agent: wonderbra 1.0") }}
<canvas class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
</div>
</div>
<div id="browser-steps-fieldlist" style="padding-left: 1em; width: 350px; font-size: 80%;" >
<div id="browser-steps-fieldlist" >
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
{{ render_field(form.browser_steps) }}
</div>
@@ -254,7 +254,10 @@ User-Agent: wonderbra 1.0") }}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner" id="filters-and-triggers">
<div class="pure-control-group">
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">Activate preview</span>
<div>
<div id="edit-text-filter">
<div class="pure-control-group" id="pro-tips">
<strong>Pro-tips:</strong><br>
<ul>
<li>
@@ -310,12 +313,13 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
footer
nav
.stockticker") }}
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
<li> Don't paste HTML here, use only CSS selectors </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
@@ -330,14 +334,21 @@ nav
<span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.remove_duplicate_lines) }}
<span class="pure-form-message-inline">Remove duplicate lines of text</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.sort_text_alphabetically) }}
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
{{ render_checkbox_field(form.trim_text_whitespace) }}
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
</fieldset>
<fieldset>
<div class="pure-control-group">
@@ -407,7 +418,19 @@ Unavailable") }}
</fieldset>
</div>
</div>
{% endif %}
<div id="text-preview" style="display: none;" >
<script>
const preview_text_edit_filters_url="{{url_for('watch_get_preview_rendered', uuid=uuid)}}";
</script>
<span><strong>Preview of the text that is used for changedetection after all filters run.</strong></span><br>
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
<p>
<div id="text-preview-inner"></div>
</p>
</div>
</div>
</div>
{% endif %}
{# rendered sub Template #}
{% if extra_form_content %}
<div class="tab-pane-inner" id="extras_tab">

View File

@@ -155,11 +155,13 @@
{{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header
footer
nav
.stockticker") }}
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>

View File

@@ -1,12 +1,27 @@
#!/usr/bin/env python3
import os
import time
from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
def set_response():
import time
data = f"""<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data)
time.sleep(1)
def test_socks5(client, live_server, measure_memory_usage):
live_server_setup(live_server)
set_response()
# Setup a proxy
res = client.post(
@@ -24,7 +39,10 @@ def test_socks5(client, live_server, measure_memory_usage):
assert b"Settings updated." in res.data
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
# Because the socks server should connect back to us
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
test_url = test_url.replace('localhost.localdomain', 'cdio')
test_url = test_url.replace('localhost', 'cdio')
res = client.post(
url_for("form_quick_watch_add"),
@@ -60,4 +78,4 @@ def test_socks5(client, live_server, measure_memory_usage):
)
# Should see the proper string
assert "+0200:".encode('utf-8') in res.data
assert "Awesome, you made it".encode('utf-8') in res.data

View File

@@ -1,16 +1,32 @@
#!/usr/bin/env python3
import os
import time
from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
def set_response():
import time
data = f"""<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data)
time.sleep(1)
# should be proxies.json mounted from run_proxy_tests.sh already
# -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
live_server_setup(live_server)
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
set_response()
# Because the socks server should connect back to us
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
test_url = test_url.replace('localhost.localdomain', 'cdio')
test_url = test_url.replace('localhost', 'cdio')
res = client.get(url_for("settings_page"))
assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data
@@ -49,4 +65,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
)
# Should see the proper string
assert "+0200:".encode('utf-8') in res.data
assert "Awesome, you made it".encode('utf-8') in res.data

View File

@@ -87,6 +87,9 @@ def test_element_removal_output():
Some initial text<br>
<p>across multiple lines</p>
<div id="changetext">Some text that changes</div>
<div>Some text should be matched by xPath // selector</div>
<div>Some text should be matched by xPath selector</div>
<div>Some text should be matched by xPath1 selector</div>
</body>
<footer>
<p>Footer</p>
@@ -94,7 +97,16 @@ def test_element_removal_output():
</html>
"""
html_blob = element_removal(
["header", "footer", "nav", "#changetext"], html_content=content
[
"header",
"footer",
"nav",
"#changetext",
"//*[contains(text(), 'xPath // selector')]",
"xpath://*[contains(text(), 'xPath selector')]",
"xpath1://*[contains(text(), 'xPath1 selector')]"
],
html_content=content
)
text = get_text(html_blob)
assert (

View File

@@ -116,9 +116,11 @@ def run_filter_test(client, live_server, content_filter):
res = client.get(url_for("index"))
assert b'Warning, no filters were found' in res.data
assert not os.path.isfile("test-datastore/notification.txt")
time.sleep(1)
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
time.sleep(2)
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)

View File

@@ -11,6 +11,8 @@ def set_original_ignore_response():
<p>Some initial text</p>
<p>Which is across multiple lines</p>
<p>So let's see what happens.</p>
<p>&nbsp; So let's see what happens. <br> </p>
<p>A - sortable line</p>
</body>
</html>
"""
@@ -164,5 +166,52 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_extra_filters(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_original_ignore_response()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"remove_duplicate_lines": "y",
"trim_text_whitespace": "y",
"sort_text_alphabetically": "", # leave this OFF for testing
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first")
)
assert res.data.count(b"see what happens.") == 1
# still should remain unsorted ('A - sortable line') stays at the end
assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines')
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -78,6 +78,7 @@ def set_more_modified_response():
def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
#@todo - could check the apprise object directly instead of looking for this file
from os.path import isfile
for i in range(1, 20):
time.sleep(1)

View File

@@ -278,7 +278,7 @@ class update_worker(threading.Thread):
update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection(
changed_detected, update_obj, contents, content_after_filters = update_handler.run_changedetection(
watch=watch,
skip_when_checksum_same=skip_when_same_checksum,
)

View File

@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
# jq not available on Windows so must be installed manually
# Notification library
apprise~=1.8.1
apprise==1.9.0
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible