mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-01-05 23:00:36 +00:00
Compare commits
4 Commits
text-filte
...
ui-mobile-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7c7f6ac182 | ||
|
|
5b34aece96 | ||
|
|
1b625dc18a | ||
|
|
367afc81e9 |
@@ -1432,6 +1432,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
)
|
||||
# Use the last loaded HTML as the input
|
||||
update_handler.fetcher.content = decompressed_data
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
try:
|
||||
changed_detected, update_obj, contents, text_after_filter = update_handler.run_changedetection(
|
||||
watch=tmp_watch,
|
||||
|
||||
@@ -18,6 +18,7 @@ class watch_base(dict):
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'content-type': None,
|
||||
'date_created': None,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
|
||||
@@ -143,8 +143,6 @@ class perform_site_check(difference_detection_processor):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
import hashlib
|
||||
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
@@ -186,11 +184,7 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
itemprop_availability = {}
|
||||
try:
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments
|
||||
# anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
|
||||
itemprop_availability = future.result()
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
|
||||
@@ -36,8 +36,6 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
@@ -174,30 +172,20 @@ class perform_site_check(difference_detection_processor):
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
is_rss=is_rss)
|
||||
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
else:
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url))
|
||||
html_content += future.result()
|
||||
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
|
||||
@@ -210,20 +198,16 @@ class perform_site_check(difference_detection_processor):
|
||||
else:
|
||||
# extract text
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss)) #1874 activate the <title workaround hack
|
||||
stripped_text_from_html = future.result()
|
||||
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss) # 1874 activate the <title workaround hack
|
||||
|
||||
|
||||
if watch.get('trim_text_whitespace'):
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||
|
||||
if watch.get('remove_duplicate_lines'):
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
if watch.get('sort_text_alphabetically'):
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
|
||||
@@ -34,6 +34,7 @@ function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
|
||||
|
||||
function request_textpreview_update() {
|
||||
if (!$('body').hasClass('preview-text-enabled')) {
|
||||
console.error("Preview text was requested but body tag was not setup")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -77,20 +78,19 @@ $(document).ready(function () {
|
||||
|
||||
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
|
||||
$("#text-preview-inner").css('max-height', (vh-300)+"px");
|
||||
|
||||
// Realtime preview of 'Filters & Text' setup
|
||||
var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
|
||||
|
||||
$("#activate-text-preview").click(function (e) {
|
||||
$(this).fadeOut();
|
||||
$('body').toggleClass('preview-text-enabled')
|
||||
|
||||
request_textpreview_update();
|
||||
|
||||
$("#text-preview-refresh").click(function (e) {
|
||||
request_textpreview_update();
|
||||
});
|
||||
$('textarea:visible').on('keyup blur', debounced_request_textpreview_update);
|
||||
$('input:visible').on('keyup blur change', debounced_request_textpreview_update);
|
||||
$("#filters-and-triggers-tab").on('click', debounced_request_textpreview_update);
|
||||
const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
|
||||
$("#text-preview-refresh")[method]('click', debounced_request_textpreview_update);
|
||||
$('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update);
|
||||
$('input:visible')[method]('keyup blur change', debounced_request_textpreview_update);
|
||||
$("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update);
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@@ -42,26 +42,36 @@
|
||||
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%;
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
|
||||
@media screen and (min-width: 800px) {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
#browser-steps-fieldlist {
|
||||
flex-grow: 0; /* Don't allow it to grow */
|
||||
flex-shrink: 0; /* Don't allow it to shrink */
|
||||
flex-basis: auto; /* Base width is determined by the content */
|
||||
max-width: 400px; /* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
|
||||
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
}
|
||||
}
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
flex-grow: 0; /* Don't allow it to grow */
|
||||
flex-shrink: 0; /* Don't allow it to shrink */
|
||||
flex-basis: auto; /* Base width is determined by the content */
|
||||
max-width: 400px; /* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll;
|
||||
#browser-steps-ui {
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
}
|
||||
|
||||
#browser-steps-field-list {
|
||||
text-align: center;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,30 +47,35 @@
|
||||
display: none; }
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%; }
|
||||
@media screen and (min-width: 800px) {
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh; }
|
||||
#browser-steps .flex-wrapper #browser-steps-fieldlist {
|
||||
flex-grow: 0;
|
||||
/* Don't allow it to grow */
|
||||
flex-shrink: 0;
|
||||
/* Don't allow it to shrink */
|
||||
flex-basis: auto;
|
||||
/* Base width is determined by the content */
|
||||
max-width: 400px;
|
||||
/* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll; }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */ } }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px; }
|
||||
#browser-steps .flex-wrapper #browser-steps-fieldlist {
|
||||
flex-grow: 0;
|
||||
/* Don't allow it to grow */
|
||||
flex-shrink: 0;
|
||||
/* Don't allow it to shrink */
|
||||
flex-basis: auto;
|
||||
/* Base width is determined by the content */
|
||||
max-width: 400px;
|
||||
/* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll; }
|
||||
#browser-steps .flex-wrapper #browser-steps-field-list {
|
||||
text-align: center; }
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -38,6 +38,11 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Content type recording worked
|
||||
uuid = extract_UUID_from_client(client)
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['content-type'] == "text/html"
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
|
||||
@@ -491,6 +491,8 @@ class update_worker(threading.Thread):
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
|
||||
# Mark that we never had any failures
|
||||
if not watch.get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
|
||||
Reference in New Issue
Block a user