Compare commits

...

59 Commits

Author SHA1 Message Date
dgtlmoon
3afccbe9c9 Lock versions 2022-05-23 23:31:15 +02:00
dgtlmoon
6af778aea4 handle redraw 2022-05-23 23:30:03 +02:00
dgtlmoon
63459d1504 Include playwright 2022-05-23 23:29:55 +02:00
dgtlmoon
73e27c6b24 Adding test of fetchers 2022-05-23 23:17:20 +02:00
dgtlmoon
de1739dd09 test improvements 2022-05-23 23:16:38 +02:00
dgtlmoon
766a7ea746 Adding tests for fetchers 2022-05-23 23:07:13 +02:00
dgtlmoon
c379035480 minor tweaks 2022-05-23 23:07:08 +02:00
dgtlmoon
a737a653fa Screenshot now available in the visual selector 2022-05-23 22:26:12 +02:00
dgtlmoon
4cbdb92074 update text 2022-05-23 22:25:22 +02:00
dgtlmoon
c29058dcaf remove from selenium 2022-05-23 17:42:52 +02:00
dgtlmoon
875319f910 fix overbaked solutio 2022-05-23 17:41:33 +02:00
dgtlmoon
7be8f9296d Check that we are ready 2022-05-23 17:36:03 +02:00
dgtlmoon
6af88062a8 misc cleanups 2022-05-23 17:14:20 +02:00
dgtlmoon
e5568cf744 UI improvments 2022-05-23 16:13:37 +02:00
dgtlmoon
d82cec5446 Handle tabbing better 2022-05-23 15:50:26 +02:00
dgtlmoon
5eecf138c0 Merge branch 'master' into 550-visual-selector 2022-05-23 14:48:44 +02:00
dgtlmoon
13aabd48db more tweaks 2022-05-22 22:07:45 +02:00
dgtlmoon
a05f8f2bf2 filter error checking 2022-05-22 19:56:49 +02:00
dgtlmoon
1594853ce5 WIP 2022-05-22 19:03:20 +02:00
dgtlmoon
49139e779a Re #616 - adding extra test 2022-05-21 22:07:27 +02:00
dgtlmoon
5e7324b0b8 Set nice error when content found but no text found 2022-05-18 09:38:33 +02:00
dgtlmoon
8b038374f9 Merge branch 'master' into 550-visual-selector 2022-05-18 09:32:17 +02:00
dgtlmoon
695fcc4566 Merge branch 'master' into 550-visual-selector 2022-05-10 17:58:33 +02:00
dgtlmoon
d7c5a53315 skip hidden, prefix xpath 2022-05-10 17:55:46 +02:00
dgtlmoon
573e92c5e5 Merge branch 'master' into 550-visual-selector 2022-05-05 11:00:54 +02:00
dgtlmoon
57ba77e287 wip 2022-05-02 20:29:54 +02:00
dgtlmoon
22e9d96739 fix webdriver 2022-05-02 18:36:43 +02:00
dgtlmoon
6f35c2eec9 fix scaling 2022-05-02 18:09:44 +02:00
dgtlmoon
9c8894a875 wip 2022-05-02 17:15:02 +02:00
dgtlmoon
371c1c974a Merge branch 'playwright' into 550-visual-selector 2022-05-02 00:09:27 +02:00
dgtlmoon
22c1a63167 Merge branch 'master' into 550-visual-selector 2022-05-01 23:57:55 +02:00
dgtlmoon
8ce75f40d9 Merge branch 'playwright' of https://github.com/weeix/changedetection.io into playwright 2022-05-01 19:51:49 +02:00
dgtlmoon
5f3251a3e1 screenshot fixesxy 2022-05-01 19:45:10 +02:00
dgtlmoon
703922c369 Oops 2022-05-01 19:40:05 +02:00
dgtlmoon
22dda97a65 minor cleanups 2022-05-01 19:37:47 +02:00
dgtlmoon
8134242b38 Make system prefer to use Playwright if PLAYWRIGHT_DRIVER_URL is set 2022-05-01 19:34:08 +02:00
dgtlmoon
dc8f20d104 no needed 2022-05-01 19:33:43 +02:00
dgtlmoon
704452322a Merge branch 'master' into playwright 2022-05-01 15:53:14 +02:00
dgtlmoon
588820d2fe tweaks 2022-05-01 15:51:56 +02:00
dgtlmoon
12aa77ee35 just re-use the existing page fetch 2022-04-30 17:32:56 +02:00
dgtlmoon
a086991b54 add limit 2022-04-29 10:12:01 +02:00
dgtlmoon
ac236ee88c fix text bumping 2022-04-29 10:07:46 +02:00
dgtlmoon
26c56c3fc4 more tweaks 2022-04-29 09:35:44 +02:00
dgtlmoon
a7e6cc5c62 misc tweaks 2022-04-28 20:48:45 +02:00
dgtlmoon
245fea07ac set size on load 2022-04-28 19:33:27 +02:00
dgtlmoon
013ae339e0 oops 2022-04-28 19:28:02 +02:00
dgtlmoon
8eccbaa050 misc tweaks 2022-04-28 19:20:03 +02:00
dgtlmoon
71d007a6aa WIP 2022-04-28 19:04:27 +02:00
dgtlmoon
a038cfe046 WIP 2022-04-28 18:53:49 +02:00
dgtlmoon
d819d37463 WIP 2022-04-28 18:10:53 +02:00
dgtlmoon
ea4a8ed580 WIP 2022-04-28 16:54:15 +02:00
dgtlmoon
eef98c6adc WIP 2022-04-28 14:20:18 +02:00
dgtlmoon
4b7774db29 POC 2022-04-26 09:56:06 +02:00
Wee
1be1cee04d Comment out playwright-chrome service 2022-04-18 00:47:09 +07:00
Wee
c990db2bd5 Replace Playwright server with a pre-built image 2022-04-17 23:27:17 +07:00
Wee
25a7fd050f Hide the Playwright option for unsupported devices 2022-04-17 23:18:43 +07:00
Wee
f71545a4b0 Allow the Playwright installation to fail
Excluded Playwright from requirements.txt to
prevent arm/v6 and arm/v7 builds from failing.
2022-04-17 23:18:43 +07:00
dgtlmoon
d87a8cc661 Add new fetch method: Playwright Chromium (Selenium/WebDriver alternative)
Co-authored-by: Wee Sritippho <weeaix@gmail.com>
2022-03-22 00:35:37 +01:00
dgtlmoon
0d114f2adc Add new fetch method: Playwright Chromium (Selenium/WebDriver alternative)
Co-authored-by: Wee Sritippho <weeaix@gmail.com>
2022-03-22 00:17:12 +01:00
22 changed files with 2202 additions and 47 deletions

View File

@@ -626,6 +626,12 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and not form.validate():
flash("An error occurred, please see below.", "error")
visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
output = render_template("edit.html",
uuid=uuid,
watch=datastore.data['watching'][uuid],
@@ -633,7 +639,9 @@ def changedetection_app(config=None, datastore_o=None):
has_empty_checktime=using_default_check_time,
using_global_webdriver_wait=default['webdriver_delay'] is None,
current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False)
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled
)
return output
@@ -976,10 +984,9 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
def static_content(group, filename):
from flask import make_response
if group == 'screenshot':
from flask import make_response
# Could be sensitive, follow password requirements
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
abort(403)
@@ -998,6 +1005,26 @@ def changedetection_app(config=None, datastore_o=None):
except FileNotFoundError:
abort(404)
if group == 'visual_selector_data':
# Could be sensitive, follow password requirements
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
abort(403)
# These files should be in our subdirectory
try:
# set nocache, set content-type
watch_dir = datastore_o.datastore_path + "/" + filename
response = make_response(send_from_directory(filename="elements.json", directory=watch_dir, path=watch_dir + "/elements.json"))
response.headers['Content-type'] = 'application/json'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = 0
return response
except FileNotFoundError:
abort(404)
# These files should be in our subdirectory
try:
return send_from_directory("static/{}".format(group), path=filename)
@@ -1150,7 +1177,6 @@ def changedetection_app(config=None, datastore_o=None):
# paste in etc
return redirect(url_for('index'))
# @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()

View File

@@ -27,6 +27,117 @@ class Fetcher():
status_code = None
content = None
headers = None
fetcher_description = "No description"
xpath_element_js = """
// Include the getXpath script directly, easier than fetching
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
const findUpTag = (el) => {
let r = el
chained_css = [];
depth=0;
// Strategy 1: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
while (r.parentNode) {
if(depth==5) {
break;
}
if('' !==r.id) {
chained_css.unshift("#"+r.id);
final_selector= chained_css.join('>');
// Be sure theres only one, some sites have multiples of the same ID tag :-(
if (window.document.querySelectorAll(final_selector).length ==1 ) {
return final_selector;
}
return null;
} else {
chained_css.unshift(r.tagName.toLowerCase());
}
r=r.parentNode;
depth+=1;
}
return null;
}
// @todo - if it's SVG or IMG, go into image diff mode
var elements = window.document.querySelectorAll("div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary");
var size_pos=[];
// after page fetch, inject this JS
// build a map of all elements and their positions (maybe that only include text?)
var bbox;
for (var i = 0; i < elements.length; i++) {
bbox = elements[i].getBoundingClientRect();
// forget really small ones
if (bbox['width'] <20 && bbox['height'] < 20 ) {
continue;
}
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
// it should not traverse when we know we can anchor off just an ID one level up etc..
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
xpath_result=false;
try {
var d= findUpTag(elements[i]);
if (d) {
xpath_result =d;
}
} catch (e) {
var x=1;
}
// You could swap it and default to getXpath and then try the smarter one
// default back to the less intelligent one
if (!xpath_result) {
xpath_result = getXPath(elements[i]);
}
if(window.getComputedStyle(elements[i]).visibility === "hidden") {
continue;
}
size_pos.push({
xpath: xpath_result,
width: Math.round(bbox['width']),
height: Math.round(bbox['height']),
left: Math.floor(bbox['left']),
top: Math.floor(bbox['top']),
childCount: elements[i].childElementCount
});
}
// inject the current one set in the css_filter, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated.
if (css_filter.length) {
// is it xpath?
if (css_filter.startsWith('/') ) {
q=document.evaluate(css_filter, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
} else {
q=document.querySelector(css_filter);
}
bbox = q.getBoundingClientRect();
if (bbox && bbox['width'] >0 && bbox['height']>0) {
size_pos.push({
xpath: css_filter,
width: bbox['width'],
height: bbox['height'],
left: bbox['left'],
top: bbox['top'],
childCount: q.childElementCount
});
}
}
// https://stackoverflow.com/questions/1145850/how-to-get-height-of-entire-document-with-javascript
return {'size_pos':size_pos, 'browser_width': window.innerWidth, 'browser_height':document.body.scrollHeight};
"""
xpath_data = None
# Will be needed in the future by the VisualSelector, always get this where possible.
screenshot = False
fetcher_description = "No description"
@@ -47,7 +158,8 @@ class Fetcher():
request_headers,
request_body,
request_method,
ignore_status_codes=False):
ignore_status_codes=False,
current_css_filter=None):
# Should set self.error, self.status_code and self.content
pass
@@ -128,7 +240,8 @@ class base_html_playwright(Fetcher):
request_headers,
request_body,
request_method,
ignore_status_codes=False):
ignore_status_codes=False,
current_css_filter=None):
from playwright.sync_api import sync_playwright
import playwright._impl._api_types
@@ -148,8 +261,8 @@ class base_html_playwright(Fetcher):
proxy=self.proxy
)
page = context.new_page()
page.set_viewport_size({"width": 1280, "height": 1024})
try:
# Bug - never set viewport size BEFORE page.goto
response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
# Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
@@ -166,14 +279,27 @@ class base_html_playwright(Fetcher):
if len(page.content().strip()) == 0:
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
page.set_viewport_size({"width": 1280, "height": 1024})
# Bugish - Let the page redraw/reflow
page.set_viewport_size({"width": 1280, "height": 1024})
self.status_code = response.status
self.content = page.content()
self.headers = response.all_headers()
if current_css_filter is not None:
page.evaluate("var css_filter='{}'".format(current_css_filter))
else:
page.evaluate("var css_filter=''")
self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
# JPEG is better here because the screenshots can be very very large
page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=90)
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=92)
context.close()
browser.close()
@@ -225,7 +351,8 @@ class base_html_webdriver(Fetcher):
request_headers,
request_body,
request_method,
ignore_status_codes=False):
ignore_status_codes=False,
current_css_filter=None):
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@@ -245,6 +372,10 @@ class base_html_webdriver(Fetcher):
self.quit()
raise
self.driver.set_window_size(1280, 1024)
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
self.screenshot = self.driver.get_screenshot_as_png()
# @todo - how to check this? is it possible?
self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
@@ -254,8 +385,6 @@ class base_html_webdriver(Fetcher):
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = self.driver.page_source
self.headers = {}
self.screenshot = self.driver.get_screenshot_as_png()
self.quit()
# Does the connection to the webdriver work? run a test connection.
def is_ready(self):
@@ -292,7 +421,8 @@ class html_requests(Fetcher):
request_headers,
request_body,
request_method,
ignore_status_codes=False):
ignore_status_codes=False,
current_css_filter=None):
proxies={}

View File

@@ -94,6 +94,7 @@ class perform_site_check():
# If the klass doesnt exist, just use a default
klass = getattr(content_fetcher, "html_requests")
proxy_args = self.set_proxy_from_list(watch)
fetcher = klass(proxy_override=proxy_args)
@@ -104,7 +105,8 @@ class perform_site_check():
elif system_webdriver_delay is not None:
fetcher.render_extract_delay = system_webdriver_delay
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter'])
fetcher.quit()
# Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base?
@@ -236,4 +238,4 @@ class perform_site_check():
if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data

View File

@@ -22,3 +22,26 @@ echo "RUNNING WITH BASE_URL SET"
export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py
# Now for the selenium and playwright/browserless fetchers
# Note - this is not UI functional tests - just checking that each one can fetch the content
echo "TESTING WEBDRIVER FETCH > SELENIUM/WEBDRIVER..."
docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome-debug:3.141.59
# takes a while to spin up
sleep 5
export WEBDRIVER_URL=http://localhost:4444/wd/hub
pytest tests/fetchers/test_content.py
unset WEBDRIVER_URL
docker kill $$-test_selenium
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
pip3 install playwright~=1.22
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
# takes a while to spin up
sleep 5
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
pytest tests/fetchers/test_content.py
unset PLAYWRIGHT_DRIVER_URL
docker kill $$-test_browserless

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@@ -0,0 +1,56 @@
/**
* debounce
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait after the last call before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds after the last call before
* calling the original function.
*/
Function.prototype.debounce = function (milliseconds, context) {
var baseFunction = this,
timer = null,
wait = milliseconds;
return function () {
var self = context || this,
args = arguments;
function complete() {
baseFunction.apply(self, args);
timer = null;
}
if (timer) {
clearTimeout(timer);
}
timer = setTimeout(complete, wait);
};
};
/**
* throttle
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait between calls before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds between calls before
* calling the original function.
*/
Function.prototype.throttle = function (milliseconds, context) {
var baseFunction = this,
lastEventTimestamp = null,
limit = milliseconds;
return function () {
var self = context || this,
args = arguments,
now = Date.now();
if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
lastEventTimestamp = now;
baseFunction.apply(self, args);
}
};
};

View File

@@ -0,0 +1,219 @@
// Horrible proof of concept code :)
// yes - this is really a hack, if you are a front-ender and want to help, please get in touch!
$(document).ready(function() {
$('#visualselector-tab').click(function () {
$("img#selector-background").off('load');
bootstrap_visualselector();
});
$(document).on('keydown', function(event) {
if ($("img#selector-background").is(":visible")) {
if (event.key == "Escape") {
state_clicked=false;
ctx.clearRect(0, 0, c.width, c.height);
}
}
});
// For when the page loads
if(!window.location.hash || window.location.hash != '#visualselector') {
$("img#selector-background").attr('src','');
return;
}
// Handle clearing button/link
$('#clear-selector').on('click', function(event) {
if(!state_clicked) {
alert('Oops, Nothing selected!');
}
state_clicked=false;
ctx.clearRect(0, 0, c.width, c.height);
});
bootstrap_visualselector();
var current_selected_i;
var state_clicked=false;
var c;
// greyed out fill context
var xctx;
// redline highlight context
var ctx;
var current_default_xpath;
var x_scale=1;
var y_scale=1;
var selector_image;
var selector_image_rect;
var vh;
var selector_data;
function bootstrap_visualselector() {
if ( 1 ) {
// bootstrap it, this will trigger everything else
$("img#selector-background").bind('load', function () {
console.log("Loaded background...");
c = document.getElementById("selector-canvas");
// greyed out fill context
xctx = c.getContext("2d");
// redline highlight context
ctx = c.getContext("2d");
current_default_xpath =$("#css_filter").val();
fetch_data();
$('#selector-canvas').off("mousemove");
// screenshot_url defined in the edit.html template
}).attr("src", screenshot_url);
}
}
function fetch_data() {
// Image is ready
$('.fetching-update-notice').html("Fetching element data..");
$.ajax({
url: watch_visual_selector_data_url,
context: document.body
}).done(function (data) {
$('.fetching-update-notice').html("Rendering..");
selector_data = data;
console.log("Reported browser width from backend: "+data['browser_width']);
state_clicked=false;
set_scale();
reflow_selector();
$('.fetching-update-notice').fadeOut();
});
};
function set_scale() {
// some things to check if the scaling doesnt work
// - that the widths/sizes really are about the actual screen size cat elements.json |grep -o width......|sort|uniq
selector_image = $("img#selector-background")[0];
selector_image_rect = selector_image.getBoundingClientRect();
// make the canvas the same size as the image
$('#selector-canvas').attr('height', selector_image_rect.height);
$('#selector-canvas').attr('width', selector_image_rect.width);
$('#selector-wrapper').attr('width', selector_image_rect.width);
x_scale = selector_image_rect.width / selector_data['browser_width'];
y_scale = selector_image_rect.height / selector_image.naturalHeight;
ctx.strokeStyle = 'rgba(255,0,0, 0.9)';
ctx.fillStyle = 'rgba(255,0,0, 0.1)';
ctx.lineWidth = 3;
console.log("scaling set x: "+x_scale+" by y:"+y_scale);
$("#selector-current-xpath").css('max-width', selector_image_rect.width);
}
function reflow_selector() {
$(window).resize(function() {
set_scale();
highlight_current_selected_i();
});
var selector_currnt_xpath_text=$("#selector-current-xpath span");
set_scale();
console.log(selector_data['size_pos'].length + " selectors found");
// highlight the default one if we can find it in the xPath list
// or the xpath matches the default one
found = false;
if(current_default_xpath.length) {
for (var i = selector_data['size_pos'].length; i!==0; i--) {
var sel = selector_data['size_pos'][i-1];
if(selector_data['size_pos'][i - 1].xpath == current_default_xpath) {
console.log("highlighting "+current_default_xpath);
current_selected_i = i-1;
highlight_current_selected_i();
found = true;
break;
}
}
if(!found) {
alert("unfortunately your existing CSS/xPath Filter was no longer found!");
}
}
$('#selector-canvas').bind('mousemove', function (e) {
if(state_clicked) {
return;
}
ctx.clearRect(0, 0, c.width, c.height);
current_selected_i=null;
// Reverse order - the most specific one should be deeper/"laster"
// Basically, find the most 'deepest'
var found=0;
ctx.fillStyle = 'rgba(205,0,0,0.35)';
for (var i = selector_data['size_pos'].length; i!==0; i--) {
// draw all of them? let them choose somehow?
var sel = selector_data['size_pos'][i-1];
// If we are in a bounding-box
if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
&&
e.offsetX > sel.left * y_scale && e.offsetX < sel.left * y_scale + sel.width * y_scale
) {
// FOUND ONE
set_current_selected_text(sel.xpath);
ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
// no need to keep digging
// @todo or, O to go out/up, I to go in
// or double click to go up/out the selector?
current_selected_i=i-1;
found+=1;
break;
}
}
}.debounce(5));
function set_current_selected_text(s) {
selector_currnt_xpath_text[0].innerHTML=s;
}
function highlight_current_selected_i() {
if(state_clicked) {
state_clicked=false;
xctx.clearRect(0,0,c.width, c.height);
return;
}
var sel = selector_data['size_pos'][current_selected_i];
if (sel[0] == '/') {
// @todo - not sure just checking / is right
$("#css_filter").val('xpath:'+sel.xpath);
} else {
$("#css_filter").val(sel.xpath);
}
xctx.fillStyle = 'rgba(205,205,205,0.95)';
xctx.strokeStyle = 'rgba(225,0,0,0.9)';
xctx.lineWidth = 3;
xctx.fillRect(0,0,c.width, c.height);
// Clear out what only should be seen (make a clear/clean spot)
xctx.clearRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
xctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
state_clicked=true;
set_current_selected_text(sel.xpath);
}
$('#selector-canvas').bind('mousedown', function (e) {
highlight_current_selected_i();
});
}
});

View File

@@ -4,6 +4,7 @@ $(function () {
$(this).closest('.unviewed').removeClass('unviewed');
});
$('.with-share-link > *').click(function () {
$("#copied-clipboard").remove();
@@ -20,5 +21,6 @@ $(function () {
$(this).remove();
});
});
});

View File

@@ -338,7 +338,8 @@ footer {
padding-top: 110px; }
div.tabs.collapsable ul li {
display: block;
border-radius: 0px; }
border-radius: 0px;
margin-right: 0px; }
input[type='text'] {
width: 100%; }
/*
@@ -429,6 +430,15 @@ and also iPads specifically.
.tab-pane-inner:target {
display: block; }
#beta-logo {
height: 50px;
right: -3px;
top: -3px;
position: absolute; }
#selector-header {
padding-bottom: 1em; }
.edit-form {
min-width: 70%;
/* so it cant overflow */
@@ -454,6 +464,24 @@ ul {
.time-check-widget tr input[type="number"] {
width: 5em; }
#selector-wrapper {
height: 600px;
overflow-y: scroll;
position: relative; }
#selector-wrapper > img {
position: absolute;
z-index: 4;
max-width: 100%; }
#selector-wrapper > canvas {
position: relative;
z-index: 5;
max-width: 100%; }
#selector-wrapper > canvas:hover {
cursor: pointer; }
#selector-current-xpath {
font-size: 80%; }
#webdriver-override-options input[type="number"] {
width: 5em; }

View File

@@ -469,6 +469,7 @@ footer {
div.tabs.collapsable ul li {
display: block;
border-radius: 0px;
margin-right: 0px;
}
input[type='text'] {
@@ -613,6 +614,18 @@ $form-edge-padding: 20px;
padding: 0px;
}
#beta-logo {
height: 50px;
// looks better when it's hanging off a little
right: -3px;
top: -3px;
position: absolute;
}
#selector-header {
padding-bottom: 1em;
}
.edit-form {
min-width: 70%;
/* so it cant overflow */
@@ -649,6 +662,30 @@ ul {
}
}
#selector-wrapper {
height: 600px;
overflow-y: scroll;
position: relative;
//width: 100%;
> img {
position: absolute;
z-index: 4;
max-width: 100%;
}
>canvas {
position: relative;
z-index: 5;
max-width: 100%;
&:hover {
cursor: pointer;
}
}
}
#selector-current-xpath {
font-size: 80%;
}
#webdriver-override-options {
input[type="number"] {
width: 5em;

View File

@@ -372,6 +372,15 @@ class ChangeDetectionStore:
return False
def visualselector_data_is_ready(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
screenshot_filename = "{}/last-screenshot.png".format(output_path)
elements_index_filename = "{}/elements.json".format(output_path)
if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) :
return True
return False
# Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, watch_uuid, screenshot: bytes):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
@@ -380,6 +389,14 @@ class ChangeDetectionStore:
f.write(screenshot)
f.close()
def save_xpath_data(self, watch_uuid, data):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/elements.json".format(output_path)
with open(fname, 'w') as f:
f.write(json.dumps(data))
f.close()
def sync_to_json(self):
logging.info("Saving JSON..")
print("Saving JSON..")

View File

@@ -39,9 +39,6 @@
<div class="tabs">
<ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li>
{% if screenshot %}
<li class="tab"><a href="#screenshot">Current screenshot</a></li>
{% endif %}
</ul>
</div>
@@ -63,18 +60,6 @@
</table>
Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a>
</div>
{% if screenshot %}
<div class="tab-pane-inner" id="screenshot">
<p>
<i>For now, only the most recent screenshot is saved and displayed.</i></br>
<strong>Note: No changedetection is performed on the image yet, but we are working on that in an upcoming release.</strong>
</p>
<img src="{{url_for('static_content', group='screenshot', filename=uuid)}}">
</div>
{% endif %}
</div>

View File

@@ -5,12 +5,18 @@
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script>
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
{% if emailprefix %}
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
{% endif %}
</script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
<div class="edit-form monospaced-textarea">
@@ -18,6 +24,7 @@
<ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li>
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Selector</a></li>
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
<li class="tab"><a href="#notifications">Notifications</a></li>
</ul>
@@ -194,6 +201,46 @@ nav
</fieldset>
</div>
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
<img id="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
<fieldset>
<div class="pure-control-group">
{% if visualselector_enabled %}
{% if visualselector_data_is_ready %}
<div id="selector-header">
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
<i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
</div>
<div id="selector-wrapper">
<!-- request the screenshot and get the element offset info ready -->
<!-- use img src ready load to know everything is ready to map out -->
<!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
<img id="selector-background" />
<canvas id="selector-canvas"></canvas>
</div>
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
<span class="pure-form-message-inline">
<p><span style="font-weight: bold">Beta!</span> The Visual Selector is new and there may be minor bugs, please report pages that dont work, help us to improve this software!</p>
</span>
{% else %}
<span class="pure-form-message-inline">Screenshot and element data is not available or not yet ready.</span>
{% endif %}
{% else %}
<span class="pure-form-message-inline">
<p>Sorry, this functionality only works with Playwright/Chrome enabled watches.</p>
<p>Enable the Playwright Chrome fetcher, or alternatively try our <a href="https://lemonade.changedetection.io/start">very affordable subscription based service</a>.</p>
<p>This is because Selenium/WebDriver can not extract full page screenshots reliably.</p>
</span>
{% endif %}
</div>
</fieldset>
</div>
<div id="actions">
<div class="pure-control-group">

View File

@@ -10,9 +10,6 @@
<div class="tabs">
<ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li>
{% if screenshot %}
<li class="tab"><a href="#screenshot">Current screenshot</a></li>
{% endif %}
</ul>
</div>
@@ -31,16 +28,5 @@
</tbody>
</table>
</div>
{% if screenshot %}
<div class="tab-pane-inner" id="screenshot">
<p>
<i>For now, only the most recent screenshot is saved and displayed.</i></br>
<strong>Note: No changedetection is performed on the image yet, but we are working on that in an upcoming release.</strong>
</p>
<img src="{{url_for('static_content', group='screenshot', filename=uuid)}}">
</div>
{% endif %}
</div>
{% endblock %}

View File

@@ -3,6 +3,7 @@
{% from '_helpers.jinja' import render_simple_field %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
<div class="box">
<form class="pure-form" action="{{ url_for('form_watch_add') }}" method="POST" id="new-watch-form">

View File

@@ -0,0 +1,2 @@
"""Tests for the app."""

View File

@@ -0,0 +1,3 @@
#!/usr/bin/python3
from .. import conftest

View File

@@ -0,0 +1,48 @@
#!/usr/bin/python3
import time
from flask import url_for
from ..util import live_server_setup
import logging
def test_fetch_webdriver_content(client, live_server):
live_server_setup(live_server)
#####################
res = client.post(
url_for("settings_page"),
data={"application-empty_pages_are_a_change": "",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_webdriver"},
follow_redirects=True
)
assert b"Settings updated." in res.data
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": "https://changedetection.io/ci-test.html"},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(3)
attempt = 0
while attempt < 20:
res = client.get(url_for("index"))
if not b'Checking now' in res.data:
break
logging.getLogger().info("Waiting for check to not say 'Checking now'..")
time.sleep(3)
attempt += 1
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
logging.getLogger().info("Looking for correct fetched HTML (text) from server")
assert b'cool it works' in res.data

View File

@@ -121,7 +121,7 @@ def test_trigger_functionality(client, live_server):
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Just to be sure.. set a regular modified change..
# Now set the content which contains the trigger text
time.sleep(sleep_time_for_fetch_thread)
set_modified_with_trigger_text_response()
@@ -130,6 +130,12 @@ def test_trigger_functionality(client, live_server):
res = client.get(url_for("index"))
assert b'unviewed' in res.data
# https://github.com/dgtlmoon/changedetection.io/issues/616
# Apparently the actual snapshot that contains the trigger never shows
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'foobar123' in res.data
# Check the preview/highlighter, we should be able to see what we triggered on, but it should be highlighted
res = client.get(url_for("preview_page", uuid="first"))
# We should be able to see what we ignored

View File

@@ -40,10 +40,11 @@ class update_worker(threading.Thread):
contents = ""
screenshot = False
update_obj= {}
xpath_data = False
now = time.time()
try:
changed_detected, update_obj, contents, screenshot = update_handler.run(uuid)
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -55,6 +56,7 @@ class update_worker(threading.Thread):
except content_fetcher.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
pass
except content_fetcher.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
@@ -148,6 +150,9 @@ class update_worker(threading.Thread):
# Always save the screenshot if it's available
if screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
if xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
self.current_uuid = None # Done
self.q.task_done()

1532
f Normal file

File diff suppressed because one or more lines are too long