Compare commits

..

5 Commits

13 changed files with 98 additions and 36 deletions

View File

@@ -13,6 +13,8 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Some common stuff here that can be moved to a base class # Some common stuff here that can be moved to a base class
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(): class perform_site_check():
screenshot = None
xpath_data = None
def __init__(self, *args, datastore, **kwargs): def __init__(self, *args, datastore, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -127,6 +129,9 @@ class perform_site_check():
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter']) fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
fetcher.quit() fetcher.quit()
self.screenshot = fetcher.screenshot
self.xpath_data = fetcher.xpath_data
# Fetching complete, now filters # Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base? # @todo move to class / maybe inside of fetcher abstract base?
@@ -312,4 +317,4 @@ class perform_site_check():
if not watch.get('previous_md5'): if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5 watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data return changed_detected, update_obj, text_content_before_ignored_filter

View File

@@ -384,7 +384,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace') ignore_whitespace = BooleanField('Ignore whitespace')
real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?')
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False) empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)

View File

@@ -42,7 +42,6 @@ class model(dict):
'notification_title': default_notification_title, 'notification_title': default_notification_title,
'notification_body': default_notification_body, 'notification_body': default_notification_body,
'notification_format': default_notification_format, 'notification_format': default_notification_format,
'real_browser_save_screenshot': True,
'schema_version' : 0, 'schema_version' : 0,
'webdriver_delay': None # Extra delay in seconds before extracting text 'webdriver_delay': None # Extra delay in seconds before extracting text
} }

View File

@@ -83,6 +83,12 @@ class model(dict):
return False return False
def ensure_data_dir_exists(self):
target_path = os.path.join(self.__datastore_path, self['uuid'])
if not os.path.isdir(target_path):
print ("> Creating data dir {}".format(target_path))
os.mkdir(target_path)
@property @property
def label(self): def label(self):
# Used for sorting # Used for sorting
@@ -149,9 +155,7 @@ class model(dict):
output_path = "{}/{}".format(self.__datastore_path, self['uuid']) output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
# Incase the operator deleted it, check and create. self.ensure_data_dir_exists()
if not os.path.isdir(output_path):
os.mkdir(output_path)
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
logging.debug("Saving history text {}".format(snapshot_fname)) logging.debug("Saving history text {}".format(snapshot_fname))

View File

@@ -38,13 +38,14 @@ docker kill $$-test_selenium
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
pip3 install playwright~=1.22 pip3 install playwright~=1.24
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
# takes a while to spin up # takes a while to spin up
sleep 5 sleep 5
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
pytest tests/fetchers/test_content.py pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py pytest tests/test_errorhandling.py
pytest tests/visualselector/test_fetch_data.py
unset PLAYWRIGHT_DRIVER_URL unset PLAYWRIGHT_DRIVER_URL
docker kill $$-test_browserless docker kill $$-test_browserless

View File

@@ -8,7 +8,7 @@ import threading
import time import time
import uuid as uuid_builder import uuid as uuid_builder
from copy import deepcopy from copy import deepcopy
from os import mkdir, path, unlink from os import path, unlink
from threading import Lock from threading import Lock
import re import re
import requests import requests
@@ -324,12 +324,7 @@ class ChangeDetectionStore:
new_watch.update(apply_extras) new_watch.update(apply_extras)
self.__data['watching'][new_uuid]=new_watch self.__data['watching'][new_uuid]=new_watch
# Get the directory ready self.__data['watching'][new_uuid].ensure_data_dir_exists()
output_path = "{}/{}".format(self.datastore_path, new_uuid)
try:
mkdir(output_path)
except FileExistsError:
print(output_path, "already exists.")
if write_to_disk_now: if write_to_disk_now:
self.sync_to_json() self.sync_to_json()
@@ -352,6 +347,8 @@ class ChangeDetectionStore:
else: else:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png") target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
self.data['watching'][watch_uuid].ensure_data_dir_exists()
with open(target_path, 'wb') as f: with open(target_path, 'wb') as f:
f.write(screenshot) f.write(screenshot)
f.close() f.close()
@@ -366,9 +363,9 @@ class ChangeDetectionStore:
def save_xpath_data(self, watch_uuid, data, as_error=False): def save_xpath_data(self, watch_uuid, data, as_error=False):
if as_error: if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json") target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
with open(target_path, 'w') as f: with open(target_path, 'w') as f:
f.write(json.dumps(data)) f.write(json.dumps(data))

View File

@@ -69,12 +69,6 @@
{{ render_checkbox_field(form.application.form.extract_title_as_title) }} {{ render_checkbox_field(form.application.form.extract_title_as_title) }}
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span> <span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
</div> </div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
<span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
</div>
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span> <span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>

View File

@@ -2,7 +2,7 @@
import time import time
from flask import url_for from flask import url_for
from ..util import live_server_setup from ..util import live_server_setup, wait_for_all_checks
import logging import logging
@@ -29,14 +29,8 @@ def test_fetch_webdriver_content(client, live_server):
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(3) time.sleep(3)
attempt = 0
while attempt < 20: wait_for_all_checks(client)
res = client.get(url_for("index"))
if not b'Checking now' in res.data:
break
logging.getLogger().info("Waiting for check to not say 'Checking now'..")
time.sleep(3)
attempt += 1
res = client.get( res = client.get(

View File

@@ -2,6 +2,8 @@
from flask import make_response, request from flask import make_response, request
from flask import url_for from flask import url_for
import logging
import time
def set_original_response(): def set_original_response():
test_return_data = """<html> test_return_data = """<html>
@@ -68,6 +70,31 @@ def extract_api_key_from_UI(client):
api_key = m.group(1) api_key = m.group(1)
return api_key.strip() return api_key.strip()
# kinda funky, but works for now
def extract_UUID_from_client(client):
import re
res = client.get(
url_for("index"),
)
# <span id="api-key">{{api_key}}</span>
m = re.search('edit/(.+?)"', str(res.data))
uuid = m.group(1)
return uuid.strip()
def wait_for_all_checks(client):
# Loop waiting until done..
attempt=0
while attempt < 60:
time.sleep(1)
res = client.get(url_for("index"))
if not b'Checking now' in res.data:
break
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
attempt += 1
def live_server_setup(live_server): def live_server_setup(live_server):
@live_server.app.route('/test-endpoint') @live_server.app.route('/test-endpoint')
@@ -133,3 +160,4 @@ def live_server_setup(live_server):
return ret return ret
live_server.start() live_server.start()

View File

@@ -0,0 +1,2 @@
"""Tests for the app."""

View File

@@ -0,0 +1,3 @@
#!/usr/bin/python3
from .. import conftest

View File

@@ -0,0 +1,35 @@
#!/usr/bin/python3
import time
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
def test_visual_selector_content_ready(client, live_server):
import os
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
live_server_setup(live_server)
time.sleep(1)
# Add our URL to the import page, maybe better to use something we control?
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
test_url = 'https://news.ycombinator.com'
res = client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data
res = client.post(
url_for("edit_page", uuid="first", unpause_on_save=1),
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
follow_redirects=True
)
assert b"unpaused" in res.data
time.sleep(1)
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"

View File

@@ -142,7 +142,7 @@ class update_worker(threading.Thread):
now = time.time() now = time.time()
try: try:
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid) changed_detected, update_obj, contents = update_handler.run(uuid)
# Re #342 # Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc # We then convert/.decode('utf-8') for the notification etc
@@ -222,6 +222,7 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
except content_fetcher.PageUnloadable as e: except content_fetcher.PageUnloadable as e:
# @todo connection-refused ?
err_text = "Page request from server didnt respond correctly" err_text = "Page request from server didnt respond correctly"
if e.screenshot: if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
@@ -280,10 +281,10 @@ class update_worker(threading.Thread):
'last_checked': round(time.time())}) 'last_checked': round(time.time())})
# Always save the screenshot if it's available # Always save the screenshot if it's available
if screenshot: if update_handler.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
if xpath_data: if update_handler.xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data) self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
self.current_uuid = None # Done self.current_uuid = None # Done