Compare commits

...

7 Commits

Author SHA1 Message Date
dgtlmoon
dc96a5ff69 WIP 2022-12-19 15:02:41 +01:00
dgtlmoon
7a1d2d924e Dark mode - system setting var is not required (its cookie based) 2022-12-19 14:13:57 +01:00
dgtlmoon
c3731cf055 0.40.0.3 2022-12-19 12:41:52 +01:00
dgtlmoon
a287e5a86c Visual Selector - Select smallest/most precise element first, better filtering of zero size elements 2022-12-19 12:33:31 +01:00
dgtlmoon
235535c327 Fetching - Check the most overdue watch first (#1242) 2022-12-17 15:40:57 +01:00
dgtlmoon
44dc62da2d Overview list - Checkbox action "Recheck" 2022-12-16 18:35:09 +01:00
dgtlmoon
0c380c170f Playwright - Better error reporting and re-try fetch on fail once (#1238) 2022-12-16 18:06:14 +01:00
9 changed files with 107 additions and 27 deletions

View File

@@ -36,7 +36,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.40.0.2'
__version__ = '0.40.0.3'
datastore = None
@@ -755,8 +755,11 @@ def changedetection_app(config=None, datastore_o=None):
@login_required
def import_page():
remaining_urls = []
from changedetectionio import forms
form = forms.importForm(request.form)
if request.method == 'POST':
from .importer import import_url_list, import_distill_io_json
from .importer import import_url_list, import_distill_io_json, import_changedetection_io_zip
# URL List import
if request.values.get('urls') and len(request.values.get('urls').strip()):
@@ -779,10 +782,20 @@ def changedetection_app(config=None, datastore_o=None):
for uuid in d_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
if request.files.get("backup_zip_file"):
if not form.validate():
flash("An error occurred, please see below.", "error")
else:
d_importer = import_changedetection_io_zip()
d_importer.run(data=None, flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids:
# Queue without priority, we will examine their own rule to find out if it should be checked
update_q.put(queuedWatchMetaData.PrioritizedItem(item={'uuid': uuid, 'skip_when_checksum_same': True}))
# Could be some remaining, or we could be on GET
output = render_template("import.html",
form=form,
import_url_list_remaining="\n".join(remaining_urls),
original_distill_json=''
)
@@ -1260,6 +1273,14 @@ def changedetection_app(config=None, datastore_o=None):
datastore.data['watching'][uuid.strip()]['notification_muted'] = False
flash("{} watches un-muted".format(len(uuids)))
elif (op == 'recheck'):
for uuid in uuids:
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
flash("{} watches un-muted".format(len(uuids)))
elif (op == 'notification-default'):
from changedetectionio.notification import (
default_notification_format_for_watch
@@ -1441,7 +1462,11 @@ def ticker_thread_check_time_launch_checks():
watch_uuid_list = []
while True:
try:
watch_uuid_list = datastore.data['watching'].keys()
# Get a list of watches sorted by last_checked, [1] because it gets passed a tuple
# This is so we examine the most over-due first
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked',0)):
watch_uuid_list.append(k[0])
except RuntimeError as e:
# RuntimeError: dictionary changed size during iteration
time.sleep(0.1)

View File

@@ -42,7 +42,7 @@ class BrowserStepsStepTimout(Exception):
class PageUnloadable(Exception):
def __init__(self, status_code, url, screenshot=False, message=False):
def __init__(self, status_code, url, message, screenshot=False):
# Set this so we can use it in other parts of the app
self.status_code = status_code
self.url = url
@@ -299,23 +299,34 @@ class base_html_playwright(Fetcher):
if len(request_headers):
context.set_extra_http_headers(request_headers)
try:
self.page.set_default_navigation_timeout(90000)
self.page.set_default_timeout(90000)
# Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
# Bug - never set viewport size BEFORE page.goto
# Waits for the next navigation. Using Python context manager
# prevents a race condition between clicking and waiting for a navigation.
response = self.page.goto(url, wait_until='commit')
# Goto page
try:
# Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors'
response = self.page.goto(url, wait_until='commit')
except playwright._impl._api_types.Error as e:
# Retry once - https://github.com/browserless/chrome/issues/2485
# Sometimes errors related to invalid cert's and other can be random
print ("Content Fetcher > retrying request got error - ", str(e))
time.sleep(1)
response = self.page.goto(url, wait_until='commit')
except Exception as e:
print ("Content Fetcher > Other exception when page.goto", str(e))
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e))
# Execute any browser steps
try:
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
self.page.wait_for_timeout(extra_wait * 1000)
@@ -328,17 +339,15 @@ class base_html_playwright(Fetcher):
# This can be ok, we will try to grab what we could retrieve
pass
except Exception as e:
print ("other exception when page.goto")
print (str(e))
print ("Content Fetcher > Other exception when executing custom JS code", str(e))
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None)
raise PageUnloadable(url=url, status_code=None, message=str(e))
if response is None:
context.close()
browser.close()
print ("response object was none")
print ("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
@@ -357,7 +366,7 @@ class base_html_playwright(Fetcher):
if len(self.page.content().strip()) == 0:
context.close()
browser.close()
print ("Content was empty")
print ("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
@@ -502,7 +511,7 @@ class base_html_webdriver(Fetcher):
try:
self.driver.quit()
except Exception as e:
print("Exception in chrome shutdown/quit" + str(e))
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py!

View File

@@ -3,6 +3,7 @@ import re
from wtforms import (
BooleanField,
FileField,
Form,
IntegerField,
RadioField,
@@ -425,6 +426,14 @@ class watchForm(commonSettingsForm):
result = False
return result
# datastore.data['settings']['requests']..
class importForm(Form):
#backup_zip_file = FileField("File", validators=[validators.regexp('\.zip$', flags=re.IGNORECASE)])
backup_zip_file = FileField("File")
def validate_backup_zip_file(form, field):
if field.data:
x=1
# datastore.data['settings']['requests']..
class globalSettingsRequestForm(Form):

View File

@@ -1,4 +1,5 @@
from abc import ABC, abstractmethod
from flask import request, url_for, current_app
import time
import validators
@@ -20,6 +21,26 @@ class Importer():
datastore):
pass
class import_changedetection_io_zip(Importer):
def run(self,
data,
flash,
datastore,
):
# `data` should be none, we will hit up request directly
import zipfile
import io
with zipfile.ZipFile(io.BytesIO(request.files["backup_zip_file"].read()), 'r') as zf:
p =zf.namelist()
for file in zf.namelist():
x=1
class import_url_list(Importer):
"""

View File

@@ -27,7 +27,6 @@ class model(dict):
'base_url' : None,
'extract_title_as_title': False,
'empty_pages_are_a_change': False,
'css_dark_mode': False,
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum

View File

@@ -1,3 +1,6 @@
// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
// All rights reserved.
// @file Scrape the page looking for elements of concern (%ELEMENTS%)
// http://matatk.agrip.org.uk/tests/position-and-width/
// https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
@@ -89,8 +92,8 @@ for (var i = 0; i < elements.length; i++) {
continue
}
// Forget really small ones
if (bbox['width'] < 10 && bbox['height'] < 10) {
// Skip really small ones, and where width or height ==0
if (bbox['width'] * bbox['height'] < 100) {
continue;
}
@@ -146,7 +149,6 @@ for (var i = 0; i < elements.length; i++) {
}
// Inject the current one set in the include_filters, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated.
if (include_filters.length) {
@@ -205,5 +207,9 @@ if (include_filters.length) {
}
}
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
// so that we dont select the wrapping element by mistake and be unable to select what we want
size_pos.sort((a, b) => (a.width*a.height > b.width*b.height) ? 1 : -1)
// Window.width required for proper scaling in the frontend
return {'size_pos': size_pos, 'browser_width': window.innerWidth};

View File

@@ -1,4 +1,5 @@
// Horrible proof of concept code :)
// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
// All rights reserved.
// yes - this is really a hack, if you are a front-ender and want to help, please get in touch!
$(document).ready(function () {
@@ -177,9 +178,10 @@ $(document).ready(function () {
// Basically, find the most 'deepest'
var found = 0;
ctx.fillStyle = 'rgba(205,0,0,0.35)';
for (var i = selector_data['size_pos'].length; i !== 0; i--) {
// Will be sorted by smallest width*height first
for (var i = 0; i <= selector_data['size_pos'].length; i++) {
// draw all of them? let them choose somehow?
var sel = selector_data['size_pos'][i - 1];
var sel = selector_data['size_pos'][i];
// If we are in a bounding-box
if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
&&
@@ -195,7 +197,7 @@ $(document).ready(function () {
// no need to keep digging
// @todo or, O to go out/up, I to go in
// or double click to go up/out the selector?
current_selected_i = i - 1;
current_selected_i = i;
found += 1;
break;
}

View File

@@ -1,5 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.jinja' import render_field %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="edit-form monospaced-textarea">
@@ -7,11 +8,12 @@
<ul>
<li class="tab" id=""><a href="#url-list">URL List</a></li>
<li class="tab"><a href="#distill-io">Distill.io</a></li>
<li class="tab"><a href="#changedetection-io">Changedetection.io</a></li>
</ul>
</div>
<div class="box-wrap inner">
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST" enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="tab-pane-inner" id="url-list">
<fieldset class="pure-group">
@@ -77,6 +79,12 @@
" rows="25">{{ original_distill_json }}</textarea>
</fieldset>
</div>
<div class="tab-pane-inner" id="changedetection-io">
Upload your changedetection.io backup ZIP here</br>
<fieldset class="pure-group">
{{ render_field(form.backup_zip_file) }}
</fieldset>
</div>
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
</form>

View File

@@ -32,6 +32,7 @@
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unpause">UnPause</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="mute">Mute</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unmute">UnMute</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button>
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button>
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
</div>