Compare commits

..

1 Commits

Author SHA1 Message Date
dgtlmoon
5454b0fc75 Closes #1224 2022-12-14 00:02:51 +01:00
16 changed files with 347 additions and 332 deletions

View File

@@ -10,7 +10,6 @@ import threading
import time import time
import timeago import timeago
from changedetectionio import queuedWatchMetaData
from copy import deepcopy from copy import deepcopy
from distutils.util import strtobool from distutils.util import strtobool
from feedgen.feed import FeedGenerator from feedgen.feed import FeedGenerator
@@ -405,6 +404,7 @@ def changedetection_app(config=None, datastore_o=None):
sorted_watches.append(watch) sorted_watches.append(watch)
existing_tags = datastore.get_all_tags() existing_tags = datastore.get_all_tags()
form = forms.quickWatchForm(request.form) form = forms.quickWatchForm(request.form)
output = render_template("watch-overview.html", output = render_template("watch-overview.html",
form=form, form=form,
@@ -416,7 +416,7 @@ def changedetection_app(config=None, datastore_o=None):
# Don't link to hosting when we're on the hosting environment # Don't link to hosting when we're on the hosting environment
hosted_sticky=os.getenv("SALTED_PASS", False) == False, hosted_sticky=os.getenv("SALTED_PASS", False) == False,
guid=datastore.data['app_guid'], guid=datastore.data['app_guid'],
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue]) queued_uuids=[uuid for p,uuid in update_q.queue])
if session.get('share-link'): if session.get('share-link'):
@@ -596,16 +596,25 @@ def changedetection_app(config=None, datastore_o=None):
using_default_check_time = False using_default_check_time = False
break break
# Use the default if it's the same as system-wide. # Use the default if its the same as system wide
if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']: if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
extra_update_obj['fetch_backend'] = None extra_update_obj['fetch_backend'] = None
# Ignore text # Ignore text
form_ignore_text = form.ignore_text.data form_ignore_text = form.ignore_text.data
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form_ignore_text:
if len(datastore.data['watching'][uuid].history):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
if len(datastore.data['watching'][uuid].history):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Be sure proxy value is None # Be sure proxy value is None
if datastore.proxy_list is not None and form.data['proxy'] == '': if datastore.proxy_list is not None and form.data['proxy'] == '':
extra_update_obj['proxy'] = None extra_update_obj['proxy'] = None
@@ -623,7 +632,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.needs_write_urgent = True datastore.needs_write_urgent = True
# Queue the watch for immediate recheck, with a higher priority # Queue the watch for immediate recheck, with a higher priority
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put((1, uuid))
# Diff page [edit] link should go back to diff page # Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff': if request.args.get("next") and request.args.get("next") == 'diff':
@@ -764,7 +773,7 @@ def changedetection_app(config=None, datastore_o=None):
importer = import_url_list() importer = import_url_list()
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore) importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
for uuid in importer.new_uuids: for uuid in importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put((1, uuid))
if len(importer.remaining_data) == 0: if len(importer.remaining_data) == 0:
return redirect(url_for('index')) return redirect(url_for('index'))
@@ -777,7 +786,7 @@ def changedetection_app(config=None, datastore_o=None):
d_importer = import_distill_io_json() d_importer = import_distill_io_json()
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids: for uuid in d_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put((1, uuid))
@@ -1142,7 +1151,7 @@ def changedetection_app(config=None, datastore_o=None):
if not add_paused and new_uuid: if not add_paused and new_uuid:
# Straight into the queue. # Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) update_q.put((1, new_uuid))
flash("Watch added.") flash("Watch added.")
if add_paused: if add_paused:
@@ -1179,7 +1188,7 @@ def changedetection_app(config=None, datastore_o=None):
uuid = list(datastore.data['watching'].keys()).pop() uuid = list(datastore.data['watching'].keys()).pop()
new_uuid = datastore.clone(uuid) new_uuid = datastore.clone(uuid)
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) update_q.put((5, new_uuid))
flash('Cloned.') flash('Cloned.')
return redirect(url_for('index')) return redirect(url_for('index'))
@@ -1187,7 +1196,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/api/checknow", methods=['GET']) @app.route("/api/checknow", methods=['GET'])
@login_required @login_required
def form_watch_checknow(): def form_watch_checknow():
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
tag = request.args.get('tag') tag = request.args.get('tag')
uuid = request.args.get('uuid') uuid = request.args.get('uuid')
i = 0 i = 0
@@ -1196,9 +1205,11 @@ def changedetection_app(config=None, datastore_o=None):
for t in running_update_threads: for t in running_update_threads:
running_uuids.append(t.current_uuid) running_uuids.append(t.current_uuid)
# @todo check thread is running and skip
if uuid: if uuid:
if uuid not in running_uuids: if uuid not in running_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put((1, uuid))
i = 1 i = 1
elif tag != None: elif tag != None:
@@ -1206,14 +1217,14 @@ def changedetection_app(config=None, datastore_o=None):
for watch_uuid, watch in datastore.data['watching'].items(): for watch_uuid, watch in datastore.data['watching'].items():
if (tag != None and tag in watch['tag']): if (tag != None and tag in watch['tag']):
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) update_q.put((1, watch_uuid))
i += 1 i += 1
else: else:
# No tag, no uuid, add everything. # No tag, no uuid, add everything.
for watch_uuid, watch in datastore.data['watching'].items(): for watch_uuid, watch in datastore.data['watching'].items():
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) update_q.put((1, watch_uuid))
i += 1 i += 1
flash("{} watches are queued for rechecking.".format(i)) flash("{} watches are queued for rechecking.".format(i))
return redirect(url_for('index', tag=tag)) return redirect(url_for('index', tag=tag))
@@ -1333,7 +1344,7 @@ def changedetection_app(config=None, datastore_o=None):
app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps') app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps')
import changedetectionio.blueprint.price_data_follower as price_data_follower import changedetectionio.blueprint.price_data_follower as price_data_follower
app.register_blueprint(price_data_follower.construct_blueprint(datastore, update_q), url_prefix='/price_data_follower') app.register_blueprint(price_data_follower.construct_blueprint(datastore), url_prefix='/price_data_follower')
# @todo handle ctrl break # @todo handle ctrl break
@@ -1481,7 +1492,7 @@ def ticker_thread_check_time_launch_checks():
seconds_since_last_recheck = now - watch['last_checked'] seconds_since_last_recheck = now - watch['last_checked']
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]: if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
# Proxies can be set to have a limit on seconds between which they can be called # Proxies can be set to have a limit on seconds between which they can be called
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid) watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
@@ -1512,9 +1523,8 @@ def ticker_thread_check_time_launch_checks():
priority, priority,
watch.jitter_seconds, watch.jitter_seconds,
now - watch['last_checked'])) now - watch['last_checked']))
# Into the queue with you # Into the queue with you
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put((priority, uuid))
# Reset for next time # Reset for next time
watch.jitter_seconds = 0 watch.jitter_seconds = 0

View File

@@ -1,4 +1,3 @@
from changedetectionio import queuedWatchMetaData
from flask_restful import abort, Resource from flask_restful import abort, Resource
from flask import request, make_response from flask import request, make_response
import validators import validators
@@ -25,7 +24,7 @@ class Watch(Resource):
abort(404, message='No watch exists with the UUID of {}'.format(uuid)) abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if request.args.get('recheck'): if request.args.get('recheck'):
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) self.update_q.put((1, uuid))
return "OK", 200 return "OK", 200
# Return without history, get that via another API call # Return without history, get that via another API call
@@ -101,7 +100,7 @@ class CreateWatch(Resource):
extras = {'title': json_data['title'].strip()} if json_data.get('title') else {} extras = {'title': json_data['title'].strip()} if json_data.get('title') else {}
new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras) new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras)
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) self.update_q.put((1, new_uuid))
return {'uuid': new_uuid}, 201 return {'uuid': new_uuid}, 201
# Return concise list of available watches and some very basic info # Return concise list of available watches and some very basic info
@@ -119,7 +118,7 @@ class CreateWatch(Resource):
if request.args.get('recheck_all'): if request.args.get('recheck_all'):
for uuid in self.datastore.data['watching'].keys(): for uuid in self.datastore.data['watching'].keys():
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) self.update_q.put((1, uuid))
return {'status': "OK"}, 200 return {'status': "OK"}, 200
return list, 200 return list, 200

View File

@@ -26,7 +26,7 @@ browser_step_ui_config = {'Choose one': '0 0',
# 'Extract text and use as filter': '1 0', # 'Extract text and use as filter': '1 0',
'Goto site': '0 0', 'Goto site': '0 0',
'Press Enter': '0 0', 'Press Enter': '0 0',
'Select by label': '1 1', 'Select option': '1 1',
'Scroll down': '0 0', 'Scroll down': '0 0',
'Uncheck checkbox': '1 0', 'Uncheck checkbox': '1 0',
'Wait for seconds': '0 1', 'Wait for seconds': '0 1',
@@ -75,13 +75,15 @@ class steppable_browser_interface():
def action_goto_url(self, url, optional_value): def action_goto_url(self, url, optional_value):
# self.page.set_viewport_size({"width": 1280, "height": 5000}) # self.page.set_viewport_size({"width": 1280, "height": 5000})
now = time.time() now = time.time()
response = self.page.goto(url, timeout=0, wait_until='commit') response = self.page.goto(url, timeout=0, wait_until='domcontentloaded')
print("Time to goto URL", time.time() - now)
# Wait_until = commit # Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading. # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors' # This seemed to solve nearly all 'TimeoutErrors'
print("Time to goto URL ", time.time() - now) extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
self.page.wait_for_timeout(extra_wait * 1000)
def action_click_element_containing_text(self, selector=None, value=''): def action_click_element_containing_text(self, selector=None, value=''):
if not len(value.strip()): if not len(value.strip()):
@@ -234,7 +236,7 @@ class browsersteps_live_ui(steppable_browser_interface):
self.page.evaluate("var include_filters=''") self.page.evaluate("var include_filters=''")
# Go find the interactive elements # Go find the interactive elements
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers? # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span,select'
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements) xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
# So the JS will find the smallest one first # So the JS will find the smallest one first

View File

@@ -3,13 +3,11 @@ from distutils.util import strtobool
from flask import Blueprint, flash, redirect, url_for from flask import Blueprint, flash, redirect, url_for
from flask_login import login_required from flask_login import login_required
from changedetectionio.store import ChangeDetectionStore from changedetectionio.store import ChangeDetectionStore
from changedetectionio import queuedWatchMetaData
from queue import PriorityQueue
PRICE_DATA_TRACK_ACCEPT = 'accepted' PRICE_DATA_TRACK_ACCEPT = 'accepted'
PRICE_DATA_TRACK_REJECT = 'rejected' PRICE_DATA_TRACK_REJECT = 'rejected'
def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue): def construct_blueprint(datastore: ChangeDetectionStore):
price_data_follower_blueprint = Blueprint('price_data_follower', __name__) price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
@@ -17,7 +15,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET']) @price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
def accept(uuid): def accept(uuid):
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
return redirect(url_for("form_watch_checknow", uuid=uuid)) return redirect(url_for("form_watch_checknow", uuid=uuid))

View File

@@ -23,9 +23,6 @@ class Non200ErrorCodeReceived(Exception):
self.page_text = html_tools.html_to_text(page_html) self.page_text = html_tools.html_to_text(page_html)
return return
class checksumFromPreviousCheckWasTheSame(Exception):
def __init__(self):
return
class JSActionExceptions(Exception): class JSActionExceptions(Exception):
def __init__(self, status_code, url, screenshot, message=''): def __init__(self, status_code, url, screenshot, message=''):
@@ -42,7 +39,7 @@ class BrowserStepsStepTimout(Exception):
class PageUnloadable(Exception): class PageUnloadable(Exception):
def __init__(self, status_code, url, message, screenshot=False): def __init__(self, status_code, url, screenshot=False, message=False):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
@@ -289,8 +286,6 @@ class base_html_playwright(Fetcher):
proxy=self.proxy, proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others # This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True, bypass_csp=True,
# Can't think why we need the service workers for our use case?
service_workers='block',
# Should never be needed # Should never be needed
accept_downloads=False accept_downloads=False
) )
@@ -299,34 +294,24 @@ class base_html_playwright(Fetcher):
if len(request_headers): if len(request_headers):
context.set_extra_http_headers(request_headers) context.set_extra_http_headers(request_headers)
try:
self.page.set_default_navigation_timeout(90000) self.page.set_default_navigation_timeout(90000)
self.page.set_default_timeout(90000) self.page.set_default_timeout(90000)
# Listen for all console events and handle errors # Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
# Goto page # Bug - never set viewport size BEFORE page.goto
try:
# Waits for the next navigation. Using Python context manager
# prevents a race condition between clicking and waiting for a navigation.
with self.page.expect_navigation():
response = self.page.goto(url, wait_until='load')
# Wait_until = commit # Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading. # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors' # This seemed to solve nearly all 'TimeoutErrors'
response = self.page.goto(url, wait_until='commit')
except playwright._impl._api_types.Error as e:
# Retry once - https://github.com/browserless/chrome/issues/2485
# Sometimes errors related to invalid cert's and other can be random
print ("Content Fetcher > retrying request got error - ", str(e))
time.sleep(1)
response = self.page.goto(url, wait_until='commit')
except Exception as e:
print ("Content Fetcher > Other exception when page.goto", str(e))
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e))
# Execute any browser steps
try:
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
self.page.wait_for_timeout(extra_wait * 1000) self.page.wait_for_timeout(extra_wait * 1000)
@@ -339,15 +324,17 @@ class base_html_playwright(Fetcher):
# This can be ok, we will try to grab what we could retrieve # This can be ok, we will try to grab what we could retrieve
pass pass
except Exception as e: except Exception as e:
print ("Content Fetcher > Other exception when executing custom JS code", str(e)) print ("other exception when page.goto")
print (str(e))
context.close() context.close()
browser.close() browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e)) raise PageUnloadable(url=url, status_code=None)
if response is None: if response is None:
context.close() context.close()
browser.close() browser.close()
print ("Content Fetcher > Response object was none") print ("response object was none")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page # Bug 2(?) Set the viewport size AFTER loading the page
@@ -366,7 +353,7 @@ class base_html_playwright(Fetcher):
if len(self.page.content().strip()) == 0: if len(self.page.content().strip()) == 0:
context.close() context.close()
browser.close() browser.close()
print ("Content Fetcher > Content was empty") print ("Content was empty")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page # Bug 2(?) Set the viewport size AFTER loading the page
@@ -511,7 +498,7 @@ class base_html_webdriver(Fetcher):
try: try:
self.driver.quit() self.driver.quit()
except Exception as e: except Exception as e:
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e)) print("Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py! # "html_requests" is listed as the default fetcher in store.py!

View File

@@ -1,5 +1,4 @@
import hashlib import hashlib
import json
import logging import logging
import os import os
import re import re
@@ -7,7 +6,6 @@ import urllib3
from changedetectionio import content_fetcher, html_tools from changedetectionio import content_fetcher, html_tools
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
from copy import deepcopy
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -40,7 +38,8 @@ class perform_site_check():
return regex return regex
def run(self, uuid, skip_when_checksum_same=True): def run(self, uuid):
from copy import deepcopy
changed_detected = False changed_detected = False
screenshot = False # as bytes screenshot = False # as bytes
stripped_text_from_html = "" stripped_text_from_html = ""
@@ -123,14 +122,6 @@ class perform_site_check():
self.screenshot = fetcher.screenshot self.screenshot = fetcher.screenshot
self.xpath_data = fetcher.xpath_data self.xpath_data = fetcher.xpath_data
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
if skip_when_checksum_same:
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
raise content_fetcher.checksumFromPreviousCheckWasTheSame()
# Fetching complete, now filters # Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base? # @todo move to class / maybe inside of fetcher abstract base?
@@ -168,14 +159,6 @@ class perform_site_check():
include_filters_rule.append("json:$") include_filters_rule.append("json:$")
has_filter_rule = True has_filter_rule = True
if is_json:
# Sort the JSON so we dont get false alerts when the content is just re-ordered
try:
fetcher.content = json.dumps(json.loads(fetcher.content), sort_keys=True)
except Exception as e:
# Might have just been a snippet, or otherwise bad JSON, continue
pass
if has_filter_rule: if has_filter_rule:
json_filter_prefixes = ['json:', 'jq:'] json_filter_prefixes = ['json:', 'jq:']
for filter in include_filters_rule: for filter in include_filters_rule:
@@ -183,8 +166,6 @@ class perform_site_check():
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter) stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
is_html = False is_html = False
if is_html or is_source: if is_html or is_source:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text

View File

@@ -14,52 +14,51 @@ from changedetectionio.notification import (
class model(dict): class model(dict):
__newest_history_key = None __newest_history_key = None
__history_n = 0 __history_n=0
__base_config = { __base_config = {
# 'history': {}, # Dict of timestamp and output stripped filename (removed) #'history': {}, # Dict of timestamp and output stripped filename (removed)
# 'newest_history_key': 0, (removed, taken from history.txt index) #'newest_history_key': 0, (removed, taken from history.txt index)
'body': None, 'body': None,
'check_unique_lines': False, # On change-detected, compare against all history if its something new 'check_unique_lines': False, # On change-detected, compare against all history if its something new
'check_count': 0, 'check_count': 0,
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_text': [], # Extract text by regex after filters 'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False, 'extract_title_as_title': False,
'fetch_backend': None, 'fetch_backend': None,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'has_ldjson_price_data': None, 'has_ldjson_price_data': None,
'track_ldjson_price_data': None, 'track_ldjson_price_data': None,
'headers': {}, # Extra headers to send 'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum 'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'include_filters': [], 'include_filters': [],
'last_checked': 0, 'last_checked': 0,
'last_error': False, 'last_error': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link 'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET', 'method': 'GET',
# Custom notification content # Custom notification content
'notification_body': None, 'notification_body': None,
'notification_format': default_notification_format_for_watch, 'notification_format': default_notification_format_for_watch,
'notification_muted': False, 'notification_muted': False,
'notification_title': None, 'notification_title': None,
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL 'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'paused': False, 'paused': False,
'previous_md5': False, 'previous_md5': False,
'previous_md5_before_filters': False, # Used for skipping changedetection entirely 'proxy': None, # Preferred proxy connection
'proxy': None, # Preferred proxy connection 'subtractive_selectors': [],
'subtractive_selectors': [], 'tag': None,
'tag': None, 'text_should_not_be_present': [], # Text that should not present
'text_should_not_be_present': [], # Text that should not present # Re #110, so then if this is set to None, we know to use the default value instead
# Re #110, so then if this is set to None, we know to use the default value instead # Requires setting to None on submit if it's the same as the default
# Requires setting to None on submit if it's the same as the default # Should be all None by default, so we use the system default in this case.
# Should be all None by default, so we use the system default in this case. 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, 'title': None,
'title': None, 'trigger_text': [], # List of text or regex to wait for until a change is detected
'trigger_text': [], # List of text or regex to wait for until a change is detected 'url': None,
'url': None, 'uuid': str(uuid.uuid4()),
'uuid': str(uuid.uuid4()), 'webdriver_delay': None,
'webdriver_delay': None, 'webdriver_js_execute_code': None, # Run before change-detection
'webdriver_js_execute_code': None, # Run before change-detection }
}
jitter_seconds = 0 jitter_seconds = 0
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):

View File

@@ -1,10 +0,0 @@
from dataclasses import dataclass, field
from typing import Any
# So that we can queue some metadata in `item`
# https://docs.python.org/3/library/queue.html#queue.PriorityQueue
#
@dataclass(order=True)
class PrioritizedItem:
priority: int
item: Any=field(compare=False)

View File

@@ -6,6 +6,10 @@
// will automatically force a scroll somewhere, so include the position offset // will automatically force a scroll somewhere, so include the position offset
// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing // Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
function setModalChoiceFromList(items, element) {
}
var scroll_y=+document.documentElement.scrollTop || document.body.scrollTop var scroll_y=+document.documentElement.scrollTop || document.body.scrollTop
// Include the getXpath script directly, easier than fetching // Include the getXpath script directly, easier than fetching
@@ -133,7 +137,7 @@ for (var i = 0; i < elements.length; i++) {
} }
// @todo Possible to ONLY list where it's clickable to save JSON xfer size // @todo Possible to ONLY list where it's clickable to save JSON xfer size
size_pos.push({ var n = {
xpath: xpath_result, xpath: xpath_result,
width: Math.round(bbox['width']), width: Math.round(bbox['width']),
height: Math.round(bbox['height']), height: Math.round(bbox['height']),
@@ -142,7 +146,16 @@ for (var i = 0; i < elements.length; i++) {
tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '', tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '',
tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : '', tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : '',
isClickable: (elements[i].onclick) || window.getComputedStyle(elements[i]).cursor == "pointer" isClickable: (elements[i].onclick) || window.getComputedStyle(elements[i]).cursor == "pointer"
}); }
if (n['tagName'] == 'select') {
n['options']=[]
for (const v of elements[i].options) {
n['options'].push(v.text)
}
}
size_pos.push(n);
} }
@@ -174,23 +187,10 @@ if (include_filters.length) {
} }
if (q) { if (q) {
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. bbox = q.getBoundingClientRect();
if (q.hasOwnProperty('getBoundingClientRect')) { console.log("xpath_element_scraper: Got filter element, scroll from top was "+scroll_y)
bbox = q.getBoundingClientRect(); } else {
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) console.log("xpath_element_scraper: filter element "+f+" was not found");
} else {
try {
// Try and see we can find its ownerElement
bbox = q.ownerElement.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) {
console.log("xpath_element_scraper: error looking up ownerElement")
}
}
}
if(!q) {
console.log("xpath_element_scraper: filter element " + f + " was not found");
} }
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {

View File

@@ -214,7 +214,12 @@ $(document).ready(function () {
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); $('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
found_something = true; found_something = true;
} else { } else {
if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') { if (x['tagName'] === 'select') {
$('select', first_available).val('Select option').change();
$('input[type=text]', first_available).first().val(x['xpath']);
$('#myModal').show();
found_something = true;
} else if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') {
$('select', first_available).val('Click element').change(); $('select', first_available).val('Click element').change();
$('input[type=text]', first_available).first().val(x['xpath']); $('input[type=text]', first_available).first().val(x['xpath']);
found_something = true; found_something = true;

View File

@@ -0,0 +1,37 @@
/* The Modal (background) */
.modal {
display: none; /* Hidden by default */
position: fixed; /* Stay in place */
z-index: 1; /* Sit on top */
left: 0;
top: 0;
width: 100%; /* Full width */
height: 100%; /* Full height */
overflow: auto; /* Enable scroll if needed */
background-color: rgb(0,0,0); /* Fallback color */
background-color: rgba(0,0,0,0.4); /* Black w/ opacity */
}
/* Modal Content/Box */
.modal-content {
background-color: #fefefe;
margin: 15% auto; /* 15% from the top and centered */
padding: 20px;
border: 1px solid #888;
max-width: 80%; /* Could be more or less, depending on screen size */
}
/* The Close Button */
.close {
color: #aaa;
float: right;
font-size: 28px;
font-weight: bold;
}
.close:hover,
.close:focus {
color: black;
text-decoration: none;
cursor: pointer;
}

View File

@@ -2,10 +2,11 @@
* -- BASE STYLES -- * -- BASE STYLES --
*/ */
@import "parts/_variables";
@import "parts/_spinners";
@import "parts/_browser-steps";
@import "parts/_arrows"; @import "parts/_arrows";
@import "parts/_browser-steps";
@import "parts/_modal";
@import "parts/_spinners";
@import "parts/_variables";
body { body {
color: var(--color-text); color: var(--color-text);

View File

@@ -1,6 +1,174 @@
/* /*
* -- BASE STYLES -- * -- BASE STYLES --
*/ */
.arrow {
border: solid #1b98f8;
border-width: 0 2px 2px 0;
display: inline-block;
padding: 3px; }
.arrow.right {
transform: rotate(-45deg);
-webkit-transform: rotate(-45deg); }
.arrow.left {
transform: rotate(135deg);
-webkit-transform: rotate(135deg); }
.arrow.up, .arrow.asc {
transform: rotate(-135deg);
-webkit-transform: rotate(-135deg); }
.arrow.down, .arrow.desc {
transform: rotate(45deg);
-webkit-transform: rotate(45deg); }
#browser_steps {
/* convert rows to horizontal cells */ }
#browser_steps th {
display: none; }
#browser_steps li {
list-style: decimal;
padding: 5px; }
#browser_steps li:not(:first-child):hover {
opacity: 1.0; }
#browser_steps li .control {
padding-left: 5px;
padding-right: 5px; }
#browser_steps li .control a {
font-size: 70%; }
#browser_steps li.empty {
padding: 0px;
opacity: 0.35; }
#browser_steps li.empty .control {
display: none; }
#browser_steps li:hover {
background: #eee; }
#browser_steps li > label {
display: none; }
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll; }
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 600px;
/*@todo make this dynamic */ }
/* this is duplicate :( */
#browsersteps-selector-wrapper {
height: 100%;
width: 100%;
overflow-y: scroll;
position: relative;
/* nice tall skinny one */ }
#browsersteps-selector-wrapper > img {
position: absolute;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas {
position: relative;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas:hover {
cursor: pointer; }
#browsersteps-selector-wrapper .loader {
position: absolute;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
margin-left: -40px;
z-index: 100;
max-width: 350px;
text-align: center; }
#browsersteps-selector-wrapper .spinner, #browsersteps-selector-wrapper .spinner:after {
width: 80px;
height: 80px;
font-size: 3px; }
#browsersteps-selector-wrapper #browsersteps-click-start {
color: var(--color-grey-400); }
#browsersteps-selector-wrapper #browsersteps-click-start:hover {
cursor: pointer; }
/* The Modal (background) */
.modal {
display: none;
/* Hidden by default */
position: fixed;
/* Stay in place */
z-index: 1;
/* Sit on top */
left: 0;
top: 0;
width: 100%;
/* Full width */
height: 100%;
/* Full height */
overflow: auto;
/* Enable scroll if needed */
background-color: black;
/* Fallback color */
background-color: rgba(0, 0, 0, 0.4);
/* Black w/ opacity */ }
/* Modal Content/Box */
.modal-content {
background-color: #fefefe;
margin: 15% auto;
/* 15% from the top and centered */
padding: 20px;
border: 1px solid #888;
max-width: 80%;
/* Could be more or less, depending on screen size */ }
/* The Close Button */
.close {
color: #aaa;
float: right;
font-size: 28px;
font-weight: bold; }
.close:hover,
.close:focus {
color: black;
text-decoration: none;
cursor: pointer; }
/* spinner */
.spinner,
.spinner:after {
border-radius: 50%;
width: 10px;
height: 10px; }
.spinner {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38, 104, 237, 0.2);
border-right: 1.1em solid rgba(38, 104, 237, 0.2);
border-bottom: 1.1em solid rgba(38, 104, 237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear; }
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
/** /**
* CSS custom properties (aka variables). * CSS custom properties (aka variables).
*/ */
@@ -138,130 +306,6 @@ html[data-darkmode="true"] {
html[data-darkmode="true"] .watch-table .unviewed.error { html[data-darkmode="true"] .watch-table .unviewed.error {
color: var(--color-watch-table-error); } color: var(--color-watch-table-error); }
/* spinner */
.spinner,
.spinner:after {
border-radius: 50%;
width: 10px;
height: 10px; }
.spinner {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38, 104, 237, 0.2);
border-right: 1.1em solid rgba(38, 104, 237, 0.2);
border-bottom: 1.1em solid rgba(38, 104, 237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear; }
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
#browser_steps {
/* convert rows to horizontal cells */ }
#browser_steps th {
display: none; }
#browser_steps li {
list-style: decimal;
padding: 5px; }
#browser_steps li:not(:first-child):hover {
opacity: 1.0; }
#browser_steps li .control {
padding-left: 5px;
padding-right: 5px; }
#browser_steps li .control a {
font-size: 70%; }
#browser_steps li.empty {
padding: 0px;
opacity: 0.35; }
#browser_steps li.empty .control {
display: none; }
#browser_steps li:hover {
background: #eee; }
#browser_steps li > label {
display: none; }
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll; }
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 600px;
/*@todo make this dynamic */ }
/* this is duplicate :( */
#browsersteps-selector-wrapper {
height: 100%;
width: 100%;
overflow-y: scroll;
position: relative;
/* nice tall skinny one */ }
#browsersteps-selector-wrapper > img {
position: absolute;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas {
position: relative;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas:hover {
cursor: pointer; }
#browsersteps-selector-wrapper .loader {
position: absolute;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
margin-left: -40px;
z-index: 100;
max-width: 350px;
text-align: center; }
#browsersteps-selector-wrapper .spinner, #browsersteps-selector-wrapper .spinner:after {
width: 80px;
height: 80px;
font-size: 3px; }
#browsersteps-selector-wrapper #browsersteps-click-start {
color: var(--color-grey-400); }
#browsersteps-selector-wrapper #browsersteps-click-start:hover {
cursor: pointer; }
.arrow {
border: solid #1b98f8;
border-width: 0 2px 2px 0;
display: inline-block;
padding: 3px; }
.arrow.right {
transform: rotate(-45deg);
-webkit-transform: rotate(-45deg); }
.arrow.left {
transform: rotate(135deg);
-webkit-transform: rotate(135deg); }
.arrow.up, .arrow.asc {
transform: rotate(-135deg);
-webkit-transform: rotate(-135deg); }
.arrow.down, .arrow.desc {
transform: rotate(45deg);
-webkit-transform: rotate(45deg); }
body { body {
color: var(--color-text); color: var(--color-text);
background: var(--color-background-page); } background: var(--color-background-page); }

View File

@@ -34,6 +34,17 @@
</head> </head>
<body> <body>
<div id="myModal" class="modal" style="display: none;">
<!-- Modal content -->
<div class="modal-content">
<span class="close">&times;</span>
<div class="inner">
xxx
</div>
</div>
</div>
<div class="header"> <div class="header">
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu"> <div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
{% if has_password and not current_user.is_authenticated %} {% if has_password and not current_user.is_authenticated %}

View File

@@ -394,48 +394,6 @@ def check_json_ext_filter(json_filter, client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_ignore_json_order(client, live_server):
# A change in order shouldn't trigger a notification
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write('{"hello" : 123, "world": 123}')
# Add our URL to the import page
test_url = url_for('test_endpoint', content_type="application/json", _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(2)
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write('{"world" : 123, "hello": 123}')
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Just to be sure it still works
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write('{"world" : 123, "hello": 124}')
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_jsonpath_ext_filter(client, live_server): def test_check_jsonpath_ext_filter(client, live_server):
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)

View File

@@ -4,7 +4,6 @@ import queue
import time import time
from changedetectionio import content_fetcher from changedetectionio import content_fetcher
from changedetectionio import queuedWatchMetaData
from changedetectionio.fetch_site_status import FilterNotFoundInResponse from changedetectionio.fetch_site_status import FilterNotFoundInResponse
# A single update worker # A single update worker
@@ -158,12 +157,11 @@ class update_worker(threading.Thread):
while not self.app.config.exit.is_set(): while not self.app.config.exit.is_set():
try: try:
queued_item_data = self.q.get(block=False) priority, uuid = self.q.get(block=False)
except queue.Empty: except queue.Empty:
pass pass
else: else:
uuid = queued_item_data.item.get('uuid')
self.current_uuid = uuid self.current_uuid = uuid
if uuid in list(self.datastore.data['watching'].keys()): if uuid in list(self.datastore.data['watching'].keys()):
@@ -173,11 +171,11 @@ class update_worker(threading.Thread):
update_obj= {} update_obj= {}
xpath_data = False xpath_data = False
process_changedetection_results = True process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])) print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
now = time.time() now = time.time()
try: try:
changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same')) changed_detected, update_obj, contents = update_handler.run(uuid)
# Re #342 # Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc # We then convert/.decode('utf-8') for the notification etc
@@ -243,10 +241,6 @@ class update_worker(threading.Thread):
process_changedetection_results = True process_changedetection_results = True
except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
# Yes fine, so nothing todo
pass
except content_fetcher.BrowserStepsStepTimout as e: except content_fetcher.BrowserStepsStepTimout as e:
if not self.datastore.data['watching'].get(uuid): if not self.datastore.data['watching'].get(uuid):