mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-16 22:46:09 +00:00
Compare commits
1 Commits
threading-
...
compose-im
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7afeebd4ef |
@@ -3,16 +3,14 @@
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS.
|
## Self-Hosted, Open Source, Change Monitoring of Web Pages
|
||||||
|
|
||||||
_Know when web pages change! Stay ontop of new information! get notifications when important website content changes_
|
_Know when web pages change! Stay ontop of new information!_
|
||||||
|
|
||||||
Live your data-life *pro-actively* instead of *re-actively*.
|
Live your data-life *pro-actively* instead of *re-actively*.
|
||||||
|
|
||||||
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
|
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
|
||||||
|
|
||||||
[[ Discord ]](https://discord.com/channels/1000806276256780309/1000806276873334816) [[ YouTube ]](https://www.youtube.com/channel/UCbS09q1TRf0o4N2t-WA3emQ) [[ LinkedIn ]](https://www.linkedin.com/company/changedetection-io/)
|
|
||||||
|
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||||
|
|
||||||
@@ -35,7 +33,6 @@ Free, Open-source web page monitoring, notification and change detection. Don't
|
|||||||
- New software releases, security advisories when you're not on their mailing list.
|
- New software releases, security advisories when you're not on their mailing list.
|
||||||
- Festivals with changes
|
- Festivals with changes
|
||||||
- Realestate listing changes
|
- Realestate listing changes
|
||||||
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
|
|
||||||
- COVID related news from government websites
|
- COVID related news from government websites
|
||||||
- University/organisation news from their website
|
- University/organisation news from their website
|
||||||
- Detect and monitor changes in JSON API responses
|
- Detect and monitor changes in JSON API responses
|
||||||
@@ -121,7 +118,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
|
|||||||
## Filters
|
## Filters
|
||||||
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||||
|
|
||||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
|
(We support LXML re:test, re:math and re:replace.)
|
||||||
|
|
||||||
## Notifications
|
## Notifications
|
||||||
|
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ from flask_wtf import CSRFProtect
|
|||||||
from changedetectionio import html_tools
|
from changedetectionio import html_tools
|
||||||
from changedetectionio.api import api_v1
|
from changedetectionio.api import api_v1
|
||||||
|
|
||||||
__version__ = '0.39.16'
|
__version__ = '0.39.15'
|
||||||
|
|
||||||
datastore = None
|
datastore = None
|
||||||
|
|
||||||
@@ -105,10 +105,9 @@ def init_app_secret(datastore_path):
|
|||||||
# running or something similar.
|
# running or something similar.
|
||||||
@app.template_filter('format_last_checked_time')
|
@app.template_filter('format_last_checked_time')
|
||||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||||
|
|
||||||
# Worker thread tells us which UUID it is currently processing.
|
# Worker thread tells us which UUID it is currently processing.
|
||||||
for t in threading.enumerate():
|
for t in running_update_threads:
|
||||||
if t.name == 'update_worker' and t.current_uuid == watch_obj['uuid']:
|
if t.current_uuid == watch_obj['uuid']:
|
||||||
return '<span class="loader"></span><span> Checking now</span>'
|
return '<span class="loader"></span><span> Checking now</span>'
|
||||||
|
|
||||||
if watch_obj['last_checked'] == 0:
|
if watch_obj['last_checked'] == 0:
|
||||||
@@ -299,7 +298,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
# Sort by last_changed and add the uuid which is usually the key..
|
# Sort by last_changed and add the uuid which is usually the key..
|
||||||
sorted_watches = []
|
sorted_watches = []
|
||||||
|
|
||||||
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
# @todo needs a .itemsWithTag() or something
|
||||||
for uuid, watch in datastore.data['watching'].items():
|
for uuid, watch in datastore.data['watching'].items():
|
||||||
|
|
||||||
if limit_tag != None:
|
if limit_tag != None:
|
||||||
@@ -362,7 +361,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
fe.pubDate(dt)
|
fe.pubDate(dt)
|
||||||
|
|
||||||
response = make_response(fg.rss_str())
|
response = make_response(fg.rss_str())
|
||||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
response.headers.set('Content-Type', 'application/rss+xml')
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@app.route("/", methods=['GET'])
|
@app.route("/", methods=['GET'])
|
||||||
@@ -404,6 +403,8 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
watch['uuid'] = uuid
|
watch['uuid'] = uuid
|
||||||
sorted_watches.append(watch)
|
sorted_watches.append(watch)
|
||||||
|
|
||||||
|
sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
|
||||||
|
|
||||||
existing_tags = datastore.get_all_tags()
|
existing_tags = datastore.get_all_tags()
|
||||||
|
|
||||||
form = forms.quickWatchForm(request.form)
|
form = forms.quickWatchForm(request.form)
|
||||||
@@ -432,9 +433,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
def ajax_callback_send_notification_test():
|
def ajax_callback_send_notification_test():
|
||||||
|
|
||||||
import apprise
|
import apprise
|
||||||
from .apprise_asset import asset
|
apobj = apprise.Apprise()
|
||||||
apobj = apprise.Apprise(asset=asset)
|
|
||||||
|
|
||||||
|
|
||||||
# validate URLS
|
# validate URLS
|
||||||
if not len(request.form['notification_urls'].strip()):
|
if not len(request.form['notification_urls'].strip()):
|
||||||
@@ -460,38 +459,37 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
return 'OK'
|
return 'OK'
|
||||||
|
|
||||||
|
|
||||||
@app.route("/clear_history/<string:uuid>", methods=['GET'])
|
@app.route("/scrub/<string:uuid>", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
def clear_watch_history(uuid):
|
def scrub_watch(uuid):
|
||||||
try:
|
try:
|
||||||
datastore.clear_watch_history(uuid)
|
datastore.scrub_watch(uuid)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
flash('Watch not found', 'error')
|
flash('Watch not found', 'error')
|
||||||
else:
|
else:
|
||||||
flash("Cleared snapshot history for watch {}".format(uuid))
|
flash("Scrubbed watch {}".format(uuid))
|
||||||
|
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
|
||||||
@app.route("/clear_history", methods=['GET', 'POST'])
|
@app.route("/scrub", methods=['GET', 'POST'])
|
||||||
@login_required
|
@login_required
|
||||||
def clear_all_history():
|
def scrub_page():
|
||||||
|
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
confirmtext = request.form.get('confirmtext')
|
confirmtext = request.form.get('confirmtext')
|
||||||
|
|
||||||
if confirmtext == 'clear':
|
if confirmtext == 'scrub':
|
||||||
changes_removed = 0
|
changes_removed = 0
|
||||||
for uuid in datastore.data['watching'].keys():
|
for uuid in datastore.data['watching'].keys():
|
||||||
datastore.clear_watch_history(uuid)
|
datastore.scrub_watch(uuid)
|
||||||
#TODO: KeyError not checked, as it is above
|
|
||||||
|
|
||||||
flash("Cleared snapshot history for all watches")
|
flash("Cleared all snapshot history")
|
||||||
else:
|
else:
|
||||||
flash('Incorrect confirmation text.', 'error')
|
flash('Incorrect confirmation text.', 'error')
|
||||||
|
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
|
||||||
output = render_template("clear_all_history.html")
|
output = render_template("scrub.html")
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@@ -658,8 +656,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||||
visualselector_data_is_ready=visualselector_data_is_ready,
|
visualselector_data_is_ready=visualselector_data_is_ready,
|
||||||
visualselector_enabled=visualselector_enabled,
|
visualselector_enabled=visualselector_enabled
|
||||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return output
|
return output
|
||||||
@@ -835,7 +832,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
newest=newest_version_file_contents,
|
newest=newest_version_file_contents,
|
||||||
previous=previous_version_file_contents,
|
previous=previous_version_file_contents,
|
||||||
extra_stylesheets=extra_stylesheets,
|
extra_stylesheets=extra_stylesheets,
|
||||||
versions=dates[:-1], # All except current/last
|
versions=dates[1:],
|
||||||
uuid=uuid,
|
uuid=uuid,
|
||||||
newest_version_timestamp=dates[-1],
|
newest_version_timestamp=dates[-1],
|
||||||
current_previous_version=str(previous_version),
|
current_previous_version=str(previous_version),
|
||||||
@@ -859,7 +856,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
uuid = list(datastore.data['watching'].keys()).pop()
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
|
||||||
# Normally you would never reach this, because the 'preview' button is not available when there's no history
|
# Normally you would never reach this, because the 'preview' button is not available when there's no history
|
||||||
# However they may try to clear snapshots and reload the page
|
# However they may try to scrub and reload the page
|
||||||
if datastore.data['watching'][uuid].history_n == 0:
|
if datastore.data['watching'][uuid].history_n == 0:
|
||||||
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
|
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
@@ -1214,7 +1211,6 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
# @todo handle ctrl break
|
# @todo handle ctrl break
|
||||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||||
threading.Thread(target=ticker_thread_job_queue_processor).start()
|
|
||||||
|
|
||||||
threading.Thread(target=notification_runner).start()
|
threading.Thread(target=notification_runner).start()
|
||||||
|
|
||||||
@@ -1290,63 +1286,25 @@ def notification_runner():
|
|||||||
# Trim the log length
|
# Trim the log length
|
||||||
notification_debug_log = notification_debug_log[-100:]
|
notification_debug_log = notification_debug_log[-100:]
|
||||||
|
|
||||||
# Check the queue, when a job exists, start a fresh thread of update_worker
|
|
||||||
def ticker_thread_job_queue_processor():
|
|
||||||
|
|
||||||
from changedetectionio import update_worker
|
|
||||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
|
||||||
|
|
||||||
while not app.config.exit.is_set():
|
|
||||||
time.sleep(0.3)
|
|
||||||
|
|
||||||
# Check that some threads are free
|
|
||||||
running = 0
|
|
||||||
for t in threading.enumerate():
|
|
||||||
if t.name == 'update_worker':
|
|
||||||
running += 1
|
|
||||||
|
|
||||||
if running >= n_workers:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
uuid = update_q.get(block=False)
|
|
||||||
except queue.Empty:
|
|
||||||
# Go back to waiting for exit and/or another entry from the queue
|
|
||||||
continue
|
|
||||||
print ("Starting a thread fetch")
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Launch the update_worker thread that will handle picking items off a queue and sending them off
|
|
||||||
# in the event that playwright or others have a memory leak, this should clean it up better than gc.collect()
|
|
||||||
# (By letting it exit entirely)
|
|
||||||
update_worker.update_worker(update_q, notification_q, app, datastore, uuid).start()
|
|
||||||
except Exception as e:
|
|
||||||
print ("Error launching update_worker for UUID {}.".format(uuid))
|
|
||||||
print (str(e))
|
|
||||||
|
|
||||||
print ("Running now {}", running)
|
|
||||||
|
|
||||||
|
|
||||||
# Thread runner to check every minute, look for new watches to feed into the Queue.
|
# Thread runner to check every minute, look for new watches to feed into the Queue.
|
||||||
def ticker_thread_check_time_launch_checks():
|
def ticker_thread_check_time_launch_checks():
|
||||||
import random
|
import random
|
||||||
|
from changedetectionio import update_worker
|
||||||
|
|
||||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
||||||
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
||||||
|
|
||||||
# Can go in its own function
|
|
||||||
|
|
||||||
# Always maintain the minimum number of threads, each thread will terminate when it has processed exactly 1 queued watch
|
|
||||||
# This is to be totally sure that they don't leak memory
|
|
||||||
# Spin up Workers that do the fetching
|
# Spin up Workers that do the fetching
|
||||||
# Can be overriden by ENV or use the default settings
|
# Can be overriden by ENV or use the default settings
|
||||||
|
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||||
|
for _ in range(n_workers):
|
||||||
|
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
|
||||||
|
running_update_threads.append(new_worker)
|
||||||
|
new_worker.start()
|
||||||
|
|
||||||
while not app.config.exit.is_set():
|
while not app.config.exit.is_set():
|
||||||
|
|
||||||
# Update our list of watches by UUID that are currently fetching data, used in the UI
|
# Get a list of watches by UUID that are currently fetching data
|
||||||
running_uuids = []
|
running_uuids = []
|
||||||
for t in running_update_threads:
|
for t in running_update_threads:
|
||||||
if t.current_uuid:
|
if t.current_uuid:
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
import apprise
|
|
||||||
|
|
||||||
# Create our AppriseAsset and populate it with some of our new values:
|
|
||||||
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
|
|
||||||
asset = apprise.AppriseAsset(
|
|
||||||
image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
|
||||||
)
|
|
||||||
|
|
||||||
asset.app_id = "changedetection.io"
|
|
||||||
asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection"
|
|
||||||
asset.app_url = "https://changedetection.io"
|
|
||||||
@@ -35,7 +35,7 @@ def main():
|
|||||||
create_datastore_dir = False
|
create_datastore_dir = False
|
||||||
|
|
||||||
for opt, arg in opts:
|
for opt, arg in opts:
|
||||||
# if opt == '--clear-all-history':
|
# if opt == '--purge':
|
||||||
# Remove history, the actual files you need to delete manually.
|
# Remove history, the actual files you need to delete manually.
|
||||||
# for uuid, watch in datastore.data['watching'].items():
|
# for uuid, watch in datastore.data['watching'].items():
|
||||||
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
||||||
|
|||||||
@@ -46,7 +46,6 @@ class Fetcher():
|
|||||||
headers = None
|
headers = None
|
||||||
|
|
||||||
fetcher_description = "No description"
|
fetcher_description = "No description"
|
||||||
webdriver_js_execute_code = None
|
|
||||||
xpath_element_js = """
|
xpath_element_js = """
|
||||||
// Include the getXpath script directly, easier than fetching
|
// Include the getXpath script directly, easier than fetching
|
||||||
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
|
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
|
||||||
@@ -176,6 +175,7 @@ class Fetcher():
|
|||||||
|
|
||||||
# Will be needed in the future by the VisualSelector, always get this where possible.
|
# Will be needed in the future by the VisualSelector, always get this where possible.
|
||||||
screenshot = False
|
screenshot = False
|
||||||
|
fetcher_description = "No description"
|
||||||
system_http_proxy = os.getenv('HTTP_PROXY')
|
system_http_proxy = os.getenv('HTTP_PROXY')
|
||||||
system_https_proxy = os.getenv('HTTPS_PROXY')
|
system_https_proxy = os.getenv('HTTPS_PROXY')
|
||||||
|
|
||||||
@@ -301,17 +301,11 @@ class base_html_playwright(Fetcher):
|
|||||||
accept_downloads=False
|
accept_downloads=False
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(request_headers):
|
|
||||||
context.set_extra_http_headers(request_headers)
|
|
||||||
|
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
try:
|
try:
|
||||||
page.set_default_navigation_timeout(90000)
|
page.set_default_navigation_timeout(90000)
|
||||||
page.set_default_timeout(90000)
|
page.set_default_timeout(90000)
|
||||||
|
|
||||||
# Listen for all console events and handle errors
|
|
||||||
page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
|
||||||
|
|
||||||
# Bug - never set viewport size BEFORE page.goto
|
# Bug - never set viewport size BEFORE page.goto
|
||||||
|
|
||||||
# Waits for the next navigation. Using Python context manager
|
# Waits for the next navigation. Using Python context manager
|
||||||
@@ -319,9 +313,6 @@ class base_html_playwright(Fetcher):
|
|||||||
with page.expect_navigation():
|
with page.expect_navigation():
|
||||||
response = page.goto(url, wait_until='load')
|
response = page.goto(url, wait_until='load')
|
||||||
|
|
||||||
if self.webdriver_js_execute_code is not None:
|
|
||||||
page.evaluate(self.webdriver_js_execute_code)
|
|
||||||
|
|
||||||
except playwright._impl._api_types.TimeoutError as e:
|
except playwright._impl._api_types.TimeoutError as e:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
@@ -453,12 +444,6 @@ class base_html_webdriver(Fetcher):
|
|||||||
|
|
||||||
self.driver.set_window_size(1280, 1024)
|
self.driver.set_window_size(1280, 1024)
|
||||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||||
|
|
||||||
if self.webdriver_js_execute_code is not None:
|
|
||||||
self.driver.execute_script(self.webdriver_js_execute_code)
|
|
||||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
|
||||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
|
||||||
|
|
||||||
self.screenshot = self.driver.get_screenshot_as_png()
|
self.screenshot = self.driver.get_screenshot_as_png()
|
||||||
|
|
||||||
# @todo - how to check this? is it possible?
|
# @todo - how to check this? is it possible?
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
@@ -11,7 +10,6 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|||||||
|
|
||||||
|
|
||||||
# Some common stuff here that can be moved to a base class
|
# Some common stuff here that can be moved to a base class
|
||||||
# (set_proxy_from_list)
|
|
||||||
class perform_site_check():
|
class perform_site_check():
|
||||||
|
|
||||||
def __init__(self, *args, datastore, **kwargs):
|
def __init__(self, *args, datastore, **kwargs):
|
||||||
@@ -46,20 +44,6 @@ class perform_site_check():
|
|||||||
|
|
||||||
return proxy_args
|
return proxy_args
|
||||||
|
|
||||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
|
||||||
# So convert it to inline flag "foobar(?i)" type configuration
|
|
||||||
def forward_slash_enclosed_regex_to_options(self, regex):
|
|
||||||
res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE)
|
|
||||||
|
|
||||||
if res:
|
|
||||||
regex = res.group(1)
|
|
||||||
regex += '(?{})'.format(res.group(2))
|
|
||||||
else:
|
|
||||||
regex += '(?{})'.format('i')
|
|
||||||
|
|
||||||
return regex
|
|
||||||
|
|
||||||
|
|
||||||
def run(self, uuid):
|
def run(self, uuid):
|
||||||
timestamp = int(time.time()) # used for storage etc too
|
timestamp = int(time.time()) # used for storage etc too
|
||||||
|
|
||||||
@@ -121,9 +105,6 @@ class perform_site_check():
|
|||||||
elif system_webdriver_delay is not None:
|
elif system_webdriver_delay is not None:
|
||||||
fetcher.render_extract_delay = system_webdriver_delay
|
fetcher.render_extract_delay = system_webdriver_delay
|
||||||
|
|
||||||
if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
|
|
||||||
fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
|
|
||||||
|
|
||||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter'])
|
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter'])
|
||||||
fetcher.quit()
|
fetcher.quit()
|
||||||
|
|
||||||
@@ -165,9 +146,7 @@ class perform_site_check():
|
|||||||
is_html = False
|
is_html = False
|
||||||
|
|
||||||
if is_html or is_source:
|
if is_html or is_source:
|
||||||
|
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content)
|
|
||||||
html_content = fetcher.content
|
html_content = fetcher.content
|
||||||
|
|
||||||
# If not JSON, and if it's not text/plain..
|
# If not JSON, and if it's not text/plain..
|
||||||
@@ -230,27 +209,15 @@ class perform_site_check():
|
|||||||
if len(extract_text) > 0:
|
if len(extract_text) > 0:
|
||||||
regex_matched_output = []
|
regex_matched_output = []
|
||||||
for s_re in extract_text:
|
for s_re in extract_text:
|
||||||
# incase they specified something in '/.../x'
|
result = re.findall(s_re.encode('utf8'), stripped_text_from_html,
|
||||||
regex = self.forward_slash_enclosed_regex_to_options(s_re)
|
flags=re.MULTILINE | re.DOTALL | re.LOCALE)
|
||||||
result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
|
if result:
|
||||||
|
regex_matched_output.append(result[0])
|
||||||
|
|
||||||
for l in result:
|
|
||||||
if type(l) is tuple:
|
|
||||||
#@todo - some formatter option default (between groups)
|
|
||||||
regex_matched_output += list(l) + [b'\n']
|
|
||||||
else:
|
|
||||||
# @todo - some formatter option default (between each ungrouped result)
|
|
||||||
regex_matched_output += [l] + [b'\n']
|
|
||||||
|
|
||||||
# Now we will only show what the regex matched
|
|
||||||
stripped_text_from_html = b''
|
|
||||||
text_content_before_ignored_filter = b''
|
|
||||||
if regex_matched_output:
|
if regex_matched_output:
|
||||||
# @todo some formatter for presentation?
|
stripped_text_from_html = b'\n'.join(regex_matched_output)
|
||||||
stripped_text_from_html = b''.join(regex_matched_output)
|
|
||||||
text_content_before_ignored_filter = stripped_text_from_html
|
text_content_before_ignored_filter = stripped_text_from_html
|
||||||
|
|
||||||
|
|
||||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||||
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||||
@@ -288,6 +255,9 @@ class perform_site_check():
|
|||||||
# Looks like something changed, but did it match all the rules?
|
# Looks like something changed, but did it match all the rules?
|
||||||
if blocked:
|
if blocked:
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
|
else:
|
||||||
|
update_obj["last_changed"] = timestamp
|
||||||
|
|
||||||
|
|
||||||
# Extract title as title
|
# Extract title as title
|
||||||
if is_html:
|
if is_html:
|
||||||
@@ -295,16 +265,6 @@ class perform_site_check():
|
|||||||
if not watch['title'] or not len(watch['title']):
|
if not watch['title'] or not len(watch['title']):
|
||||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
||||||
|
|
||||||
if changed_detected:
|
|
||||||
if watch.get('check_unique_lines', False):
|
|
||||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
|
|
||||||
# One or more lines? unsure?
|
|
||||||
if not has_unique_lines:
|
|
||||||
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
|
|
||||||
changed_detected = False
|
|
||||||
else:
|
|
||||||
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
|
|
||||||
|
|
||||||
# Always record the new checksum
|
# Always record the new checksum
|
||||||
update_obj["previous_md5"] = fetched_md5
|
update_obj["previous_md5"] = fetched_md5
|
||||||
|
|
||||||
|
|||||||
@@ -340,17 +340,12 @@ class watchForm(commonSettingsForm):
|
|||||||
body = TextAreaField('Request body', [validators.Optional()])
|
body = TextAreaField('Request body', [validators.Optional()])
|
||||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||||
check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
|
|
||||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||||
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
|
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
|
||||||
|
|
||||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
|
||||||
|
|
||||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
|
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
proxy = RadioField('Proxy')
|
proxy = RadioField('Proxy')
|
||||||
filter_failure_notification_send = BooleanField(
|
|
||||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
|
||||||
|
|
||||||
def validate(self, **kwargs):
|
def validate(self, **kwargs):
|
||||||
if not super().validate():
|
if not super().validate():
|
||||||
@@ -389,11 +384,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
|
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
|
||||||
password = SaltyPasswordField()
|
password = SaltyPasswordField()
|
||||||
|
|
||||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
|
||||||
render_kw={"style": "width: 5em;"},
|
|
||||||
validators=[validators.NumberRange(min=0,
|
|
||||||
message="Should contain zero or more attempts")])
|
|
||||||
|
|
||||||
|
|
||||||
class globalSettingsForm(Form):
|
class globalSettingsForm(Form):
|
||||||
# Define these as FormFields/"sub forms", this way it matches the JSON storage
|
# Define these as FormFields/"sub forms", this way it matches the JSON storage
|
||||||
|
|||||||
@@ -1,27 +1,23 @@
|
|||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from jsonpath_ng.ext import parse
|
from jsonpath_ng.ext import parse
|
||||||
import re
|
import re
|
||||||
|
from inscriptis import get_text
|
||||||
|
from inscriptis.model.config import ParserConfig
|
||||||
|
|
||||||
class FilterNotFoundInResponse(ValueError):
|
|
||||||
def __init__(self, msg):
|
|
||||||
ValueError.__init__(self, msg)
|
|
||||||
|
|
||||||
class JSONNotFound(ValueError):
|
class JSONNotFound(ValueError):
|
||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
ValueError.__init__(self, msg)
|
ValueError.__init__(self, msg)
|
||||||
|
|
||||||
|
|
||||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
||||||
def css_filter(css_filter, html_content):
|
def css_filter(css_filter, html_content):
|
||||||
soup = BeautifulSoup(html_content, "html.parser")
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
html_block = ""
|
html_block = ""
|
||||||
r = soup.select(css_filter, separator="")
|
for item in soup.select(css_filter, separator=""):
|
||||||
if len(html_content) > 0 and len(r) == 0:
|
|
||||||
raise FilterNotFoundInResponse(css_filter)
|
|
||||||
for item in r:
|
|
||||||
html_block += str(item)
|
html_block += str(item)
|
||||||
|
|
||||||
return html_block + "\n"
|
return html_block + "\n"
|
||||||
@@ -46,12 +42,8 @@ def xpath_filter(xpath_filter, html_content):
|
|||||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'))
|
tree = html.fromstring(bytes(html_content, encoding='utf-8'))
|
||||||
html_block = ""
|
html_block = ""
|
||||||
|
|
||||||
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}):
|
||||||
if len(html_content) > 0 and len(r) == 0:
|
html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>"
|
||||||
raise FilterNotFoundInResponse(xpath_filter)
|
|
||||||
|
|
||||||
for item in r:
|
|
||||||
html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "<br/>"
|
|
||||||
|
|
||||||
return html_block
|
return html_block
|
||||||
|
|
||||||
@@ -181,17 +173,10 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
|||||||
|
|
||||||
|
|
||||||
def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
from inscriptis.model.config import ParserConfig
|
|
||||||
|
|
||||||
"""Converts html string to a string with just the text. If ignoring
|
"""Converts html string to a string with just the text. If ignoring
|
||||||
rendering anchor tag content is enable, anchor tag content are also
|
rendering anchor tag content is enable, anchor tag content are also
|
||||||
included in the text
|
included in the text
|
||||||
|
|
||||||
@NOTE: HORRIBLE LXML INDUCED MEMORY LEAK WORKAROUND HERE
|
|
||||||
https://www.reddit.com/r/Python/comments/j0gl8t/psa_pythonlxml_memory_leaks_and_a_solution/
|
|
||||||
|
|
||||||
:param html_content: string with html content
|
:param html_content: string with html content
|
||||||
:param render_anchor_tag_content: boolean flag indicating whether to extract
|
:param render_anchor_tag_content: boolean flag indicating whether to extract
|
||||||
hyperlinks (the anchor tag content) together with text. This refers to the
|
hyperlinks (the anchor tag content) together with text. This refers to the
|
||||||
@@ -212,33 +197,8 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
|||||||
else:
|
else:
|
||||||
parser_config = None
|
parser_config = None
|
||||||
|
|
||||||
|
|
||||||
def parse_function(html_content, parser_config, results_queue):
|
|
||||||
from inscriptis import get_text
|
|
||||||
# get text and annotations via inscriptis
|
# get text and annotations via inscriptis
|
||||||
text_content = get_text(html_content, config=parser_config)
|
text_content = get_text(html_content, config=parser_config)
|
||||||
results_queue.put(text_content)
|
|
||||||
|
|
||||||
results_queue = multiprocessing.Queue()
|
|
||||||
parse_process = multiprocessing.Process(target=parse_function, args=(html_content, parser_config, results_queue))
|
|
||||||
parse_process.daemon = True
|
|
||||||
parse_process.start()
|
|
||||||
text_content = results_queue.get() # blocks until results are available
|
|
||||||
parse_process.terminate()
|
|
||||||
|
|
||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
def workarounds_for_obfuscations(content):
|
|
||||||
"""
|
|
||||||
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
|
|
||||||
This could go into its own Pip package in the future, for faster updates
|
|
||||||
"""
|
|
||||||
|
|
||||||
# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
|
|
||||||
# https://github.com/weblyzard/inscriptis/issues/45
|
|
||||||
if not content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
content = re.sub('<!--\s+-->', '', content)
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|||||||
@@ -1,28 +1,30 @@
|
|||||||
from os import getenv
|
import collections
|
||||||
|
import os
|
||||||
|
|
||||||
|
import uuid as uuid_builder
|
||||||
|
|
||||||
from changedetectionio.notification import (
|
from changedetectionio.notification import (
|
||||||
default_notification_body,
|
default_notification_body,
|
||||||
default_notification_format,
|
default_notification_format,
|
||||||
default_notification_title,
|
default_notification_title,
|
||||||
)
|
)
|
||||||
|
|
||||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
|
||||||
|
|
||||||
class model(dict):
|
class model(dict):
|
||||||
base_config = {
|
base_config = {
|
||||||
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
||||||
'watching': {},
|
'watching': {},
|
||||||
'settings': {
|
'settings': {
|
||||||
'headers': {
|
'headers': {
|
||||||
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||||
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
||||||
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
||||||
},
|
},
|
||||||
'requests': {
|
'requests': {
|
||||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
'timeout': 15, # Default 15 seconds
|
||||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||||
'jitter_seconds': 0,
|
'jitter_seconds': 0,
|
||||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
'workers': 10, # Number of threads, lower is better for slow connections
|
||||||
'proxy': None # Preferred proxy connection
|
'proxy': None # Preferred proxy connection
|
||||||
},
|
},
|
||||||
'application': {
|
'application': {
|
||||||
@@ -31,8 +33,7 @@ class model(dict):
|
|||||||
'base_url' : None,
|
'base_url' : None,
|
||||||
'extract_title_as_title': False,
|
'extract_title_as_title': False,
|
||||||
'empty_pages_are_a_change': False,
|
'empty_pages_are_a_change': False,
|
||||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
|
||||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
'global_subtractive_selectors': [],
|
'global_subtractive_selectors': [],
|
||||||
'ignore_whitespace': True,
|
'ignore_whitespace': True,
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import uuid as uuid_builder
|
import uuid as uuid_builder
|
||||||
from distutils.util import strtobool
|
|
||||||
|
|
||||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
|
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
|
||||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
|
||||||
|
|
||||||
from changedetectionio.notification import (
|
from changedetectionio.notification import (
|
||||||
default_notification_body,
|
default_notification_body,
|
||||||
@@ -42,20 +40,16 @@ class model(dict):
|
|||||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||||
'text_should_not_be_present': [], # Text that should not present
|
'text_should_not_be_present': [], # Text that should not present
|
||||||
'fetch_backend': None,
|
'fetch_backend': None,
|
||||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
|
||||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
|
||||||
'extract_title_as_title': False,
|
'extract_title_as_title': False,
|
||||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
|
||||||
'proxy': None, # Preferred proxy connection
|
'proxy': None, # Preferred proxy connection
|
||||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||||
# Requires setting to None on submit if it's the same as the default
|
# Requires setting to None on submit if it's the same as the default
|
||||||
# Should be all None by default, so we use the system default in this case.
|
# Should be all None by default, so we use the system default in this case.
|
||||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||||
'webdriver_delay': None,
|
'webdriver_delay': None
|
||||||
'webdriver_js_execute_code': None, # Run before change-detection
|
|
||||||
}
|
}
|
||||||
jitter_seconds = 0
|
jitter_seconds = 0
|
||||||
|
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||||
def __init__(self, *arg, **kw):
|
def __init__(self, *arg, **kw):
|
||||||
import uuid
|
import uuid
|
||||||
self.update(self.__base_config)
|
self.update(self.__base_config)
|
||||||
@@ -164,21 +158,8 @@ class model(dict):
|
|||||||
|
|
||||||
def threshold_seconds(self):
|
def threshold_seconds(self):
|
||||||
seconds = 0
|
seconds = 0
|
||||||
for m, n in mtable.items():
|
for m, n in self.mtable.items():
|
||||||
x = self.get('time_between_check', {}).get(m, None)
|
x = self.get('time_between_check', {}).get(m, None)
|
||||||
if x:
|
if x:
|
||||||
seconds += x * n
|
seconds += x * n
|
||||||
return seconds
|
return seconds
|
||||||
|
|
||||||
# Iterate over all history texts and see if something new exists
|
|
||||||
def lines_contain_something_unique_compared_to_history(self, lines=[]):
|
|
||||||
local_lines = [l.decode('utf-8').strip().lower() for l in lines]
|
|
||||||
|
|
||||||
# Compare each lines (set) against each history text file (set) looking for something new..
|
|
||||||
for k, v in self.history.items():
|
|
||||||
alist = [line.decode('utf-8').strip().lower() for line in open(v, 'rb')]
|
|
||||||
res = set(alist) != set(local_lines)
|
|
||||||
if res:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ def process_notification(n_object, datastore):
|
|||||||
valid_notification_formats[default_notification_format],
|
valid_notification_formats[default_notification_format],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Insert variables into the notification content
|
# Insert variables into the notification content
|
||||||
notification_parameters = create_notification_parameters(n_object, datastore)
|
notification_parameters = create_notification_parameters(n_object, datastore)
|
||||||
|
|
||||||
@@ -48,9 +49,9 @@ def process_notification(n_object, datastore):
|
|||||||
# raise it as an exception
|
# raise it as an exception
|
||||||
apobjs=[]
|
apobjs=[]
|
||||||
sent_objs=[]
|
sent_objs=[]
|
||||||
from .apprise_asset import asset
|
|
||||||
for url in n_object['notification_urls']:
|
for url in n_object['notification_urls']:
|
||||||
apobj = apprise.Apprise(debug=True, asset=asset)
|
|
||||||
|
apobj = apprise.Apprise(debug=True)
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
if len(url):
|
if len(url):
|
||||||
print(">> Process Notification: AppRise notifying {}".format(url))
|
print(">> Process Notification: AppRise notifying {}".format(url))
|
||||||
@@ -63,7 +64,7 @@ def process_notification(n_object, datastore):
|
|||||||
|
|
||||||
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
|
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
|
||||||
k = '?' if not '?' in url else '&'
|
k = '?' if not '?' in url else '&'
|
||||||
if not 'avatar_url' in url and not url.startswith('mail'):
|
if not 'avatar_url' in url:
|
||||||
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
||||||
|
|
||||||
if url.startswith('tgram://'):
|
if url.startswith('tgram://'):
|
||||||
@@ -78,21 +79,13 @@ def process_notification(n_object, datastore):
|
|||||||
n_title = n_title[0:payload_max_size]
|
n_title = n_title[0:payload_max_size]
|
||||||
n_body = n_body[0:body_limit]
|
n_body = n_body[0:body_limit]
|
||||||
|
|
||||||
elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks'):
|
elif url.startswith('discord://'):
|
||||||
# real limit is 2000, but minus some for extra metadata
|
# real limit is 2000, but minus some for extra metadata
|
||||||
payload_max_size = 1700
|
payload_max_size = 1700
|
||||||
body_limit = max(0, payload_max_size - len(n_title))
|
body_limit = max(0, payload_max_size - len(n_title))
|
||||||
n_title = n_title[0:payload_max_size]
|
n_title = n_title[0:payload_max_size]
|
||||||
n_body = n_body[0:body_limit]
|
n_body = n_body[0:body_limit]
|
||||||
|
|
||||||
elif url.startswith('mailto'):
|
|
||||||
# Apprise will default to HTML, so we need to override it
|
|
||||||
# So that whats' generated in n_body is in line with what is going to be sent.
|
|
||||||
# https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
|
|
||||||
if not 'format=' in url and (n_format == 'text' or n_format == 'markdown'):
|
|
||||||
prefix = '?' if not '?' in url else '&'
|
|
||||||
url = "{}{}format={}".format(url, prefix, n_format)
|
|
||||||
|
|
||||||
apobj.add(url)
|
apobj.add(url)
|
||||||
|
|
||||||
apobj.notify(
|
apobj.notify(
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<svg
|
|
||||||
width="18"
|
|
||||||
height="19.92"
|
|
||||||
viewBox="0 0 18 19.92"
|
|
||||||
version="1.1"
|
|
||||||
id="svg6"
|
|
||||||
xmlns="http://www.w3.org/2000/svg"
|
|
||||||
xmlns:svg="http://www.w3.org/2000/svg">
|
|
||||||
<defs
|
|
||||||
id="defs10" />
|
|
||||||
<path
|
|
||||||
d="M -3,-2 H 21 V 22 H -3 Z"
|
|
||||||
fill="none"
|
|
||||||
id="path2" />
|
|
||||||
<path
|
|
||||||
d="m 15,14.08 c -0.76,0 -1.44,0.3 -1.96,0.77 L 5.91,10.7 C 5.96,10.47 6,10.24 6,10 6,9.76 5.96,9.53 5.91,9.3 L 12.96,5.19 C 13.5,5.69 14.21,6 15,6 16.66,6 18,4.66 18,3 18,1.34 16.66,0 15,0 c -1.66,0 -3,1.34 -3,3 0,0.24 0.04,0.47 0.09,0.7 L 5.04,7.81 C 4.5,7.31 3.79,7 3,7 1.34,7 0,8.34 0,10 c 0,1.66 1.34,3 3,3 0.79,0 1.5,-0.31 2.04,-0.81 l 7.12,4.16 c -0.05,0.21 -0.08,0.43 -0.08,0.65 0,1.61 1.31,2.92 2.92,2.92 1.61,0 2.92,-1.31 2.92,-2.92 0,-1.61 -1.31,-2.92 -2.92,-2.92 z"
|
|
||||||
id="path4"
|
|
||||||
style="fill:#ffffff;fill-opacity:1" />
|
|
||||||
</svg>
|
|
||||||
|
Before Width: | Height: | Size: 892 B |
@@ -1,30 +1,13 @@
|
|||||||
$(document).ready(function() {
|
$(document).ready(function() {
|
||||||
function toggle() {
|
function toggle() {
|
||||||
if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
|
if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') {
|
||||||
if(playwright_enabled) {
|
|
||||||
// playwright supports headers, so hide everything else
|
|
||||||
// See #664
|
|
||||||
$('#requests-override-options #request-method').hide();
|
|
||||||
$('#requests-override-options #request-body').hide();
|
|
||||||
|
|
||||||
// @todo connect this one up
|
|
||||||
$('#ignore-status-codes-option').hide();
|
|
||||||
} else {
|
|
||||||
// selenium/webdriver doesnt support anything afaik, hide it all
|
|
||||||
$('#requests-override-options').hide();
|
$('#requests-override-options').hide();
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
$('#webdriver-override-options').show();
|
$('#webdriver-override-options').show();
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
$('#requests-override-options').show();
|
$('#requests-override-options').show();
|
||||||
$('#requests-override-options *:hidden').show();
|
|
||||||
$('#webdriver-override-options').hide();
|
$('#webdriver-override-options').hide();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$('input[name="fetch_backend"]').click(function (e) {
|
$('input[name="fetch_backend"]').click(function (e) {
|
||||||
toggle();
|
toggle();
|
||||||
});
|
});
|
||||||
|
|||||||
2
changedetectionio/static/styles/.gitignore
vendored
2
changedetectionio/static/styles/.gitignore
vendored
@@ -1,3 +1 @@
|
|||||||
node_modules
|
node_modules
|
||||||
package-lock.json
|
|
||||||
|
|
||||||
|
|||||||
3719
changedetectionio/static/styles/package-lock.json
generated
Normal file
3719
changedetectionio/static/styles/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -158,7 +158,8 @@ class ChangeDetectionStore:
|
|||||||
@property
|
@property
|
||||||
def threshold_seconds(self):
|
def threshold_seconds(self):
|
||||||
seconds = 0
|
seconds = 0
|
||||||
for m, n in Watch.mtable.items():
|
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||||
|
for m, n in mtable.items():
|
||||||
x = self.__data['settings']['requests']['time_between_check'].get(m)
|
x = self.__data['settings']['requests']['time_between_check'].get(m)
|
||||||
if x:
|
if x:
|
||||||
seconds += x * n
|
seconds += x * n
|
||||||
@@ -249,7 +250,7 @@ class ChangeDetectionStore:
|
|||||||
return self.data['watching'][uuid].get(val)
|
return self.data['watching'][uuid].get(val)
|
||||||
|
|
||||||
# Remove a watchs data but keep the entry (URL etc)
|
# Remove a watchs data but keep the entry (URL etc)
|
||||||
def clear_watch_history(self, uuid):
|
def scrub_watch(self, uuid):
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
self.__data['watching'][uuid].update(
|
self.__data['watching'][uuid].update(
|
||||||
@@ -297,8 +298,7 @@ class ChangeDetectionStore:
|
|||||||
'ignore_text', 'css_filter',
|
'ignore_text', 'css_filter',
|
||||||
'subtractive_selectors', 'trigger_text',
|
'subtractive_selectors', 'trigger_text',
|
||||||
'extract_title_as_title', 'extract_text',
|
'extract_title_as_title', 'extract_text',
|
||||||
'text_should_not_be_present',
|
'text_should_not_be_present']:
|
||||||
'webdriver_js_execute_code']:
|
|
||||||
if res.get(k):
|
if res.get(k):
|
||||||
apply_extras[k] = res[k]
|
apply_extras[k] = res[k]
|
||||||
|
|
||||||
@@ -518,11 +518,3 @@ class ChangeDetectionStore:
|
|||||||
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
||||||
# In the distant future we can remove this entirely
|
# In the distant future we can remove this entirely
|
||||||
self.data['watching'][uuid]['history'] = {}
|
self.data['watching'][uuid]['history'] = {}
|
||||||
|
|
||||||
# We incorrectly stored last_changed when there was not a change, and then confused the output list table
|
|
||||||
def update_3(self):
|
|
||||||
for uuid, watch in self.data['watching'].items():
|
|
||||||
# Be sure it's recalculated
|
|
||||||
p = watch.history
|
|
||||||
if watch.history_n < 2:
|
|
||||||
watch['last_changed'] = 0
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
{% if versions|length >= 1 %}
|
{% if versions|length >= 1 %}
|
||||||
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
|
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
|
||||||
<select id="diff-version" name="previous_version">
|
<select id="diff-version" name="previous_version">
|
||||||
{% for version in versions|reverse %}
|
{% for version in versions %}
|
||||||
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
|
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
|
||||||
{{version}}
|
{{version}}
|
||||||
</option>
|
</option>
|
||||||
|
|||||||
@@ -7,7 +7,6 @@
|
|||||||
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
|
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
|
||||||
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
|
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
|
||||||
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||||
const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %};
|
|
||||||
|
|
||||||
{% if emailprefix %}
|
{% if emailprefix %}
|
||||||
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
|
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
|
||||||
@@ -25,7 +24,7 @@
|
|||||||
<ul>
|
<ul>
|
||||||
<li class="tab" id="default-tab"><a href="#general">General</a></li>
|
<li class="tab" id="default-tab"><a href="#general">General</a></li>
|
||||||
<li class="tab"><a href="#request">Request</a></li>
|
<li class="tab"><a href="#request">Request</a></li>
|
||||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Selector</a></li>
|
||||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
@@ -62,12 +61,6 @@
|
|||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.extract_title_as_title) }}
|
{{ render_checkbox_field(form.extract_title_as_title) }}
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_checkbox_field(form.filter_failure_notification_send) }}
|
|
||||||
<span class="pure-form-message-inline">
|
|
||||||
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
</fieldset>
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -88,39 +81,33 @@
|
|||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<fieldset id="webdriver-override-options">
|
<fieldset id="webdriver-override-options">
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_field(form.webdriver_delay) }}
|
|
||||||
<div class="pure-form-message-inline">
|
<div class="pure-form-message-inline">
|
||||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||||
<br/>
|
<br/>
|
||||||
This will wait <i>n</i> seconds before extracting the text.
|
This will wait <i>n</i> seconds before extracting the text.
|
||||||
{% if using_global_webdriver_wait %}
|
|
||||||
<br/><strong>Using the current global default settings</strong>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.webdriver_js_execute_code) }}
|
{{ render_field(form.webdriver_delay) }}
|
||||||
|
</div>
|
||||||
|
{% if using_global_webdriver_wait %}
|
||||||
<div class="pure-form-message-inline">
|
<div class="pure-form-message-inline">
|
||||||
Run this code before performing change detection, handy for filling in fields and other actions <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">More help and examples here</a>
|
<strong>Using the current global default settings</strong>
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<fieldset class="pure-group" id="requests-override-options">
|
<fieldset class="pure-group" id="requests-override-options">
|
||||||
{% if not playwright_enabled %}
|
|
||||||
<div class="pure-form-message-inline">
|
<div class="pure-form-message-inline">
|
||||||
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
|
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
<div class="pure-control-group">
|
||||||
<div class="pure-control-group" id="request-method">
|
|
||||||
{{ render_field(form.method) }}
|
{{ render_field(form.method) }}
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group" id="request-headers">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.headers, rows=5, placeholder="Example
|
{{ render_field(form.headers, rows=5, placeholder="Example
|
||||||
Cookie: foobar
|
Cookie: foobar
|
||||||
User-Agent: wonderbra 1.0") }}
|
User-Agent: wonderbra 1.0") }}
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group" id="request-body">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.body, rows=5, placeholder="Example
|
{{ render_field(form.body, rows=5, placeholder="Example
|
||||||
{
|
{
|
||||||
\"name\":\"John\",
|
\"name\":\"John\",
|
||||||
@@ -128,7 +115,7 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
\"car\":null
|
\"car\":null
|
||||||
}") }}
|
}") }}
|
||||||
</div>
|
</div>
|
||||||
<div id="ignore-status-codes-option">
|
<div>
|
||||||
{{ render_checkbox_field(form.ignore_status_codes) }}
|
{{ render_checkbox_field(form.ignore_status_codes) }}
|
||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
@@ -156,12 +143,6 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<fieldset>
|
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
|
||||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
|
||||||
</div>
|
|
||||||
</fieldset>
|
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
|
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
|
||||||
class="m-d") }}
|
class="m-d") }}
|
||||||
@@ -196,7 +177,7 @@ nav
|
|||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
<ul>
|
<ul>
|
||||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
|
||||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||||
<li>Use the preview/show current tab to see ignores</li>
|
<li>Use the preview/show current tab to see ignores</li>
|
||||||
</ul>
|
</ul>
|
||||||
@@ -239,15 +220,8 @@ Unavailable") }}
|
|||||||
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
|
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
<ul>
|
<ul>
|
||||||
<li>Extracts text in the final output (line by line) after other filters using regular expressions;
|
<li>Extracts text in the final output after other filters using regular expressions, for example <code>\d+ online</code></li>
|
||||||
<ul>
|
<li>One line per regular-expression.</li>
|
||||||
<li>Regular expression ‐ example <code>/reports.+?2022/i</code></li>
|
|
||||||
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
|
|
||||||
<li>Keyword example ‐ example <code>Out of stock</code></li>
|
|
||||||
<li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
|
|
||||||
</ul>
|
|
||||||
</li>
|
|
||||||
<li>One line per regular-expression/ string match</li>
|
|
||||||
</ul>
|
</ul>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
@@ -256,7 +230,7 @@ Unavailable") }}
|
|||||||
|
|
||||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||||
<img id="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
|
<img id="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
|
||||||
<strong>Pro-tip:</strong> This tool is only for limiting which elements will be included on a change-detection, not for interacting with browser directly.
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{% if visualselector_enabled %}
|
{% if visualselector_enabled %}
|
||||||
@@ -301,8 +275,8 @@ Unavailable") }}
|
|||||||
|
|
||||||
<a href="{{url_for('form_delete', uuid=uuid)}}"
|
<a href="{{url_for('form_delete', uuid=uuid)}}"
|
||||||
class="pure-button button-small button-error ">Delete</a>
|
class="pure-button button-small button-error ">Delete</a>
|
||||||
<a href="{{url_for('clear_watch_history', uuid=uuid)}}"
|
<a href="{{url_for('scrub_watch', uuid=uuid)}}"
|
||||||
class="pure-button button-small button-error ">Clear History</a>
|
class="pure-button button-small button-error ">Scrub</a>
|
||||||
<a href="{{url_for('form_clone', uuid=uuid)}}"
|
<a href="{{url_for('form_clone', uuid=uuid)}}"
|
||||||
class="pure-button button-small ">Create Copy</a>
|
class="pure-button button-small ">Create Copy</a>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -3,22 +3,22 @@
|
|||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="edit-form">
|
<div class="edit-form">
|
||||||
<div class="box-wrap inner">
|
<div class="box-wrap inner">
|
||||||
<form class="pure-form pure-form-stacked" action="{{url_for('clear_all_history')}}" method="POST">
|
<form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
This will remove version history (snapshots) for ALL watches, but keep your list of URLs! <br/>
|
This will remove ALL version snapshots/data, but keep your list of URLs. <br/>
|
||||||
You may like to use the <strong>BACKUP</strong> link first.<br/>
|
You may like to use the <strong>BACKUP</strong> link first.<br/>
|
||||||
</div>
|
</div>
|
||||||
<br/>
|
<br/>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<label for="confirmtext">Confirmation text</label>
|
<label for="confirmtext">Confirmation text</label>
|
||||||
<input type="text" id="confirmtext" required="" name="confirmtext" value="" size="10"/>
|
<input type="text" id="confirmtext" required="" name="confirmtext" value="" size="10"/>
|
||||||
<span class="pure-form-message-inline">Type in the word <strong>clear</strong> to confirm that you understand.</span>
|
<span class="pure-form-message-inline">Type in the word <strong>scrub</strong> to confirm that you understand!</span>
|
||||||
</div>
|
</div>
|
||||||
<br/>
|
<br/>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<button type="submit" class="pure-button pure-button-primary">Clear History!</button>
|
<button type="submit" class="pure-button pure-button-primary">Scrub!</button>
|
||||||
</div>
|
</div>
|
||||||
<br/>
|
<br/>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
@@ -36,13 +36,7 @@
|
|||||||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||||
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
|
||||||
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
|
|
||||||
<br/>
|
|
||||||
Set to <strong>0</strong> to disable
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{% if not hide_remove_pass %}
|
{% if not hide_remove_pass %}
|
||||||
{% if current_user.is_authenticated %}
|
{% if current_user.is_authenticated %}
|
||||||
@@ -154,7 +148,7 @@ nav
|
|||||||
<ul>
|
<ul>
|
||||||
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
||||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
|
||||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||||
<li>Use the preview/show current tab to see ignores</li>
|
<li>Use the preview/show current tab to see ignores</li>
|
||||||
</ul>
|
</ul>
|
||||||
@@ -179,7 +173,7 @@ nav
|
|||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_button(form.save_button) }}
|
{{ render_button(form.save_button) }}
|
||||||
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
|
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||||
<a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
|
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
||||||
<button type="submit" class="pure-button pure-button-primary">Watch</button>
|
<button type="submit" class="pure-button pure-button-primary">Watch</button>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
|
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
|
||||||
</form>
|
</form>
|
||||||
<div>
|
<div>
|
||||||
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
||||||
@@ -40,7 +40,7 @@
|
|||||||
<tbody>
|
<tbody>
|
||||||
|
|
||||||
|
|
||||||
{% for watch in watches|sort(attribute='last_changed', reverse=True) %}
|
{% for watch in watches %}
|
||||||
<tr id="{{ watch.uuid }}"
|
<tr id="{{ watch.uuid }}"
|
||||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
|
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
|
||||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||||
@@ -68,7 +68,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
<td class="last-checked">{{watch|format_last_checked_time|safe}}</td>
|
<td class="last-checked">{{watch|format_last_checked_time|safe}}</td>
|
||||||
<td class="last-changed">{% if watch.history_n >=2 and watch.last_changed >0 %}
|
<td class="last-changed">{% if watch.history_n >=2 and watch.last_changed %}
|
||||||
{{watch.last_changed|format_timestamp_timeago}}
|
{{watch.last_changed|format_timestamp_timeago}}
|
||||||
{% else %}
|
{% else %}
|
||||||
Not yet
|
Not yet
|
||||||
|
|||||||
@@ -95,8 +95,6 @@ def test_api_simple(client, live_server):
|
|||||||
assert watch_uuid in json.loads(res.data).keys()
|
assert watch_uuid in json.loads(res.data).keys()
|
||||||
before_recheck_info = json.loads(res.data)[watch_uuid]
|
before_recheck_info = json.loads(res.data)[watch_uuid]
|
||||||
assert before_recheck_info['last_checked'] != 0
|
assert before_recheck_info['last_checked'] != 0
|
||||||
#705 `last_changed` should be zero on the first check
|
|
||||||
assert before_recheck_info['last_changed'] == 0
|
|
||||||
assert before_recheck_info['title'] == 'My test URL'
|
assert before_recheck_info['title'] == 'My test URL'
|
||||||
|
|
||||||
set_modified_response()
|
set_modified_response()
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ def set_original_response():
|
|||||||
</br>
|
</br>
|
||||||
So let's see what happens. </br>
|
So let's see what happens. </br>
|
||||||
<div id="sametext">Some text thats the same</div>
|
<div id="sametext">Some text thats the same</div>
|
||||||
<div class="changetext">Some text that will change</div>
|
<div id="changetext">Some text that will change</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
@@ -33,8 +33,7 @@ def set_modified_response():
|
|||||||
</br>
|
</br>
|
||||||
So let's see what happens. </br>
|
So let's see what happens. </br>
|
||||||
<div id="sametext">Some text thats the same</div>
|
<div id="sametext">Some text thats the same</div>
|
||||||
<div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/> 2000 online )</div>
|
<div id="changetext">Some text that did change ( 1000 online <br/> 80 guests)</div>
|
||||||
<div class="changetext">SomeCase insensitive 3456</div>
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
@@ -45,78 +44,11 @@ def set_modified_response():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def set_multiline_response():
|
|
||||||
test_return_data = """<html>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<p>Something <br/>
|
|
||||||
across 6 billion multiple<br/>
|
|
||||||
lines
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<div>aaand something lines</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write(test_return_data)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def test_setup(client, live_server):
|
|
||||||
|
|
||||||
live_server_setup(live_server)
|
|
||||||
|
|
||||||
def test_check_filter_multiline(client, live_server):
|
|
||||||
|
|
||||||
set_multiline_response()
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
|
||||||
res = client.post(
|
|
||||||
url_for("import_page"),
|
|
||||||
data={"urls": test_url},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"1 Imported" in res.data
|
|
||||||
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# Goto the edit page, add our ignore text
|
|
||||||
# Add our URL to the import page
|
|
||||||
res = client.post(
|
|
||||||
url_for("edit_page", uuid="first"),
|
|
||||||
data={"css_filter": '',
|
|
||||||
'extract_text': '/something.+?6 billion.+?lines/si',
|
|
||||||
"url": test_url,
|
|
||||||
"tag": "",
|
|
||||||
"headers": "",
|
|
||||||
'fetch_backend': "html_requests"
|
|
||||||
},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert b"Updated watch." in res.data
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
res = client.get(
|
|
||||||
url_for("preview_page", uuid="first"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
assert b'<div class="">Something' in res.data
|
|
||||||
assert b'<div class="">across 6 billion multiple' in res.data
|
|
||||||
assert b'<div class="">lines' in res.data
|
|
||||||
|
|
||||||
# but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
|
|
||||||
assert b'aaand something lines' not in res.data
|
|
||||||
|
|
||||||
def test_check_filter_and_regex_extract(client, live_server):
|
def test_check_filter_and_regex_extract(client, live_server):
|
||||||
sleep_time_for_fetch_thread = 3
|
sleep_time_for_fetch_thread = 3
|
||||||
css_filter = ".changetext"
|
|
||||||
|
live_server_setup(live_server)
|
||||||
|
css_filter = "#changetext"
|
||||||
|
|
||||||
set_original_response()
|
set_original_response()
|
||||||
|
|
||||||
@@ -132,7 +64,6 @@ def test_check_filter_and_regex_extract(client, live_server):
|
|||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
time.sleep(1)
|
|
||||||
# Trigger a check
|
# Trigger a check
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
@@ -144,7 +75,7 @@ def test_check_filter_and_regex_extract(client, live_server):
|
|||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": css_filter,
|
data={"css_filter": css_filter,
|
||||||
'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
|
'extract_text': '\d+ online\n\d+ guests',
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tag": "",
|
"tag": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
@@ -155,6 +86,15 @@ def test_check_filter_and_regex_extract(client, live_server):
|
|||||||
|
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
|
|
||||||
|
# Check it saved
|
||||||
|
res = client.get(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
)
|
||||||
|
assert b'\d+ online' in res.data
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
# client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
@@ -180,19 +120,8 @@ def test_check_filter_and_regex_extract(client, live_server):
|
|||||||
# Class will be blank for now because the frontend didnt apply the diff
|
# Class will be blank for now because the frontend didnt apply the diff
|
||||||
assert b'<div class="">1000 online' in res.data
|
assert b'<div class="">1000 online' in res.data
|
||||||
|
|
||||||
# All regex matching should be here
|
|
||||||
assert b'<div class="">2000 online' in res.data
|
|
||||||
|
|
||||||
# Both regexs should be here
|
# Both regexs should be here
|
||||||
assert b'<div class="">80 guests' in res.data
|
assert b'<div class="">80 guests' in res.data
|
||||||
|
|
||||||
# Regex with flag handling should be here
|
|
||||||
assert b'<div class="">SomeCase insensitive 3456' in res.data
|
|
||||||
|
|
||||||
# Singular group from /somecase insensitive (345\d)/i
|
|
||||||
assert b'<div class="">3456' in res.data
|
|
||||||
|
|
||||||
# Regex with multiline flag handling should be here
|
|
||||||
|
|
||||||
# Should not be here
|
# Should not be here
|
||||||
assert b'Some text that did change' not in res.data
|
assert b'Some text that did change' not in res.data
|
||||||
@@ -1,134 +0,0 @@
|
|||||||
import os
|
|
||||||
import time
|
|
||||||
import re
|
|
||||||
from flask import url_for
|
|
||||||
from .util import set_original_response, live_server_setup
|
|
||||||
from changedetectionio.model import App
|
|
||||||
|
|
||||||
|
|
||||||
def set_response_with_filter():
|
|
||||||
test_return_data = """<html>
|
|
||||||
<body>
|
|
||||||
Some initial text</br>
|
|
||||||
<p>Which is across multiple lines</p>
|
|
||||||
</br>
|
|
||||||
So let's see what happens. </br>
|
|
||||||
<div id="nope-doesnt-exist">Some text thats the same</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write(test_return_data)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def run_filter_test(client, content_filter):
|
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
|
||||||
res = client.post(
|
|
||||||
url_for("form_watch_add"),
|
|
||||||
data={"url": test_url, "tag": ''},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"Watch added" in res.data
|
|
||||||
|
|
||||||
# Give the thread time to pick up the first version
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# Goto the edit page, add our ignore text
|
|
||||||
# Add our URL to the import page
|
|
||||||
url = url_for('test_notification_endpoint', _external=True)
|
|
||||||
notification_url = url.replace('http', 'json')
|
|
||||||
|
|
||||||
print(">>>> Notification URL: " + notification_url)
|
|
||||||
|
|
||||||
# Just a regular notification setting, this will be used by the special 'filter not found' notification
|
|
||||||
notification_form_data = {"notification_urls": notification_url,
|
|
||||||
"notification_title": "New ChangeDetection.io Notification - {watch_url}",
|
|
||||||
"notification_body": "BASE URL: {base_url}\n"
|
|
||||||
"Watch URL: {watch_url}\n"
|
|
||||||
"Watch UUID: {watch_uuid}\n"
|
|
||||||
"Watch title: {watch_title}\n"
|
|
||||||
"Watch tag: {watch_tag}\n"
|
|
||||||
"Preview: {preview_url}\n"
|
|
||||||
"Diff URL: {diff_url}\n"
|
|
||||||
"Snapshot: {current_snapshot}\n"
|
|
||||||
"Diff: {diff}\n"
|
|
||||||
"Diff Full: {diff_full}\n"
|
|
||||||
":-)",
|
|
||||||
"notification_format": "Text"}
|
|
||||||
|
|
||||||
notification_form_data.update({
|
|
||||||
"url": test_url,
|
|
||||||
"tag": "my tag",
|
|
||||||
"title": "my title",
|
|
||||||
"headers": "",
|
|
||||||
"css_filter": content_filter,
|
|
||||||
"fetch_backend": "html_requests"})
|
|
||||||
|
|
||||||
res = client.post(
|
|
||||||
url_for("edit_page", uuid="first"),
|
|
||||||
data=notification_form_data,
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"Updated watch." in res.data
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# Now the notification should not exist, because we didnt reach the threshold
|
|
||||||
assert not os.path.isfile("test-datastore/notification.txt")
|
|
||||||
|
|
||||||
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
|
|
||||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# We should see something in the frontend
|
|
||||||
assert b'Did the page change its layout' in res.data
|
|
||||||
|
|
||||||
# Now it should exist and contain our "filter not found" alert
|
|
||||||
assert os.path.isfile("test-datastore/notification.txt")
|
|
||||||
notification = False
|
|
||||||
with open("test-datastore/notification.txt", 'r') as f:
|
|
||||||
notification = f.read()
|
|
||||||
assert 'CSS/xPath filter was not present in the page' in notification
|
|
||||||
assert content_filter.replace('"', '\\"') in notification
|
|
||||||
|
|
||||||
# Remove it and prove that it doesnt trigger when not expected
|
|
||||||
os.unlink("test-datastore/notification.txt")
|
|
||||||
set_response_with_filter()
|
|
||||||
|
|
||||||
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# It should have sent a notification, but..
|
|
||||||
assert os.path.isfile("test-datastore/notification.txt")
|
|
||||||
# but it should not contain the info about the failed filter
|
|
||||||
with open("test-datastore/notification.txt", 'r') as f:
|
|
||||||
notification = f.read()
|
|
||||||
assert not 'CSS/xPath filter was not present in the page' in notification
|
|
||||||
|
|
||||||
# cleanup for the next
|
|
||||||
client.get(
|
|
||||||
url_for("form_delete", uuid="all"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
os.unlink("test-datastore/notification.txt")
|
|
||||||
|
|
||||||
|
|
||||||
def test_setup(live_server):
|
|
||||||
live_server_setup(live_server)
|
|
||||||
|
|
||||||
def test_check_css_filter_failure_notification(client, live_server):
|
|
||||||
set_original_response()
|
|
||||||
time.sleep(1)
|
|
||||||
run_filter_test(client, '#nope-doesnt-exist')
|
|
||||||
|
|
||||||
def test_check_xpath_filter_failure_notification(client, live_server):
|
|
||||||
set_original_response()
|
|
||||||
time.sleep(1)
|
|
||||||
run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
|
|
||||||
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
#!/usr/bin/python3
|
|
||||||
|
|
||||||
import time
|
|
||||||
from flask import url_for
|
|
||||||
from .util import live_server_setup
|
|
||||||
|
|
||||||
|
|
||||||
def set_original_ignore_response():
|
|
||||||
test_return_data = """<html>
|
|
||||||
<body>
|
|
||||||
<span>The price is</span><span>$<!-- -->90<!-- -->.<!-- -->74</span>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write(test_return_data)
|
|
||||||
|
|
||||||
|
|
||||||
def test_obfuscations(client, live_server):
|
|
||||||
set_original_ignore_response()
|
|
||||||
live_server_setup(live_server)
|
|
||||||
time.sleep(1)
|
|
||||||
# Add our URL to the import page
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
|
||||||
res = client.post(
|
|
||||||
url_for("import_page"),
|
|
||||||
data={"urls": test_url},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"1 Imported" in res.data
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# Check HTML conversion detected and workd
|
|
||||||
res = client.get(
|
|
||||||
url_for("preview_page", uuid="first"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert b'$90.74' in res.data
|
|
||||||
@@ -1,104 +0,0 @@
|
|||||||
#!/usr/bin/python3
|
|
||||||
|
|
||||||
import time
|
|
||||||
from flask import url_for
|
|
||||||
from .util import live_server_setup
|
|
||||||
|
|
||||||
|
|
||||||
def set_original_ignore_response():
|
|
||||||
test_return_data = """<html>
|
|
||||||
<body>
|
|
||||||
<p>Some initial text</p>
|
|
||||||
<p>Which is across multiple lines</p>
|
|
||||||
<p>So let's see what happens.</p>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write(test_return_data)
|
|
||||||
|
|
||||||
|
|
||||||
# The same but just re-ordered the text
|
|
||||||
def set_modified_swapped_lines():
|
|
||||||
# Re-ordered and with some whitespacing, should get stripped() too.
|
|
||||||
test_return_data = """<html>
|
|
||||||
<body>
|
|
||||||
<p>Some initial text</p>
|
|
||||||
<p> So let's see what happens.</p>
|
|
||||||
<p> Which is across multiple lines</p>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write(test_return_data)
|
|
||||||
|
|
||||||
|
|
||||||
def set_modified_with_trigger_text_response():
|
|
||||||
test_return_data = """<html>
|
|
||||||
<body>
|
|
||||||
<p>Some initial text</p>
|
|
||||||
<p>So let's see what happens.</p>
|
|
||||||
<p>and a new line!</p>
|
|
||||||
<p>Which is across multiple lines</p>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write(test_return_data)
|
|
||||||
|
|
||||||
|
|
||||||
def test_unique_lines_functionality(client, live_server):
|
|
||||||
live_server_setup(live_server)
|
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
set_original_ignore_response()
|
|
||||||
# Give the endpoint time to spin up
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
|
||||||
res = client.post(
|
|
||||||
url_for("import_page"),
|
|
||||||
data={"urls": test_url},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"1 Imported" in res.data
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
res = client.post(
|
|
||||||
url_for("edit_page", uuid="first"),
|
|
||||||
data={"check_unique_lines": "y",
|
|
||||||
"url": test_url,
|
|
||||||
"fetch_backend": "html_requests"},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"Updated watch." in res.data
|
|
||||||
assert b'unviewed' not in res.data
|
|
||||||
|
|
||||||
# Make a change
|
|
||||||
set_modified_swapped_lines()
|
|
||||||
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
|
||||||
# It should report nothing found (no new 'unviewed' class)
|
|
||||||
res = client.get(url_for("index"))
|
|
||||||
assert b'unviewed' not in res.data
|
|
||||||
|
|
||||||
|
|
||||||
# Now set the content which contains the new text and re-ordered existing text
|
|
||||||
set_modified_with_trigger_text_response()
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
res = client.get(url_for("index"))
|
|
||||||
assert b'unviewed' in res.data
|
|
||||||
|
|
||||||
@@ -3,74 +3,38 @@ import queue
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from changedetectionio import content_fetcher
|
from changedetectionio import content_fetcher
|
||||||
from changedetectionio.html_tools import FilterNotFoundInResponse
|
|
||||||
|
|
||||||
# A single update worker
|
# A single update worker
|
||||||
#
|
#
|
||||||
#
|
# Requests for checking on a single site(watch) from a queue of watches
|
||||||
|
# (another process inserts watches into the queue that are time-ready for checking)
|
||||||
|
|
||||||
|
|
||||||
class update_worker(threading.Thread):
|
class update_worker(threading.Thread):
|
||||||
current_uuid = None
|
current_uuid = None
|
||||||
|
|
||||||
def __init__(self, q, notification_q, app, datastore, uuid, *args, **kwargs):
|
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
|
||||||
self.q = q
|
self.q = q
|
||||||
|
|
||||||
self.app = app
|
self.app = app
|
||||||
self.notification_q = notification_q
|
self.notification_q = notification_q
|
||||||
self.datastore = datastore
|
self.datastore = datastore
|
||||||
self.current_uuid = uuid
|
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.name = "update_worker"
|
|
||||||
|
|
||||||
def send_filter_failure_notification(self, uuid):
|
|
||||||
|
|
||||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
|
||||||
watch = self.datastore.data['watching'].get(uuid, False)
|
|
||||||
|
|
||||||
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
|
||||||
'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
|
||||||
watch['css_filter'],
|
|
||||||
threshold),
|
|
||||||
'notification_format': 'text'}
|
|
||||||
|
|
||||||
if len(watch['notification_urls']):
|
|
||||||
n_object['notification_urls'] = watch['notification_urls']
|
|
||||||
|
|
||||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
|
||||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
|
||||||
|
|
||||||
# Only prepare to notify if the rules above matched
|
|
||||||
if 'notification_urls' in n_object:
|
|
||||||
n_object.update({
|
|
||||||
'watch_url': watch['url'],
|
|
||||||
'uuid': uuid
|
|
||||||
})
|
|
||||||
self.notification_q.put(n_object)
|
|
||||||
print("Sent filter not found notification for {}".format(uuid))
|
|
||||||
|
|
||||||
# Pick one job off the list, process it threaded, exist
|
|
||||||
def run(self):
|
def run(self):
|
||||||
# Go talk to the website
|
|
||||||
self.perform_site_update()
|
|
||||||
|
|
||||||
self.current_uuid = None # Done
|
|
||||||
self.q.task_done()
|
|
||||||
|
|
||||||
# Let the thread die after processing 1
|
|
||||||
# We will launch nice juicy fresh threads every time to prevent memory leaks in complex runner code (playwright etc)
|
|
||||||
print ("EXITING THREAD!")
|
|
||||||
self.app.config.exit.wait(1)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def perform_site_update(self):
|
|
||||||
|
|
||||||
from changedetectionio import fetch_site_status
|
from changedetectionio import fetch_site_status
|
||||||
|
|
||||||
if not self.current_uuid in list(self.datastore.data['watching'].keys()):
|
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
|
||||||
return
|
|
||||||
|
|
||||||
|
while not self.app.config.exit.is_set():
|
||||||
|
|
||||||
|
try:
|
||||||
|
uuid = self.q.get(block=False)
|
||||||
|
except queue.Empty:
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.current_uuid = uuid
|
||||||
|
|
||||||
|
if uuid in list(self.datastore.data['watching'].keys()):
|
||||||
|
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
contents = ""
|
contents = ""
|
||||||
@@ -79,56 +43,44 @@ class update_worker(threading.Thread):
|
|||||||
xpath_data = False
|
xpath_data = False
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
|
|
||||||
try:
|
try:
|
||||||
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(self.current_uuid)
|
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
|
||||||
# Re #342
|
# Re #342
|
||||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||||
# We then convert/.decode('utf-8') for the notification etc
|
# We then convert/.decode('utf-8') for the notification etc
|
||||||
if not isinstance(contents, (bytes, bytearray)):
|
if not isinstance(contents, (bytes, bytearray)):
|
||||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
self.app.logger.error("File permission error updating", self.current_uuid, str(e))
|
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||||
except content_fetcher.ReplyWithContentButNoText as e:
|
except content_fetcher.ReplyWithContentButNoText as e:
|
||||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||||
# Page had elements/content but no renderable text
|
# Page had elements/content but no renderable text
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': "Got HTML content but no text found."})
|
if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'):
|
||||||
except FilterNotFoundInResponse as e:
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"})
|
||||||
err_text = "Filter '{}' not found - Did the page change its layout?".format(str(e))
|
else:
|
||||||
c = 0
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
|
||||||
if self.datastore.data['watching'].get(self.current_uuid, False):
|
pass
|
||||||
c = self.datastore.data['watching'][self.current_uuid].get('consecutive_filter_failures', 5)
|
|
||||||
c += 1
|
|
||||||
|
|
||||||
# Send notification if we reached the threshold?
|
|
||||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
|
||||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(self.current_uuid, c))
|
|
||||||
if threshold >0 and c >= threshold:
|
|
||||||
self.send_filter_failure_notification(self.current_uuid)
|
|
||||||
c = 0
|
|
||||||
|
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
|
|
||||||
'consecutive_filter_failures': c})
|
|
||||||
except content_fetcher.EmptyReply as e:
|
except content_fetcher.EmptyReply as e:
|
||||||
# Some kind of custom to-str handler in the exception handler that does this?
|
# Some kind of custom to-str handler in the exception handler that does this?
|
||||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
'last_check_status': e.status_code})
|
'last_check_status': e.status_code})
|
||||||
except content_fetcher.ScreenshotUnavailable as e:
|
except content_fetcher.ScreenshotUnavailable as e:
|
||||||
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
|
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
'last_check_status': e.status_code})
|
'last_check_status': e.status_code})
|
||||||
except content_fetcher.PageUnloadable as e:
|
except content_fetcher.PageUnloadable as e:
|
||||||
err_text = "Page request from server didnt respond correctly"
|
err_text = "Page request from server didnt respond correctly"
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
'last_check_status': e.status_code})
|
'last_check_status': e.status_code})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", self.current_uuid, str(e))
|
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': str(e)})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
watch = self.datastore.data['watching'][self.current_uuid]
|
watch = self.datastore.data['watching'][uuid]
|
||||||
fname = "" # Saved history text filename
|
fname = "" # Saved history text filename
|
||||||
|
|
||||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||||
@@ -137,19 +89,16 @@ class update_worker(threading.Thread):
|
|||||||
fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
||||||
|
|
||||||
# Generally update anything interesting returned
|
# Generally update anything interesting returned
|
||||||
update_obj['consecutive_filter_failures'] = 0
|
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj=update_obj)
|
|
||||||
|
|
||||||
# A change was detected
|
# A change was detected
|
||||||
if changed_detected:
|
if changed_detected:
|
||||||
n_object = {}
|
n_object = {}
|
||||||
print (">> Change detected in UUID {} - {}".format(self.current_uuid, watch['url']))
|
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||||
|
|
||||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||||
if watch.history_n >= 2:
|
if watch.history_n >= 2:
|
||||||
# Atleast 2, means there really was a change
|
print(">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_changed': round(now)})
|
|
||||||
|
|
||||||
watch_history = watch.history
|
watch_history = watch.history
|
||||||
dates = list(watch_history.keys())
|
dates = list(watch_history.keys())
|
||||||
# Theoretically it's possible that this could be just 1 long,
|
# Theoretically it's possible that this could be just 1 long,
|
||||||
@@ -160,9 +109,10 @@ class update_worker(threading.Thread):
|
|||||||
)
|
)
|
||||||
prev_fname = watch_history[dates[-2]]
|
prev_fname = watch_history[dates[-2]]
|
||||||
|
|
||||||
|
|
||||||
# Did it have any notification alerts to hit?
|
# Did it have any notification alerts to hit?
|
||||||
if len(watch['notification_urls']):
|
if len(watch['notification_urls']):
|
||||||
print(">>> Notifications queued for UUID from watch {}".format(self.current_uuid))
|
print(">>> Notifications queued for UUID from watch {}".format(uuid))
|
||||||
n_object['notification_urls'] = watch['notification_urls']
|
n_object['notification_urls'] = watch['notification_urls']
|
||||||
n_object['notification_title'] = watch['notification_title']
|
n_object['notification_title'] = watch['notification_title']
|
||||||
n_object['notification_body'] = watch['notification_body']
|
n_object['notification_body'] = watch['notification_body']
|
||||||
@@ -170,7 +120,7 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
# No? maybe theres a global setting, queue them all
|
# No? maybe theres a global setting, queue them all
|
||||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
||||||
print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(self.current_uuid))
|
print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid))
|
||||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
||||||
n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
|
n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
|
||||||
n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body']
|
n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body']
|
||||||
@@ -189,7 +139,7 @@ class update_worker(threading.Thread):
|
|||||||
from changedetectionio import diff
|
from changedetectionio import diff
|
||||||
n_object.update({
|
n_object.update({
|
||||||
'watch_url': watch['url'],
|
'watch_url': watch['url'],
|
||||||
'uuid': self.current_uuid,
|
'uuid': uuid,
|
||||||
'current_snapshot': contents.decode('utf-8'),
|
'current_snapshot': contents.decode('utf-8'),
|
||||||
'diff': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
|
'diff': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
|
||||||
'diff_full': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
|
'diff_full': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
|
||||||
@@ -200,18 +150,24 @@ class update_worker(threading.Thread):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||||
print("!!!! Exception in update_worker !!!\n", e)
|
print("!!!! Exception in update_worker !!!\n", e)
|
||||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", self.current_uuid, str(e))
|
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': str(e)})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Always record that we atleast tried
|
# Always record that we atleast tried
|
||||||
self.datastore.update_watch(uuid=self.current_uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||||
'last_checked': round(time.time())})
|
'last_checked': round(time.time())})
|
||||||
|
|
||||||
# Always save the screenshot if it's available
|
# Always save the screenshot if it's available
|
||||||
if screenshot:
|
if screenshot:
|
||||||
self.datastore.save_screenshot(watch_uuid=self.current_uuid, screenshot=screenshot)
|
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
|
||||||
if xpath_data:
|
if xpath_data:
|
||||||
self.datastore.save_xpath_data(watch_uuid=self.current_uuid, data=xpath_data)
|
self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
|
||||||
|
|
||||||
|
|
||||||
|
self.current_uuid = None # Done
|
||||||
|
self.q.task_done()
|
||||||
|
|
||||||
|
# Give the CPU time to interrupt
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
self.app.config.exit.wait(1)
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ services:
|
|||||||
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
|
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
|
||||||
#
|
#
|
||||||
# Alternative Playwright URL, do not use "'s or 's!
|
# Alternative Playwright URL, do not use "'s or 's!
|
||||||
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true
|
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/
|
||||||
#
|
#
|
||||||
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
|
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
|
||||||
#
|
#
|
||||||
@@ -78,6 +78,9 @@ services:
|
|||||||
# - SCREEN_HEIGHT=1024
|
# - SCREEN_HEIGHT=1024
|
||||||
# - SCREEN_DEPTH=16
|
# - SCREEN_DEPTH=16
|
||||||
# - ENABLE_DEBUGGER=false
|
# - ENABLE_DEBUGGER=false
|
||||||
|
# - SCREEN_WIDTH=1280
|
||||||
|
# - SCREEN_HEIGHT=1024
|
||||||
|
# - SCREEN_DEPTH=16
|
||||||
# - PREBOOT_CHROME=true
|
# - PREBOOT_CHROME=true
|
||||||
# - CONNECTION_TIMEOUT=300000
|
# - CONNECTION_TIMEOUT=300000
|
||||||
# - MAX_CONCURRENT_SESSIONS=10
|
# - MAX_CONCURRENT_SESSIONS=10
|
||||||
|
|||||||
Reference in New Issue
Block a user