Compare commits

..

36 Commits

Author SHA1 Message Date
dgtlmoon
f3b815715b Shows which items are already in the queue, disables adding to the queue if already in the recheck queue 2022-04-21 12:44:10 +02:00
dgtlmoon
206ded4201 Notifications - Signal API support, Ntfy support, hotmail, matrix, Gotify API fixes 2022-04-20 23:13:55 +02:00
dgtlmoon
9e71f2aa35 Discord:// notification size limit - also includes the notification title 2022-04-20 17:00:21 +02:00
dgtlmoon
f9594aeffb Fix spelling errors 2022-04-20 09:51:53 +02:00
dgtlmoon
b4e1353376 Update README.md 2022-04-19 23:56:11 +02:00
dgtlmoon
5b670c38d3 Update README.md 2022-04-19 23:48:21 +02:00
dgtlmoon
2a9fb12451 Import speed improvements, and adding an import URL batch size of 5,000 to stop accidental CPU overload (#549) 2022-04-19 23:15:32 +02:00
dgtlmoon
6c3c5dc28a Ability to set the default fetch mode via the DEFAULT_FETCH_BACKEND variable 2022-04-19 23:15:00 +02:00
dgtlmoon
8f062bfec9 Refactor form handling (#548) 2022-04-19 21:43:07 +02:00
dgtlmoon
380c512cc2 Adding support for change detection of HTML source-code via "source:https://website.com" prefix (#540) 2022-04-12 17:36:29 +02:00
dgtlmoon
d7ed7c44ed Re-label the quick-add widget placeholder 'tag' to 'watch group' 2022-04-12 10:55:43 +02:00
dgtlmoon
34a87c0f41 HTTP Fetcher code improvements 2022-04-12 08:36:08 +02:00
dgtlmoon
4074fe53f1 Adding RSS metadata auto-discovery 2022-04-12 07:35:47 +02:00
Tristan Hill
44d599d0d1 Upgrade WTforms form handler to v3 (#523) 2022-04-09 19:50:56 +02:00
dgtlmoon
615fe9290a 0.39.12 2022-04-09 14:16:30 +02:00
dgtlmoon
2cc6955bc3 Miscellaneous settings form visual improvements (#535) 2022-04-09 12:15:34 +02:00
dgtlmoon
9809af142d Option to render links as [Some Text ](/link), adds the ability to change-detect on hyperlink changes 2022-04-09 10:35:14 +02:00
dgtlmoon
1890881977 Specify our Discord avatar_url as default avatar_url 2022-04-08 18:35:59 +02:00
dgtlmoon
9fc2fe85d5 Minor git updates 2022-04-08 17:21:42 +02:00
dgtlmoon
bb3c546838 Fix screenshot tab name 2022-04-08 17:08:06 +02:00
dgtlmoon
165f794595 Discord:// notifications should be cut to 2000 chars or Discord will not process them. (#531 + #323) 2022-04-08 16:32:04 +02:00
dgtlmoon
a440eece9e Make long reports in the notification error log easier to read 2022-04-08 14:12:42 +02:00
dgtlmoon
34c83f0e7c [Add email] button in notification settings with a prefix set from NOTIFICATION_MAIL_BUTTON_PREFIX env variable when defined. (#528) 2022-04-07 18:18:23 +02:00
dgtlmoon
f6e518497a Update README.md 2022-04-07 14:59:31 +02:00
dgtlmoon
63e91a3d66 Skip processing a watch into the RSS feed if there's not enough data to examine (fixes Internal Server Error when accessing the RSS feed) (#521) 2022-04-05 20:31:31 +02:00
dgtlmoon
3034d047c2 Introduce an AJAX button for sending test notifications instead of the checkbox (#519) 2022-04-05 18:04:26 +02:00
dgtlmoon
2620818ba7 Make text tab always available at default 2022-04-02 14:55:40 +02:00
dgtlmoon
9fe4f95990 When fetching a snapshot via Chrome, make the most recent screenshot available on the Diff and Preview pages (#516) 2022-04-02 14:49:32 +02:00
dgtlmoon
ffd2a89d60 Remove 'unviewed' status in watch table when Diff link clicked (#514) 2022-03-31 11:01:07 +02:00
dgtlmoon
8f40f19328 RSS feed CDATA should contain difference output 2022-03-30 10:51:10 +02:00
dgtlmoon
082634f851 Fix - {diff} and {diff_full} notifications tokens were not always including the full output 2022-03-29 19:18:26 +02:00
dgtlmoon
334010025f Update README.md 2022-03-26 14:02:56 +01:00
dgtlmoon
81aa8fa16b Update README.md 2022-03-26 09:56:56 +01:00
dgtlmoon
c79d6824e3 Minor UI cleanups (mobile tabs, font sizing) (#503) 2022-03-25 23:37:28 +01:00
zznidar
946377d2be Fix typo in Filters & Triggers settings. (#495) 2022-03-23 23:18:04 +01:00
zznidar
5db9a30ad4 Add autofocus attribute to password login field (#496) 2022-03-23 23:17:47 +01:00
50 changed files with 1506 additions and 690 deletions

View File

@@ -9,18 +9,15 @@ _Know when web pages change! Stay ontop of new information!_
Live your data-life *pro-actively* instead of *re-actively*.
Open source web page monitoring, notification and change detection.
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
**Get your own private instance now! Let us host it for you!**
[![Deploy to Lemonade](https://lemonade.changedetection.io/static/images/lemonade.svg)](https://lemonade.changedetection.io/start)
[_Let us host your own private instance - We accept PayPal and Bitcoin, Support the further development of changedetection.io!_](https://lemonade.changedetection.io/start)
[**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
@@ -39,13 +36,14 @@ Open source web page monitoring, notification and change detection.
- COVID related news from government websites
- University/organisation news from their website
- Detect and monitor changes in JSON API responses
- API monitoring and alerting
- JSON API monitoring and alerting
- Changes in legal and other documents
- Trigger API calls via notifications when text appears on a website
- Glue together APIs using the JSON filter and JSON notifications
- Create RSS feeds based on changes in web content
- Monitor HTML source code for unexpected changes, strengthen your PCI compliance
- You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver!</a>_
## Screenshots
@@ -70,6 +68,10 @@ Docker standalone
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
```
### Windows
See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
### Python Pip
Check out our pypi page https://pypi.org/project/changedetection.io/
@@ -163,9 +165,6 @@ See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configura
Raspberry Pi and linux/arm/v6 linux/arm/v7 arm64 devices are supported! See the wiki for [details](https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver)
## Windows support?
YES! See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
## Support us

View File

@@ -39,7 +39,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools
__version__ = '0.39.11'
__version__ = '0.39.12'
datastore = None
@@ -94,16 +94,6 @@ def init_app_secret(datastore_path):
return secret
# Remember python is by reference
# populate_form in wtfors didnt work for me. (try using a setattr() obj type on datastore.watch?)
def populate_form_from_watch(form, watch):
for i in form.__dict__.keys():
if i[0] != '_':
p = getattr(form, i)
if hasattr(p, 'data') and i in watch:
setattr(p, "data", watch[i])
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
# running or something similar.
@app.template_filter('format_last_checked_time')
@@ -272,7 +262,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/rss", methods=['GET'])
@login_required
def rss():
from . import diff
limit_tag = request.args.get('tag')
# Sort by last_changed and add the uuid which is usually the key..
@@ -301,6 +291,19 @@ def changedetection_app(config=None, datastore_o=None):
fg.link(href='https://changedetection.io')
for watch in sorted_watches:
dates = list(watch['history'].keys())
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
if len(dates) < 2:
continue
# Convert to int, sort and back to str again
# @todo replace datastore getter that does this automatically
dates = [int(i) for i in dates]
dates.sort(reverse=True)
dates = [str(i) for i in dates]
prev_fname = watch['history'][dates[1]]
if not watch['viewed']:
# Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
@@ -316,12 +319,16 @@ def changedetection_app(config=None, datastore_o=None):
diff_link = {'href': "{}{}".format(base_url, url_for('diff_history_page', uuid=watch['uuid']))}
# @todo use title if it exists
fe.link(link=diff_link)
fe.title(title=watch['url'])
# @todo in the future <description><![CDATA[<html><body>Any code html is valid.</body></html>]]></description>
fe.description(description=watch['url'])
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
fe.title(title=watch_title)
latest_fname = watch['history'][dates[0]]
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
fe.description(description="<![CDATA[<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff))
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch['newest_history_key']))
@@ -335,6 +342,8 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/", methods=['GET'])
@login_required
def index():
from changedetectionio import forms
limit_tag = request.args.get('tag')
pause_uuid = request.args.get('pause')
@@ -371,7 +380,6 @@ def changedetection_app(config=None, datastore_o=None):
existing_tags = datastore.get_all_tags()
from changedetectionio import forms
form = forms.quickWatchForm(request.form)
output = render_template("watch-overview.html",
@@ -383,10 +391,43 @@ def changedetection_app(config=None, datastore_o=None):
has_unviewed=datastore.data['has_unviewed'],
# Don't link to hosting when we're on the hosting environment
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
guid=datastore.data['app_guid'])
guid=datastore.data['app_guid'],
queued_uuids=update_q.queue)
return output
# AJAX endpoint for sending a test
@app.route("/notification/send-test", methods=['POST'])
@login_required
def ajax_callback_send_notification_test():
import apprise
apobj = apprise.Apprise()
# validate URLS
if not len(request.form['notification_urls'].strip()):
return make_response({'error': 'No Notification URLs set'}, 400)
for server_url in request.form['notification_urls'].splitlines():
if len(server_url.strip()):
if not apobj.add(server_url):
message = '{} is not a valid AppRise URL.'.format(server_url)
return make_response({'error': message}, 400)
try:
n_object = {'watch_url': request.form['window_url'],
'notification_urls': request.form['notification_urls'].splitlines(),
'notification_title': request.form['notification_title'].strip(),
'notification_body': request.form['notification_body'].strip(),
'notification_format': request.form['notification_format'].strip()
}
notification_q.put(n_object)
except Exception as e:
return make_response({'error': str(e)}, 400)
return 'OK'
@app.route("/scrub", methods=['GET', 'POST'])
@login_required
def scrub_page():
@@ -472,49 +513,46 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
def edit_page(uuid):
from changedetectionio import forms
form = forms.watchForm(request.form)
# More for testing, possible to return the first/only
if not datastore.data['watching'].keys():
flash("No watches to edit", "error")
return redirect(url_for('index'))
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
if not uuid in datastore.data['watching']:
flash("No watch with the UUID %s found." % (uuid), "error")
return redirect(url_for('index'))
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
data=datastore.data['watching'][uuid]
)
if request.method == 'GET':
if not uuid in datastore.data['watching']:
flash("No watch with the UUID %s found." % (uuid), "error")
return redirect(url_for('index'))
populate_form_from_watch(form, datastore.data['watching'][uuid])
# Set some defaults that refer to the main config when None, we do the same in POST,
# probably there should be a nice little handler for this.
if datastore.data['watching'][uuid]['fetch_backend'] is None:
form.fetch_backend.data = datastore.data['settings']['application']['fetch_backend']
if datastore.data['watching'][uuid]['minutes_between_check'] is None:
form.minutes_between_check.data = datastore.data['settings']['requests']['minutes_between_check']
if request.method == 'POST' and form.validate():
# Re #110, if they submit the same as the default value, set it to None, so we continue to follow the default
if form.minutes_between_check.data == datastore.data['settings']['requests']['minutes_between_check']:
form.minutes_between_check.data = None
if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
form.fetch_backend.data = None
update_obj = {'url': form.url.data.strip(),
'minutes_between_check': form.minutes_between_check.data,
'tag': form.tag.data.strip(),
'title': form.title.data.strip(),
'headers': form.headers.data,
'body': form.body.data,
'method': form.method.data,
'ignore_status_codes': form.ignore_status_codes.data,
'fetch_backend': form.fetch_backend.data,
'trigger_text': form.trigger_text.data,
'notification_title': form.notification_title.data,
'notification_body': form.notification_body.data,
'notification_format': form.notification_format.data,
'extract_title_as_title': form.extract_title_as_title.data,
}
extra_update_obj = {}
# Notification URLs
datastore.data['watching'][uuid]['notification_urls'] = form.notification_urls.data
@@ -526,18 +564,15 @@ def changedetection_app(config=None, datastore_o=None):
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form_ignore_text:
if len(datastore.data['watching'][uuid]['history']):
update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
datastore.data['watching'][uuid]['css_filter'] = form.css_filter.data.strip()
datastore.data['watching'][uuid]['subtractive_selectors'] = form.subtractive_selectors.data
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']:
if len(datastore.data['watching'][uuid]['history']):
update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
datastore.data['watching'][uuid].update(update_obj)
datastore.data['watching'][uuid].update(form.data)
datastore.data['watching'][uuid].update(extra_update_obj)
flash("Updated watch.")
@@ -548,20 +583,6 @@ def changedetection_app(config=None, datastore_o=None):
# Queue the watch for immediate recheck
update_q.put(uuid)
if form.trigger_check.data:
if len(form.notification_urls.data):
n_object = {'watch_url': form.url.data.strip(),
'notification_urls': form.notification_urls.data,
'notification_title': form.notification_title.data,
'notification_body': form.notification_body.data,
'notification_format': form.notification_format.data,
'uuid': uuid
}
notification_q.put(n_object)
flash('Test notification queued.')
else:
flash('No notification URLs set, cannot send test.', 'error')
# Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff' and not form.save_and_preview_button.data:
return redirect(url_for('diff_history_page', uuid=uuid))
@@ -576,18 +597,14 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and not form.validate():
flash("An error occurred, please see below.", "error")
# Re #110 offer the default minutes
using_default_minutes = False
if form.minutes_between_check.data == None:
form.minutes_between_check.data = datastore.data['settings']['requests']['minutes_between_check']
using_default_minutes = True
has_empty_checktime = datastore.data['watching'][uuid].has_empty_checktime
output = render_template("edit.html",
uuid=uuid,
watch=datastore.data['watching'][uuid],
form=form,
using_default_minutes=using_default_minutes,
current_base_url = datastore.data['settings']['application']['base_url']
has_empty_checktime=has_empty_checktime,
current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False)
)
return output
@@ -595,75 +612,45 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/settings", methods=['GET', "POST"])
@login_required
def settings_page():
from changedetectionio import content_fetcher, forms
form = forms.globalSettingsForm(request.form)
# Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None,
data=datastore.data['settings']
)
if request.method == 'GET':
form.minutes_between_check.data = int(datastore.data['settings']['requests']['minutes_between_check'])
form.notification_urls.data = datastore.data['settings']['application']['notification_urls']
form.global_subtractive_selectors.data = datastore.data['settings']['application']['global_subtractive_selectors']
form.global_ignore_text.data = datastore.data['settings']['application']['global_ignore_text']
form.ignore_whitespace.data = datastore.data['settings']['application']['ignore_whitespace']
form.extract_title_as_title.data = datastore.data['settings']['application']['extract_title_as_title']
form.fetch_backend.data = datastore.data['settings']['application']['fetch_backend']
form.notification_title.data = datastore.data['settings']['application']['notification_title']
form.notification_body.data = datastore.data['settings']['application']['notification_body']
form.notification_format.data = datastore.data['settings']['application']['notification_format']
form.base_url.data = datastore.data['settings']['application']['base_url']
if request.method == 'POST' and form.data.get('removepassword_button') == True:
if request.method == 'POST':
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
if not os.getenv("SALTED_PASS", False):
datastore.data['settings']['application']['password'] = False
flash("Password protection removed.", 'notice')
flask_login.logout_user()
return redirect(url_for('settings_page'))
if form.application.form.data.get('removepassword_button', False):
# SALTED_PASS means the password is "locked" to what we set in the Env var
if not os.getenv("SALTED_PASS", False):
datastore.remove_password()
flash("Password protection removed.", 'notice')
flask_login.logout_user()
return redirect(url_for('settings_page'))
if request.method == 'POST' and form.validate():
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
datastore.data['settings']['requests']['minutes_between_check'] = form.minutes_between_check.data
datastore.data['settings']['application']['extract_title_as_title'] = form.extract_title_as_title.data
datastore.data['settings']['application']['fetch_backend'] = form.fetch_backend.data
datastore.data['settings']['application']['notification_title'] = form.notification_title.data
datastore.data['settings']['application']['notification_body'] = form.notification_body.data
datastore.data['settings']['application']['notification_format'] = form.notification_format.data
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
datastore.data['settings']['application']['base_url'] = form.base_url.data
datastore.data['settings']['application']['global_subtractive_selectors'] = form.global_subtractive_selectors.data
datastore.data['settings']['application']['global_ignore_text'] = form.global_ignore_text.data
datastore.data['settings']['application']['ignore_whitespace'] = form.ignore_whitespace.data
if form.validate():
datastore.data['settings']['application'].update(form.data['application'])
datastore.data['settings']['requests'].update(form.data['requests'])
datastore.needs_write = True
if form.trigger_check.data:
if len(form.notification_urls.data):
n_object = {'watch_url': "Test from changedetection.io!",
'notification_urls': form.notification_urls.data,
'notification_title': form.notification_title.data,
'notification_body': form.notification_body.data,
'notification_format': form.notification_format.data,
}
notification_q.put(n_object)
flash('Test notification queued.')
else:
flash('No notification URLs set, cannot send test.', 'error')
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
datastore.needs_write = True
flash("Password protection enabled.", 'notice')
flask_login.logout_user()
return redirect(url_for('index'))
if not os.getenv("SALTED_PASS", False) and form.password.encrypted_password:
datastore.data['settings']['application']['password'] = form.password.encrypted_password
flash("Password protection enabled.", 'notice')
flask_login.logout_user()
return redirect(url_for('index'))
flash("Settings updated.")
datastore.needs_write = True
flash("Settings updated.")
if request.method == 'POST' and not form.validate():
flash("An error occurred, please see below.", "error")
else:
flash("An error occurred, please see below.", "error")
output = render_template("settings.html",
form=form,
current_base_url = datastore.data['settings']['application']['base_url'],
hide_remove_pass=os.getenv("SALTED_PASS", False))
hide_remove_pass=os.getenv("SALTED_PASS", False),
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False))
return output
@@ -676,13 +663,19 @@ def changedetection_app(config=None, datastore_o=None):
good = 0
if request.method == 'POST':
now=time.time()
urls = request.values.get('urls').split("\n")
if (len(urls) > 5000):
flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
for url in urls:
url = url.strip()
url, *tags = url.split(" ")
# Flask wtform validators wont work with basic auth, use validators package
if len(url) and validators.url(url):
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags))
# Up to 5000 per batch so we dont flood the server
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags), write_to_disk_now=False)
# Straight into the queue.
update_q.put(new_uuid)
good += 1
@@ -690,7 +683,8 @@ def changedetection_app(config=None, datastore_o=None):
if len(url):
remaining_urls.append(url)
flash("{} Imported, {} Skipped.".format(good, len(remaining_urls)))
flash("{} Imported in {:.2f}s, {} Skipped.".format(good, time.time()-now,len(remaining_urls)))
datastore.needs_write = True
if len(remaining_urls) == 0:
# Looking good, redirect to index.
@@ -763,6 +757,9 @@ def changedetection_app(config=None, datastore_o=None):
except Exception as e:
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
screenshot_url = datastore.get_screenshot(uuid)
output = render_template("diff.html", watch_a=watch,
newest=newest_version_file_contents,
previous=previous_version_file_contents,
@@ -773,7 +770,8 @@ def changedetection_app(config=None, datastore_o=None):
current_previous_version=str(previous_version),
current_diff_url=watch['url'],
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
left_sticky=True)
left_sticky=True,
screenshot=screenshot_url)
return output
@@ -833,15 +831,17 @@ def changedetection_app(config=None, datastore_o=None):
else:
content.append({'line': "No history found", 'classes': ''})
screenshot_url = datastore.get_screenshot(uuid)
output = render_template("preview.html",
content=content,
extra_stylesheets=extra_stylesheets,
ignored_line_numbers=ignored_line_numbers,
triggered_line_numbers=trigger_line_numbers,
current_diff_url=watch['url'],
screenshot=screenshot_url,
watch=watch,
uuid=uuid)
return output
@app.route("/settings/notification-logs", methods=['GET'])
@@ -954,6 +954,28 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
def static_content(group, filename):
if group == 'screenshot':
from flask import make_response
# Could be sensitive, follow password requirements
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
abort(403)
# These files should be in our subdirectory
try:
# set nocache, set content-type
watch_dir = datastore_o.datastore_path + "/" + filename
response = make_response(send_from_directory(filename="last-screenshot.png", directory=watch_dir, path=watch_dir + "/last-screenshot.png"))
response.headers['Content-type'] = 'image/png'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = 0
return response
except FileNotFoundError:
abort(404)
# These files should be in our subdirectory
try:
return send_from_directory("static/{}".format(group), path=filename)
@@ -984,10 +1006,14 @@ def changedetection_app(config=None, datastore_o=None):
flash("Error")
return redirect(url_for('index'))
@app.route("/api/delete", methods=['GET'])
@login_required
def api_delete():
uuid = request.args.get('uuid')
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
datastore.delete(uuid)
flash('Deleted.')
@@ -1113,8 +1139,6 @@ def notification_runner():
notification_debug_log = notification_debug_log[-100:]
# Thread runner to check every minute, look for new watches to feed into the Queue.
def ticker_thread_check_time_launch_checks():
from changedetectionio import update_worker
@@ -1151,7 +1175,9 @@ def ticker_thread_check_time_launch_checks():
# Check for watches outside of the time threshold to put in the thread queue.
now = time.time()
max_system_wide = int(copied_datastore.data['settings']['requests']['minutes_between_check']) * 60
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
recheck_time_system_seconds = int(copied_datastore.data['settings']['requests']['minutes_between_check']) * 60
for uuid, watch in copied_datastore.data['watching'].items():
@@ -1160,18 +1186,14 @@ def ticker_thread_check_time_launch_checks():
continue
# If they supplied an individual entry minutes to threshold.
watch_minutes_between_check = watch.get('minutes_between_check', None)
if watch_minutes_between_check is not None:
# Cast to int just incase
max_time = int(watch_minutes_between_check) * 60
threshold = now
if watch.threshold_seconds:
threshold -= watch.threshold_seconds
else:
# Default system wide.
max_time = max_system_wide
threshold = now - max_time
threshold -= recheck_time_system_seconds
# Yeah, put it in the queue, it's more than time
if watch['last_checked'] <= threshold:
if watch['last_checked'] <= max(threshold, recheck_time_minimum_seconds):
if not uuid in running_uuids and uuid not in update_q.queue:
update_q.put(uuid)
@@ -1179,4 +1201,4 @@ def ticker_thread_check_time_launch_checks():
time.sleep(3)
# Should be low so we can break this out in testing
app.config.exit.wait(1)
app.config.exit.wait(1)

View File

@@ -42,6 +42,14 @@ class Fetcher():
# Should set self.error, self.status_code and self.content
pass
@abstractmethod
def quit(self):
return
@abstractmethod
def screenshot(self):
return
@abstractmethod
def get_last_status_code(self):
return self.status_code
@@ -116,16 +124,16 @@ class html_webdriver(Fetcher):
# request_body, request_method unused for now, until some magic in the future happens.
# check env for WEBDRIVER_URL
driver = webdriver.Remote(
self.driver = webdriver.Remote(
command_executor=self.command_executor,
desired_capabilities=DesiredCapabilities.CHROME,
proxy=self.proxy)
try:
driver.get(url)
self.driver.get(url)
except WebDriverException as e:
# Be sure we close the session window
driver.quit()
self.quit()
raise
# @todo - how to check this? is it possible?
@@ -135,26 +143,33 @@ class html_webdriver(Fetcher):
# @todo - dom wait loaded?
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
self.content = driver.page_source
self.content = self.driver.page_source
self.headers = {}
driver.quit()
def screenshot(self):
return self.driver.get_screenshot_as_png()
# Does the connection to the webdriver work? run a test connection.
def is_ready(self):
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.common.exceptions import WebDriverException
driver = webdriver.Remote(
self.driver = webdriver.Remote(
command_executor=self.command_executor,
desired_capabilities=DesiredCapabilities.CHROME)
# driver.quit() seems to cause better exceptions
driver.quit()
self.quit()
return True
def quit(self):
if self.driver:
try:
self.driver.quit()
except Exception as e:
print("Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py!
class html_requests(Fetcher):
fetcher_description = "Basic fast Plaintext/HTTP Client"

View File

@@ -2,22 +2,31 @@
import difflib
def same_slicer(l, a, b):
if a == b:
return [l[a]]
else:
return l[a:b]
# like .compare but a little different output
def customSequenceMatcher(before, after, include_equal=False):
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)
# @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if include_equal and tag == 'equal':
g = before[alo:ahi]
yield g
elif tag == 'delete':
g = "(removed) {}".format(before[alo])
g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
yield g
elif tag == 'replace':
g = ["(changed) {}".format(before[alo]), "(-> into) {}".format(after[blo])]
g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
g += ["(into ) " + i for i in same_slicer(after, blo, bhi)]
yield g
elif tag == 'insert':
g = "(added) {}".format(after[blo])
g = ["(added ) " + i for i in same_slicer(after, blo, bhi)]
yield g
# only_differences - only return info about the differences, no context

View File

@@ -4,7 +4,6 @@ import re
import time
import urllib3
from inscriptis import get_text
from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -21,6 +20,7 @@ class perform_site_check():
timestamp = int(time.time()) # used for storage etc too
changed_detected = False
screenshot = False # as bytes
stripped_text_from_html = ""
watch = self.datastore.data['watching'][uuid]
@@ -46,130 +46,155 @@ class perform_site_check():
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
# @todo check the failures are really handled how we expect
timeout = self.datastore.data['settings']['requests']['timeout']
url = self.datastore.get_val(uuid, 'url')
request_body = self.datastore.get_val(uuid, 'body')
request_method = self.datastore.get_val(uuid, 'method')
ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes')
# source: support
is_source = False
if url.startswith('source:'):
url = url.replace('source:', '')
is_source = True
# Pluggable content fetcher
prefer_backend = watch['fetch_backend']
if hasattr(content_fetcher, prefer_backend):
klass = getattr(content_fetcher, prefer_backend)
else:
timeout = self.datastore.data['settings']['requests']['timeout']
url = self.datastore.get_val(uuid, 'url')
request_body = self.datastore.get_val(uuid, 'body')
request_method = self.datastore.get_val(uuid, 'method')
ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes')
# If the klass doesnt exist, just use a default
klass = getattr(content_fetcher, "html_requests")
# Pluggable content fetcher
prefer_backend = watch['fetch_backend']
if hasattr(content_fetcher, prefer_backend):
klass = getattr(content_fetcher, prefer_backend)
fetcher = klass()
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
# Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base?
# @note: I feel like the following should be in a more obvious chain system
# - Check filter text
# - Is the checksum different?
# - Do we convert to JSON?
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
# return content().textfilter().jsonextract().checksumcompare() ?
is_json = 'application/json' in fetcher.headers.get('Content-Type', '')
is_html = not is_json
# source: support, basically treat it as plaintext
if is_source:
is_html = False
is_json = False
css_filter_rule = watch['css_filter']
subtractive_selectors = watch.get(
"subtractive_selectors", []
) + self.datastore.data["settings"]["application"].get(
"global_subtractive_selectors", []
)
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
if is_json and not has_filter_rule:
css_filter_rule = "json:$"
has_filter_rule = True
if has_filter_rule:
if 'json:' in css_filter_rule:
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
is_html = False
if is_html or is_source:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content = fetcher.content
# If not JSON, and if it's not text/plain..
if 'text/plain' in fetcher.headers.get('Content-Type', '').lower():
# Don't run get_text or xpath/css filters on plaintext
stripped_text_from_html = html_content
else:
# If the klass doesnt exist, just use a default
klass = getattr(content_fetcher, "html_requests")
# Then we assume HTML
if has_filter_rule:
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
if css_filter_rule[0] == '/':
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule, html_content=fetcher.content)
else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content)
if not is_source:
# extract text
stripped_text_from_html = \
html_tools.html_to_text(
html_content,
render_anchor_tag_content=self.datastore.data["settings"][
"application"].get(
"render_anchor_tag_content", False)
)
fetcher = klass()
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
# Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base?
# @note: I feel like the following should be in a more obvious chain system
# - Check filter text
# - Is the checksum different?
# - Do we convert to JSON?
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
# return content().textfilter().jsonextract().checksumcompare() ?
is_json = 'application/json' in fetcher.headers.get('Content-Type', '')
is_html = not is_json
css_filter_rule = watch['css_filter']
subtractive_selectors = watch.get(
"subtractive_selectors", []
) + self.datastore.data["settings"]["application"].get(
"global_subtractive_selectors", []
)
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
if is_json and not has_filter_rule:
css_filter_rule = "json:$"
has_filter_rule = True
if has_filter_rule:
if 'json:' in css_filter_rule:
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
is_html = False
if is_html:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content = fetcher.content
# If not JSON, and if it's not text/plain..
if 'text/plain' in fetcher.headers.get('Content-Type', '').lower():
# Don't run get_text or xpath/css filters on plaintext
elif is_source:
stripped_text_from_html = html_content
else:
# Then we assume HTML
if has_filter_rule:
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
if css_filter_rule[0] == '/':
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule, html_content=fetcher.content)
else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content)
# get_text() via inscriptis
stripped_text_from_html = get_text(html_content)
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
# We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms.
update_obj["last_check_status"] = fetcher.get_last_status_code()
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
# If there's text to skip
# @todo we could abstract out the get_text() to handle this cleaner
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
if len(text_to_ignore):
stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
else:
stripped_text_from_html = stripped_text_from_html.encode('utf8')
# We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms.
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
else:
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
update_obj["last_check_status"] = fetcher.get_last_status_code()
# On the first run of a site, watch['previous_md5'] will be an empty string, set it the current one.
if not len(watch['previous_md5']):
watch['previous_md5'] = fetched_md5
update_obj["previous_md5"] = fetched_md5
# If there's text to skip
# @todo we could abstract out the get_text() to handle this cleaner
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
if len(text_to_ignore):
stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
else:
stripped_text_from_html = stripped_text_from_html.encode('utf8')
blocked_by_not_found_trigger_text = False
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
else:
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
if len(watch['trigger_text']):
# Yeah, lets block first until something matches
blocked_by_not_found_trigger_text = True
# Filter and trigger works the same, so reuse it
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['trigger_text'],
mode="line numbers")
if result:
blocked_by_not_found_trigger_text = False
# On the first run of a site, watch['previous_md5'] will be an empty string, set it the current one.
if not len(watch['previous_md5']):
watch['previous_md5'] = fetched_md5
update_obj["previous_md5"] = fetched_md5
blocked_by_not_found_trigger_text = False
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
changed_detected = True
update_obj["previous_md5"] = fetched_md5
update_obj["last_changed"] = timestamp
if len(watch['trigger_text']):
# Yeah, lets block first until something matches
blocked_by_not_found_trigger_text = True
# Filter and trigger works the same, so reuse it
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['trigger_text'],
mode="line numbers")
if result:
blocked_by_not_found_trigger_text = False
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
changed_detected = True
update_obj["previous_md5"] = fetched_md5
update_obj["last_changed"] = timestamp
# Extract title as title
if is_html:
if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
# Extract title as title
if is_html:
if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
if self.datastore.data['settings']['application'].get('real_browser_save_screenshot', True):
screenshot = fetcher.screenshot()
return changed_detected, update_obj, text_content_before_ignored_filter
fetcher.quit()
return changed_detected, update_obj, text_content_before_ignored_filter, screenshot

View File

@@ -15,7 +15,6 @@ from wtforms import (
validators,
widgets,
)
from wtforms.fields import html5
from wtforms.validators import ValidationError
from changedetectionio import content_fetcher
@@ -26,6 +25,8 @@ from changedetectionio.notification import (
valid_notification_formats,
)
from wtforms.fields import FormField
valid_method = {
'GET',
'POST',
@@ -122,7 +123,6 @@ class ValidateContentFetcherIsReady(object):
def __call__(self, form, field):
import urllib3.exceptions
from changedetectionio import content_fetcher
# Better would be a radiohandler that keeps a reference to each class
@@ -231,7 +231,7 @@ class ValidateListRegex(object):
except re.error:
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
raise ValidationError(message % (line))
class ValidateCSSJSONXPATHInput(object):
"""
Filter validation
@@ -293,39 +293,39 @@ class ValidateCSSJSONXPATHInput(object):
# Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything?
class quickWatchForm(Form):
# https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
# `require_tld` = False is needed even for the test harness "http://localhost:5005.." to run
url = html5.URLField('URL', validators=[validateURL()])
url = fields.URLField('URL', validators=[validateURL()])
tag = StringField('Group tag', [validators.Optional(), validators.Length(max=35)])
# Common to a single watch and the global settings
class commonSettingsForm(Form):
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateNotificationBodyAndTitleWhenURLisSet(), ValidateAppRiseServers()])
notification_title = StringField('Notification Title', default=default_notification_title, validators=[validators.Optional(), ValidateTokensList()])
notification_body = TextAreaField('Notification Body', default=default_notification_body, validators=[validators.Optional(), ValidateTokensList()])
notification_format = SelectField('Notification Format', choices=valid_notification_formats.keys(), default=default_notification_format)
trigger_check = BooleanField('Send test notification on save')
fetch_backend = RadioField(u'Fetch Method', choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
notification_urls = StringListField('Notification URL list', validators=[validators.Optional(), ValidateNotificationBodyAndTitleWhenURLisSet(), ValidateAppRiseServers()])
notification_title = StringField('Notification title', default=default_notification_title, validators=[validators.Optional(), ValidateTokensList()])
notification_body = TextAreaField('Notification body', default=default_notification_body, validators=[validators.Optional(), ValidateTokensList()])
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys(), default=default_notification_format)
fetch_backend = RadioField(u'Fetch method', choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
class watchForm(commonSettingsForm):
url = html5.URLField('URL', validators=[validateURL()])
tag = StringField('Group tag', [validators.Optional(), validators.Length(max=35)])
url = fields.URLField('URL', validators=[validateURL()])
tag = StringField('Group tag', [validators.Optional(), validators.Length(max=35)], default='')
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
minutes_between_check = fields.IntegerField('Maximum time in minutes until recheck',
[validators.Optional(), validators.NumberRange(min=1)])
css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()])
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
title = StringField('Title')
ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
headers = StringDictKeyValue('Request Headers')
body = TextAreaField('Request Body', [validators.Optional()])
method = SelectField('Request Method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore Status Codes (process non-2xx status codes as normal)', default=False)
css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='')
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
title = StringField('Title', default='')
ignore_text = StringListField('Ignore text', [ValidateListRegex()])
headers = StringDictKeyValue('Request headers')
body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -344,15 +344,31 @@ class watchForm(commonSettingsForm):
return result
class globalSettingsForm(commonSettingsForm):
password = SaltyPasswordField()
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
# datastore.data['settings']['requests']..
class globalSettingsRequestForm(Form):
minutes_between_check = fields.IntegerField('Maximum time in minutes until recheck',
[validators.NumberRange(min=1)])
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title')
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):
base_url = StringField('Base URL', validators=[validators.Optional()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace')
real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?')
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
password = SaltyPasswordField()
class globalSettingsForm(Form):
# Define these as FormFields/"sub forms", this way it matches the JSON storage
# datastore.data['settings']['application']..
# datastore.data['settings']['requests']..
requests = FormField(globalSettingsRequestForm)
application = FormField(globalSettingsApplicationForm)
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})

View File

@@ -4,6 +4,9 @@ from typing import List
from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse
import re
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
class JSONNotFound(ValueError):
@@ -25,12 +28,12 @@ def subtractive_css_selector(css_selector, html_content):
item.decompose()
return str(soup)
def element_removal(selectors: List[str], html_content):
"""Joins individual filters into one css filter."""
selector = ",".join(selectors)
return subtractive_css_selector(selector, html_content)
# Return str Utf-8 of matched rules
def xpath_filter(xpath_filter, html_content):
@@ -167,3 +170,35 @@ def strip_ignore_text(content, wordlist, mode="content"):
return ignored_line_numbers
return "\n".encode('utf8').join(output)
def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
"""Converts html string to a string with just the text. If ignoring
rendering anchor tag content is enable, anchor tag content are also
included in the text
:param html_content: string with html content
:param render_anchor_tag_content: boolean flag indicating whether to extract
hyperlinks (the anchor tag content) together with text. This refers to the
'href' inside 'a' tags.
Anchor tag content is rendered in the following manner:
'[ text ](anchor tag content)'
:return: extracted text from the HTML
"""
# if anchor tag content flag is set to True define a config for
# extracting this content
if render_anchor_tag_content:
parser_config = ParserConfig(
annotation_rules={"a": ["hyperlink"]}, display_links=True
)
# otherwise set config to None
else:
parser_config = None
# get text and annotations via inscriptis
text_content = get_text(html_content, config=parser_config)
return text_content

View File

@@ -0,0 +1,49 @@
import collections
import os
import uuid as uuid_builder
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
default_notification_title,
)
class model(dict):
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
self.update({
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
'watching': {},
'settings': {
'headers': {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
},
'requests': {
'timeout': 15, # Default 15 seconds
# Default 3 hours
'minutes_between_check': 3 * 60, # Default 3 hours
'workers': 10 # Number of threads, lower is better for slow connections
},
'application': {
'password': False,
'base_url' : None,
'extract_title_as_title': False,
'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
'global_subtractive_selectors': [],
'ignore_whitespace': False,
'render_anchor_tag_content': False,
'notification_urls': [], # Apprise URL list
# Custom notification content
'notification_title': default_notification_title,
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'real_browser_save_screenshot': True,
'schema_version' : 0
}
}
})

View File

@@ -0,0 +1,62 @@
import os
import uuid as uuid_builder
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 5))
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
default_notification_title,
)
class model(dict):
def __init__(self, *arg, **kw):
self.update({
'url': None,
'tag': None,
'last_checked': 0,
'last_changed': 0,
'paused': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'newest_history_key': "",
'title': None,
'previous_md5': "",
'uuid': str(uuid_builder.uuid4()),
'headers': {}, # Extra headers to send
'body': None,
'method': 'GET',
'history': {}, # Dict of timestamp and output stripped filename
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
# Custom notification content
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'notification_title': default_notification_title,
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'css_filter': "",
'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected
'fetch_backend': None,
'extract_title_as_title': False,
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case.
'minutes_between_check': None
})
# goes at the end so we update the default object with the initialiser
super(model, self).__init__(*arg, **kw)
@property
def has_empty_checktime(self):
if self.get('minutes_between_check', None):
return False
return True
@property
def threshold_seconds(self):
sec = self.get('minutes_between_check', None)
if sec:
sec = sec * 60
return sec

View File

View File

@@ -26,13 +26,6 @@ default_notification_title = 'ChangeDetection.io Notification - {watch_url}'
def process_notification(n_object, datastore):
apobj = apprise.Apprise(debug=True)
for url in n_object['notification_urls']:
url = url.strip()
print (">> Process Notification: AppRise notifying {}".format(url))
apobj.add(url)
# Get the notification body from datastore
n_body = n_object.get('notification_body', default_notification_body)
n_title = n_object.get('notification_title', default_notification_title)
@@ -54,19 +47,49 @@ def process_notification(n_object, datastore):
# https://github.com/caronc/apprise/wiki/Development_LogCapture
# Anything higher than or equal to WARNING (which covers things like Connection errors)
# raise it as an exception
apobjs=[]
for url in n_object['notification_urls']:
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
apobj.notify(
body=n_body,
title=n_title,
body_format=n_format)
apobj = apprise.Apprise(debug=True)
url = url.strip()
if len(url):
print(">> Process Notification: AppRise notifying {}".format(url))
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :(
# 2000 bytes minus -
# 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
# Length of URL - Incase they specify a longer custom avatar_url
# Returns empty string if nothing found, multi-line string otherwise
log_value = logs.getvalue()
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
raise Exception(log_value)
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
k = '?' if not '?' in url else '&'
if not 'avatar_url' in url:
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
payload_max_size = 1700
# Trim everything to a max of 1700 total chars (leave some for padding, control etc)
# Incase n_title > 1700
# basically trim back the body until the total size fits our threshold
# and trim off the n_title to fit always.
body_limit = max(0, payload_max_size - len(n_title))
body = n_body[0:body_limit] if 'discord://' in url else n_body
apobj.add(url)
apobj.notify(
title=n_title[0:payload_max_size],
body=body,
body_format=n_format)
apobj.clear()
# Incase it needs to exist in memory for a while after to process(?)
apobjs.append(apobj)
# Returns empty string if nothing found, multi-line string otherwise
log_value = logs.getvalue()
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
raise Exception(log_value)
# Notification title + body content parameters get created here.
def create_notification_parameters(n_object, datastore):

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,53 @@
$(document).ready(function() {
$('#add-email-helper').click(function (e) {
e.preventDefault();
email = prompt("Destination email");
if(email) {
var n = $("#notification_urls");
var p=email_notification_prefix;
$(n).val( $.trim( $(n).val() )+"\n"+email_notification_prefix+email );
}
});
$('#send-test-notification').click(function (e) {
e.preventDefault();
// this can be global
var csrftoken = $('input[name=csrf_token]').val();
$.ajaxSetup({
beforeSend: function(xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
data = {
window_url : window.location.href,
notification_urls : $('#notification_urls').val(),
notification_title : $('#notification_title').val(),
notification_body : $('#notification_body').val(),
notification_format : $('#notification_format').val(),
}
for (key in data) {
if (!data[key].length) {
alert(key+" is empty, cannot send test.")
return;
}
}
$.ajax({
type: "POST",
url: notification_base_url,
data : data
}).done(function(data){
console.log(data);
alert('Sent');
}).fail(function(data){
console.log(data);
alert('Error: '+data.responseJSON.error);
})
});
});

View File

@@ -1,6 +1,11 @@
// Rewrite this is a plugin.. is all this JS really 'worth it?'
if(!window.location.hash) {
var tab=document.querySelectorAll("#default-tab a");
tab[0].click();
}
window.addEventListener('hashchange', function() {
var tabs = document.getElementsByClassName('active');
while (tabs[0]) {
@@ -21,7 +26,6 @@ if (!has_errors.length) {
focus_error_tab();
}
function set_active_tab() {
var tab=document.querySelectorAll("a[href='"+location.hash+"']");
if (tab.length) {

View File

@@ -0,0 +1,6 @@
$(function () {
// Remove unviewed status when normally clicked
$('.diff-link').click(function () {
$(this).closest('.unviewed').removeClass('unviewed');
});
});

View File

@@ -1,7 +1,8 @@
#diff-ui {
background: #fff;
padding: 2em;
margin: 1em;
margin-left: 1em;
margin-right: 1em;
border-radius: 5px;
font-size: 11px; }
#diff-ui table {
@@ -70,3 +71,8 @@ td#diff-col div {
/* ignored and triggered? make it obvious error */
.ignored.triggered {
background-color: #ff0000; }
.tab-pane-inner#screenshot {
text-align: center; }
.tab-pane-inner#screenshot img {
max-width: 99%; }

View File

@@ -2,7 +2,8 @@
background: #fff;
padding: 2em;
margin: 1em;
margin-left: 1em;
margin-right: 1em;
border-radius: 5px;
font-size: 11px;
@@ -85,4 +86,11 @@ td#diff-col div {
/* ignored and triggered? make it obvious error */
.ignored.triggered {
background-color: #ff0000;
}
.tab-pane-inner#screenshot {
text-align: center;
img {
max-width: 99%;
}
}

View File

@@ -31,7 +31,7 @@ a.github-link {
section.content {
padding-top: 5em;
padding-bottom: 5em;
padding-bottom: 1em;
flex-direction: column;
display: flex;
align-items: center;
@@ -182,9 +182,15 @@ body:after, body:before {
#notification-customisation {
border: 1px solid #ccc;
padding: 1rem;
padding: 0.5rem;
border-radius: 5px; }
#notification-error-log {
border: 1px solid #ccc;
padding: 1rem;
border-radius: 5px;
overflow-wrap: break-word; }
#token-table.pure-table td, #token-table.pure-table th {
font-size: 80%; }
@@ -244,7 +250,7 @@ footer {
.sticky-tab {
position: absolute;
top: 60px;
font-size: 8px;
font-size: 65%;
background: #fff;
padding: 10px; }
.sticky-tab#left-sticky {
@@ -279,6 +285,11 @@ footer {
padding-bottom: 1em; }
.pure-form .pure-control-group div, .pure-form .pure-group div, .pure-form .pure-controls div {
margin: 0px; }
.pure-form .pure-control-group .checkbox > *, .pure-form .pure-group .checkbox > *, .pure-form .pure-controls .checkbox > * {
display: inline;
vertical-align: middle; }
.pure-form .pure-control-group .checkbox > label, .pure-form .pure-group .checkbox > label, .pure-form .pure-controls .checkbox > label {
padding-left: 5px; }
.pure-form .error input {
background-color: #ffebeb; }
.pure-form ul.errors {
@@ -310,14 +321,23 @@ footer {
#nav-menu {
overflow-x: scroll; } }
/*
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 800px) {
div.sticky-tab#hosted-sticky {
top: 60px;
left: 0px;
right: auto; }
section.content {
padding-top: 110px; }
div.tabs.collapsable ul li {
display: block;
border-radius: 0px; }
input[type='text'] {
width: 100%; }
/*
Max width before this PARTICULAR table gets nasty
This query will take effect for any screen smaller than 760px
and also iPads specifically.
*/
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
input[type='text'] {
width: 100%; }
.watch-table {
/* Force table to not be like tables anymore */
/* Force table to not be like tables anymore */
@@ -394,14 +414,17 @@ and also iPads specifically.
padding: 20px;
border-radius: 5px; }
.tab-pane-inner {
padding: 0px; }
.tab-pane-inner:not(:target) {
display: none; }
.tab-pane-inner:target {
display: block; }
.edit-form {
min-width: 70%; }
.edit-form .tab-pane-inner {
padding: 0px; }
.edit-form .tab-pane-inner:not(:target) {
display: none; }
.edit-form .tab-pane-inner:target {
display: block; }
min-width: 70%;
/* so it cant overflow */
max-width: 95%; }
.edit-form .box-wrap {
position: relative; }
.edit-form .inner {

View File

@@ -35,7 +35,7 @@ a.github-link {
section.content {
padding-top: 5em;
padding-bottom: 5em;
padding-bottom: 1em;
flex-direction: column;
display: flex;
align-items: center;
@@ -241,10 +241,16 @@ body:after, body:before {
#notification-customisation {
border: 1px solid #ccc;
padding: 1rem;
padding: 0.5rem;
border-radius: 5px;
}
#notification-error-log {
border: 1px solid #ccc;
padding: 1rem;
border-radius: 5px;
overflow-wrap: break-word;
}
#token-table {
&.pure-table td, &.pure-table th {
@@ -322,7 +328,7 @@ footer {
.sticky-tab {
position: absolute;
top: 60px;
font-size: 8px;
font-size: 65%;
background: #fff;
padding: 10px;
&#left-sticky {
@@ -369,6 +375,15 @@ footer {
div {
margin: 0px;
}
.checkbox {
> * {
display: inline;
vertical-align: middle;
}
> label {
padding-left: 5px;
}
}
}
/* The input fields with errors */
.error {
@@ -422,18 +437,35 @@ footer {
}
}
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 800px) {
div.sticky-tab#hosted-sticky {
top: 60px;
left: 0px;
right: auto;
}
section.content {
padding-top: 110px;
}
// Make the tabs easier to hit, they will be all nice and horizontal
div.tabs.collapsable ul li {
display: block;
border-radius: 0px;
}
input[type='text'] {
width: 100%;
}
/*
Max width before this PARTICULAR table gets nasty
This query will take effect for any screen smaller than 760px
and also iPads specifically.
*/
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
input[type='text'] {
width: 100%;
}
.watch-table {
/* Force table to not be like tables anymore */
thead, tbody, th, td, tr {
@@ -556,10 +588,7 @@ $form-edge-padding: 20px;
}
}
.edit-form {
min-width: 70%;
.tab-pane-inner {
.tab-pane-inner {
&:not(:target) {
display: none;
}
@@ -568,7 +597,12 @@ $form-edge-padding: 20px;
}
// doesnt need padding because theres another row of buttons/activity
padding: 0px;
}
}
.edit-form {
min-width: 70%;
/* so it cant overflow */
max-width: 95%;
.box-wrap {
position: relative;
}

View File

@@ -8,11 +8,7 @@ from copy import deepcopy
from os import mkdir, path, unlink
from threading import Lock
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
default_notification_title,
)
from changedetectionio.model import Watch, App
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
@@ -29,69 +25,10 @@ class ChangeDetectionStore:
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
self.stop_thread = False
self.__data = {
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
'watching': {},
'settings': {
'headers': {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
},
'requests': {
'timeout': 15, # Default 15 seconds
'minutes_between_check': 3 * 60, # Default 3 hours
'workers': 10 # Number of threads, lower is better for slow connections
},
'application': {
'password': False,
'base_url' : None,
'extract_title_as_title': False,
'fetch_backend': 'html_requests',
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
'global_subtractive_selectors': [],
'ignore_whitespace': False,
'notification_urls': [], # Apprise URL list
# Custom notification content
'notification_title': default_notification_title,
'notification_body': default_notification_body,
'notification_format': default_notification_format,
}
}
}
self.__data = App.model()
# Base definition for all watchers
self.generic_definition = {
'url': None,
'tag': None,
'last_checked': 0,
'last_changed': 0,
'paused': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'newest_history_key': "",
'title': None,
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
'minutes_between_check': None,
'previous_md5': "",
'uuid': str(uuid_builder.uuid4()),
'headers': {}, # Extra headers to send
'body': None,
'method': 'GET',
'history': {}, # Dict of timestamp and output stripped filename
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
# Custom notification content
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'notification_title': default_notification_title,
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'css_filter': "",
'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected
'fetch_backend': None,
'extract_title_as_title': False
}
self.generic_definition = Watch.model()
if path.isfile('changedetectionio/source.txt'):
with open('changedetectionio/source.txt') as f:
@@ -188,6 +125,10 @@ class ChangeDetectionStore:
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
self.needs_write = True
def remove_password(self):
self.__data['settings']['application']['password'] = False
self.needs_write = True
def update_watch(self, uuid, update_obj):
with self.lock:
@@ -331,15 +272,14 @@ class ChangeDetectionStore:
self.needs_write = True
return changes_removed
def add_watch(self, url, tag="", extras=None):
def add_watch(self, url, tag="", extras=None, write_to_disk_now=True):
if extras is None:
extras = {}
with self.lock:
# @todo use a common generic version of this
new_uuid = str(uuid_builder.uuid4())
_blank = deepcopy(self.generic_definition)
_blank.update({
new_watch = Watch.model({
'url': url,
'tag': tag
})
@@ -350,9 +290,8 @@ class ChangeDetectionStore:
if k in apply_extras:
del apply_extras[k]
_blank.update(apply_extras)
self.data['watching'][new_uuid] = _blank
new_watch.update(apply_extras)
self.__data['watching'][new_uuid]=new_watch
# Get the directory ready
output_path = "{}/{}".format(self.datastore_path, new_uuid)
@@ -361,7 +300,8 @@ class ChangeDetectionStore:
except FileExistsError:
print(output_path, "already exists.")
self.sync_to_json()
if write_to_disk_now:
self.sync_to_json()
return new_uuid
# Save some text file to the appropriate path and bump the history
@@ -381,6 +321,22 @@ class ChangeDetectionStore:
return fname
def get_screenshot(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/last-screenshot.png".format(output_path)
if path.isfile(fname):
return fname
return False
# Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, watch_uuid, screenshot: bytes):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/last-screenshot.png".format(output_path)
with open(fname, 'wb') as f:
f.write(screenshot)
f.close()
def sync_to_json(self):
logging.info("Saving JSON..")

View File

@@ -1,7 +1,7 @@
{% from '_helpers.jinja' import render_field %}
{% macro render_common_settings_form(form, current_base_url) %}
{% macro render_common_settings_form(form, current_base_url, emailprefix) %}
<div class="pure-control-group">
{{ render_field(form.notification_urls, rows=5, placeholder="Examples:
@@ -13,13 +13,19 @@
<div class="pure-form-message-inline">
<ul>
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
<li><code>discord://</code> will silently fail if the total message length is more than 2000 chars.</li>
<li><code>discord://</code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li>Go here for <a href="{{url_for('notification_logs')}}">Notification debug logs</a></li>
<li>Go here for <a href="{{url_for('notification_logs')}}">notification debug logs</a></li>
</ul>
</div>
<br/>
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Send test notification</a>
{% if emailprefix %}
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a>
{% endif %}
</div>
<div id="notification-customisation">
<div id="notification-customisation" class="pure-control-group">
<div class="pure-control-group">
{{ render_field(form.notification_title, class="m-d") }}
<span class="pure-form-message-inline">Title for all notifications</span>
@@ -93,7 +99,4 @@
</span>
</div>
</div>
<div class="pure-control-group">
{{ render_field(form.trigger_check) }}
</div>
{% endmacro %}

View File

@@ -1,3 +1,30 @@
{% macro render_field(field) %}
<div {% if field.errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
<div {% if field.errors %} class="error" {% endif %}>{{ field.label }}</div>
{% if field.errors %}
<ul class=errors>
{% for error in field.errors %}
<li>{{ error }}</li>
{% endfor %}
</ul>
{% endif %}
</div>
{% endmacro %}
{% macro render_checkbox_field(field) %}
<div class="checkbox {% if field.errors %} error {% endif %}">
{{ field(**kwargs)|safe }} {{ field.label }}
{% if field.errors %}
<ul class=errors>
{% for error in field.errors %}
<li>{{ error }}</li>
{% endfor %}
</ul>
{% endif %}
</div>
{% endmacro %}
{% macro render_field(field) %}
<div {% if field.errors %} class="error" {% endif %}>{{ field.label }}</div>
<div {% if field.errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}

View File

@@ -5,6 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="Self hosted website change detection.">
<title>Change Detection{{extra_title}}</title>
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag %}- {{active_tag}}{% endif %}" href="{{ url_for('rss', tag=active_tag , token=app_rss_token)}}" />
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='pure-min.css')}}">
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='styles.css')}}">
{% if extra_stylesheets %}
@@ -17,6 +18,8 @@
background-image: url({{url_for('static_content', group='images', filename='gradient-border.png')}});
}
</style>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
</head>
<body>

View File

@@ -1,7 +1,6 @@
{% extends 'base.html' %}
{% block content %}
<div id="settings">
<h1>Differences</h1>
<form class="pure-form " action="" method="GET">
@@ -35,21 +34,45 @@
<div id="diff-jump">
<a onclick="next_diff();">Jump</a>
</div>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="tabs">
<ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li>
{% if screenshot %}
<li class="tab"><a href="#screenshot">Current screenshot</a></li>
{% endif %}
</ul>
</div>
<div id="diff-ui">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
<table>
<tbody>
<tr>
<!-- just proof of concept copied straight from github.com/kpdecker/jsdiff -->
<td id="a" style="display: none;">{{previous}}</td>
<td id="b" style="display: none;">{{newest}}</td>
<td id="diff-col">
<span id="result"></span>
</td>
</tr>
</tbody>
</table>
Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a>
<div class="tab-pane-inner" id="text">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.
</div>
<table>
<tbody>
<tr>
<!-- just proof of concept copied straight from github.com/kpdecker/jsdiff -->
<td id="a" style="display: none;">{{previous}}</td>
<td id="b" style="display: none;">{{newest}}</td>
<td id="diff-col">
<span id="result"></span>
</td>
</tr>
</tbody>
</table>
Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a>
</div>
{% if screenshot %}
<div class="tab-pane-inner" id="screenshot">
<p>
<i>For now, only the most recent screenshot is saved and displayed.</i>
</p>
<img src="{{url_for('static_content', group='screenshot', filename=uuid)}}">
</div>
{% endif %}
</div>

View File

@@ -1,13 +1,19 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.jinja' import render_field %}
{% from '_helpers.jinja' import render_button %}
{% from '_helpers.jinja' import render_field, render_checkbox_field, render_button %}
{% from '_common_fields.jinja' import render_common_settings_form %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script>
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
{% if emailprefix %}
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
{% endif %}
</script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
<div class="edit-form monospaced-textarea">
<div class="tabs">
<div class="tabs collapsable">
<ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li>
@@ -36,7 +42,7 @@
</div>
<div class="pure-control-group">
{{ render_field(form.minutes_between_check) }}
{% if using_default_minutes %}
{% if has_empty_checktime %}
<span class="pure-form-message-inline">Currently using the <a
href="{{ url_for('settings_page', uuid=uuid) }}">default global settings</a>, change to another value if you want to be specific.</span>
{% else %}
@@ -45,7 +51,7 @@
{% endif %}
</div>
<div class="pure-control-group">
{{ render_field(form.extract_title_as_title) }}
{{ render_checkbox_field(form.extract_title_as_title) }}
</div>
</fieldset>
</div>
@@ -82,7 +88,7 @@ User-Agent: wonderbra 1.0") }}
}") }}
</div>
<div>
{{ render_field(form.ignore_status_codes) }}
{{ render_checkbox_field(form.ignore_status_codes) }}
</div>
</fieldset>
<br/>
@@ -92,7 +98,7 @@ User-Agent: wonderbra 1.0") }}
<strong>Note: <i>These settings override the global settings for this watch.</i></strong>
<fieldset>
<div class="field-group">
{{ render_common_settings_form(form, current_base_url) }}
{{ render_common_settings_form(form, current_base_url, emailprefix) }}
</div>
</fieldset>
</div>
@@ -162,7 +168,7 @@ nav
<ul>
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>Each line is process separately (think of each line as "OR")</li>
<li>Each line is processed separately (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>

View File

@@ -9,7 +9,7 @@
<div class="pure-control-group">
<label for="password">Password</label>
<input type="password" id="password" required="" name="password" value=""
size="15"/>
size="15" autofocus />
<input type="hidden" id="email" name="email" value="defaultuser@changedetection.io" />
</div>
<div class="pure-control-group">

View File

@@ -5,7 +5,7 @@
<div class="inner">
<h4 style="margin-top: 0px;">The following issues were detected when sending notifications</h4>
<div id="notification-customisation">
<div id="notification-error-log">
<ul style="font-size: 80%; margin:0px; padding: 0 0 0 7px">
{% for log in logs|reverse %}
<li>{{log}}</li>

View File

@@ -6,18 +6,40 @@
<h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1>
</div>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="tabs">
<ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li>
{% if screenshot %}
<li class="tab"><a href="#screenshot">Current screenshot</a></li>
{% endif %}
</ul>
</div>
<div id="diff-ui">
<span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
<table>
<tbody>
<tr>
<td id="diff-col">
<div class="tab-pane-inner" id="text">
<span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
<table>
<tbody>
<tr>
<td id="diff-col">
{% for row in content %}
<div class="{{row.classes}}">{{row.line}}</div>
{% endfor %}
</td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
</table>
</div>
{% if screenshot %}
<div class="tab-pane-inner" id="screenshot">
<p>
<i>For now, only the most recent screenshot is saved and displayed.</i>
</p>
<img src="{{url_for('static_content', group='screenshot', filename=uuid)}}">
</div>
{% endif %}
</div>
{% endblock %}

View File

@@ -1,14 +1,20 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.jinja' import render_field, render_button %}
{% from '_helpers.jinja' import render_field, render_checkbox_field, render_button %}
{% from '_common_fields.jinja' import render_common_settings_form %}
<script>
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
{% if emailprefix %}
const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
{% endif %}
</script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='settings.js')}}" defer></script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
<div class="edit-form">
<div class="tabs">
<div class="tabs collapsable">
<ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li>
<li class="tab"><a href="#notifications">Notifications</a></li>
@@ -22,15 +28,15 @@
<div class="tab-pane-inner" id="general">
<fieldset>
<div class="pure-control-group">
{{ render_field(form.minutes_between_check) }}
{{ render_field(form.requests.form.minutes_between_check) }}
<span class="pure-form-message-inline">Default time for all watches, when the watch does not have a specific time setting.</span>
</div>
<div class="pure-control-group">
{% if not hide_remove_pass %}
{% if current_user.is_authenticated %}
{{ render_button(form.removepassword_button) }}
{{ render_button(form.application.form.removepassword_button) }}
{% else %}
{{ render_field(form.password) }}
{{ render_field(form.application.form.password) }}
<span class="pure-form-message-inline">Password protection for your changedetection.io application.</span>
{% endif %}
{% else %}
@@ -38,7 +44,7 @@
{% endif %}
</div>
<div class="pure-control-group">
{{ render_field(form.base_url, placeholder="http://yoursite.com:5000/",
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
class="m-d") }}
<span class="pure-form-message-inline">
Base URL used for the {base_url} token in notifications and RSS links.<br/>Default value is the ENV var 'BASE_URL' (Currently "{{current_base_url}}"),
@@ -47,25 +53,29 @@
</div>
<div class="pure-control-group">
{{ render_field(form.extract_title_as_title) }}
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
<span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
</div>
</fieldset>
</div>
<div class="tab-pane-inner" id="notifications">
<fieldset>
<div class="field-group">
{{ render_common_settings_form(form, current_base_url) }}
{{ render_common_settings_form(form.application.form, current_base_url, emailprefix) }}
</div>
</fieldset>
<a href="{{url_for('notification_logs')}}">Notification debug logs</a>
</div>
<div class="tab-pane-inner" id="fetching">
<div class="pure-control-group">
{{ render_field(form.fetch_backend) }}
{{ render_field(form.application.form.fetch_backend) }}
<span class="pure-form-message-inline">
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
@@ -77,14 +87,20 @@
<div class="tab-pane-inner" id="filters">
<fieldset class="pure-group">
{{ render_field(form.ignore_whitespace) }}
{{ render_checkbox_field(form.application.form.ignore_whitespace) }}
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br/>
<i>Note:</i> Changing this will change the status of your existing watches, possibily trigger alerts etc.
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
<span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
<br/>
<i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.global_subtractive_selectors, rows=5, placeholder="header
{{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header
footer
nav
.stockticker") }}
@@ -96,7 +112,7 @@ nav
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
{{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br/>

View File

@@ -1,7 +1,8 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.jinja' import render_simple_field %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
<div class="box">
<form class="pure-form" action="{{ url_for('api_watch_add') }}" method="POST" id="new-watch-form">
@@ -9,7 +10,7 @@
<fieldset>
<legend>Add a new change detection watch</legend>
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="tag") }}
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
<button type="submit" class="pure-button pure-button-primary">Watch</button>
</fieldset>
<!-- add extra stuff, like do a http POST and send headers -->
@@ -45,12 +46,13 @@
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
{% if watch.newest_history_key| int > watch.last_viewed| int %}unviewed{% endif %}">
{% if watch.newest_history_key| int > watch.last_viewed| int %}unviewed{% endif %}
{% if watch.uuid in queued_uuids %}queued{% endif %}">
<td class="inline">{{ loop.index }}</td>
<td class="inline paused-state state-{{watch.paused}}"><a href="{{url_for('index', pause=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause" title="Pause"/></a></td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.url }}"></a>
<a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
@@ -71,11 +73,11 @@
{% endif %}
</td>
<td>
<a href="{{ url_for('api_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
class="pure-button button-small pure-button-primary">Recheck</a>
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('api_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
class="recheck pure-button button-small pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
<a href="{{ url_for('edit_page', uuid=watch.uuid)}}" class="pure-button button-small pure-button-primary">Edit</a>
{% if watch.history|length >= 2 %}
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Diff</a>
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
{% else %}
{% if watch.history|length == 1 %}
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>

View File

@@ -16,6 +16,7 @@ def cleanup(datastore_path):
# Unlink test output files
files = ['output.txt',
'url-watches.json',
'secret.txt',
'notification.txt',
'count.txt',
'endpoint-content.txt'

View File

@@ -1,5 +1,5 @@
from flask import url_for
from . util import live_server_setup
def test_check_access_control(app, client):
# Still doesnt work, but this is closer.
@@ -12,9 +12,9 @@ def test_check_access_control(app, client):
# Enable password check.
res = c.post(
url_for("settings_page"),
data={"password": "foobar",
"minutes_between_check": 180,
'fetch_backend': "html_requests"},
data={"application-password": "foobar",
"requests-minutes_between_check": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
@@ -49,74 +49,31 @@ def test_check_access_control(app, client):
assert b"minutes_between_check" in res.data
assert b"fetch_backend" in res.data
##################################################
# Remove password button, and check that it worked
##################################################
res = c.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"tag": "",
"headers": "",
"fetch_backend": "html_webdriver",
"removepassword_button": "Remove password"
"requests-minutes_between_check": 180,
"application-fetch_backend": "html_webdriver",
"application-removepassword_button": "Remove password"
},
follow_redirects=True,
)
assert b"Password protection removed." in res.data
assert b"LOG OUT" not in res.data
# There was a bug where saving the settings form would submit a blank password
def test_check_access_control_no_blank_password(app, client):
# Still doesnt work, but this is closer.
with app.test_client() as c:
# Check we dont have any password protection enabled yet.
res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data
# Enable password check.
############################################################
# Be sure a blank password doesnt setup password protection
############################################################
res = c.post(
url_for("settings_page"),
data={"password": "",
"minutes_between_check": 180,
'fetch_backend': "html_requests"},
data={"application-password": "",
"requests-minutes_between_check": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." not in res.data
assert b"Login" not in res.data
assert b"Password protection enabled" not in res.data
# There was a bug where saving the settings form would submit a blank password
def test_check_access_no_remote_access_to_remove_password(app, client):
# Still doesnt work, but this is closer.
with app.test_client() as c:
# Check we dont have any password protection enabled yet.
res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data
# Enable password check.
res = c.post(
url_for("settings_page"),
data={"password": "password",
"minutes_between_check": 180,
'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." in res.data
assert b"Login" in res.data
res = c.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"tag": "",
"headers": "",
"fetch_backend": "html_webdriver",
"removepassword_button": "Remove password"
},
follow_redirects=True,
)
assert b"Password protection removed." not in res.data
res = c.get(url_for("index"),
follow_redirects=True)
assert b"watch-table-wrapper" not in res.data

View File

@@ -26,7 +26,8 @@ def test_snapshot_api_detects_change(client, live_server):
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
test_url = url_for('test_endpoint', content_type="text/plain",
_external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},

View File

@@ -50,6 +50,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
#####################
# Check HTML conversion detected and workd
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Check this class does not appear (that we didnt see the actual source)
assert b'foobar-detection' not in res.data
# Make a change
set_modified_response()
@@ -70,6 +78,11 @@ def test_check_basic_change_detection_functionality(client, live_server):
res = client.get(url_for("rss"))
expected_url = url_for('test_endpoint', _external=True)
assert b'<rss' in res.data
# re #16 should have the diff in here too
assert b'(into ) which has this one new line' in res.data
assert b'CDATA' in res.data
assert expected_url.encode('utf-8') in res.data
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
@@ -96,7 +109,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
# Enable auto pickup of <title> in settings
res = client.post(
url_for("settings_page"),
data={"extract_title_as_title": "1", "minutes_between_check": 180, 'fetch_backend': "html_requests"},
data={"application-extract_title_as_title": "1", "requests-minutes_between_check": 180, 'application-fetch_backend': "html_requests"},
follow_redirects=True
)

View File

@@ -0,0 +1,38 @@
#!/usr/bin/python3
"""Test suite for the method to extract text from an html string"""
from ..html_tools import html_to_text
def test_html_to_text_func():
test_html = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
<a href="/first_link"> More Text </a>
</br>
So let's see what happens. </br>
<a href="second_link.com"> Even More Text </a>
</body>
</html>
"""
# extract text, with 'render_anchor_tag_content' set to False
text_content = html_to_text(test_html, render_anchor_tag_content=False)
no_links_text = \
"Some initial text\n\nWhich is across multiple " \
"lines\n\nMore Text So let's see what happens. Even More Text"
# check that no links are in the extracted text
assert text_content == no_links_text
# extract text, with 'render_anchor_tag_content' set to True
text_content = html_to_text(test_html, render_anchor_tag_content=True)
links_text = \
"Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
"](/first_link) So let's see what happens. [ Even More Text ]" \
"(second_link.com)"
# check that links are present in the extracted text
assert text_content == links_text

View File

@@ -196,9 +196,9 @@ def test_check_global_ignore_text_functionality(client, live_server):
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"global_ignore_text": ignore_text,
'fetch_backend': "html_requests"
"requests-minutes_between_check": 180,
"application-global_ignore_text": ignore_text,
'application-fetch_backend': "html_requests"
},
follow_redirects=True
)

View File

@@ -0,0 +1,219 @@
#!/usr/bin/python3
"""Test suite for the render/not render anchor tag content functionality"""
import time
from flask import url_for
from .util import live_server_setup
def test_setup(live_server):
live_server_setup(live_server)
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
<a href="/original_link"> Some More Text </a>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# Should be the same as set_original_ignore_response() but with a different
# link
def set_modified_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
<a href="/modified_link"> Some More Text </a>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_render_anchor_tag_content_true(client, live_server):
"""Testing that the link changes are detected when
render_anchor_tag_content setting is set to true"""
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
# set original html text
set_original_ignore_response()
# Goto the settings page, choose not to ignore links
res = client.post(
url_for("settings_page"),
data={
"requests-minutes_between_check": 180,
"application-render_anchor_tag_content": "true",
"application-fetch_backend": "html_requests",
},
follow_redirects=True,
)
assert b"Settings updated." in res.data
# Add our URL to the import page
test_url = url_for("test_endpoint", _external=True)
res = client.post(
url_for("import_page"), data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# set a new html text with a modified link
set_modified_ignore_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# check that the anchor tag content is rendered
res = client.get(url_for("preview_page", uuid="first"))
assert '(/modified_link)' in res.data.decode()
# since the link has changed, and we chose to render anchor tag content,
# we should detect a change (new 'unviewed' class)
res = client.get(url_for("index"))
assert b"unviewed" in res.data
assert b"/test-endpoint" in res.data
# Cleanup everything
res = client.get(url_for("api_delete", uuid="all"),
follow_redirects=True)
assert b'Deleted' in res.data
def test_render_anchor_tag_content_false(client, live_server):
"""Testing that anchor tag content changes are ignored when
render_anchor_tag_content setting is set to false"""
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
# set the original html text
set_original_ignore_response()
# Goto the settings page, choose to ignore hyperlinks
res = client.post(
url_for("settings_page"),
data={
"requests-minutes_between_check": 180,
"application-render_anchor_tag_content": "false",
"application-fetch_backend": "html_requests",
},
follow_redirects=True,
)
assert b"Settings updated." in res.data
# Add our URL to the import page
test_url = url_for("test_endpoint", _external=True)
res = client.post(
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# set a new html text, with a modified link
set_modified_ignore_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# check that the anchor tag content is not rendered
res = client.get(url_for("preview_page", uuid="first"))
assert '(/modified_link)' not in res.data.decode()
# even though the link has changed, we shouldn't detect a change since
# we selected to not render anchor tag content (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b"unviewed" not in res.data
assert b"/test-endpoint" in res.data
# Cleanup everything
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_render_anchor_tag_content_default(client, live_server):
"""Testing that anchor tag content changes are ignored when the
render_anchor_tag_content setting is not explicitly selected"""
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
# set the original html text
set_original_ignore_response()
# Goto the settings page, not passing the render_anchor_tag_content setting
res = client.post(
url_for("settings_page"),
data={
"requests-minutes_between_check": 180,
"application-fetch_backend": "html_requests",
},
follow_redirects=True,
)
assert b"Settings updated." in res.data
# Add our URL to the import page
test_url = url_for("test_endpoint", _external=True)
res = client.post(
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# set a new html text, with a modified link
set_modified_ignore_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# check that the anchor tag content is not rendered
res = client.get(url_for("preview_page", uuid="first"))
assert '(/modified_link)' not in res.data.decode()
# even though the link has changed, we shouldn't detect a change since
# we did not select the setting and the default behaviour is to not
# render anchor tag content (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b"unviewed" not in res.data
assert b"/test-endpoint" in res.data
# Cleanup everything
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -51,9 +51,9 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server):
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"ignore_status_codes": "y",
'fetch_backend': "html_requests"
"requests-minutes_between_check": 180,
"application-ignore_status_codes": "y",
'application-fetch_backend': "html_requests"
},
follow_redirects=True
)

View File

@@ -61,9 +61,9 @@ def test_check_ignore_whitespace(client, live_server):
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"ignore_whitespace": "y",
'fetch_backend': "html_requests"
"requests-minutes_between_check": 180,
"application-ignore_whitespace": "y",
"application-fetch_backend": "html_requests"
},
follow_redirects=True
)

View File

@@ -2,15 +2,17 @@ import os
import time
import re
from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup
from . util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup
import logging
from changedetectionio.notification import default_notification_body, default_notification_title
def test_setup(live_server):
live_server_setup(live_server)
# Hard to just add more live server URLs when one test is already running (I think)
# So we add our test here (was in a different file)
def test_check_notification(client, live_server):
live_server_setup(live_server)
set_original_response()
# Give the endpoint time to spin up
@@ -49,84 +51,76 @@ def test_check_notification(client, live_server):
notification_url = url.replace('http', 'json')
print (">>>> Notification URL: "+notification_url)
notification_form_data = {"notification_urls": notification_url,
"notification_title": "New ChangeDetection.io Notification - {watch_url}",
"notification_body": "BASE URL: {base_url}\n"
"Watch URL: {watch_url}\n"
"Watch UUID: {watch_uuid}\n"
"Watch title: {watch_title}\n"
"Watch tag: {watch_tag}\n"
"Preview: {preview_url}\n"
"Diff URL: {diff_url}\n"
"Snapshot: {current_snapshot}\n"
"Diff: {diff}\n"
"Diff Full: {diff_full}\n"
":-)",
"notification_format": "Text"}
notification_form_data.update({
"url": test_url,
"tag": "my tag",
"title": "my title",
"headers": "",
"fetch_backend": "html_requests"})
res = client.post(
url_for("edit_page", uuid="first"),
data={"notification_urls": notification_url,
"notification_title": "New ChangeDetection.io Notification - {watch_url}",
"notification_body": "BASE URL: {base_url}\n"
"Watch URL: {watch_url}\n"
"Watch UUID: {watch_uuid}\n"
"Watch title: {watch_title}\n"
"Watch tag: {watch_tag}\n"
"Preview: {preview_url}\n"
"Diff URL: {diff_url}\n"
"Snapshot: {current_snapshot}\n"
"Diff: {diff}\n"
"Diff Full: {diff_full}\n"
":-)",
"notification_format": "Text",
"url": test_url,
"tag": "my tag",
"title": "my title",
"headers": "",
"fetch_backend": "html_requests",
"trigger_check": "y"},
data=notification_form_data,
follow_redirects=True
)
assert b"Updated watch." in res.data
assert b"Test notification queued" in res.data
# Hit the edit page, be sure that we saved it
# Re #242 - wasnt saving?
res = client.get(
url_for("edit_page", uuid="first"))
assert bytes(notification_url.encode('utf-8')) in res.data
# Re #242 - wasnt saving?
assert bytes("New ChangeDetection.io Notification".encode('utf-8')) in res.data
# Because we hit 'send test notification on save'
## Now recheck, and it should have sent the notification
time.sleep(3)
set_modified_response()
notification_submission = None
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
time.sleep(3)
# Verify what was sent as a notification, this file should exist
with open("test-datastore/notification.txt", "r") as f:
notification_submission = f.read()
# Did we see the URL that had a change, in the notification?
assert test_url in notification_submission
os.unlink("test-datastore/notification.txt")
set_modified_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# Did the front end see it?
res = client.get(
url_for("index"))
assert bytes("just now".encode('utf-8')) in res.data
notification_submission=None
# Verify what was sent as a notification
with open("test-datastore/notification.txt", "r") as f:
notification_submission = f.read()
# Did we see the URL that had a change, in the notification?
assert test_url in notification_submission
# Did we see the URL that had a change, in the notification?
# Diff was correctly executed
assert test_url in notification_submission
assert ':-)' in notification_submission
assert "Diff Full: Some initial text" in notification_submission
assert "Diff: (changed) Which is across multiple lines" in notification_submission
assert "(-> into) which has this one new line" in notification_submission
assert "(into ) which has this one new line" in notification_submission
# Re #342 - check for accidental python byte encoding of non-utf8/string
assert "b'" not in notification_submission
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
assert "Watch title: my title" in notification_submission
assert "Watch tag: my tag" in notification_submission
assert "diff/" in notification_submission
assert "preview/" in notification_submission
assert ":-)" in notification_submission
assert "New ChangeDetection.io Notification - {}".format(test_url) in notification_submission
if env_base_url:
# Re #65 - did we see our BASE_URl ?
@@ -135,50 +129,17 @@ def test_check_notification(client, live_server):
else:
logging.debug(">>> Skipping BASE_URL check")
## Now configure something clever, we go into custom config (non-default) mode, this is returned by the endpoint
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(";jasdhflkjadshf kjhsdfkjl ahslkjf haslkjd hfaklsj hf\njl;asdhfkasj stuff we will detect\n")
res = client.post(
url_for("settings_page"),
data={"notification_title": "New ChangeDetection.io Notification - {watch_url}",
"notification_urls": "json://foobar.com", #Re #143 should not see that it sent without [test checkbox]
"minutes_between_check": 180,
"fetch_backend": "html_requests",
},
follow_redirects=True
)
assert b"Settings updated." in res.data
# Re #143 - should not see this if we didnt hit the test box
assert b"Test notification queued" not in res.data
# Trigger a check
# This should insert the {current_snapshot}
set_more_modified_response()
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# Did the front end see it?
res = client.get(
url_for("index"))
assert bytes("just now".encode('utf-8')) in res.data
# Verify what was sent as a notification, this file should exist
with open("test-datastore/notification.txt", "r") as f:
notification_submission = f.read()
print ("Notification submission was:", notification_submission)
# Re #342 - check for accidental python byte encoding of non-utf8/string
assert "b'" not in notification_submission
assert "Ohh yeah awesome" in notification_submission
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
assert "Watch title: my title" in notification_submission
assert "Watch tag: my tag" in notification_submission
assert "diff/" in notification_submission
assert "preview/" in notification_submission
assert ":-)" in notification_submission
assert "New ChangeDetection.io Notification - {}".format(test_url) in notification_submission
# This should insert the {current_snapshot}
assert "stuff we will detect" in notification_submission
# Prove that "content constantly being marked as Changed with no Updating causes notification" is not a thing
# https://github.com/dgtlmoon/changedetection.io/discussions/192
@@ -186,33 +147,39 @@ def test_check_notification(client, live_server):
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
time.sleep(3)
time.sleep(1)
client.get(url_for("api_watch_checknow"), follow_redirects=True)
time.sleep(3)
time.sleep(1)
client.get(url_for("api_watch_checknow"), follow_redirects=True)
time.sleep(3)
time.sleep(1)
assert os.path.exists("test-datastore/notification.txt") == False
# Now adding a wrong token should give us an error
res = client.post(
url_for("settings_page"),
data={"notification_title": "New ChangeDetection.io Notification - {watch_url}",
"notification_body": "Rubbish: {rubbish}\n",
"notification_format": "Text",
"notification_urls": "json://foobar.com",
"minutes_between_check": 180,
"fetch_backend": "html_requests"
},
# cleanup for the next
client.get(
url_for("api_delete", uuid="first"),
follow_redirects=True
)
assert bytes("is not a valid token".encode('utf-8')) in res.data
def test_notification_validation(client, live_server):
#live_server_setup(live_server)
time.sleep(3)
# re #242 - when you edited an existing new entry, it would not correctly show the notification settings
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("api_watch_add"),
data={"url": test_url, "tag": 'nice one'},
follow_redirects=True
)
with open("xxx.bin", "wb") as f:
f.write(res.data)
assert b"Watch added" in res.data
# Re #360 some validation
res = client.post(
url_for("edit_page", uuid="first"),
data={"notification_urls": notification_url,
data={"notification_urls": 'json://localhost/foobar',
"notification_title": "",
"notification_body": "",
"notification_format": "Text",
@@ -220,8 +187,28 @@ def test_check_notification(client, live_server):
"tag": "my tag",
"title": "my title",
"headers": "",
"fetch_backend": "html_requests",
"trigger_check": "y"},
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Notification Body and Title is required when a Notification URL is used" in res.data
# Now adding a wrong token should give us an error
res = client.post(
url_for("settings_page"),
data={"application-notification_title": "New ChangeDetection.io Notification - {watch_url}",
"application-notification_body": "Rubbish: {rubbish}\n",
"application-notification_format": "Text",
"application-notification_urls": "json://localhost/foobar",
"requests-minutes_between_check": 180,
"fetch_backend": "html_requests"
},
follow_redirects=True
)
assert bytes("is not a valid token".encode('utf-8')) in res.data
# cleanup for the next
client.get(
url_for("api_delete", uuid="first"),
follow_redirects=True
)

View File

@@ -84,9 +84,25 @@ def test_body_in_request(client, live_server):
)
assert b"1 Imported" in res.data
body_value = 'Test Body Value'
time.sleep(3)
# Add a properly formatted body with a proper method
# add the first 'version'
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tag": "",
"method": "POST",
"fetch_backend": "html_requests",
"body": "something something"},
follow_redirects=True
)
assert b"Updated watch." in res.data
time.sleep(3)
# Now the change which should trigger a change
body_value = 'Test Body Value'
res = client.post(
url_for("edit_page", uuid="first"),
data={

View File

@@ -0,0 +1,95 @@
#!/usr/bin/python3
import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup
sleep_time_for_fetch_thread = 3
def test_setup(live_server):
live_server_setup(live_server)
def test_check_basic_change_detection_functionality_source(client, live_server):
set_original_response()
test_url = 'source:'+url_for('test_endpoint', _external=True)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
#####################
# Check HTML conversion detected and workd
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Check this class DOES appear (that we didnt see the actual source)
assert b'foobar-detection' in res.data
# Make a change
set_modified_response()
# Force recheck
res = client.get(url_for("api_watch_checknow"), follow_redirects=True)
assert b'1 watches are queued for rechecking.' in res.data
time.sleep(5)
# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("index"))
assert b'unviewed' in res.data
res = client.get(
url_for("diff_history_page", uuid="first"),
follow_redirects=True
)
assert b'&lt;title&gt;modified head title' in res.data
def test_check_ignore_elements(client, live_server):
set_original_response()
time.sleep(2)
test_url = 'source:'+url_for('test_endpoint', _external=True)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
#####################
# We want <span> and <p> ONLY, but ignore span with .foobar-detection
res = client.post(
url_for("edit_page", uuid="first"),
data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
follow_redirects=True
)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'foobar-detection' not in res.data
assert b'&lt;br' not in res.data
assert b'&lt;p' in res.data

View File

@@ -56,8 +56,8 @@ def test_check_recheck_global_setting(client, live_server):
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 1566,
'fetch_backend': "html_requests"
"requests-minutes_between_check": 1566,
'application-fetch_backend': "html_requests"
},
follow_redirects=True
)
@@ -88,8 +88,8 @@ def test_check_recheck_global_setting(client, live_server):
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 222,
'fetch_backend': "html_requests"
"requests-minutes_between_check": 222,
'application-fetch_backend': "html_requests"
},
follow_redirects=True
)
@@ -124,8 +124,8 @@ def test_check_recheck_global_setting(client, live_server):
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 666,
'fetch_backend': "html_requests"
"requests-minutes_between_check": 666,
'application-fetch_backend': "html_requests"
},
follow_redirects=True
)

View File

@@ -0,0 +1,3 @@
After twenty years, as cursed as I may be
ok
and insure that I'm one of those computer nerds.

View File

@@ -2,5 +2,6 @@ After twenty years, as cursed as I may be
for having learned computerese,
I continue to examine bits, bytes and words
xok
next-x-ok
and insure that I'm one of those computer nerds.
and something new

View File

@@ -12,12 +12,19 @@ from changedetectionio import diff
class TestDiffBuilder(unittest.TestCase):
def test_expected_diff_output(self):
base_dir=os.path.dirname(__file__)
output = diff.render_diff(base_dir+"/test-content/before.txt", base_dir+"/test-content/after.txt")
base_dir = os.path.dirname(__file__)
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
output = output.split("\n")
self.assertIn("(changed) ok", output)
self.assertIn("(-> into) xok", output)
self.assertIn("(added) and something new", output)
self.assertIn('(changed) ok', output)
self.assertIn('(into ) xok', output)
self.assertIn('(into ) next-x-ok', output)
self.assertIn('(added ) and something new', output)
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
output = output.split("\n")
self.assertIn('(removed) for having learned computerese,', output)
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
# @todo test blocks of changed, blocks of added, blocks of removed

View File

@@ -10,6 +10,7 @@ def set_original_response():
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<span class="foobar-detection" style='display:none'></span>
</body>
</html>
"""
@@ -35,6 +36,24 @@ def set_modified_response():
return None
def set_more_modified_response():
test_return_data = """<html>
<head><title>modified head title</title></head>
<body>
Some initial text</br>
<p>which has this one new line</p>
</br>
So let's see what happens. </br>
Ohh yeah awesome<br/>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
def live_server_setup(live_server):
@@ -66,6 +85,7 @@ def live_server_setup(live_server):
# Just return the body in the request
@live_server.app.route('/test-body', methods=['POST', 'GET'])
def test_body():
print ("TEST-BODY GOT", request.data, "returning")
return request.data
# Just return the verb in the request
@@ -82,7 +102,7 @@ def live_server_setup(live_server):
if data != None:
f.write(data)
print("\n>> Test notification endpoint was hit.\n")
print("\n>> Test notification endpoint was hit.\n", data)
return "Text was set"

View File

@@ -2,6 +2,7 @@ import threading
import queue
import time
from changedetectionio import content_fetcher
# A single update worker
#
# Requests for checking on a single site(watch) from a queue of watches
@@ -32,17 +33,17 @@ class update_worker(threading.Thread):
else:
self.current_uuid = uuid
from changedetectionio import content_fetcher
if uuid in list(self.datastore.data['watching'].keys()):
changed_detected = False
contents = ""
screenshot = False
update_obj= {}
now = time.time()
try:
changed_detected, update_obj, contents = update_handler.run(uuid)
changed_detected, update_obj, contents, screenshot = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -140,6 +141,9 @@ class update_worker(threading.Thread):
# Always record that we atleast tried
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time())})
# Always save the screenshot if it's available
if screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
self.current_uuid = None # Done
self.q.task_done()

View File

@@ -13,11 +13,11 @@ requests[socks] ~= 2.26
urllib3 > 1.26
chardet > 2.3.0
wtforms ~= 2.3.3
wtforms ~= 3.0
jsonpath-ng ~= 1.5.3
# Notification library
apprise ~= 0.9.7
apprise ~= 0.9.8.1
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
paho-mqtt
@@ -35,3 +35,8 @@ lxml
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
selenium ~= 4.1.0
# https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849
# ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security'
# need to revisit flask login versions
werkzeug ~= 2.0.0