mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-25 02:46:16 +00:00
Compare commits
70 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
19dcbc2f08 | ||
|
|
c59838a6e4 | ||
|
|
0a8c339535 | ||
|
|
cd5b703037 | ||
|
|
90642742bd | ||
|
|
96221598e7 | ||
|
|
98623de38c | ||
|
|
33985dbd9d | ||
|
|
a3a5ca78bf | ||
|
|
3fcbbb3fbf | ||
|
|
70252b24f9 | ||
|
|
0a08616c87 | ||
|
|
beebba487c | ||
|
|
cbeafcbaa0 | ||
|
|
e200cd3289 | ||
|
|
22c7a1a88d | ||
|
|
63eea2d6db | ||
|
|
3e9a110671 | ||
|
|
22bc8fabd1 | ||
|
|
9030070b3d | ||
|
|
fca7bb8583 | ||
|
|
3c175bfc4a | ||
|
|
fd5475ba38 | ||
|
|
b0c5dbd88e | ||
|
|
1718e2e86f | ||
|
|
b46a7fc4b1 | ||
|
|
4770ebb2ea | ||
|
|
d4db082c01 | ||
|
|
c8607ae8bb | ||
|
|
b361a61d18 | ||
|
|
87f4347fe5 | ||
|
|
93ee65fe53 | ||
|
|
9f964b6d3f | ||
|
|
426b09b7e1 | ||
|
|
ec98415c4d | ||
|
|
47e5a7cf09 | ||
|
|
d07cf53a07 | ||
|
|
b9f73a6240 | ||
|
|
5e31ae86d0 | ||
|
|
ef2dd44e7e | ||
|
|
07f41782c0 | ||
|
|
d93926a8b6 | ||
|
|
7072858814 | ||
|
|
cd5c05e72a | ||
|
|
3034d17c06 | ||
|
|
3b2c8d356a | ||
|
|
711853a149 | ||
|
|
5669ae70cc | ||
|
|
084dcde410 | ||
|
|
37b070f5a0 | ||
|
|
3952f3a207 | ||
|
|
0c3d5e55ab | ||
|
|
6a102374c6 | ||
|
|
bbd99c9aa9 | ||
|
|
26c9a6e0fc | ||
|
|
c4197a5045 | ||
|
|
f1c2ece32f | ||
|
|
704b8daa6d | ||
|
|
9ec820fa97 | ||
|
|
e7e3eb36c0 | ||
|
|
801b50cb5b | ||
|
|
eecc620386 | ||
|
|
25b565d9ba | ||
|
|
7b4ed2429d | ||
|
|
4e0fb33580 | ||
|
|
4931e757b9 | ||
|
|
3e934e8f8c | ||
|
|
118814912f | ||
|
|
4013e34899 | ||
|
|
b58cf76445 |
37
.github/workflows/python-app.yml
vendored
Normal file
37
.github/workflows/python-app.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: changedetection.io
|
||||
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
cd backend; pytest
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,3 +3,5 @@ __pycache__
|
||||
*.pyc
|
||||
datastore/url-watches.json
|
||||
datastore/*
|
||||
__pycache__
|
||||
.pytest_cache
|
||||
|
||||
19
Dockerfile
19
Dockerfile
@@ -2,16 +2,27 @@ FROM python:3.8-slim
|
||||
COPY requirements.txt /tmp/requirements.txt
|
||||
RUN pip3 install -r /tmp/requirements.txt
|
||||
|
||||
COPY backend /app
|
||||
|
||||
RUN [ ! -d "/app" ] && mkdir /app
|
||||
RUN [ ! -d "/datastore" ] && mkdir /datastore
|
||||
|
||||
# The actual flask app
|
||||
COPY backend /app/backend
|
||||
|
||||
# The eventlet server wrapper
|
||||
COPY changedetection.py /app/changedetection.py
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN [ ! -d "/datastore" ] && mkdir /datastore
|
||||
# Attempt to store the triggered commit
|
||||
ARG SOURCE_COMMIT
|
||||
ARG SOURCE_BRANCH
|
||||
RUN echo "commit: $SOURCE_COMMIT branch: $SOURCE_BRANCH" >/source.txt
|
||||
|
||||
CMD [ "python", "./backend.py" ]
|
||||
CMD [ "python", "./changedetection.py" , "-d", "/datastore"]
|
||||
|
||||
|
||||
|
||||
|
||||
201
LICENSE
Normal file
201
LICENSE
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
30
README.md
30
README.md
@@ -1,4 +1,8 @@
|
||||
# changedetection.io
|
||||

|
||||
<a href="https://hub.docker.com/r/dgtlmoon/changedetection.io" target="_blank" title="Docker Pulls">
|
||||
<img src="https://img.shields.io/docker/pulls/dgtlmoon/changedetection.io" alt="Docker Pulls"/>
|
||||
</a>
|
||||
|
||||
## Self-hosted change monitoring of web pages.
|
||||
|
||||
@@ -11,23 +15,39 @@ Know when ...
|
||||
|
||||
- Government department updates (changes are often only on their websites)
|
||||
- Local government news (changes are often only on their websites)
|
||||
- New software releases
|
||||
- New software releases, security advisories when you're not on their mailing list.
|
||||
- Festivals with changes
|
||||
- Realestate listing changes
|
||||
|
||||
|
||||
Get monitoring now! super simple, one command!
|
||||
**Get monitoring now! super simple, one command!**
|
||||
|
||||
```
|
||||
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore dgtlmoon/changedetection.io
|
||||
```bash
|
||||
docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
|
||||
```
|
||||
|
||||
Now visit http://127.0.0.1:5000 , You should now be able to access the UI.
|
||||
|
||||
#### Updating to latest version
|
||||
|
||||
Highly recommended :)
|
||||
|
||||

|
||||
```bash
|
||||
docker pull dgtlmoon/changedetection.io
|
||||
docker kill $(docker ps -a|grep changedetection.io|awk '{print $1}')
|
||||
docker rm $(docker ps -a|grep changedetection.io|awk '{print $1}')
|
||||
docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
|
||||
```
|
||||
|
||||
### Screenshots
|
||||
|
||||
Application running.
|
||||
|
||||

|
||||
|
||||
Examining differences in content.
|
||||
|
||||

|
||||
|
||||
### Future plans
|
||||
|
||||
|
||||
1
backend/README-pytest.md
Normal file
1
backend/README-pytest.md
Normal file
@@ -0,0 +1 @@
|
||||
Note: run `pytest` from this directory.
|
||||
487
backend/__init__.py
Normal file
487
backend/__init__.py
Normal file
@@ -0,0 +1,487 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
|
||||
# @todo logging
|
||||
# @todo sort by last_changed
|
||||
# @todo extra options for url like , verify=False etc.
|
||||
# @todo enable https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl as option?
|
||||
# @todo maybe a button to reset all 'last-changed'.. so you can see it clearly when something happens since your last visit
|
||||
# @todo option for interval day/6 hour/etc
|
||||
# @todo on change detected, config for calling some API
|
||||
# @todo make tables responsive!
|
||||
# @todo fetch title into json
|
||||
# https://distill.io/features
|
||||
# proxy per check
|
||||
# - flask_cors, itsdangerous,MarkupSafe
|
||||
|
||||
import time
|
||||
import os
|
||||
import timeago
|
||||
|
||||
import threading
|
||||
import queue
|
||||
|
||||
from flask import Flask, render_template, request, send_file, send_from_directory, abort, redirect, url_for
|
||||
|
||||
datastore = None
|
||||
|
||||
# Local
|
||||
running_update_threads = []
|
||||
ticker_thread = None
|
||||
|
||||
messages = []
|
||||
extra_stylesheets = []
|
||||
|
||||
update_q = queue.Queue()
|
||||
|
||||
app = Flask(__name__, static_url_path="/var/www/change-detection/backen/static")
|
||||
|
||||
# Stop browser caching of assets
|
||||
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
||||
|
||||
app.config['STOP_THREADS'] = False
|
||||
|
||||
# Disables caching of the templates
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||
|
||||
|
||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
# Worker thread tells us which UUID it is currently processing.
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid == watch_obj['uuid']:
|
||||
return "Checking now.."
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time())
|
||||
|
||||
|
||||
# @app.context_processor
|
||||
# def timeago():
|
||||
# def _timeago(lower_time, now):
|
||||
# return timeago.format(lower_time, now)
|
||||
# return dict(timeago=_timeago)
|
||||
|
||||
@app.template_filter('format_timestamp_timeago')
|
||||
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
||||
return timeago.format(timestamp, time.time())
|
||||
# return timeago.format(timestamp, time.time())
|
||||
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format)
|
||||
|
||||
|
||||
def changedetection_app(config=None, datastore_o=None):
|
||||
global datastore
|
||||
datastore = datastore_o
|
||||
# Hmm
|
||||
app.config.update(dict(DEBUG=True))
|
||||
app.config.update(config or {})
|
||||
|
||||
# Setup cors headers to allow all domains
|
||||
# https://flask-cors.readthedocs.io/en/latest/
|
||||
# CORS(app)
|
||||
|
||||
# https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39
|
||||
# You can divide up the stuff like this
|
||||
|
||||
@app.route("/", methods=['GET'])
|
||||
def index():
|
||||
global messages
|
||||
|
||||
limit_tag = request.args.get('tag')
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
|
||||
if limit_tag != None:
|
||||
# Support for comma separated list of tags.
|
||||
for tag_in_watch in watch['tag'].split(','):
|
||||
tag_in_watch = tag_in_watch.strip()
|
||||
if tag_in_watch == limit_tag:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
else:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
|
||||
|
||||
existing_tags = datastore.get_all_tags()
|
||||
output = render_template("watch-overview.html",
|
||||
watches=sorted_watches,
|
||||
messages=messages,
|
||||
tags=existing_tags,
|
||||
active_tag=limit_tag)
|
||||
|
||||
# Show messages but once.
|
||||
messages = []
|
||||
return output
|
||||
|
||||
@app.route("/scrub", methods=['GET', 'POST'])
|
||||
def scrub_page():
|
||||
from pathlib import Path
|
||||
|
||||
global messages
|
||||
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext == 'scrub':
|
||||
|
||||
for txt_file_path in Path(app.config['datastore_path']).rglob('*.txt'):
|
||||
os.unlink(txt_file_path)
|
||||
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
watch['last_checked'] = 0
|
||||
watch['last_changed'] = 0
|
||||
watch['previous_md5'] = None
|
||||
watch['history'] = {}
|
||||
|
||||
datastore.needs_write = True
|
||||
messages.append({'class': 'ok', 'message': 'Cleaned all version history.'})
|
||||
else:
|
||||
messages.append({'class': 'error', 'message': 'Wrong confirm text.'})
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
return render_template("scrub.html")
|
||||
|
||||
@app.route("/edit", methods=['GET', 'POST'])
|
||||
def edit_page():
|
||||
global messages
|
||||
import validators
|
||||
|
||||
if request.method == 'POST':
|
||||
uuid = request.args.get('uuid')
|
||||
|
||||
url = request.form.get('url').strip()
|
||||
tag = request.form.get('tag').strip()
|
||||
|
||||
form_headers = request.form.get('headers').strip().split("\n")
|
||||
extra_headers = {}
|
||||
if form_headers:
|
||||
for header in form_headers:
|
||||
if len(header):
|
||||
parts = header.split(':', 1)
|
||||
extra_headers.update({parts[0].strip(): parts[1].strip()})
|
||||
|
||||
validators.url(url) # @todo switch to prop/attr/observer
|
||||
datastore.data['watching'][uuid].update({'url': url,
|
||||
'tag': tag,
|
||||
'headers': extra_headers})
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': 'Updated watch.'})
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
else:
|
||||
|
||||
uuid = request.args.get('uuid')
|
||||
output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], messages=messages)
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/settings", methods=['GET', "POST"])
|
||||
def settings_page():
|
||||
global messages
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
minutes = int(request.values.get('minutes').strip())
|
||||
except ValueError:
|
||||
messages.append({'class': 'error', 'message': "Invalid value given, use an integer."})
|
||||
|
||||
else:
|
||||
if minutes >= 5:
|
||||
datastore.data['settings']['requests']['minutes_between_check'] = minutes
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': "Updated"})
|
||||
else:
|
||||
messages.append(
|
||||
{'class': 'error', 'message': "Must be atleast 5 minutes."})
|
||||
|
||||
output = render_template("settings.html", messages=messages,
|
||||
minutes=datastore.data['settings']['requests']['minutes_between_check'])
|
||||
messages = []
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/import", methods=['GET', "POST"])
|
||||
def import_page():
|
||||
import validators
|
||||
global messages
|
||||
remaining_urls = []
|
||||
|
||||
good = 0
|
||||
|
||||
if request.method == 'POST':
|
||||
urls = request.values.get('urls').split("\n")
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if len(url) and validators.url(url):
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag="")
|
||||
# Straight into the queue.
|
||||
update_q.put(new_uuid)
|
||||
good += 1
|
||||
else:
|
||||
if len(url):
|
||||
remaining_urls.append(url)
|
||||
|
||||
messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))})
|
||||
|
||||
if len(remaining_urls) == 0:
|
||||
return redirect(url_for('index'))
|
||||
else:
|
||||
output = render_template("import.html",
|
||||
messages=messages,
|
||||
remaining="\n".join(remaining_urls)
|
||||
)
|
||||
messages = []
|
||||
return output
|
||||
|
||||
@app.route("/diff/<string:uuid>", methods=['GET'])
|
||||
def diff_history_page(uuid):
|
||||
global messages
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid= list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
|
||||
extra_stylesheets = ['/static/css/diff.css']
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
dates = list(watch['history'].keys())
|
||||
# Convert to int, sort and back to str again
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
dates = [str(i) for i in dates]
|
||||
|
||||
|
||||
if len(dates) < 2:
|
||||
messages.append({'class': 'error', 'message': "Not enough saved change detection snapshots to produce a report."})
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# Save the current newest history as the most recently viewed
|
||||
datastore.set_last_viewed(uuid, dates[0])
|
||||
|
||||
newest_file = watch['history'][dates[0]]
|
||||
with open(newest_file, 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
|
||||
previous_version = request.args.get('previous_version')
|
||||
|
||||
try:
|
||||
previous_file = watch['history'][previous_version]
|
||||
except KeyError:
|
||||
# Not present, use a default value, the second one in the sorted list.
|
||||
previous_file = watch['history'][dates[1]]
|
||||
|
||||
with open(previous_file, 'r') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
|
||||
output = render_template("diff.html", watch_a=watch,
|
||||
messages=messages,
|
||||
newest=newest_version_file_contents,
|
||||
previous=previous_version_file_contents,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
versions=dates[1:],
|
||||
newest_version_timestamp=dates[0],
|
||||
current_previous_version=str(previous_version),
|
||||
current_diff_url=watch['url'])
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/favicon.ico", methods=['GET'])
|
||||
def favicon():
|
||||
return send_from_directory("/app/static/images", filename="favicon.ico")
|
||||
|
||||
# We're good but backups are even better!
|
||||
@app.route("/backup", methods=['GET'])
|
||||
def get_backup():
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
# create a ZipFile object
|
||||
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
|
||||
|
||||
# We only care about UUIDS from the current index file
|
||||
uuids = list(datastore.data['watching'].keys())
|
||||
|
||||
with zipfile.ZipFile(os.path.join(app.config['datastore_path'], backupname), 'w',
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=6) as zipObj:
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(app.config['datastore_path'], "url-watches.json"))
|
||||
# Add any snapshot data we find
|
||||
for txt_file_path in Path(app.config['datastore_path']).rglob('*.txt'):
|
||||
parent_p = txt_file_path.parent
|
||||
if parent_p.name in uuids:
|
||||
zipObj.write(txt_file_path)
|
||||
|
||||
return send_file(os.path.join(app.config['datastore_path'], backupname),
|
||||
as_attachment=True,
|
||||
mimetype="application/zip",
|
||||
attachment_filename=backupname)
|
||||
|
||||
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
||||
def static_content(group, filename):
|
||||
# These files should be in our subdirectory
|
||||
full_path = os.path.realpath(__file__)
|
||||
p = os.path.dirname(full_path)
|
||||
|
||||
try:
|
||||
return send_from_directory("{}/static/{}".format(p, group), filename=filename)
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
@app.route("/api/add", methods=['POST'])
|
||||
def api_watch_add():
|
||||
global messages
|
||||
|
||||
# @todo add_watch should throw a custom Exception for validation etc
|
||||
new_uuid = datastore.add_watch(url=request.form.get('url').strip(), tag=request.form.get('tag').strip())
|
||||
# Straight into the queue.
|
||||
update_q.put(new_uuid)
|
||||
|
||||
messages.append({'class': 'ok', 'message': 'Watch added.'})
|
||||
return redirect(url_for('index'))
|
||||
|
||||
@app.route("/api/delete", methods=['GET'])
|
||||
def api_delete():
|
||||
global messages
|
||||
uuid = request.args.get('uuid')
|
||||
datastore.delete(uuid)
|
||||
messages.append({'class': 'ok', 'message': 'Deleted.'})
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
@app.route("/api/checknow", methods=['GET'])
|
||||
def api_watch_checknow():
|
||||
|
||||
global messages
|
||||
|
||||
tag = request.args.get('tag')
|
||||
uuid = request.args.get('uuid')
|
||||
i = 0
|
||||
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# @todo check thread is running and skip
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
update_q.put(uuid)
|
||||
i = 1
|
||||
|
||||
elif tag != None:
|
||||
# Items that have this current tag
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if (tag != None and tag in watch['tag']):
|
||||
i += 1
|
||||
if watch_uuid not in running_uuids:
|
||||
update_q.put(watch_uuid)
|
||||
else:
|
||||
# No tag, no uuid, add everything.
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
i += 1
|
||||
if watch_uuid not in running_uuids:
|
||||
update_q.put(watch_uuid)
|
||||
|
||||
messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)})
|
||||
return redirect(url_for('index', tag=tag))
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
|
||||
return app
|
||||
|
||||
|
||||
# Requests for checking on the site use a pool of thread Workers managed by a Queue.
|
||||
class Worker(threading.Thread):
|
||||
current_uuid = None
|
||||
|
||||
def __init__(self, q, *args, **kwargs):
|
||||
self.q = q
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def run(self):
|
||||
from backend import fetch_site_status
|
||||
|
||||
update_handler = fetch_site_status.perform_site_check(datastore=datastore)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
uuid = self.q.get(block=True, timeout=1)
|
||||
except queue.Empty:
|
||||
# We have a chance to kill this thread that needs to monitor for new jobs..
|
||||
# Delays here would be caused by a current response object pending
|
||||
# @todo switch to threaded response handler
|
||||
if app.config['STOP_THREADS']:
|
||||
return
|
||||
else:
|
||||
self.current_uuid = uuid
|
||||
|
||||
if uuid in list(datastore.data['watching'].keys()):
|
||||
|
||||
try:
|
||||
changed_detected, result, contents = update_handler.run(uuid)
|
||||
|
||||
except PermissionError as s:
|
||||
app.logger.error("File permission error updating", uuid, str(s))
|
||||
else:
|
||||
if result:
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj=result)
|
||||
if changed_detected:
|
||||
# A change was detected
|
||||
datastore.save_history_text(uuid=uuid, contents=contents, result_obj=result)
|
||||
|
||||
|
||||
self.current_uuid = None # Done
|
||||
self.q.task_done()
|
||||
|
||||
|
||||
# Thread runner to check every minute, look for new watches to feed into the Queue.
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
# Spin up Workers.
|
||||
for _ in range(datastore.data['settings']['requests']['workers']):
|
||||
new_worker = Worker(update_q)
|
||||
running_update_threads.append(new_worker)
|
||||
new_worker.start()
|
||||
|
||||
# Every minute check for new UUIDs to follow up on
|
||||
while True:
|
||||
|
||||
if app.config['STOP_THREADS']:
|
||||
return
|
||||
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# Look at the dataset, find a stale watch to process
|
||||
minutes = datastore.data['settings']['requests']['minutes_between_check']
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
if watch['last_checked'] <= time.time() - (minutes * 60):
|
||||
|
||||
# @todo maybe update_q.queue is enough?
|
||||
if not uuid in running_uuids and uuid not in update_q.queue:
|
||||
update_q.put(uuid)
|
||||
|
||||
# Should be low so we can break this out in testing
|
||||
time.sleep(1)
|
||||
@@ -1,365 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
|
||||
# @todo logging
|
||||
# @todo sort by last_changed
|
||||
# @todo extra options for url like , verify=False etc.
|
||||
# @todo enable https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl as option?
|
||||
# @todo maybe a button to reset all 'last-changed'.. so you can see it clearly when something happens since your last visit
|
||||
# @todo option for interval day/6 hour/etc
|
||||
# @todo on change detected, config for calling some API
|
||||
# @todo make tables responsive!
|
||||
# @todo fetch title into json
|
||||
# https://distill.io/features
|
||||
# proxy per check
|
||||
#i
|
||||
import json
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
|
||||
import time
|
||||
import os
|
||||
import getopt
|
||||
import sys
|
||||
import datetime
|
||||
import timeago
|
||||
|
||||
import threading
|
||||
|
||||
from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for
|
||||
|
||||
# Local
|
||||
import store
|
||||
import fetch_site_status
|
||||
|
||||
ticker_thread = None
|
||||
|
||||
datastore = store.ChangeDetectionStore()
|
||||
messages = []
|
||||
extra_stylesheets = []
|
||||
running_update_threads = {}
|
||||
|
||||
app = Flask(__name__, static_url_path='/static')
|
||||
app.config['STATIC_RESOURCES'] = "/app/static"
|
||||
|
||||
# app.config['SECRET_KEY'] = 'secret!'
|
||||
|
||||
# Disables caching of the templates
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||
|
||||
|
||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
global running_update_threads
|
||||
if watch_obj['uuid'] in running_update_threads:
|
||||
if running_update_threads[watch_obj['uuid']].is_alive():
|
||||
return "Checking now.."
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time())
|
||||
|
||||
|
||||
# @app.context_processor
|
||||
# def timeago():
|
||||
# def _timeago(lower_time, now):
|
||||
# return timeago.format(lower_time, now)
|
||||
# return dict(timeago=_timeago)
|
||||
|
||||
@app.template_filter('format_timestamp_timeago')
|
||||
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
||||
if timestamp == 0:
|
||||
return 'Not yet'
|
||||
return timeago.format(timestamp, time.time())
|
||||
# return timeago.format(timestamp, time.time())
|
||||
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format)
|
||||
|
||||
|
||||
@app.route("/", methods=['GET'])
|
||||
def main_page():
|
||||
global messages
|
||||
|
||||
limit_tag = request.args.get('tag')
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
if limit_tag != None:
|
||||
# Support for comma separated list of tags.
|
||||
for tag_in_watch in watch['tag'].split(','):
|
||||
tag_in_watch = tag_in_watch.strip()
|
||||
if tag_in_watch == limit_tag:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
else:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
|
||||
|
||||
existing_tags = datastore.get_all_tags()
|
||||
output = render_template("watch-overview.html",
|
||||
watches=sorted_watches,
|
||||
messages=messages,
|
||||
tags=existing_tags,
|
||||
active_tag=limit_tag)
|
||||
|
||||
# Show messages but once.
|
||||
messages = []
|
||||
return output
|
||||
|
||||
|
||||
@app.route("/edit", methods=['GET'])
|
||||
def edit_page():
|
||||
global messages
|
||||
|
||||
uuid = request.args.get('uuid')
|
||||
output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], messages=messages)
|
||||
return output
|
||||
|
||||
|
||||
@app.route("/settings", methods=['GET', "POST"])
|
||||
def settings_page():
|
||||
global messages
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
minutes = int(request.values.get('minutes').strip())
|
||||
except ValueError:
|
||||
messages.append({'class': 'error', 'message': "Invalid value given, use an integer."})
|
||||
|
||||
else:
|
||||
if minutes >= 5 and minutes <= 600:
|
||||
datastore.data['settings']['requests']['minutes_between_check'] = minutes
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': "Updated"})
|
||||
else:
|
||||
messages.append({'class': 'error', 'message': "Must be equal to or greater than 5 and less than 600 minutes"})
|
||||
|
||||
output = render_template("settings.html", messages=messages, minutes=datastore.data['settings']['requests']['minutes_between_check'])
|
||||
messages =[]
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/import", methods=['GET', "POST"])
|
||||
def import_page():
|
||||
import validators
|
||||
global messages
|
||||
remaining_urls=[]
|
||||
|
||||
good = 0
|
||||
|
||||
if request.method == 'POST':
|
||||
urls = request.values.get('urls').split("\n")
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if len(url) and validators.url(url):
|
||||
datastore.add_watch(url=url.strip(), tag="")
|
||||
good += 1
|
||||
else:
|
||||
if len(url):
|
||||
remaining_urls.append(url)
|
||||
|
||||
messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))})
|
||||
|
||||
launch_checks()
|
||||
|
||||
output = render_template("import.html",
|
||||
messages=messages,
|
||||
remaining="\n".join(remaining_urls)
|
||||
)
|
||||
messages = []
|
||||
return output
|
||||
|
||||
|
||||
@app.route("/diff/<string:uuid>", methods=['GET'])
|
||||
def diff_history_page(uuid):
|
||||
global messages
|
||||
global extra_stylesheets
|
||||
extra_stylesheets.append('/static/css/diff.css')
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
dates = list(watch['history'].keys())
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
|
||||
left_file_contents = right_file_contents = ""
|
||||
l_file = watch['history'][str(dates[-1])]
|
||||
with open(l_file, 'r') as f:
|
||||
left_file_contents = f.read()
|
||||
|
||||
r_file = watch['history'][str(dates[-2])]
|
||||
with open(r_file, 'r') as f:
|
||||
right_file_contents = f.read()
|
||||
|
||||
output = render_template("diff.html", watch_a=watch, messages=messages, left=left_file_contents,
|
||||
right=right_file_contents, extra_stylesheets=extra_stylesheets)
|
||||
return output
|
||||
|
||||
@app.route("/favicon.ico", methods=['GET'])
|
||||
def favicon():
|
||||
return send_from_directory("/app/static/images", filename="favicon.ico")
|
||||
|
||||
|
||||
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
||||
def static_content(group, filename):
|
||||
try:
|
||||
return send_from_directory("/app/static/{}".format(group), filename=filename)
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
|
||||
@app.route("/api/add", methods=['POST'])
|
||||
def api_watch_add():
|
||||
global messages
|
||||
|
||||
# @todo add_watch should throw a custom Exception for validation etc
|
||||
datastore.add_watch(url=request.form.get('url').strip(), tag=request.form.get('tag').strip())
|
||||
messages.append({'class': 'ok', 'message': 'Watch added.'})
|
||||
launch_checks()
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
|
||||
@app.route("/api/delete", methods=['GET'])
|
||||
def api_delete():
|
||||
global messages
|
||||
uuid = request.args.get('uuid')
|
||||
datastore.delete(uuid)
|
||||
messages.append({'class': 'ok', 'message': 'Deleted.'})
|
||||
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
|
||||
@app.route("/api/update", methods=['POST'])
|
||||
def api_update():
|
||||
global messages
|
||||
import validators
|
||||
|
||||
uuid = request.args.get('uuid')
|
||||
|
||||
url = request.form.get('url').strip()
|
||||
tag = request.form.get('tag').strip()
|
||||
|
||||
form_headers = request.form.get('headers').strip().split("\n")
|
||||
extra_headers = {}
|
||||
if form_headers:
|
||||
for header in form_headers:
|
||||
if len(header):
|
||||
parts = header.split(':', 1)
|
||||
extra_headers.update({parts[0].strip(): parts[1].strip()})
|
||||
|
||||
|
||||
|
||||
validators.url(url) #@todo switch to prop/attr/observer
|
||||
datastore.data['watching'][uuid].update({'url': url,
|
||||
'tag': tag,
|
||||
'headers':extra_headers})
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': 'Updated watch.'})
|
||||
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
@app.route("/api/checknow", methods=['GET'])
|
||||
def api_watch_checknow():
|
||||
global messages
|
||||
|
||||
uuid = request.args.get('uuid')
|
||||
|
||||
running_update_threads[uuid] = fetch_site_status.perform_site_check(uuid=uuid,
|
||||
datastore=datastore)
|
||||
running_update_threads[uuid].start()
|
||||
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
|
||||
@app.route("/api/recheckall", methods=['GET'])
|
||||
def api_watch_recheckall():
|
||||
import fetch_site_status
|
||||
|
||||
global running_update_threads
|
||||
i = 0
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
i = i + 1
|
||||
|
||||
running_update_threads[watch['uuid']] = fetch_site_status.perform_site_check(uuid=uuid,
|
||||
datastore=datastore)
|
||||
running_update_threads[watch['uuid']].start()
|
||||
|
||||
return "{} triggered recheck of {} watches.".format(i, len(datastore.data['watching']))
|
||||
|
||||
|
||||
# Can be used whenever, launch threads that need launching to update the stored information
|
||||
def launch_checks():
|
||||
import fetch_site_status
|
||||
global running_update_threads
|
||||
|
||||
|
||||
minutes = datastore.data['settings']['requests']['minutes_between_check']
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
|
||||
|
||||
if watch['last_checked'] <= time.time() - (minutes * 60):
|
||||
running_update_threads[watch['uuid']] = fetch_site_status.perform_site_check(uuid=uuid,
|
||||
datastore=datastore)
|
||||
running_update_threads[watch['uuid']].start()
|
||||
|
||||
|
||||
# Thread runner to check every minute
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
while True:
|
||||
launch_checks()
|
||||
time.sleep(60)
|
||||
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
|
||||
# by just running periodically in one thread.
|
||||
def save_datastore():
|
||||
while True:
|
||||
if datastore.needs_write:
|
||||
datastore.sync_to_json()
|
||||
time.sleep(5)
|
||||
|
||||
def main(argv):
|
||||
ssl_mode = False
|
||||
port = 5000
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, "sp:", "purge")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -p [port]')
|
||||
sys.exit(2)
|
||||
|
||||
for opt, arg in opts:
|
||||
if opt == '--purge':
|
||||
# Remove history, the actual files you need to delete manually.
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
||||
|
||||
if opt == '-s':
|
||||
ssl_mode = True
|
||||
|
||||
if opt == '-p':
|
||||
port = arg
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
save_data_thread = threading.Thread(target=save_datastore).start()
|
||||
|
||||
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||
if ssl_mode:
|
||||
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)),
|
||||
certfile='cert.pem',
|
||||
keyfile='privkey.pem',
|
||||
server_side=True), app)
|
||||
|
||||
else:
|
||||
eventlet.wsgi.server(eventlet.listen(('', port)), app)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
@@ -3,9 +3,10 @@ FROM python:3.8-slim
|
||||
# https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /tmp/requirements.txt
|
||||
RUN pip3 install -r /tmp/requirements.txt
|
||||
# Should be mounted from docker-compose-development.yml
|
||||
RUN pip3 install -r /requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN [ ! -d "/datastore" ] && mkdir /datastore
|
||||
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
aiohttp
|
||||
async-timeout
|
||||
chardet==2.3.0
|
||||
multidict
|
||||
python-engineio
|
||||
six==1.10.0
|
||||
yarl
|
||||
flask
|
||||
|
||||
eventlet
|
||||
requests
|
||||
validators
|
||||
|
||||
bleach==3.2.1
|
||||
html5lib==0.9999999 # via bleach
|
||||
timeago
|
||||
html2text
|
||||
|
||||
# @notes
|
||||
# - Dont install socketio, it interferes with flask_socketio
|
||||
@@ -1,9 +1,7 @@
|
||||
import time
|
||||
import sys
|
||||
|
||||
print ("Sleep loop, you should run your script from the console")
|
||||
|
||||
while True:
|
||||
# Wait for 5 seconds
|
||||
|
||||
time.sleep(2)
|
||||
@@ -1,62 +1,37 @@
|
||||
from threading import Thread
|
||||
import time
|
||||
import requests
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import html2text
|
||||
# Not needed due to inscriptis being way better.
|
||||
#from urlextract import URLExtract
|
||||
from inscriptis import get_text
|
||||
|
||||
# Hmm Polymorphism datastore, thread, etc
|
||||
class perform_site_check(Thread):
|
||||
def __init__(self, *args, uuid=False, datastore, **kwargs):
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
class perform_site_check():
|
||||
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.timestamp = int(time.time()) # used for storage etc too
|
||||
self.uuid = uuid
|
||||
self.datastore = datastore
|
||||
self.url = datastore.get_val(uuid, 'url')
|
||||
self.current_md5 = datastore.get_val(uuid, 'previous_md5')
|
||||
self.output_path = "/datastore/{}".format(self.uuid)
|
||||
|
||||
def save_firefox_screenshot(self, uuid, output):
|
||||
# @todo call selenium or whatever
|
||||
return
|
||||
def run(self, uuid):
|
||||
timestamp = int(time.time()) # used for storage etc too
|
||||
stripped_text_from_html = False
|
||||
changed_detected = False
|
||||
|
||||
def ensure_output_path(self):
|
||||
update_obj = {'previous_md5': self.datastore.data['watching'][uuid]['previous_md5'],
|
||||
'history': {},
|
||||
"last_checked": timestamp
|
||||
}
|
||||
|
||||
try:
|
||||
os.stat(self.output_path)
|
||||
except:
|
||||
os.mkdir(self.output_path)
|
||||
|
||||
def save_response_html_output(self, output):
|
||||
# @todo maybe record a history.json, [timestamp, md5, filename]
|
||||
with open("{}/{}.txt".format(self.output_path, self.timestamp), 'w') as f:
|
||||
f.write(output)
|
||||
f.close()
|
||||
|
||||
def save_response_stripped_output(self, output):
|
||||
fname = "{}/{}.stripped.txt".format(self.output_path, self.timestamp)
|
||||
with open(fname, 'w') as f:
|
||||
f.write(output)
|
||||
f.close()
|
||||
|
||||
return fname
|
||||
|
||||
def run(self):
|
||||
|
||||
extra_headers = self.datastore.get_val(self.uuid, 'headers')
|
||||
extra_headers = self.datastore.get_val(uuid, 'headers')
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
request_headers = self.datastore.data['settings']['headers'].copy()
|
||||
request_headers = self.datastore.data['settings']['headers']
|
||||
request_headers.update(extra_headers)
|
||||
|
||||
print("Checking", self.url)
|
||||
#print(request_headers)
|
||||
|
||||
self.ensure_output_path()
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
try:
|
||||
timeout = self.datastore.data['settings']['requests']['timeout']
|
||||
@@ -65,70 +40,52 @@ class perform_site_check(Thread):
|
||||
timeout = 15
|
||||
|
||||
try:
|
||||
r = requests.get(self.url,
|
||||
url = self.datastore.get_val(uuid, 'url')
|
||||
|
||||
r = requests.get(url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
verify=False)
|
||||
|
||||
stripped_text_from_html = get_text(r.text)
|
||||
|
||||
|
||||
# @todo This should be a config option.
|
||||
# Many websites include junk in the links, trackers, etc.. Since we are really a service all about text changes..
|
||||
|
||||
# inscriptis handles this much cleaner, probably not needed..
|
||||
# extractor = URLExtract()
|
||||
# urls = extractor.find_urls(stripped_text_from_html)
|
||||
# Remove the urls, longest first so that we dont end up chewing up bigger links with parts of smaller ones.
|
||||
# if urls:
|
||||
# urls.sort(key=len, reverse=True)
|
||||
# for url in urls:
|
||||
# # Sometimes URLExtract will consider something like 'foobar.com' as a link when that was just text.
|
||||
# if "://" in url:
|
||||
# # print ("Stripping link", url)
|
||||
# stripped_text_from_html = stripped_text_from_html.replace(url, '')
|
||||
|
||||
|
||||
|
||||
# Usually from networkIO/requests level
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
|
||||
self.datastore.update_watch(self.uuid, 'last_error', str(e))
|
||||
update_obj["last_error"] = str(e)
|
||||
|
||||
print(str(e))
|
||||
|
||||
except requests.exceptions.MissingSchema:
|
||||
print("Skipping {} due to missing schema/bad url".format(self.uuid))
|
||||
print("Skipping {} due to missing schema/bad url".format(uuid))
|
||||
|
||||
# Usually from html2text level
|
||||
except UnicodeDecodeError as e:
|
||||
self.datastore.update_watch(self.uuid, 'last_error', str(e))
|
||||
|
||||
update_obj["last_error"] = str(e)
|
||||
print(str(e))
|
||||
# figure out how to deal with this cleaner..
|
||||
# 'utf-8' codec can't decode byte 0xe9 in position 480: invalid continuation byte
|
||||
|
||||
else:
|
||||
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||
# in the future we'll implement other mechanisms.
|
||||
|
||||
# We rely on the actual text in the html output.. many sites have random script vars etc
|
||||
self.datastore.update_watch(self.uuid, 'last_error', False)
|
||||
self.datastore.update_watch(self.uuid, 'last_check_status', r.status_code)
|
||||
update_obj["last_check_status"] = r.status_code
|
||||
update_obj["last_error"] = False
|
||||
|
||||
if not len(r.text):
|
||||
update_obj["last_error"] = "Empty reply"
|
||||
|
||||
fetched_md5 = hashlib.md5(stripped_text_from_html.encode('utf-8')).hexdigest()
|
||||
|
||||
if self.current_md5 != fetched_md5:
|
||||
# could be None or False depending on JSON type
|
||||
if self.datastore.data['watching'][uuid]['previous_md5'] != fetched_md5:
|
||||
changed_detected = True
|
||||
|
||||
# Dont confuse people by putting last-changed, when it actually just changed from nothing..
|
||||
if self.datastore.get_val(self.uuid, 'previous_md5') is not None:
|
||||
self.datastore.update_watch(self.uuid, 'last_changed', self.timestamp)
|
||||
# Don't confuse people by updating as last-changed, when it actually just changed from None..
|
||||
if self.datastore.get_val(uuid, 'previous_md5'):
|
||||
update_obj["last_changed"] = timestamp
|
||||
|
||||
self.datastore.update_watch(self.uuid, 'previous_md5', fetched_md5)
|
||||
self.save_response_html_output(r.text)
|
||||
output_filepath = self.save_response_stripped_output(stripped_text_from_html)
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
# Update history with the stripped text for future reference, this will also mean we save the first
|
||||
# attempt because 'self.current_md5 != fetched_md5' (current_md5 will be None when not run)
|
||||
# need to learn more about attr/setters/getters
|
||||
history = self.datastore.get_val(self.uuid, 'history')
|
||||
history.update(dict([(str(self.timestamp), output_filepath)]))
|
||||
self.datastore.update_watch(self.uuid, 'history', history)
|
||||
|
||||
self.datastore.update_watch(self.uuid, 'last_checked', int(time.time()))
|
||||
pass
|
||||
return changed_detected, update_obj, stripped_text_from_html
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
|
||||
from flask import make_response
|
||||
from functools import wraps, update_wrapper
|
||||
from datetime import datetime
|
||||
|
||||
def nocache(view):
|
||||
@wraps(view)
|
||||
def no_cache(*args, **kwargs):
|
||||
response = make_response(view(*args, **kwargs))
|
||||
response.headers['hmm'] = datetime.now()
|
||||
|
||||
return response
|
||||
|
||||
return update_wrapper(no_cache, view)
|
||||
2
backend/pytest.ini
Normal file
2
backend/pytest.ini
Normal file
@@ -0,0 +1,2 @@
|
||||
[pytest]
|
||||
addopts = --no-start-live-server --live-server-port=5005
|
||||
@@ -1,20 +0,0 @@
|
||||
aiohttp
|
||||
async-timeout
|
||||
chardet==2.3.0
|
||||
multidict
|
||||
python-engineio
|
||||
six==1.10.0
|
||||
yarl
|
||||
flask
|
||||
|
||||
eventlet
|
||||
requests
|
||||
validators
|
||||
|
||||
bleach==3.2.1
|
||||
html5lib==0.9999999 # via bleach
|
||||
timeago
|
||||
html2text
|
||||
|
||||
# @notes
|
||||
# - Dont install socketio, it interferes with flask_socketio
|
||||
@@ -32,12 +32,17 @@ ins {
|
||||
}
|
||||
|
||||
#settings {
|
||||
|
||||
|
||||
line-height: 2em;
|
||||
background: rgba(0,0,0,.05);
|
||||
padding: 1em;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 1em;
|
||||
color: #fff;
|
||||
font-size: 80%;
|
||||
}
|
||||
#settings label {
|
||||
margin-left: 1em;
|
||||
display: inline-block;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
.source {
|
||||
@@ -53,7 +58,7 @@ ins {
|
||||
}
|
||||
|
||||
#diff-ui {
|
||||
background: #fff;
|
||||
background: #fff;
|
||||
padding: 2em;
|
||||
margin: 1em;
|
||||
border-radius: 5px;
|
||||
|
||||
@@ -48,7 +48,13 @@ section.content {
|
||||
/* table related */
|
||||
.watch-table {
|
||||
width: 100%;
|
||||
|
||||
}
|
||||
|
||||
.watch-table tr.unviewed {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.watch-tag-list {
|
||||
color: #e70069;
|
||||
white-space: nowrap;
|
||||
@@ -77,12 +83,21 @@ section.content {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.watch-table .title-col a[target="_blank"]::after {
|
||||
.watch-table .title-col a[target="_blank"]::after, .current-diff-url::after {
|
||||
content: url();
|
||||
margin: 0 3px 0 5px;
|
||||
}
|
||||
|
||||
/* hotovo */
|
||||
#check-all-button {
|
||||
text-align:right;
|
||||
}
|
||||
|
||||
#check-all-button a {
|
||||
border-top-left-radius: initial;
|
||||
border-top-right-radius: initial;
|
||||
border-bottom-left-radius: 5px;
|
||||
border-bottom-right-radius: 5px;
|
||||
}
|
||||
|
||||
|
||||
body:after {
|
||||
|
||||
173
backend/store.py
173
backend/store.py
@@ -1,50 +1,69 @@
|
||||
import json
|
||||
import uuid as uuid_builder
|
||||
import validators
|
||||
import os.path
|
||||
from os import path
|
||||
from threading import Lock
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
|
||||
|
||||
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
|
||||
# Open a github issue if you know something :)
|
||||
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
|
||||
class ChangeDetectionStore:
|
||||
lock = Lock()
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, datastore_path="/datastore", include_default_watches=True):
|
||||
self.needs_write = False
|
||||
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
self.stop_thread = False
|
||||
self.__data = {
|
||||
'note' : "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
||||
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
||||
'watching': {},
|
||||
'tag': "0.25",
|
||||
'settings': {
|
||||
'headers': {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
||||
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
||||
},
|
||||
'requests': {
|
||||
'timeout': 15, # Default 15 seconds
|
||||
'minutes_between_check': 3 * 60 # Default 3 hours
|
||||
'timeout': 15, # Default 15 seconds
|
||||
'minutes_between_check': 3 * 60, # Default 3 hours
|
||||
'workers': 10 # Number of threads, lower is better for slow connections
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Base definition for all watchers
|
||||
self.generic_definition = {
|
||||
'url': None,
|
||||
'tag': None,
|
||||
'last_checked': 0,
|
||||
'last_changed': 0,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'newest_history_key': "",
|
||||
'title': None,
|
||||
'previous_md5': None,
|
||||
'previous_md5': "",
|
||||
'uuid': str(uuid_builder.uuid4()),
|
||||
'headers' : {}, # Extra headers to send
|
||||
'history' : {} # Dict of timestamp and output stripped filename
|
||||
'headers': {}, # Extra headers to send
|
||||
'history': {} # Dict of timestamp and output stripped filename
|
||||
}
|
||||
|
||||
if path.isfile('/source.txt'):
|
||||
with open('/source.txt') as f:
|
||||
# Should be set in Dockerfile to look for /source.txt , this will give us the git commit #
|
||||
# So when someone gives us a backup file to examine, we know exactly what code they were running.
|
||||
self.__data['build_sha'] = f.read()
|
||||
|
||||
try:
|
||||
with open('/datastore/url-watches.json') as json_file:
|
||||
with open(self.json_store_path) as json_file:
|
||||
from_disk = json.load(json_file)
|
||||
|
||||
# @todo isnt there a way todo this dict.update recursively?
|
||||
@@ -59,54 +78,86 @@ class ChangeDetectionStore:
|
||||
if 'requests' in from_disk['settings']:
|
||||
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
|
||||
|
||||
|
||||
# Reinitialise each `watching` with our generic_definition in the case that we add a new var in the future.
|
||||
# @todo pretty sure theres a python we todo this with an abstracted(?) object!
|
||||
i = 0
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
_blank = self.generic_definition.copy()
|
||||
_blank = deepcopy(self.generic_definition)
|
||||
_blank.update(watch)
|
||||
self.__data['watching'].update({uuid: _blank})
|
||||
print("Watching:", uuid, _blank['url'])
|
||||
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
|
||||
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
|
||||
|
||||
# First time ran, doesnt exist.
|
||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||
print("Creating JSON store")
|
||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
|
||||
self.add_watch(url='https://changedetection.io', tag='Tech news')
|
||||
if include_default_watches:
|
||||
print("Creating JSON store at", self.datastore_path)
|
||||
|
||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
|
||||
self.add_watch(url='https://changedetection.io', tag='Tech news')
|
||||
|
||||
# self.entryVariable.get()
|
||||
def update_watch(self, uuid, val, var):
|
||||
# Finally start the thread that will manage periodic data saves to JSON
|
||||
save_data_thread = threading.Thread(target=self.save_datastore).start()
|
||||
|
||||
self.__data['watching'][uuid].update({val: var})
|
||||
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
|
||||
def get_newest_history_key(self, uuid):
|
||||
if len(self.__data['watching'][uuid]['history']) == 1:
|
||||
return 0
|
||||
|
||||
dates = list(self.__data['watching'][uuid]['history'].keys())
|
||||
# Convert to int, sort and back to str again
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
if len(dates):
|
||||
# always keyed as str
|
||||
return str(dates[0])
|
||||
|
||||
return 0
|
||||
|
||||
def set_last_viewed(self, uuid, timestamp):
|
||||
self.data['watching'][uuid].update({'last_viewed': str(timestamp)})
|
||||
self.needs_write = True
|
||||
|
||||
def update_watch(self, uuid, update_obj):
|
||||
|
||||
with self.lock:
|
||||
|
||||
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
|
||||
for dict_key, d in self.generic_definition.items():
|
||||
if isinstance(d, dict):
|
||||
if update_obj is not None and dict_key in update_obj:
|
||||
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
|
||||
del (update_obj[dict_key])
|
||||
|
||||
self.__data['watching'][uuid].update(update_obj)
|
||||
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
|
||||
|
||||
self.needs_write = True
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
|
||||
return self.__data
|
||||
|
||||
def get_all_tags(self):
|
||||
tags=[]
|
||||
tags = []
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
|
||||
# Support for comma separated list of tags.
|
||||
for tag in watch['tag'].split(','):
|
||||
tag = tag.strip()
|
||||
if not tag in tags:
|
||||
if tag not in tags:
|
||||
tags.append(tag)
|
||||
|
||||
tags.sort()
|
||||
return tags
|
||||
|
||||
def delete(self, uuid):
|
||||
# Probably their should be dict...
|
||||
del(self.__data['watching'][uuid])
|
||||
self.needs_write = True
|
||||
|
||||
with self.lock:
|
||||
del (self.__data['watching'][uuid])
|
||||
self.needs_write = True
|
||||
|
||||
def url_exists(self, url):
|
||||
|
||||
@@ -122,26 +173,62 @@ class ChangeDetectionStore:
|
||||
return self.data['watching'][uuid].get(val)
|
||||
|
||||
def add_watch(self, url, tag):
|
||||
with self.lock:
|
||||
# @todo use a common generic version of this
|
||||
new_uuid = str(uuid_builder.uuid4())
|
||||
_blank = deepcopy(self.generic_definition)
|
||||
_blank.update({
|
||||
'url': url,
|
||||
'tag': tag,
|
||||
'uuid': new_uuid
|
||||
})
|
||||
|
||||
# @todo deal with exception
|
||||
validators.url(url)
|
||||
self.data['watching'][new_uuid] = _blank
|
||||
|
||||
# @todo use a common generic version of this
|
||||
# Get the directory ready
|
||||
output_path = "{}/{}".format(self.datastore_path, new_uuid)
|
||||
try:
|
||||
os.mkdir(output_path)
|
||||
except FileExistsError:
|
||||
print(output_path, "already exists.")
|
||||
|
||||
_blank = self.generic_definition.copy()
|
||||
_blank.update({
|
||||
'url': url,
|
||||
'tag': tag,
|
||||
'uuid': str(uuid_builder.uuid4())
|
||||
})
|
||||
self.sync_to_json()
|
||||
return new_uuid
|
||||
|
||||
self.data['watching'].update({_blank['uuid']: _blank})
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, uuid, result_obj, contents):
|
||||
|
||||
output_path = "{}/{}".format(self.datastore_path, uuid)
|
||||
fname = "{}/{}-{}.stripped.txt".format(output_path, result_obj['previous_md5'], str(time.time()))
|
||||
with open(fname, 'w') as f:
|
||||
f.write(contents)
|
||||
f.close()
|
||||
|
||||
# Update history with the stripped text for future reference, this will also mean we save the first
|
||||
# Should always be keyed by string(timestamp)
|
||||
self.update_watch(uuid, {"history": {str(result_obj["last_checked"]): fname}})
|
||||
|
||||
return fname
|
||||
|
||||
def sync_to_json(self):
|
||||
print ("Saving index")
|
||||
with open('/datastore/url-watches.json', 'w') as json_file:
|
||||
json.dump(self.data, json_file, indent=4)
|
||||
print("Saving..")
|
||||
with open(self.json_store_path, 'w') as json_file:
|
||||
json.dump(self.__data, json_file, indent=4)
|
||||
logging.info("Re-saved index")
|
||||
|
||||
self.needs_write = False
|
||||
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
|
||||
# by just running periodically in one thread, according to python, dict updates are threadsafe.
|
||||
def save_datastore(self):
|
||||
|
||||
while True:
|
||||
if self.stop_thread:
|
||||
print("Shutting down datastore thread")
|
||||
return
|
||||
if self.needs_write:
|
||||
self.sync_to_json()
|
||||
time.sleep(1)
|
||||
|
||||
# body of the constructor
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<meta name="description" content="Self hosted website change detection.">
|
||||
<title>Change Detection</title>
|
||||
<link rel="stylesheet" href="/static/css/pure-min.css">
|
||||
<link rel="stylesheet" href="/static/css/styles.css">
|
||||
<link rel="stylesheet" href="/static/css/styles.css?ver=1000">
|
||||
{% if extra_stylesheets %}
|
||||
{% for m in extra_stylesheets %}
|
||||
<link rel="stylesheet" href="{{ m }}">
|
||||
<link rel="stylesheet" href="{{ m }}?ver=1000">
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</head>
|
||||
@@ -18,9 +18,15 @@
|
||||
<div class="header">
|
||||
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed">
|
||||
<a class="pure-menu-heading" href="/"><strong>Change</strong>Detection.io</a>
|
||||
{% if current_diff_url %}
|
||||
<a class=current-diff-url href="{{ current_diff_url }}"><span style="max-width: 30%; overflow: hidden;">{{ current_diff_url }}</a>
|
||||
{% endif %}
|
||||
|
||||
<ul class="pure-menu-list">
|
||||
|
||||
<li class="pure-menu-item">
|
||||
<a href="/backup" class="pure-menu-link">BACKUP</a>
|
||||
</li>
|
||||
<li class="pure-menu-item">
|
||||
<a href="/import" class="pure-menu-link">IMPORT</a>
|
||||
</li>
|
||||
|
||||
@@ -2,34 +2,58 @@
|
||||
|
||||
{% block content %}
|
||||
|
||||
<div id="diff-ui">
|
||||
<div id="settings">
|
||||
<h1>Differences</h1>
|
||||
<form class="pure-form " action="" method="GET">
|
||||
<fieldset>
|
||||
|
||||
<label for="diffWords" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffWords" value="diffWords" /> Words</label>
|
||||
<label for="diffLines" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffLines" value="diffLines" checked=""/> Lines</label>
|
||||
|
||||
<label for="diffChars" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffChars" value="diffChars"/> Chars</label>
|
||||
|
||||
{% if versions|length >= 1 %}
|
||||
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
|
||||
<select id="diff-version" name="previous_version">
|
||||
{% for version in versions %}
|
||||
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
|
||||
{{version}}
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
</form>
|
||||
<del>Removed text</del>
|
||||
<ins>Inserted Text</ins>
|
||||
|
||||
<div id="settings">
|
||||
<h3>Diff</h3>
|
||||
<label><input type="radio" name="diff_type" value="diffChars"> Chars</label>
|
||||
<label><input type="radio" name="diff_type" value="diffWords" > Words</label>
|
||||
<label><input type="radio" name="diff_type" value="diffLines" checked=""> Lines</label>
|
||||
</div>
|
||||
|
||||
<div id="diff-ui">
|
||||
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<!-- just proof of concept copied straight from github.com/kpdecker/jsdiff -->
|
||||
<td id="a" style="display: none;">{{left}}</td>
|
||||
<td id="b" style="display: none;">{{right}}</td>
|
||||
<!-- just proof of concept copied straight from github.com/kpdecker/jsdiff -->
|
||||
<td id="a" style="display: none;">{{previous}}</td>
|
||||
<td id="b" style="display: none;">{{newest}}</td>
|
||||
<td>
|
||||
<pre id="result"></pre>
|
||||
<span id="result"></span>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff" >github.com/kpdecker/jsdiff</a>
|
||||
Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a>
|
||||
|
||||
</div>
|
||||
|
||||
<script src="/static/js/diff.js"></script>
|
||||
<script defer="">
|
||||
|
||||
|
||||
var a = document.getElementById('a');
|
||||
var b = document.getElementById('b');
|
||||
var result = document.getElementById('result');
|
||||
@@ -63,6 +87,23 @@ function changed() {
|
||||
}
|
||||
|
||||
window.onload = function() {
|
||||
|
||||
|
||||
/* Convert what is options from UTC time.time() to local browser time */
|
||||
var diffList=document.getElementById("diff-version");
|
||||
if (typeof(diffList) != 'undefined' && diffList != null) {
|
||||
for (var option of diffList.options) {
|
||||
var dateObject = new Date(option.value*1000);
|
||||
option.label=dateObject.toLocaleString();
|
||||
}
|
||||
}
|
||||
|
||||
/* Set current version date as local time in the browser also */
|
||||
var current_v = document.getElementById("current-v-date");
|
||||
var dateObject = new Date({{ newest_version_timestamp }}*1000);
|
||||
current_v.innerHTML=dateObject.toLocaleString();
|
||||
|
||||
|
||||
onDiffTypeChange(document.querySelector('#settings [name="diff_type"]:checked'));
|
||||
changed();
|
||||
};
|
||||
@@ -89,10 +130,11 @@ for (var i = 0; i < radio.length; i++) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -4,7 +4,7 @@
|
||||
<div class="edit-form">
|
||||
|
||||
|
||||
<form class="pure-form pure-form-stacked" action="/api/update?uuid={{uuid}}" method="POST">
|
||||
<form class="pure-form pure-form-stacked" action="/edit?uuid={{uuid}}" method="POST">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
<label for="url">URL</label>
|
||||
|
||||
43
backend/templates/scrub.html
Normal file
43
backend/templates/scrub.html
Normal file
@@ -0,0 +1,43 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
<div class="edit-form">
|
||||
|
||||
|
||||
<form class="pure-form pure-form-stacked" action="/scrub" method="POST">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
This will remove all version snapshots/data, but keep your list of URLs. <br/>
|
||||
You may like to use the <strong>BACKUP</strong> link first.<br/>
|
||||
|
||||
Type in the word <strong>scrub</strong> to confirm that you understand!
|
||||
<br/>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
<br/>
|
||||
<label for="confirmtext">Confirm</label><br/>
|
||||
<input type="text" id="confirmtext" required="" name="confirmtext" value="" size="10"/>
|
||||
<br/>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary">Scrub!</button>
|
||||
</div>
|
||||
<br/>
|
||||
<div class="pure-control-group">
|
||||
<a href="/" class="pure-button button-small button-cancel">Cancel</a>
|
||||
</div>
|
||||
|
||||
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
@@ -14,13 +14,15 @@
|
||||
</div>
|
||||
|
||||
|
||||
<br/>
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary">Save</button>
|
||||
</div>
|
||||
<br/>
|
||||
|
||||
<div class="pure-control-group">
|
||||
<a href="/" class="pure-button button-small button-cancel">Cancel</a>
|
||||
<a href="/" class="pure-button button-small button-cancel">Back</a>
|
||||
<a href="/scrub" class="pure-button button-small button-cancel">Reset all version data</a>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
@@ -17,16 +17,15 @@
|
||||
<div>
|
||||
|
||||
{% for tag in tags %}
|
||||
{% if tag == "" %}
|
||||
<a href="/" class="pure-button button-tag {{'active' if active_tag == tag }}">All</a>
|
||||
{% else %}
|
||||
<a href="/?tag={{ tag}}" class="pure-button button-tag {{'active' if active_tag == tag }}">{{ tag }}</a>
|
||||
{% endif %}
|
||||
{% if tag == "" %}
|
||||
<a href="/" class="pure-button button-tag {{'active' if active_tag == tag }}">All</a>
|
||||
{% else %}
|
||||
<a href="/?tag={{ tag}}" class="pure-button button-tag {{'active' if active_tag == tag }}">{{ tag }}</a>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<div id="watch-table-wrapper">
|
||||
|
||||
<table class="pure-table pure-table-striped watch-table">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -42,7 +41,9 @@
|
||||
|
||||
{% for watch in watches %}
|
||||
<tr id="{{ watch.uuid }}"
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} {% if watch.last_error is defined and watch.last_error != False %}error{% endif %}">
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
|
||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||
{% if watch.newest_history_key| int > watch.last_viewed| int %}unviewed{% endif %}">
|
||||
<td>{{ loop.index }}</td>
|
||||
<td class="title-col">{{watch.title if watch.title is not none else watch.url}}
|
||||
<a class="external" target=_blank href="{{ watch.url }}"></a>
|
||||
@@ -54,8 +55,15 @@
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>{{watch|format_last_checked_time}}</td>
|
||||
<td>{{watch.last_changed|format_timestamp_timeago}}</td>
|
||||
<td><a href="/api/checknow?uuid={{ watch.uuid}}" class="pure-button button-small pure-button-primary">Recheck</a>
|
||||
<td>{% if watch.history|length >= 2 and watch.last_changed %}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
{% else %}
|
||||
Not yet
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
<a href="/api/checknow?uuid={{ watch.uuid}}{% if request.args.get('tag') %}&tag={{request.args.get('tag')}}{% endif %}"
|
||||
class="pure-button button-small pure-button-primary">Recheck</a>
|
||||
<a href="/edit?uuid={{ watch.uuid}}" class="pure-button button-small pure-button-primary">Edit</a>
|
||||
{% if watch.history|length >= 2 %}
|
||||
<a href="/diff/{{ watch.uuid}}" class="pure-button button-small pure-button-primary">Diff</a>
|
||||
@@ -67,6 +75,11 @@
|
||||
|
||||
</tbody>
|
||||
</table>
|
||||
<div id="check-all-button">
|
||||
|
||||
<a href="/api/checknow{% if active_tag%}?tag={{active_tag}}{%endif%}" class="pure-button button-tag ">Recheck
|
||||
all {% if active_tag%}in "{{active_tag}}"{%endif%}</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
2
backend/tests/__init__.py
Normal file
2
backend/tests/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Tests for the app."""
|
||||
|
||||
43
backend/tests/conftest.py
Normal file
43
backend/tests/conftest.py
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import pytest
|
||||
from backend import changedetection_app
|
||||
from backend import store
|
||||
import os
|
||||
|
||||
|
||||
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
|
||||
|
||||
# Much better boilerplate than the docs
|
||||
# https://www.python-boilerplate.com/py3+flask+pytest/
|
||||
|
||||
global app
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def app(request):
|
||||
"""Create application for the tests."""
|
||||
|
||||
datastore_path = "./test-datastore"
|
||||
|
||||
try:
|
||||
os.mkdir(datastore_path)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
try:
|
||||
os.unlink("{}/url-watches.json".format(datastore_path))
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
def teardown():
|
||||
datastore.stop_thread = True
|
||||
app.config['STOP_THREADS'] = True
|
||||
|
||||
request.addfinalizer(teardown)
|
||||
|
||||
return app
|
||||
117
backend/tests/test_backend.py
Normal file
117
backend/tests/test_backend.py
Normal file
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def set_modified_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>which has this one new line</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def test_check_basic_change_detection_functionality(client, live_server):
|
||||
sleep_time_for_fetch_thread = 5
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
def test_endpoint():
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
with open("test-datastore/output.txt", "r") as f:
|
||||
return f.read()
|
||||
|
||||
set_original_response()
|
||||
|
||||
live_server.start()
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": url_for('test_endpoint', _external=True)},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Do this a few times.. ensures we dont accidently set the status
|
||||
for n in range(3):
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'test-endpoint' in res.data
|
||||
|
||||
#####################
|
||||
|
||||
# Make a change
|
||||
set_modified_response()
|
||||
|
||||
res = urlopen(url_for('test_endpoint', _external=True))
|
||||
assert b'which has this one new line' in res.read()
|
||||
|
||||
# Force recheck
|
||||
res = client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches are rechecking.' in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||
res = client.get(url_for("diff_history_page", uuid="first") )
|
||||
assert b'Compare newest' in res.data
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Do this a few times.. ensures we dont accidently set the status
|
||||
for n in range(3):
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'test-endpoint' in res.data
|
||||
|
||||
|
||||
set_original_response()
|
||||
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
65
changedetection.py
Normal file
65
changedetection.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# Launch as a eventlet.wsgi server instance.
|
||||
|
||||
import getopt
|
||||
import sys
|
||||
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
import backend
|
||||
|
||||
from backend import store
|
||||
|
||||
|
||||
def main(argv):
|
||||
ssl_mode = False
|
||||
port = 5000
|
||||
datastore_path = "./datastore"
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, "sd:p:", "purge")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -p [port] -d [datastore path]')
|
||||
sys.exit(2)
|
||||
|
||||
for opt, arg in opts:
|
||||
# if opt == '--purge':
|
||||
# Remove history, the actual files you need to delete manually.
|
||||
# for uuid, watch in datastore.data['watching'].items():
|
||||
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
||||
|
||||
if opt == '-s':
|
||||
ssl_mode = True
|
||||
|
||||
if opt == '-p':
|
||||
port = int(arg)
|
||||
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
|
||||
|
||||
# threads can read from disk every x seconds right?
|
||||
# front end can just save
|
||||
# We just need to know which threads are looking at which UUIDs
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'])
|
||||
app = backend.changedetection_app(app_config, datastore)
|
||||
|
||||
if ssl_mode:
|
||||
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)),
|
||||
certfile='cert.pem',
|
||||
keyfile='privkey.pem',
|
||||
server_side=True), app)
|
||||
|
||||
else:
|
||||
eventlet.wsgi.server(eventlet.listen(('', port)), app)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
@@ -1,2 +0,0 @@
|
||||
Empty dir, please keep, this is used to store your data!
|
||||
|
||||
@@ -10,6 +10,7 @@ services:
|
||||
container_name: changedetection.io-dev
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- ./requirements.txt:/requirements.txt # Normally COPY'ed in the Dockerfile
|
||||
- ./datastore:/datastore
|
||||
|
||||
ports:
|
||||
|
||||
@@ -7,6 +7,9 @@ six==1.10.0
|
||||
yarl
|
||||
flask
|
||||
|
||||
pytest
|
||||
pytest-flask # for live_server
|
||||
|
||||
eventlet
|
||||
requests
|
||||
validators
|
||||
|
||||
BIN
screenshot-diff.png
Normal file
BIN
screenshot-diff.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 115 KiB |
BIN
screenshot.png
BIN
screenshot.png
Binary file not shown.
|
Before Width: | Height: | Size: 297 KiB After Width: | Height: | Size: 217 KiB |
Reference in New Issue
Block a user