mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-05 00:56:06 +00:00
Compare commits
79 Commits
0.23
...
diff-strea
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14d88c249e | ||
|
|
0fa443c3f2 | ||
|
|
2280e6d497 | ||
|
|
822f3e6d20 | ||
|
|
35546c331c | ||
|
|
982a0d7781 | ||
|
|
c5c3e8c6c2 | ||
|
|
ff1b19cdb8 | ||
|
|
df96b8d76c | ||
|
|
89134b5b6c | ||
|
|
b31bf34890 | ||
|
|
5b2fda1a6e | ||
|
|
fb38b06eae | ||
|
|
e0578acca2 | ||
|
|
187523d8d6 | ||
|
|
b0975694c8 | ||
|
|
b1fb47e689 | ||
|
|
a82e9243a6 | ||
|
|
e3e36b3cef | ||
|
|
cd6465f844 | ||
|
|
30d53c353f | ||
|
|
47fcb8b4f8 | ||
|
|
0ec9edb971 | ||
|
|
f1da8f96b6 | ||
|
|
8bc7b5be40 | ||
|
|
022826493b | ||
|
|
092f77f066 | ||
|
|
013cbcabd4 | ||
|
|
66be95ecc6 | ||
|
|
efe0356f37 | ||
|
|
ec1ac300af | ||
|
|
468184bc3a | ||
|
|
0855017dca | ||
|
|
ae0f640ff4 | ||
|
|
cd6629ac2d | ||
|
|
3c3ca7944b | ||
|
|
b0fb52017c | ||
|
|
fc6fba377a | ||
|
|
7ea39ada7c | ||
|
|
e98ea37342 | ||
|
|
e20577df15 | ||
|
|
19dcbc2f08 | ||
|
|
c59838a6e4 | ||
|
|
0a8c339535 | ||
|
|
cd5b703037 | ||
|
|
90642742bd | ||
|
|
96221598e7 | ||
|
|
98623de38c | ||
|
|
33985dbd9d | ||
|
|
a3a5ca78bf | ||
|
|
3fcbbb3fbf | ||
|
|
70252b24f9 | ||
|
|
0a08616c87 | ||
|
|
beebba487c | ||
|
|
cbeafcbaa0 | ||
|
|
e200cd3289 | ||
|
|
22c7a1a88d | ||
|
|
63eea2d6db | ||
|
|
3e9a110671 | ||
|
|
22bc8fabd1 | ||
|
|
9030070b3d | ||
|
|
fca7bb8583 | ||
|
|
3c175bfc4a | ||
|
|
fd5475ba38 | ||
|
|
b0c5dbd88e | ||
|
|
1718e2e86f | ||
|
|
b46a7fc4b1 | ||
|
|
4770ebb2ea | ||
|
|
d4db082c01 | ||
|
|
c8607ae8bb | ||
|
|
b361a61d18 | ||
|
|
87f4347fe5 | ||
|
|
93ee65fe53 | ||
|
|
9f964b6d3f | ||
|
|
426b09b7e1 | ||
|
|
ec98415c4d | ||
|
|
47e5a7cf09 | ||
|
|
d07cf53a07 | ||
|
|
b9f73a6240 |
33
.github/workflows/python-app.yml
vendored
Normal file
33
.github/workflows/python-app.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: changedetection.io
|
||||
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
cd backend; pytest
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -2,4 +2,6 @@ __pycache__
|
||||
.idea
|
||||
*.pyc
|
||||
datastore/url-watches.json
|
||||
datastore/*
|
||||
datastore/*
|
||||
__pycache__
|
||||
.pytest_cache
|
||||
|
||||
18
Dockerfile
18
Dockerfile
@@ -2,23 +2,27 @@ FROM python:3.8-slim
|
||||
COPY requirements.txt /tmp/requirements.txt
|
||||
RUN pip3 install -r /tmp/requirements.txt
|
||||
|
||||
COPY backend /app
|
||||
|
||||
RUN [ ! -d "/app" ] && mkdir /app
|
||||
RUN [ ! -d "/datastore" ] && mkdir /datastore
|
||||
|
||||
# The actual flask app
|
||||
COPY backend /app/backend
|
||||
|
||||
# The eventlet server wrapper
|
||||
COPY changedetection.py /app/changedetection.py
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Attempt to store the triggered commit
|
||||
|
||||
ARG SOURCE_COMMIT
|
||||
ARG SOURCE_BRANCH
|
||||
RUN echo "commit: $SOURCE_COMMIT branch: $SOURCE_BRANCH" >/source.txt
|
||||
|
||||
|
||||
RUN [ ! -d "/datastore" ] && mkdir /datastore
|
||||
|
||||
CMD [ "python", "./backend.py" ]
|
||||
CMD [ "python", "./changedetection.py" , "-d", "/datastore"]
|
||||
|
||||
|
||||
|
||||
|
||||
201
LICENSE
Normal file
201
LICENSE
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
15
README.md
15
README.md
@@ -1,9 +1,18 @@
|
||||
# changedetection.io
|
||||

|
||||
<a href="https://hub.docker.com/r/dgtlmoon/changedetection.io" target="_blank" title="Change detection docker hub">
|
||||
<img src="https://img.shields.io/docker/pulls/dgtlmoon/changedetection.io" alt="Docker Pulls"/>
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/dgtlmoon/changedetection.io" target="_blank" title="Change detection docker hub">
|
||||
<img src="https://img.shields.io/docker/v/dgtlmoon/changedetection.io/0.27" alt="Change detection latest tag version"/>
|
||||
</a>
|
||||
|
||||
## Self-hosted change monitoring of web pages.
|
||||
|
||||
_Know when web pages change! Stay ontop of new information!_
|
||||
|
||||

|
||||
|
||||
|
||||
#### Example use cases
|
||||
|
||||
@@ -11,7 +20,7 @@ Know when ...
|
||||
|
||||
- Government department updates (changes are often only on their websites)
|
||||
- Local government news (changes are often only on their websites)
|
||||
- New software releases
|
||||
- New software releases, security advisories when you're not on their mailing list.
|
||||
- Festivals with changes
|
||||
- Realestate listing changes
|
||||
|
||||
@@ -37,10 +46,6 @@ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/dat
|
||||
|
||||
### Screenshots
|
||||
|
||||
Application running.
|
||||
|
||||

|
||||
|
||||
Examining differences in content.
|
||||
|
||||

|
||||
|
||||
1
backend/README-pytest.md
Normal file
1
backend/README-pytest.md
Normal file
@@ -0,0 +1 @@
|
||||
Note: run `pytest` from this directory.
|
||||
668
backend/__init__.py
Normal file
668
backend/__init__.py
Normal file
@@ -0,0 +1,668 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
|
||||
# @todo logging
|
||||
# @todo extra options for url like , verify=False etc.
|
||||
# @todo enable https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl as option?
|
||||
# @todo option for interval day/6 hour/etc
|
||||
# @todo on change detected, config for calling some API
|
||||
# @todo make tables responsive!
|
||||
# @todo fetch title into json
|
||||
# https://distill.io/features
|
||||
# proxy per check
|
||||
# - flask_cors, itsdangerous,MarkupSafe
|
||||
|
||||
import time
|
||||
import os
|
||||
import timeago
|
||||
|
||||
import threading
|
||||
from threading import Event
|
||||
|
||||
import queue
|
||||
|
||||
from flask import Flask, render_template, request, send_file, send_from_directory, abort, redirect, url_for
|
||||
|
||||
from feedgen.feed import FeedGenerator
|
||||
from flask import make_response
|
||||
import datetime
|
||||
import pytz
|
||||
|
||||
datastore = None
|
||||
|
||||
# Local
|
||||
running_update_threads = []
|
||||
ticker_thread = None
|
||||
|
||||
messages = []
|
||||
extra_stylesheets = []
|
||||
|
||||
update_q = queue.Queue()
|
||||
|
||||
app = Flask(__name__, static_url_path="/var/www/change-detection/backen/static")
|
||||
|
||||
# Stop browser caching of assets
|
||||
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
||||
|
||||
app.config.exit = Event()
|
||||
|
||||
app.config['NEW_VERSION_AVAILABLE'] = False
|
||||
|
||||
# Disables caching of the templates
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||
|
||||
|
||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
# Worker thread tells us which UUID it is currently processing.
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid == watch_obj['uuid']:
|
||||
return "Checking now.."
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time())
|
||||
|
||||
|
||||
# @app.context_processor
|
||||
# def timeago():
|
||||
# def _timeago(lower_time, now):
|
||||
# return timeago.format(lower_time, now)
|
||||
# return dict(timeago=_timeago)
|
||||
|
||||
@app.template_filter('format_timestamp_timeago')
|
||||
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
||||
return timeago.format(timestamp, time.time())
|
||||
# return timeago.format(timestamp, time.time())
|
||||
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format)
|
||||
|
||||
|
||||
def changedetection_app(config=None, datastore_o=None):
|
||||
global datastore
|
||||
datastore = datastore_o
|
||||
|
||||
app.config.update(dict(DEBUG=True))
|
||||
app.config.update(config or {})
|
||||
|
||||
# Setup cors headers to allow all domains
|
||||
# https://flask-cors.readthedocs.io/en/latest/
|
||||
# CORS(app)
|
||||
|
||||
# https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39
|
||||
# You can divide up the stuff like this
|
||||
|
||||
@app.route("/", methods=['GET'])
|
||||
def index():
|
||||
global messages
|
||||
|
||||
limit_tag = request.args.get('tag')
|
||||
rss = request.args.get('rss')
|
||||
mode = request.args.get('mode')
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
|
||||
if limit_tag != None:
|
||||
# Support for comma separated list of tags.
|
||||
for tag_in_watch in watch['tag'].split(','):
|
||||
tag_in_watch = tag_in_watch.strip()
|
||||
if tag_in_watch == limit_tag:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
else:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
|
||||
|
||||
existing_tags = datastore.get_all_tags()
|
||||
|
||||
if mode == 'stream':
|
||||
import difflib
|
||||
|
||||
import pprint
|
||||
streams = []
|
||||
|
||||
extra_stylesheets = ['/static/css/diff.css']
|
||||
for watch in sorted_watches:
|
||||
if not watch['viewed']:
|
||||
|
||||
# get last two date keys
|
||||
dates = list(watch['history'].keys())
|
||||
# Convert to int, sort and back to str again
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
dates = [str(i) for i in dates]
|
||||
print ("OK", watch['uuid'])
|
||||
|
||||
if len(dates) < 2:
|
||||
print ("Skipping", watch['url'])
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
path = datastore.data['watching'][watch['uuid']]['history'][str(dates[1])]
|
||||
with open(path,
|
||||
encoding='utf-8') as file:
|
||||
txt1=[line.rstrip() for line in file.readlines()]
|
||||
|
||||
path = datastore.data['watching'][watch['uuid']]['history'][str(dates[0])]
|
||||
with open(path,
|
||||
encoding='utf-8') as file:
|
||||
txt2 = [line.rstrip() for line in file.readlines()]
|
||||
except FileNotFoundError:
|
||||
print ("Skipping", watch['url'])
|
||||
continue
|
||||
|
||||
df = list(difflib.unified_diff(txt1, txt2,n=1))
|
||||
diff_entry=[]
|
||||
for line in df:
|
||||
if line[0] == '-' or line[0] == '+':
|
||||
diff_entry.append(line)
|
||||
|
||||
|
||||
# pprint(df)
|
||||
#s = pprint.pformat(df)
|
||||
streams.append(diff_entry)
|
||||
|
||||
|
||||
print ("###########", len(streams))
|
||||
|
||||
output = render_template("watch-diff-stream.html",
|
||||
streams=streams,
|
||||
extra_stylesheets=extra_stylesheets
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
if rss:
|
||||
fg = FeedGenerator()
|
||||
fg.title('changedetection.io')
|
||||
fg.description('Feed description')
|
||||
fg.link(href='https://changedetection.io')
|
||||
|
||||
for watch in sorted_watches:
|
||||
if not watch['viewed']:
|
||||
fe = fg.add_entry()
|
||||
fe.title(watch['url'])
|
||||
fe.link(href=watch['url'])
|
||||
fe.description(watch['url'])
|
||||
fe.guid(watch['uuid'], permalink=False)
|
||||
dt = datetime.datetime.fromtimestamp(int(watch['newest_history_key']))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
fe.pubDate(dt)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml')
|
||||
return response
|
||||
|
||||
else:
|
||||
#table = render_template('watch-table.html', watches=sorted_watches)
|
||||
output = render_template("watch-table.html",
|
||||
watches=sorted_watches,
|
||||
messages=messages,
|
||||
tags=existing_tags,
|
||||
active_tag=limit_tag,
|
||||
has_unviewed=datastore.data['has_unviewed'])
|
||||
|
||||
# Show messages but once.
|
||||
messages = []
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/scrub", methods=['GET', 'POST'])
|
||||
def scrub_page():
|
||||
from pathlib import Path
|
||||
|
||||
global messages
|
||||
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext == 'scrub':
|
||||
|
||||
for txt_file_path in Path(app.config['datastore_path']).rglob('*.txt'):
|
||||
os.unlink(txt_file_path)
|
||||
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
watch['last_checked'] = 0
|
||||
watch['last_changed'] = 0
|
||||
watch['previous_md5'] = None
|
||||
watch['history'] = {}
|
||||
|
||||
datastore.needs_write = True
|
||||
messages.append({'class': 'ok', 'message': 'Cleaned all version history.'})
|
||||
else:
|
||||
messages.append({'class': 'error', 'message': 'Wrong confirm text.'})
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
return render_template("scrub.html")
|
||||
|
||||
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
|
||||
# the excluded text.
|
||||
def get_current_checksum_include_ignore_text(uuid):
|
||||
|
||||
import hashlib
|
||||
from backend import fetch_site_status
|
||||
|
||||
# Get the most recent one
|
||||
newest_history_key = datastore.get_val(uuid, 'newest_history_key')
|
||||
|
||||
# 0 means that theres only one, so that there should be no 'unviewed' history availabe
|
||||
if newest_history_key == 0:
|
||||
newest_history_key = list(datastore.data['watching'][uuid]['history'].keys())[0]
|
||||
|
||||
if newest_history_key:
|
||||
with open(datastore.data['watching'][uuid]['history'][newest_history_key],
|
||||
encoding='utf-8') as file:
|
||||
raw_content = file.read()
|
||||
|
||||
handler = fetch_site_status.perform_site_check(datastore=datastore)
|
||||
stripped_content = handler.strip_ignore_text(raw_content,
|
||||
datastore.data['watching'][uuid]['ignore_text'])
|
||||
|
||||
checksum = hashlib.md5(stripped_content).hexdigest()
|
||||
return checksum
|
||||
|
||||
return datastore.data['watching'][uuid]['previous_md5']
|
||||
|
||||
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||
def edit_page(uuid):
|
||||
global messages
|
||||
import validators
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if request.method == 'POST':
|
||||
|
||||
url = request.form.get('url').strip()
|
||||
tag = request.form.get('tag').strip()
|
||||
|
||||
# Extra headers
|
||||
form_headers = request.form.get('headers').strip().split("\n")
|
||||
extra_headers = {}
|
||||
if form_headers:
|
||||
for header in form_headers:
|
||||
if len(header):
|
||||
parts = header.split(':', 1)
|
||||
if len(parts) == 2:
|
||||
extra_headers.update({parts[0].strip(): parts[1].strip()})
|
||||
|
||||
update_obj = {'url': url,
|
||||
'tag': tag,
|
||||
'headers': extra_headers
|
||||
}
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = request.form.get('ignore-text').strip()
|
||||
ignore_text = []
|
||||
if len(form_ignore_text):
|
||||
for text in form_ignore_text.split("\n"):
|
||||
text = text.strip()
|
||||
if len(text):
|
||||
ignore_text.append(text)
|
||||
|
||||
datastore.data['watching'][uuid]['ignore_text'] = ignore_text
|
||||
|
||||
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
|
||||
if len(datastore.data['watching'][uuid]['history']):
|
||||
update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
||||
|
||||
validators.url(url) # @todo switch to prop/attr/observer
|
||||
datastore.data['watching'][uuid].update(update_obj)
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': 'Updated watch.'})
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
else:
|
||||
output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], messages=messages)
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/settings", methods=['GET', "POST"])
|
||||
def settings_page():
|
||||
global messages
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
minutes = int(request.values.get('minutes').strip())
|
||||
except ValueError:
|
||||
messages.append({'class': 'error', 'message': "Invalid value given, use an integer."})
|
||||
|
||||
else:
|
||||
if minutes >= 5:
|
||||
datastore.data['settings']['requests']['minutes_between_check'] = minutes
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': "Updated"})
|
||||
else:
|
||||
messages.append(
|
||||
{'class': 'error', 'message': "Must be atleast 5 minutes."})
|
||||
|
||||
output = render_template("settings.html", messages=messages,
|
||||
minutes=datastore.data['settings']['requests']['minutes_between_check'])
|
||||
messages = []
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/import", methods=['GET', "POST"])
|
||||
def import_page():
|
||||
import validators
|
||||
global messages
|
||||
remaining_urls = []
|
||||
|
||||
good = 0
|
||||
|
||||
if request.method == 'POST':
|
||||
urls = request.values.get('urls').split("\n")
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if len(url) and validators.url(url):
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag="")
|
||||
# Straight into the queue.
|
||||
update_q.put(new_uuid)
|
||||
good += 1
|
||||
else:
|
||||
if len(url):
|
||||
remaining_urls.append(url)
|
||||
|
||||
messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))})
|
||||
|
||||
if len(remaining_urls) == 0:
|
||||
# Looking good, redirect to index.
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
output = render_template("import.html",
|
||||
messages=messages,
|
||||
remaining="\n".join(remaining_urls)
|
||||
)
|
||||
messages = []
|
||||
|
||||
return output
|
||||
|
||||
# Clear all statuses, so we do not see the 'unviewed' class
|
||||
@app.route("/api/mark-all-viewed", methods=['GET'])
|
||||
def mark_all_viewed():
|
||||
|
||||
# Save the current newest history as the most recently viewed
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
datastore.set_last_viewed(watch_uuid, watch['newest_history_key'])
|
||||
|
||||
messages.append({'class': 'ok', 'message': "Cleared all statuses."})
|
||||
return redirect(url_for('index'))
|
||||
|
||||
@app.route("/diff/<string:uuid>", methods=['GET'])
|
||||
def diff_history_page(uuid):
|
||||
global messages
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
extra_stylesheets = ['/static/css/diff.css']
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
messages.append({'class': 'error', 'message': "No history found for the specified link, bad link?"})
|
||||
return redirect(url_for('index'))
|
||||
|
||||
dates = list(watch['history'].keys())
|
||||
# Convert to int, sort and back to str again
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
dates = [str(i) for i in dates]
|
||||
|
||||
if len(dates) < 2:
|
||||
messages.append(
|
||||
{'class': 'error', 'message': "Not enough saved change detection snapshots to produce a report."})
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# Save the current newest history as the most recently viewed
|
||||
datastore.set_last_viewed(uuid, dates[0])
|
||||
|
||||
newest_file = watch['history'][dates[0]]
|
||||
with open(newest_file, 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
|
||||
previous_version = request.args.get('previous_version')
|
||||
|
||||
try:
|
||||
previous_file = watch['history'][previous_version]
|
||||
except KeyError:
|
||||
# Not present, use a default value, the second one in the sorted list.
|
||||
previous_file = watch['history'][dates[1]]
|
||||
|
||||
with open(previous_file, 'r') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
|
||||
output = render_template("diff.html", watch_a=watch,
|
||||
messages=messages,
|
||||
newest=newest_version_file_contents,
|
||||
previous=previous_version_file_contents,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
versions=dates[1:],
|
||||
newest_version_timestamp=dates[0],
|
||||
current_previous_version=str(previous_version),
|
||||
current_diff_url=watch['url'])
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/favicon.ico", methods=['GET'])
|
||||
def favicon():
|
||||
return send_from_directory("/app/static/images", filename="favicon.ico")
|
||||
|
||||
# We're good but backups are even better!
|
||||
@app.route("/backup", methods=['GET'])
|
||||
def get_backup():
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
# create a ZipFile object
|
||||
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
|
||||
|
||||
# We only care about UUIDS from the current index file
|
||||
uuids = list(datastore.data['watching'].keys())
|
||||
|
||||
with zipfile.ZipFile(os.path.join(app.config['datastore_path'], backupname), 'w',
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=6) as zipObj:
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(app.config['datastore_path'], "url-watches.json"))
|
||||
# Add any snapshot data we find
|
||||
for txt_file_path in Path(app.config['datastore_path']).rglob('*.txt'):
|
||||
parent_p = txt_file_path.parent
|
||||
if parent_p.name in uuids:
|
||||
zipObj.write(txt_file_path)
|
||||
|
||||
return send_file(os.path.join(app.config['datastore_path'], backupname),
|
||||
as_attachment=True,
|
||||
mimetype="application/zip",
|
||||
attachment_filename=backupname)
|
||||
|
||||
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
||||
def static_content(group, filename):
|
||||
# These files should be in our subdirectory
|
||||
full_path = os.path.realpath(__file__)
|
||||
p = os.path.dirname(full_path)
|
||||
|
||||
try:
|
||||
return send_from_directory("{}/static/{}".format(p, group), filename=filename)
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
@app.route("/api/add", methods=['POST'])
|
||||
def api_watch_add():
|
||||
global messages
|
||||
|
||||
# @todo add_watch should throw a custom Exception for validation etc
|
||||
new_uuid = datastore.add_watch(url=request.form.get('url').strip(), tag=request.form.get('tag').strip())
|
||||
# Straight into the queue.
|
||||
update_q.put(new_uuid)
|
||||
|
||||
messages.append({'class': 'ok', 'message': 'Watch added.'})
|
||||
return redirect(url_for('index'))
|
||||
|
||||
@app.route("/api/delete", methods=['GET'])
|
||||
def api_delete():
|
||||
global messages
|
||||
uuid = request.args.get('uuid')
|
||||
datastore.delete(uuid)
|
||||
messages.append({'class': 'ok', 'message': 'Deleted.'})
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
@app.route("/api/checknow", methods=['GET'])
|
||||
def api_watch_checknow():
|
||||
|
||||
global messages
|
||||
|
||||
tag = request.args.get('tag')
|
||||
uuid = request.args.get('uuid')
|
||||
i = 0
|
||||
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# @todo check thread is running and skip
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
update_q.put(uuid)
|
||||
i = 1
|
||||
|
||||
elif tag != None:
|
||||
# Items that have this current tag
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if (tag != None and tag in watch['tag']):
|
||||
i += 1
|
||||
if watch_uuid not in running_uuids:
|
||||
update_q.put(watch_uuid)
|
||||
else:
|
||||
# No tag, no uuid, add everything.
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
i += 1
|
||||
if watch_uuid not in running_uuids:
|
||||
update_q.put(watch_uuid)
|
||||
|
||||
messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)})
|
||||
return redirect(url_for('index', tag=tag))
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
|
||||
# Check for new release version
|
||||
threading.Thread(target=check_for_new_version).start()
|
||||
return app
|
||||
|
||||
|
||||
# Check for new version and anonymous stats
|
||||
def check_for_new_version():
|
||||
import requests
|
||||
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': datastore.data['version_tag'],
|
||||
'app_guid': datastore.data['app_guid']},
|
||||
|
||||
verify=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
if "new_version" in r.text:
|
||||
app.config['NEW_VERSION_AVAILABLE'] = True
|
||||
except:
|
||||
pass
|
||||
|
||||
# Check daily
|
||||
app.config.exit.wait(86400)
|
||||
|
||||
|
||||
# Requests for checking on the site use a pool of thread Workers managed by a Queue.
|
||||
class Worker(threading.Thread):
|
||||
current_uuid = None
|
||||
|
||||
def __init__(self, q, *args, **kwargs):
|
||||
self.q = q
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def run(self):
|
||||
from backend import fetch_site_status
|
||||
|
||||
update_handler = fetch_site_status.perform_site_check(datastore=datastore)
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
|
||||
try:
|
||||
uuid = self.q.get(block=False)
|
||||
except queue.Empty:
|
||||
pass
|
||||
|
||||
else:
|
||||
self.current_uuid = uuid
|
||||
|
||||
if uuid in list(datastore.data['watching'].keys()):
|
||||
|
||||
try:
|
||||
changed_detected, result, contents = update_handler.run(uuid)
|
||||
|
||||
except PermissionError as s:
|
||||
app.logger.error("File permission error updating", uuid, str(s))
|
||||
else:
|
||||
if result:
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj=result)
|
||||
if changed_detected:
|
||||
# A change was detected
|
||||
datastore.save_history_text(uuid=uuid, contents=contents, result_obj=result)
|
||||
|
||||
self.current_uuid = None # Done
|
||||
self.q.task_done()
|
||||
|
||||
app.config.exit.wait(1)
|
||||
|
||||
|
||||
# Thread runner to check every minute, look for new watches to feed into the Queue.
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
# Spin up Workers.
|
||||
for _ in range(datastore.data['settings']['requests']['workers']):
|
||||
new_worker = Worker(update_q)
|
||||
running_update_threads.append(new_worker)
|
||||
new_worker.start()
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# Look at the dataset, find a stale watch to process
|
||||
|
||||
# Every minute check for new UUIDs to follow up on, should be inside the loop incase it changes.
|
||||
minutes = datastore.data['settings']['requests']['minutes_between_check']
|
||||
|
||||
threshold = time.time() - (minutes * 60)
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
if watch['last_checked'] <= threshold:
|
||||
if not uuid in running_uuids and uuid not in update_q.queue:
|
||||
update_q.put(uuid)
|
||||
|
||||
# Should be low so we can break this out in testing
|
||||
app.config.exit.wait(1)
|
||||
@@ -1,489 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
|
||||
# @todo logging
|
||||
# @todo sort by last_changed
|
||||
# @todo extra options for url like , verify=False etc.
|
||||
# @todo enable https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl as option?
|
||||
# @todo maybe a button to reset all 'last-changed'.. so you can see it clearly when something happens since your last visit
|
||||
# @todo option for interval day/6 hour/etc
|
||||
# @todo on change detected, config for calling some API
|
||||
# @todo make tables responsive!
|
||||
# @todo fetch title into json
|
||||
# https://distill.io/features
|
||||
# proxy per check
|
||||
#i
|
||||
import json
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
|
||||
import time
|
||||
import os
|
||||
import getopt
|
||||
import sys
|
||||
import datetime
|
||||
import timeago
|
||||
|
||||
import threading
|
||||
import queue
|
||||
|
||||
|
||||
from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for
|
||||
|
||||
|
||||
# Local
|
||||
import store
|
||||
running_update_threads = []
|
||||
ticker_thread = None
|
||||
|
||||
datastore = store.ChangeDetectionStore()
|
||||
messages = []
|
||||
extra_stylesheets = []
|
||||
|
||||
update_q = queue.Queue()
|
||||
|
||||
|
||||
app = Flask(__name__, static_url_path='/static')
|
||||
app.config['STATIC_RESOURCES'] = "/app/static"
|
||||
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
||||
|
||||
# app.config['SECRET_KEY'] = 'secret!'
|
||||
|
||||
# Disables caching of the templates
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||
|
||||
|
||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
# Worker thread tells us which UUID it is currently processing.
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid == watch_obj['uuid']:
|
||||
return "Checking now.."
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time())
|
||||
|
||||
|
||||
# @app.context_processor
|
||||
# def timeago():
|
||||
# def _timeago(lower_time, now):
|
||||
# return timeago.format(lower_time, now)
|
||||
# return dict(timeago=_timeago)
|
||||
|
||||
@app.template_filter('format_timestamp_timeago')
|
||||
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
||||
if timestamp == 0:
|
||||
return 'Not yet'
|
||||
return timeago.format(timestamp, time.time())
|
||||
# return timeago.format(timestamp, time.time())
|
||||
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format)
|
||||
|
||||
|
||||
@app.route("/", methods=['GET'])
|
||||
def main_page():
|
||||
global messages
|
||||
|
||||
limit_tag = request.args.get('tag')
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
if limit_tag != None:
|
||||
# Support for comma separated list of tags.
|
||||
for tag_in_watch in watch['tag'].split(','):
|
||||
tag_in_watch = tag_in_watch.strip()
|
||||
if tag_in_watch == limit_tag:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
else:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
|
||||
|
||||
existing_tags = datastore.get_all_tags()
|
||||
output = render_template("watch-overview.html",
|
||||
watches=sorted_watches,
|
||||
messages=messages,
|
||||
tags=existing_tags,
|
||||
active_tag=limit_tag)
|
||||
|
||||
# Show messages but once.
|
||||
messages = []
|
||||
return output
|
||||
|
||||
@app.route("/scrub", methods=['GET', 'POST'])
|
||||
def scrub_page():
|
||||
from pathlib import Path
|
||||
|
||||
global messages
|
||||
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext == 'scrub':
|
||||
|
||||
for txt_file_path in Path('/datastore').rglob('*.txt'):
|
||||
os.unlink(txt_file_path)
|
||||
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
watch['last_checked'] = 0
|
||||
watch['last_changed'] = 0
|
||||
watch['previous_md5'] = None
|
||||
watch['history'] = {}
|
||||
|
||||
datastore.needs_write = True
|
||||
messages.append({'class': 'ok', 'message': 'Cleaned all version history.'})
|
||||
else:
|
||||
messages.append({'class': 'error', 'message': 'Wrong confirm text.'})
|
||||
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
return render_template("scrub.html")
|
||||
|
||||
|
||||
@app.route("/edit", methods=['GET', 'POST'])
|
||||
def edit_page():
|
||||
global messages
|
||||
import validators
|
||||
|
||||
if request.method == 'POST':
|
||||
uuid = request.args.get('uuid')
|
||||
|
||||
url = request.form.get('url').strip()
|
||||
tag = request.form.get('tag').strip()
|
||||
|
||||
form_headers = request.form.get('headers').strip().split("\n")
|
||||
extra_headers = {}
|
||||
if form_headers:
|
||||
for header in form_headers:
|
||||
if len(header):
|
||||
parts = header.split(':', 1)
|
||||
extra_headers.update({parts[0].strip(): parts[1].strip()})
|
||||
|
||||
validators.url(url) # @todo switch to prop/attr/observer
|
||||
datastore.data['watching'][uuid].update({'url': url,
|
||||
'tag': tag,
|
||||
'headers': extra_headers})
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': 'Updated watch.'})
|
||||
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
else:
|
||||
|
||||
uuid = request.args.get('uuid')
|
||||
output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], messages=messages)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
@app.route("/settings", methods=['GET', "POST"])
|
||||
def settings_page():
|
||||
global messages
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
minutes = int(request.values.get('minutes').strip())
|
||||
except ValueError:
|
||||
messages.append({'class': 'error', 'message': "Invalid value given, use an integer."})
|
||||
|
||||
else:
|
||||
if minutes >= 5 and minutes <= 600:
|
||||
datastore.data['settings']['requests']['minutes_between_check'] = minutes
|
||||
datastore.needs_write = True
|
||||
|
||||
messages.append({'class': 'ok', 'message': "Updated"})
|
||||
else:
|
||||
messages.append({'class': 'error', 'message': "Must be equal to or greater than 5 and less than 600 minutes"})
|
||||
|
||||
output = render_template("settings.html", messages=messages, minutes=datastore.data['settings']['requests']['minutes_between_check'])
|
||||
messages =[]
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/import", methods=['GET', "POST"])
|
||||
def import_page():
|
||||
import validators
|
||||
global messages
|
||||
remaining_urls=[]
|
||||
|
||||
good = 0
|
||||
|
||||
if request.method == 'POST':
|
||||
urls = request.values.get('urls').split("\n")
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if len(url) and validators.url(url):
|
||||
datastore.add_watch(url=url.strip(), tag="")
|
||||
good += 1
|
||||
else:
|
||||
if len(url):
|
||||
remaining_urls.append(url)
|
||||
|
||||
messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))})
|
||||
|
||||
output = render_template("import.html",
|
||||
messages=messages,
|
||||
remaining="\n".join(remaining_urls)
|
||||
)
|
||||
messages = []
|
||||
return output
|
||||
|
||||
|
||||
@app.route("/diff/<string:uuid>", methods=['GET'])
|
||||
def diff_history_page(uuid):
|
||||
global messages
|
||||
|
||||
extra_stylesheets=['/static/css/diff.css']
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
dates = list(watch['history'].keys())
|
||||
# Convert to int, sort and back to str again
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
dates = [str(i) for i in dates]
|
||||
|
||||
newest_file = watch['history'][dates[0]]
|
||||
with open(newest_file, 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
|
||||
previous_version = request.args.get('previous_version')
|
||||
|
||||
try:
|
||||
previous_file = watch['history'][previous_version]
|
||||
except KeyError:
|
||||
# Not present, use a default value, the second one in the sorted list.
|
||||
previous_file = watch['history'][dates[1]]
|
||||
|
||||
with open(previous_file, 'r') as f:
|
||||
previous_version_file_contents = f.read()
|
||||
|
||||
output = render_template("diff.html", watch_a=watch,
|
||||
messages=messages,
|
||||
newest=newest_version_file_contents,
|
||||
previous=previous_version_file_contents,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
versions=dates[1:],
|
||||
newest_version_timestamp=dates[0],
|
||||
current_previous_version=str(previous_version),
|
||||
current_diff_url=watch['url'])
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/favicon.ico", methods=['GET'])
|
||||
def favicon():
|
||||
return send_from_directory("/app/static/images", filename="favicon.ico")
|
||||
|
||||
|
||||
# We're good but backups are even better!
|
||||
@app.route("/backup", methods=['GET'])
|
||||
def get_backup():
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
import zlib
|
||||
|
||||
# create a ZipFile object
|
||||
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
|
||||
|
||||
# We only care about UUIDS from the current index file
|
||||
uuids = list(datastore.data['watching'].keys())
|
||||
|
||||
with zipfile.ZipFile(os.path.join("/datastore", backupname), 'w', compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=6) as zipObj:
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join("/datastore", "url-watches.json"))
|
||||
# Add any snapshot data we find
|
||||
for txt_file_path in Path('/datastore').rglob('*.txt'):
|
||||
parent_p = txt_file_path.parent
|
||||
if parent_p.name in uuids:
|
||||
zipObj.write(txt_file_path)
|
||||
|
||||
return send_file(os.path.join("/datastore", backupname),
|
||||
as_attachment=True,
|
||||
mimetype="application/zip",
|
||||
attachment_filename=backupname)
|
||||
|
||||
|
||||
|
||||
# A few self sanity checks, mostly for developer/bug check
|
||||
@app.route("/self-check", methods=['GET'])
|
||||
def selfcheck():
|
||||
output = "All fine"
|
||||
# In earlier versions before a single threaded write of the JSON store, sometimes histories could get mixed.
|
||||
# Could also maybe affect people who manually fiddle with their JSON store?
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
for timestamp, path in watch['history'].items():
|
||||
# Each history snapshot should include a full path, which contains the {uuid}
|
||||
if not uuid in path:
|
||||
output = "Something weird in {}, suspected incorrect snapshot path.".format(uuid)
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
||||
def static_content(group, filename):
|
||||
try:
|
||||
return send_from_directory("/app/static/{}".format(group), filename=filename)
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
|
||||
@app.route("/api/add", methods=['POST'])
|
||||
def api_watch_add():
|
||||
global messages
|
||||
|
||||
# @todo add_watch should throw a custom Exception for validation etc
|
||||
new_uuid = datastore.add_watch(url=request.form.get('url').strip(), tag=request.form.get('tag').strip())
|
||||
# Straight into the queue.
|
||||
update_q.put(new_uuid)
|
||||
|
||||
messages.append({'class': 'ok', 'message': 'Watch added.'})
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
|
||||
@app.route("/api/delete", methods=['GET'])
|
||||
def api_delete():
|
||||
global messages
|
||||
uuid = request.args.get('uuid')
|
||||
datastore.delete(uuid)
|
||||
messages.append({'class': 'ok', 'message': 'Deleted.'})
|
||||
|
||||
return redirect(url_for('main_page'))
|
||||
|
||||
|
||||
@app.route("/api/checknow", methods=['GET'])
|
||||
def api_watch_checknow():
|
||||
global messages
|
||||
|
||||
tag = request.args.get('tag')
|
||||
uuid = request.args.get('uuid')
|
||||
i=0
|
||||
|
||||
if uuid:
|
||||
update_q.put(uuid)
|
||||
i = 1
|
||||
|
||||
elif tag != None:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if (tag != None and tag in watch['tag']):
|
||||
i += 1
|
||||
update_q.put(watch_uuid)
|
||||
else:
|
||||
# No tag, no uuid, add everything.
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
i += 1
|
||||
update_q.put(watch_uuid)
|
||||
|
||||
messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)})
|
||||
return redirect(url_for('main_page', tag=tag))
|
||||
|
||||
|
||||
|
||||
# Requests for checking on the site use a pool of thread Workers managed by a Queue.
|
||||
class Worker(threading.Thread):
|
||||
|
||||
current_uuid = None
|
||||
|
||||
def __init__(self, q, *args, **kwargs):
|
||||
self.q = q
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def run(self):
|
||||
import fetch_site_status
|
||||
|
||||
try:
|
||||
while True:
|
||||
uuid = self.q.get() # Blocking
|
||||
self.current_uuid = uuid
|
||||
|
||||
if uuid in list(datastore.data['watching'].keys()):
|
||||
update_handler = fetch_site_status.perform_site_check(uuid=uuid, datastore=datastore)
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_handler.update_data)
|
||||
|
||||
self.current_uuid = None # Done
|
||||
self.q.task_done()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return
|
||||
|
||||
# Thread runner to check every minute, look for new watches to feed into the Queue.
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
|
||||
# Spin up Workers.
|
||||
for _ in range(datastore.data['settings']['requests']['workers']):
|
||||
new_worker = Worker(update_q)
|
||||
running_update_threads.append(new_worker)
|
||||
new_worker.start()
|
||||
|
||||
# Every minute check for new UUIDs to follow up on
|
||||
while True:
|
||||
minutes = datastore.data['settings']['requests']['minutes_between_check']
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
if watch['last_checked'] <= time.time() - (minutes * 60):
|
||||
update_q.put(uuid)
|
||||
|
||||
time.sleep(60)
|
||||
|
||||
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
|
||||
# by just running periodically in one thread, according to python, dict updates are threadsafe.
|
||||
def save_datastore():
|
||||
try:
|
||||
while True:
|
||||
if datastore.needs_write:
|
||||
datastore.sync_to_json()
|
||||
time.sleep(5)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return
|
||||
|
||||
def main(argv):
|
||||
ssl_mode = False
|
||||
port = 5000
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, "sp:", "purge")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -p [port]')
|
||||
sys.exit(2)
|
||||
|
||||
for opt, arg in opts:
|
||||
if opt == '--purge':
|
||||
# Remove history, the actual files you need to delete manually.
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
||||
|
||||
if opt == '-s':
|
||||
ssl_mode = True
|
||||
|
||||
if opt == '-p':
|
||||
port = arg
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
|
||||
save_data_thread = threading.Thread(target=save_datastore).start()
|
||||
|
||||
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||
if ssl_mode:
|
||||
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)),
|
||||
certfile='cert.pem',
|
||||
keyfile='privkey.pem',
|
||||
server_side=True), app)
|
||||
|
||||
else:
|
||||
eventlet.wsgi.server(eventlet.listen(('', port)), app)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
@@ -3,9 +3,7 @@ FROM python:3.8-slim
|
||||
# https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Should be mounted from docker-compose-development.yml
|
||||
RUN pip3 install -r /requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN [ ! -d "/datastore" ] && mkdir /datastore
|
||||
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import time
|
||||
import sys
|
||||
|
||||
print ("Sleep loop, you should run your script from the console")
|
||||
|
||||
while True:
|
||||
# Wait for 5 seconds
|
||||
|
||||
time.sleep(2)
|
||||
time.sleep(2)
|
||||
|
||||
@@ -1,67 +1,49 @@
|
||||
import time
|
||||
import requests
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
from inscriptis import get_text
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
class perform_site_check():
|
||||
|
||||
# New state that is set after a check
|
||||
# Return value dict
|
||||
update_obj = {}
|
||||
|
||||
|
||||
def __init__(self, *args, uuid=False, datastore, **kwargs):
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.timestamp = int(time.time()) # used for storage etc too
|
||||
self.uuid = uuid
|
||||
self.datastore = datastore
|
||||
self.url = datastore.get_val(uuid, 'url')
|
||||
self.current_md5 = datastore.get_val(uuid, 'previous_md5')
|
||||
self.output_path = "/datastore/{}".format(self.uuid)
|
||||
|
||||
self.ensure_output_path()
|
||||
self.run()
|
||||
def strip_ignore_text(self, content, list_ignore_text):
|
||||
ignore = []
|
||||
for k in list_ignore_text:
|
||||
ignore.append(k.encode('utf8'))
|
||||
|
||||
# Current state of what needs to be updated
|
||||
@property
|
||||
def update_data(self):
|
||||
return self.update_obj
|
||||
output = []
|
||||
for line in content.splitlines():
|
||||
line = line.encode('utf8')
|
||||
|
||||
def save_firefox_screenshot(self, uuid, output):
|
||||
# @todo call selenium or whatever
|
||||
return
|
||||
# Always ignore blank lines in this mode. (when this function gets called)
|
||||
if len(line.strip()):
|
||||
if not any(skip_text in line for skip_text in ignore):
|
||||
output.append(line)
|
||||
|
||||
def ensure_output_path(self):
|
||||
return "\n".encode('utf8').join(output)
|
||||
|
||||
try:
|
||||
os.stat(self.output_path)
|
||||
except:
|
||||
os.mkdir(self.output_path)
|
||||
|
||||
def save_response_html_output(self, output):
|
||||
|
||||
# @todo Saving the original HTML can be very large, better to set as an option, these files could be important to some.
|
||||
with open("{}/{}.html".format(self.output_path, self.timestamp), 'w') as f:
|
||||
f.write(output)
|
||||
f.close()
|
||||
def run(self, uuid):
|
||||
timestamp = int(time.time()) # used for storage etc too
|
||||
stripped_text_from_html = False
|
||||
changed_detected = False
|
||||
|
||||
def save_response_stripped_output(self, output):
|
||||
fname = "{}/{}.stripped.txt".format(self.output_path, self.timestamp)
|
||||
with open(fname, 'w') as f:
|
||||
f.write(output)
|
||||
f.close()
|
||||
update_obj = {'previous_md5': self.datastore.data['watching'][uuid]['previous_md5'],
|
||||
'history': {},
|
||||
"last_checked": timestamp
|
||||
}
|
||||
|
||||
return fname
|
||||
|
||||
def run(self):
|
||||
|
||||
extra_headers = self.datastore.get_val(self.uuid, 'headers')
|
||||
extra_headers = self.datastore.get_val(uuid, 'headers')
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
request_headers = self.datastore.data['settings']['headers'].copy()
|
||||
request_headers = self.datastore.data['settings']['headers']
|
||||
request_headers.update(extra_headers)
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
@@ -77,28 +59,28 @@ class perform_site_check():
|
||||
timeout = 15
|
||||
|
||||
try:
|
||||
r = requests.get(self.url,
|
||||
url = self.datastore.get_val(uuid, 'url')
|
||||
|
||||
r = requests.get(url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
verify=False)
|
||||
|
||||
stripped_text_from_html = get_text(r.text)
|
||||
|
||||
|
||||
|
||||
# Usually from networkIO/requests level
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
|
||||
self.update_obj["last_error"] = str(e)
|
||||
update_obj["last_error"] = str(e)
|
||||
|
||||
print(str(e))
|
||||
|
||||
except requests.exceptions.MissingSchema:
|
||||
print("Skipping {} due to missing schema/bad url".format(self.uuid))
|
||||
print("Skipping {} due to missing schema/bad url".format(uuid))
|
||||
|
||||
# Usually from html2text level
|
||||
except UnicodeDecodeError as e:
|
||||
|
||||
self.update_obj["last_error"] = str(e)
|
||||
update_obj["last_error"] = str(e)
|
||||
print(str(e))
|
||||
# figure out how to deal with this cleaner..
|
||||
# 'utf-8' codec can't decode byte 0xe9 in position 480: invalid continuation byte
|
||||
@@ -107,26 +89,30 @@ class perform_site_check():
|
||||
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||
# in the future we'll implement other mechanisms.
|
||||
|
||||
self.update_obj["last_check_status"] = r.status_code
|
||||
self.update_obj["last_error"] = False
|
||||
update_obj["last_check_status"] = r.status_code
|
||||
update_obj["last_error"] = False
|
||||
|
||||
fetched_md5 = hashlib.md5(stripped_text_from_html.encode('utf-8')).hexdigest()
|
||||
if not len(r.text):
|
||||
update_obj["last_error"] = "Empty reply"
|
||||
|
||||
# If there's text to skip
|
||||
# @todo we could abstract out the get_text() to handle this cleaner
|
||||
if len(self.datastore.data['watching'][uuid]['ignore_text']):
|
||||
content = self.strip_ignore_text(stripped_text_from_html,
|
||||
self.datastore.data['watching'][uuid]['ignore_text'])
|
||||
else:
|
||||
content = stripped_text_from_html.encode('utf8')
|
||||
|
||||
if self.current_md5 != fetched_md5:
|
||||
fetched_md5 = hashlib.md5(content).hexdigest()
|
||||
|
||||
# could be None or False depending on JSON type
|
||||
if self.datastore.data['watching'][uuid]['previous_md5'] != fetched_md5:
|
||||
changed_detected = True
|
||||
|
||||
# Don't confuse people by updating as last-changed, when it actually just changed from None..
|
||||
if self.datastore.get_val(self.uuid, 'previous_md5') is not None:
|
||||
self.update_obj["last_changed"] = self.timestamp
|
||||
if self.datastore.get_val(uuid, 'previous_md5'):
|
||||
update_obj["last_changed"] = timestamp
|
||||
|
||||
self.update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
self.save_response_html_output(r.text)
|
||||
output_filepath = self.save_response_stripped_output(stripped_text_from_html)
|
||||
|
||||
# Update history with the stripped text for future reference, this will also mean we save the first
|
||||
timestamp = str(self.timestamp)
|
||||
self.update_obj.update({"history": {timestamp: output_filepath}})
|
||||
|
||||
self.update_obj["last_checked"] = self.timestamp
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
return changed_detected, update_obj, stripped_text_from_html
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
|
||||
from flask import make_response
|
||||
from functools import wraps, update_wrapper
|
||||
from datetime import datetime
|
||||
|
||||
def nocache(view):
|
||||
@wraps(view)
|
||||
def no_cache(*args, **kwargs):
|
||||
response = make_response(view(*args, **kwargs))
|
||||
response.headers['hmm'] = datetime.now()
|
||||
|
||||
return response
|
||||
|
||||
return update_wrapper(no_cache, view)
|
||||
12
backend/pytest.ini
Normal file
12
backend/pytest.ini
Normal file
@@ -0,0 +1,12 @@
|
||||
[pytest]
|
||||
addopts = --no-start-live-server --live-server-port=5005
|
||||
#testpaths = tests pytest_invenio
|
||||
#live_server_scope = session
|
||||
|
||||
filterwarnings =
|
||||
ignore::DeprecationWarning:urllib3.*:
|
||||
|
||||
; logging options
|
||||
log_cli = 1
|
||||
log_cli_level = DEBUG
|
||||
log_cli_format = %(asctime)s %(name)s: %(levelname)s %(message)s
|
||||
@@ -48,7 +48,13 @@ section.content {
|
||||
/* table related */
|
||||
.watch-table {
|
||||
width: 100%;
|
||||
|
||||
}
|
||||
|
||||
.watch-table tr.unviewed {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.watch-tag-list {
|
||||
color: #e70069;
|
||||
white-space: nowrap;
|
||||
@@ -82,11 +88,16 @@ section.content {
|
||||
margin: 0 3px 0 5px;
|
||||
}
|
||||
|
||||
#check-all-button {
|
||||
text-align:right;
|
||||
#post-list-buttons {
|
||||
text-align: right;
|
||||
padding: 0px;
|
||||
margin: 0px;
|
||||
}
|
||||
#post-list-buttons li {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
#check-all-button a {
|
||||
#post-list-buttons a {
|
||||
border-top-left-radius: initial;
|
||||
border-top-right-radius: initial;
|
||||
border-bottom-left-radius: 5px;
|
||||
@@ -208,3 +219,55 @@ body:after, body:before {
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
#diff-col {
|
||||
padding-left:40px;
|
||||
}
|
||||
#diff-jump {
|
||||
position: fixed;
|
||||
left: 0px;
|
||||
top: 80px;
|
||||
background: #fff;
|
||||
padding: 10px;
|
||||
border-top-right-radius: 5px;
|
||||
border-bottom-right-radius: 5px;
|
||||
box-shadow: 5px 0 5px -2px #888;
|
||||
}
|
||||
|
||||
#diff-jump a {
|
||||
color: #1b98f8;
|
||||
cursor: grabbing;
|
||||
-moz-user-select: none;
|
||||
-webkit-user-select: none;
|
||||
-ms-user-select:none;
|
||||
user-select:none;
|
||||
-o-user-select:none;
|
||||
}
|
||||
|
||||
footer {
|
||||
padding: 10px;
|
||||
background: #fff;
|
||||
color: #444;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
#feed-icon {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
#version {
|
||||
position: absolute;
|
||||
top: 80px;
|
||||
right: 0px;
|
||||
font-size: 8px;
|
||||
background: #fff;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
#new-version-text a{
|
||||
color: #e07171;
|
||||
}
|
||||
|
||||
#diff-stream {
|
||||
font-size: 10px;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
18
backend/static/images/Generic_Feed-icon.svg
Normal file
18
backend/static/images/Generic_Feed-icon.svg
Normal file
@@ -0,0 +1,18 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg"
|
||||
id="RSSicon"
|
||||
viewBox="0 0 8 8" width="256" height="256">
|
||||
|
||||
<title>RSS feed icon</title>
|
||||
|
||||
<style type="text/css">
|
||||
.button {stroke: none; fill: orange;}
|
||||
.symbol {stroke: none; fill: white;}
|
||||
</style>
|
||||
|
||||
<rect class="button" width="8" height="8" rx="1.5" />
|
||||
<circle class="symbol" cx="2" cy="6" r="1" />
|
||||
<path class="symbol" d="m 1,4 a 3,3 0 0 1 3,3 h 1 a 4,4 0 0 0 -4,-4 z" />
|
||||
<path class="symbol" d="m 1,2 a 5,5 0 0 1 5,5 h 1 a 6,6 0 0 0 -6,-6 z" />
|
||||
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 569 B |
199
backend/store.py
199
backend/store.py
@@ -1,9 +1,14 @@
|
||||
import json
|
||||
import uuid as uuid_builder
|
||||
import validators
|
||||
import os.path
|
||||
from os import path
|
||||
from threading import Lock, Thread
|
||||
from threading import Lock
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
|
||||
|
||||
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
|
||||
@@ -12,13 +17,15 @@ from threading import Lock, Thread
|
||||
class ChangeDetectionStore:
|
||||
lock = Lock()
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, datastore_path="/datastore", include_default_watches=True):
|
||||
self.needs_write = False
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
self.stop_thread = False
|
||||
|
||||
self.__data = {
|
||||
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
||||
'watching': {},
|
||||
'tag': "0.23",
|
||||
'settings': {
|
||||
'headers': {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
|
||||
@@ -40,11 +47,14 @@ class ChangeDetectionStore:
|
||||
'tag': None,
|
||||
'last_checked': 0,
|
||||
'last_changed': 0,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'newest_history_key': "",
|
||||
'title': None,
|
||||
'previous_md5': None,
|
||||
'previous_md5': "",
|
||||
'uuid': str(uuid_builder.uuid4()),
|
||||
'headers': {}, # Extra headers to send
|
||||
'history': {} # Dict of timestamp and output stripped filename
|
||||
'history': {}, # Dict of timestamp and output stripped filename
|
||||
'ignore_text': [] # List of text to ignore when calculating the comparison checksum
|
||||
}
|
||||
|
||||
if path.isfile('/source.txt'):
|
||||
@@ -54,7 +64,8 @@ class ChangeDetectionStore:
|
||||
self.__data['build_sha'] = f.read()
|
||||
|
||||
try:
|
||||
with open('/datastore/url-watches.json') as json_file:
|
||||
# @todo retest with ", encoding='utf-8'"
|
||||
with open(self.json_store_path) as json_file:
|
||||
from_disk = json.load(json_file)
|
||||
|
||||
# @todo isnt there a way todo this dict.update recursively?
|
||||
@@ -62,6 +73,9 @@ class ChangeDetectionStore:
|
||||
if 'watching' in from_disk:
|
||||
self.__data['watching'].update(from_disk['watching'])
|
||||
|
||||
if 'app_guid' in from_disk:
|
||||
self.__data['app_guid'] = from_disk['app_guid']
|
||||
|
||||
if 'settings' in from_disk:
|
||||
if 'headers' in from_disk['settings']:
|
||||
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
|
||||
@@ -71,39 +85,85 @@ class ChangeDetectionStore:
|
||||
|
||||
# Reinitialise each `watching` with our generic_definition in the case that we add a new var in the future.
|
||||
# @todo pretty sure theres a python we todo this with an abstracted(?) object!
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
_blank = self.generic_definition.copy()
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
_blank = deepcopy(self.generic_definition)
|
||||
_blank.update(watch)
|
||||
self.__data['watching'].update({uuid: _blank})
|
||||
print("Watching:", uuid, _blank['url'])
|
||||
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
|
||||
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
|
||||
|
||||
# First time ran, doesnt exist.
|
||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||
print("Creating JSON store")
|
||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
|
||||
self.add_watch(url='https://changedetection.io', tag='Tech news')
|
||||
if include_default_watches:
|
||||
print("Creating JSON store at", self.datastore_path)
|
||||
|
||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
|
||||
self.add_watch(url='https://changedetection.io', tag='Tech news')
|
||||
|
||||
|
||||
self.__data['version_tag'] = "0.27"
|
||||
|
||||
if not 'app_guid' in self.__data:
|
||||
self.__data['app_guid'] = str(uuid_builder.uuid4())
|
||||
|
||||
self.needs_write = True
|
||||
|
||||
# Finally start the thread that will manage periodic data saves to JSON
|
||||
save_data_thread = threading.Thread(target=self.save_datastore).start()
|
||||
|
||||
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
|
||||
def get_newest_history_key(self, uuid):
|
||||
if len(self.__data['watching'][uuid]['history']) == 1:
|
||||
return 0
|
||||
|
||||
dates = list(self.__data['watching'][uuid]['history'].keys())
|
||||
# Convert to int, sort and back to str again
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
if len(dates):
|
||||
# always keyed as str
|
||||
return str(dates[0])
|
||||
|
||||
return 0
|
||||
|
||||
def set_last_viewed(self, uuid, timestamp):
|
||||
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
|
||||
self.needs_write = True
|
||||
|
||||
def update_watch(self, uuid, update_obj):
|
||||
|
||||
self.lock.acquire()
|
||||
with self.lock:
|
||||
|
||||
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
|
||||
for dict_key, d in self.generic_definition.items():
|
||||
if isinstance(d, dict) and dict_key in update_obj:
|
||||
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
|
||||
del(update_obj[dict_key])
|
||||
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
|
||||
for dict_key, d in self.generic_definition.items():
|
||||
if isinstance(d, dict):
|
||||
if update_obj is not None and dict_key in update_obj:
|
||||
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
|
||||
del (update_obj[dict_key])
|
||||
|
||||
# Update with the remaining values
|
||||
self.__data['watching'][uuid].update(update_obj)
|
||||
self.__data['watching'][uuid].update(update_obj)
|
||||
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
|
||||
|
||||
self.needs_write = True
|
||||
self.lock.release()
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
|
||||
has_unviewed = False
|
||||
|
||||
for uuid, v in self.__data['watching'].items():
|
||||
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
|
||||
if int(v['newest_history_key']) <= int(v['last_viewed']):
|
||||
self.__data['watching'][uuid]['viewed'] = True
|
||||
|
||||
else:
|
||||
self.__data['watching'][uuid]['viewed'] = False
|
||||
has_unviewed = True
|
||||
|
||||
self.__data['has_unviewed'] = has_unviewed
|
||||
|
||||
return self.__data
|
||||
|
||||
def get_all_tags(self):
|
||||
@@ -113,18 +173,20 @@ class ChangeDetectionStore:
|
||||
# Support for comma separated list of tags.
|
||||
for tag in watch['tag'].split(','):
|
||||
tag = tag.strip()
|
||||
if not tag in tags:
|
||||
if tag not in tags:
|
||||
tags.append(tag)
|
||||
|
||||
tags.sort()
|
||||
return tags
|
||||
|
||||
def delete(self, uuid):
|
||||
with self.lock:
|
||||
if uuid == 'all':
|
||||
self.__data['watching'] = {}
|
||||
else:
|
||||
del (self.__data['watching'][uuid])
|
||||
|
||||
self.lock.acquire()
|
||||
del (self.__data['watching'][uuid])
|
||||
self.needs_write = True
|
||||
self.lock.release()
|
||||
self.needs_write = True
|
||||
|
||||
def url_exists(self, url):
|
||||
|
||||
@@ -140,31 +202,68 @@ class ChangeDetectionStore:
|
||||
return self.data['watching'][uuid].get(val)
|
||||
|
||||
def add_watch(self, url, tag):
|
||||
self.lock.acquire()
|
||||
print("Adding", url, tag)
|
||||
# # @todo deal with exception
|
||||
# validators.url(url)
|
||||
with self.lock:
|
||||
# @todo use a common generic version of this
|
||||
new_uuid = str(uuid_builder.uuid4())
|
||||
_blank = deepcopy(self.generic_definition)
|
||||
_blank.update({
|
||||
'url': url,
|
||||
'tag': tag,
|
||||
'uuid': new_uuid
|
||||
})
|
||||
|
||||
# @todo use a common generic version of this
|
||||
new_uuid = str(uuid_builder.uuid4())
|
||||
_blank = self.generic_definition.copy()
|
||||
_blank.update({
|
||||
'url': url,
|
||||
'tag': tag,
|
||||
'uuid': new_uuid
|
||||
})
|
||||
self.data['watching'][new_uuid] = _blank
|
||||
|
||||
self.data['watching'][new_uuid] = _blank
|
||||
self.needs_write = True
|
||||
self.lock.release()
|
||||
# Get the directory ready
|
||||
output_path = "{}/{}".format(self.datastore_path, new_uuid)
|
||||
try:
|
||||
os.mkdir(output_path)
|
||||
except FileExistsError:
|
||||
print(output_path, "already exists.")
|
||||
|
||||
self.sync_to_json()
|
||||
return new_uuid
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, uuid, result_obj, contents):
|
||||
|
||||
output_path = "{}/{}".format(self.datastore_path, uuid)
|
||||
try:
|
||||
os.mkdir(output_path)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
output_path = "{}/{}".format(self.datastore_path, uuid)
|
||||
fname = "{}/{}-{}.stripped.txt".format(output_path, result_obj['previous_md5'], str(time.time()))
|
||||
with open(fname, 'w') as f:
|
||||
f.write(contents)
|
||||
f.close()
|
||||
|
||||
# Update history with the stripped text for future reference, this will also mean we save the first
|
||||
# Should always be keyed by string(timestamp)
|
||||
self.update_watch(uuid, {"history": {str(result_obj["last_checked"]): fname}})
|
||||
|
||||
return fname
|
||||
|
||||
def sync_to_json(self):
|
||||
print("Saving index")
|
||||
self.lock.acquire()
|
||||
with open('/datastore/url-watches.json', 'w') as json_file:
|
||||
json.dump(self.data, json_file, indent=4)
|
||||
print("Saving..")
|
||||
with open(self.json_store_path, 'w') as json_file:
|
||||
json.dump(self.__data, json_file, indent=4)
|
||||
logging.info("Re-saved index")
|
||||
|
||||
self.needs_write = False
|
||||
self.lock.release()
|
||||
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
|
||||
# by just running periodically in one thread, according to python, dict updates are threadsafe.
|
||||
def save_datastore(self):
|
||||
|
||||
while True:
|
||||
if self.stop_thread:
|
||||
print("Shutting down datastore thread")
|
||||
return
|
||||
if self.needs_write:
|
||||
self.sync_to_json()
|
||||
time.sleep(1)
|
||||
|
||||
# body of the constructor
|
||||
|
||||
@@ -19,7 +19,11 @@
|
||||
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed">
|
||||
<a class="pure-menu-heading" href="/"><strong>Change</strong>Detection.io</a>
|
||||
{% if current_diff_url %}
|
||||
<a class=current-diff-url href="{{ current_diff_url }}"><span style="max-width: 30%; overflow: hidden;">{{ current_diff_url }}</a>
|
||||
<a class=current-diff-url href="{{ current_diff_url }}"><span style="max-width: 30%; overflow: hidden;">{{ current_diff_url }}</span></a>
|
||||
{% else %}
|
||||
{% if new_version_available %}
|
||||
<span id="new-version-text" class="pure-menu-heading"><a href="https://github.com/dgtlmoon/changedetection.io">A new version is available</a></span>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
<ul class="pure-menu-list">
|
||||
@@ -34,7 +38,8 @@
|
||||
<a href="/settings" class="pure-menu-link">SETTINGS</a>
|
||||
</li>
|
||||
<li class="pure-menu-item"><a class="github-link" href="https://github.com/dgtlmoon/changedetection.io">
|
||||
<svg class="octicon octicon-mark-github v-align-middle" height="32" viewBox="0 0 16 16" version="1.1"
|
||||
<svg class="octicon octicon-mark-github v-align-middle" height="32" viewBox="0 0 16 16"
|
||||
version="1.1"
|
||||
width="32" aria-hidden="true">
|
||||
<path fill-rule="evenodd"
|
||||
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"></path>
|
||||
@@ -47,7 +52,7 @@
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="version">v{{ version }}</div>
|
||||
<section class="content">
|
||||
<header>
|
||||
{% block header %}{% endblock %}
|
||||
@@ -66,5 +71,6 @@
|
||||
|
||||
{% endblock %}
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -32,6 +32,9 @@
|
||||
<ins>Inserted Text</ins>
|
||||
</div>
|
||||
|
||||
<div id="diff-jump">
|
||||
<a onclick="next_diff();">Jump</a>
|
||||
</div>
|
||||
<div id="diff-ui">
|
||||
|
||||
<table>
|
||||
@@ -40,7 +43,7 @@
|
||||
<!-- just proof of concept copied straight from github.com/kpdecker/jsdiff -->
|
||||
<td id="a" style="display: none;">{{previous}}</td>
|
||||
<td id="b" style="display: none;">{{newest}}</td>
|
||||
<td>
|
||||
<td id="diff-col">
|
||||
<span id="result"></span>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -72,9 +75,12 @@ function changed() {
|
||||
var node;
|
||||
if (diff[i].removed) {
|
||||
node = document.createElement('del');
|
||||
node.classList.add("change");
|
||||
node.appendChild(document.createTextNode(diff[i].value));
|
||||
|
||||
} else if (diff[i].added) {
|
||||
node = document.createElement('ins');
|
||||
node.classList.add("change");
|
||||
node.appendChild(document.createTextNode(diff[i].value));
|
||||
} else {
|
||||
node = document.createTextNode(diff[i].value);
|
||||
@@ -131,7 +137,26 @@ for (var i = 0; i < radio.length; i++) {
|
||||
}
|
||||
|
||||
|
||||
var inputs = document.getElementsByClassName('change');
|
||||
inputs.current=0;
|
||||
|
||||
function next_diff() {
|
||||
|
||||
var element = inputs[inputs.current];
|
||||
var headerOffset = 80;
|
||||
var elementPosition = element.getBoundingClientRect().top;
|
||||
var offsetPosition = elementPosition - headerOffset + window.scrollY;
|
||||
|
||||
window.scrollTo({
|
||||
top: offsetPosition,
|
||||
behavior: "smooth"
|
||||
});
|
||||
|
||||
inputs.current++;
|
||||
if(inputs.current >= inputs.length) {
|
||||
inputs.current=0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
</script>
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
<div class="edit-form">
|
||||
|
||||
|
||||
<form class="pure-form pure-form-stacked" action="/edit?uuid={{uuid}}" method="POST">
|
||||
<form class="pure-form pure-form-stacked" action="/edit/{{uuid}}" method="POST">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
<label for="url">URL</label>
|
||||
@@ -18,10 +18,26 @@
|
||||
<span class="pure-form-message-inline">Grouping tags, can be a comma separated list.</span>
|
||||
</div>
|
||||
|
||||
<!-- @todo: move to tabs --->
|
||||
<fieldset class="pure-group">
|
||||
<label for="ignore-text">Ignore text</label>
|
||||
|
||||
<textarea id="ignore-text" name="ignore-text" class="pure-input-1-2" placeholder=""
|
||||
style="width: 100%;
|
||||
font-family:monospace;
|
||||
white-space: pre;
|
||||
overflow-wrap: normal;
|
||||
overflow-x: scroll;" rows="5">{% for value in watch.ignore_text %}{{ value }}
|
||||
{% endfor %}</textarea>
|
||||
<span class="pure-form-message-inline">Each line will be processed separately as an ignore rule.</span>
|
||||
|
||||
</fieldset>
|
||||
|
||||
<!-- @todo: move to tabs --->
|
||||
<fieldset class="pure-group">
|
||||
<label for="headers">Extra request headers</label>
|
||||
|
||||
<textarea id=headers name="headers" class="pure-input-1-2" placeholder="Example
|
||||
<textarea id="headers" name="headers" class="pure-input-1-2" placeholder="Example
|
||||
Cookie: foobar
|
||||
User-Agent: wonderbra 1.0"
|
||||
style="width: 100%;
|
||||
@@ -33,6 +49,8 @@ User-Agent: wonderbra 1.0"
|
||||
<br/>
|
||||
|
||||
</fieldset>
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary">Save</button>
|
||||
</div>
|
||||
|
||||
12
backend/templates/watch-diff-stream.html
Normal file
12
backend/templates/watch-diff-stream.html
Normal file
@@ -0,0 +1,12 @@
|
||||
{% extends 'watch-overview.html' %}
|
||||
{% block innercontent %}
|
||||
Entries: {{ streams|length }}
|
||||
|
||||
<div id="diff-stream" class="edit-form">
|
||||
{% for item in streams %}
|
||||
{{ loop.index }}
|
||||
{% for diff in item %}{% if diff[0] =='+' %}<ins>{{ diff }}</ins>{% endif %}{% if diff[0] =='-' %}<del>{{ diff }}</del>{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -15,61 +15,18 @@
|
||||
<!-- user/pass r = requests.get('https://api.github.com/user', auth=('user', 'pass')) -->
|
||||
</form>
|
||||
<div>
|
||||
|
||||
<a href="/" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
||||
{% for tag in tags %}
|
||||
{% if tag == "" %}
|
||||
<a href="/" class="pure-button button-tag {{'active' if active_tag == tag }}">All</a>
|
||||
{% else %}
|
||||
<a href="/?tag={{ tag}}" class="pure-button button-tag {{'active' if active_tag == tag }}">{{ tag }}</a>
|
||||
{% if tag != "" %}
|
||||
<a href="/?tag={{ tag}}" class="pure-button button-tag {{'active' if active_tag == tag }}">{{ tag }}</a>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<div id="watch-table-wrapper">
|
||||
<table class="pure-table pure-table-striped watch-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>#</th>
|
||||
<th></th>
|
||||
<th>Last Checked</th>
|
||||
<th>Last Changed</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% block innercontent %}
|
||||
|
||||
|
||||
{% for watch in watches %}
|
||||
<tr id="{{ watch.uuid }}"
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} {% if watch.last_error is defined and watch.last_error != False %}error{% endif %}">
|
||||
<td>{{ loop.index }}</td>
|
||||
<td class="title-col">{{watch.title if watch.title is not none else watch.url}}
|
||||
<a class="external" target=_blank href="{{ watch.url }}"></a>
|
||||
{% if watch.last_error is defined and watch.last_error != False %}
|
||||
<div class="fetch-error">{{ watch.last_error }}</div>
|
||||
{% endif %}
|
||||
{% if not active_tag %}
|
||||
<span class="watch-tag-list">{{ watch.tag}}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>{{watch|format_last_checked_time}}</td>
|
||||
<td>{{watch.last_changed|format_timestamp_timeago}}</td>
|
||||
<td><a href="/api/checknow?uuid={{ watch.uuid}}{% if request.args.get('tag') %}&tag={{request.args.get('tag')}}{% endif %}" class="pure-button button-small pure-button-primary">Recheck</a>
|
||||
<a href="/edit?uuid={{ watch.uuid}}" class="pure-button button-small pure-button-primary">Edit</a>
|
||||
{% if watch.history|length >= 2 %}
|
||||
<a href="/diff/{{ watch.uuid}}" class="pure-button button-small pure-button-primary">Diff</a>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
||||
|
||||
</tbody>
|
||||
</table>
|
||||
<div id="check-all-button">
|
||||
|
||||
<a href="/api/checknow{% if active_tag%}?tag={{active_tag}}{%endif%}" class="pure-button button-tag " >Recheck all {% if active_tag%}in "{{active_tag}}"{%endif%}</a>
|
||||
</div>
|
||||
{% endblock %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
64
backend/templates/watch-table.html
Normal file
64
backend/templates/watch-table.html
Normal file
@@ -0,0 +1,64 @@
|
||||
{% extends 'watch-overview.html' %}
|
||||
{% block innercontent %}
|
||||
<table class="pure-table pure-table-striped watch-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>#</th>
|
||||
<th></th>
|
||||
<th>Last Checked</th>
|
||||
<th>Last Changed</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|
||||
|
||||
{% for watch in watches %}
|
||||
<tr id="{{ watch.uuid }}"
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
|
||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||
{% if watch.newest_history_key| int > watch.last_viewed| int %}unviewed{% endif %}">
|
||||
<td>{{ loop.index }}</td>
|
||||
<td class="title-col">{{watch.title if watch.title is not none else watch.url}}
|
||||
<a class="external" target=_blank href="{{ watch.url }}"></a>
|
||||
{% if watch.last_error is defined and watch.last_error != False %}
|
||||
<div class="fetch-error">{{ watch.last_error }}</div>
|
||||
{% endif %}
|
||||
{% if not active_tag %}
|
||||
<span class="watch-tag-list">{{ watch.tag}}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>{{watch|format_last_checked_time}}</td>
|
||||
<td>{% if watch.history|length >= 2 and watch.last_changed %}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
{% else %}
|
||||
Not yet
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
<a href="/api/checknow?uuid={{ watch.uuid}}{% if request.args.get('tag') %}&tag={{request.args.get('tag')}}{% endif %}"
|
||||
class="pure-button button-small pure-button-primary">Recheck</a>
|
||||
<a href="/edit/{{ watch.uuid}}" class="pure-button button-small pure-button-primary">Edit</a>
|
||||
{% if watch.history|length >= 2 %}
|
||||
<a href="/diff/{{ watch.uuid}}" class="pure-button button-small pure-button-primary">Diff</a>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
<ul id="post-list-buttons">
|
||||
{% if has_unviewed %}
|
||||
<li>
|
||||
<a href="/api/mark-all-viewed" class="pure-button button-tag ">Mark all viewed</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
<li>
|
||||
<a href="/api/checknow{% if active_tag%}?tag={{active_tag}}{%endif%}" class="pure-button button-tag ">Recheck
|
||||
all {% if active_tag%}in "{{active_tag}}"{%endif%}</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('index', tag=active_tag , rss=true)}}"><img id="feed-icon" src="/static/images/Generic_Feed-icon.svg" height="15px"></a>
|
||||
</li>
|
||||
</ul>
|
||||
{% endblock %}
|
||||
2
backend/tests/__init__.py
Normal file
2
backend/tests/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Tests for the app."""
|
||||
|
||||
48
backend/tests/conftest.py
Normal file
48
backend/tests/conftest.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import pytest
|
||||
from backend import changedetection_app
|
||||
from backend import store
|
||||
import os
|
||||
|
||||
|
||||
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
|
||||
# Much better boilerplate than the docs
|
||||
# https://www.python-boilerplate.com/py3+flask+pytest/
|
||||
|
||||
global app
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def app(request):
|
||||
"""Create application for the tests."""
|
||||
datastore_path = "./test-datastore"
|
||||
|
||||
try:
|
||||
os.mkdir(datastore_path)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
try:
|
||||
os.unlink("{}/url-watches.json".format(datastore_path))
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
|
||||
app = changedetection_app(app_config, datastore)
|
||||
app.config['STOP_THREADS'] = True
|
||||
|
||||
def teardown():
|
||||
datastore.stop_thread = True
|
||||
app.config.exit.set()
|
||||
try:
|
||||
os.unlink("{}/url-watches.json".format(datastore_path))
|
||||
except FileNotFoundError:
|
||||
# This is fine in the case of a failure.
|
||||
pass
|
||||
|
||||
assert 1 == 1
|
||||
|
||||
request.addfinalizer(teardown)
|
||||
yield app
|
||||
|
||||
123
backend/tests/test_backend.py
Normal file
123
backend/tests/test_backend.py
Normal file
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
import pytest
|
||||
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
|
||||
def test_setup_liveserver(live_server):
|
||||
@live_server.app.route('/test-endpoint')
|
||||
def test_endpoint():
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
with open("test-datastore/output.txt", "r") as f:
|
||||
return f.read()
|
||||
|
||||
live_server.start()
|
||||
|
||||
assert 1 == 1
|
||||
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def set_modified_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>which has this one new line</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def test_check_basic_change_detection_functionality(client, live_server):
|
||||
set_original_response()
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": url_for('test_endpoint', _external=True)},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Do this a few times.. ensures we dont accidently set the status
|
||||
for n in range(3):
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'test-endpoint' in res.data
|
||||
|
||||
#####################
|
||||
|
||||
# Make a change
|
||||
set_modified_response()
|
||||
|
||||
res = urlopen(url_for('test_endpoint', _external=True))
|
||||
assert b'which has this one new line' in res.read()
|
||||
|
||||
# Force recheck
|
||||
res = client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches are rechecking.' in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
||||
assert b'Compare newest' in res.data
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Do this a few times.. ensures we dont accidently set the status
|
||||
for n in range(2):
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'test-endpoint' in res.data
|
||||
|
||||
set_original_response()
|
||||
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
152
backend/tests/test_ignore_text.py
Normal file
152
backend/tests/test_ignore_text.py
Normal file
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
import pytest
|
||||
|
||||
|
||||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_text_func():
|
||||
from backend import fetch_site_status
|
||||
|
||||
test_content = """
|
||||
Some content
|
||||
is listed here
|
||||
|
||||
but sometimes we want to remove the lines.
|
||||
|
||||
but not always."""
|
||||
|
||||
ignore_lines = ["sometimes"]
|
||||
|
||||
fetcher = fetch_site_status.perform_site_check(datastore=False)
|
||||
stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines)
|
||||
|
||||
assert b"sometimes" not in stripped_content
|
||||
assert b"Some content" in stripped_content
|
||||
|
||||
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def set_modified_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some NEW nice initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
# Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text
|
||||
def set_modified_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<P>ZZZZZ</P>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def test_check_ignore_text_functionality(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
ignore_text = "XXXXX\nYYYYY\nZZZZZ"
|
||||
set_original_ignore_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"ignore-text": ignore_text, "url": test_url, "tag": "", "headers": ""},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(ignore_text.encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# Make a change
|
||||
set_modified_ignore_response()
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# Just to be sure.. set a regular modified change..
|
||||
set_modified_original_ignore_response()
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
73
changedetection.py
Normal file
73
changedetection.py
Normal file
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# Launch as a eventlet.wsgi server instance.
|
||||
|
||||
import getopt
|
||||
import sys
|
||||
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
import backend
|
||||
|
||||
from backend import store
|
||||
|
||||
|
||||
def main(argv):
|
||||
ssl_mode = False
|
||||
port = 5000
|
||||
datastore_path = "./datastore"
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, "sd:p:", "purge")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -p [port] -d [datastore path]')
|
||||
sys.exit(2)
|
||||
|
||||
for opt, arg in opts:
|
||||
# if opt == '--purge':
|
||||
# Remove history, the actual files you need to delete manually.
|
||||
# for uuid, watch in datastore.data['watching'].items():
|
||||
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
||||
|
||||
if opt == '-s':
|
||||
ssl_mode = True
|
||||
|
||||
if opt == '-p':
|
||||
port = int(arg)
|
||||
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
|
||||
|
||||
# threads can read from disk every x seconds right?
|
||||
# front end can just save
|
||||
# We just need to know which threads are looking at which UUIDs
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'])
|
||||
app = backend.changedetection_app(app_config, datastore)
|
||||
|
||||
@app.context_processor
|
||||
def inject_version():
|
||||
return dict(version=datastore.data['version_tag'])
|
||||
|
||||
@app.context_processor
|
||||
def inject_new_version_available():
|
||||
return dict(new_version_available=app.config['NEW_VERSION_AVAILABLE'])
|
||||
|
||||
if ssl_mode:
|
||||
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)),
|
||||
certfile='cert.pem',
|
||||
keyfile='privkey.pem',
|
||||
server_side=True), app)
|
||||
|
||||
else:
|
||||
eventlet.wsgi.server(eventlet.listen(('', port)), app)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
@@ -1,2 +0,0 @@
|
||||
Empty dir, please keep, this is used to store your data!
|
||||
|
||||
@@ -9,7 +9,7 @@ services:
|
||||
image: dgtlmoon/changedetection.io:dev
|
||||
container_name: changedetection.io-dev
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- .:/app
|
||||
- ./requirements.txt:/requirements.txt # Normally COPY'ed in the Dockerfile
|
||||
- ./datastore:/datastore
|
||||
|
||||
|
||||
@@ -1,21 +1,13 @@
|
||||
aiohttp
|
||||
async-timeout
|
||||
chardet==2.3.0
|
||||
multidict
|
||||
python-engineio
|
||||
six==1.10.0
|
||||
yarl
|
||||
flask
|
||||
|
||||
eventlet
|
||||
flask~= 1.0
|
||||
pytest ~=6.2
|
||||
pytest-flask ~=1.1
|
||||
eventlet ~= 0.30
|
||||
requests
|
||||
validators
|
||||
|
||||
bleach==3.2.1
|
||||
html5lib==0.9999999 # via bleach
|
||||
timeago
|
||||
html2text
|
||||
inscriptis
|
||||
|
||||
# @notes
|
||||
# - Dont install socketio, it interferes with flask_socketio
|
||||
timeago ~=1.0
|
||||
inscriptis ~= 1.1
|
||||
feedgen ~= 0.9
|
||||
pytz
|
||||
urllib3
|
||||
BIN
screenshot.png
BIN
screenshot.png
Binary file not shown.
|
Before Width: | Height: | Size: 217 KiB After Width: | Height: | Size: 213 KiB |
Reference in New Issue
Block a user