Merge branch 'master' into history-preview-ignore-text-highlighting

This commit is contained in:
dgtlmoon
2025-11-13 15:29:39 +01:00
23 changed files with 407 additions and 102 deletions
+17 -6
View File
@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.50.40'
__version__ = '0.50.43'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -74,6 +74,12 @@ def main():
datastore_path = None
do_cleanup = False
# Optional URL to watch since start
default_url = None
# Set a default logger level
logger_level = 'DEBUG'
include_default_watches = True
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
port = int(os.environ.get('PORT', 5000))
ssl_mode = False
@@ -87,15 +93,13 @@ def main():
datastore_path = os.path.join(os.getcwd(), "../datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port")
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
sys.exit(2)
create_datastore_dir = False
# Set a default logger level
logger_level = 'DEBUG'
# Set a logger level via shell env variable
# Used: Dockerfile for CICD
# To set logger level for pytest, see the app function in tests/conftest.py
@@ -116,6 +120,10 @@ def main():
if opt == '-d':
datastore_path = arg
if opt == '-u':
default_url = arg
include_default_watches = False
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
@@ -172,13 +180,16 @@ def main():
sys.exit(2)
try:
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__, include_default_watches=include_default_watches)
except JSONDecodeError as e:
# Dont' start if the JSON DB looks corrupt
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
logger.critical(str(e))
return
if default_url:
datastore.add_watch(url = default_url)
app = changedetection_app(app_config, datastore)
# Get the SocketIO instance from the Flask app (created in flask_app.py)
+1 -1
View File
@@ -41,7 +41,7 @@ def get_openapi_spec():
# Possibly for pip3 packages
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
with open(spec_path, 'r') as f:
with open(spec_path, 'r', encoding='utf-8') as f:
spec_dict = yaml.safe_load(f)
_openapi_spec = OpenAPI.from_dict(spec_dict)
return _openapi_spec
+17 -1
View File
@@ -1 +1,17 @@
RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]
from copy import deepcopy
from loguru import logger
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from changedetectionio.notification import valid_notification_formats
RSS_CONTENT_FORMAT_DEFAULT = 'text'
# Some stuff not related
RSS_FORMAT_TYPES = deepcopy(valid_notification_formats)
if RSS_FORMAT_TYPES.get('markdown'):
del RSS_FORMAT_TYPES['markdown']
if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH):
del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH]
if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT):
logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}")
+106 -14
View File
@@ -37,6 +37,51 @@ def clean_entry_content(content):
def construct_blueprint(datastore: ChangeDetectionStore):
rss_blueprint = Blueprint('rss', __name__)
# Helper function to generate GUID for RSS entries
def generate_watch_guid(watch):
"""Generate a unique GUID for a watch RSS entry."""
return f"{watch['uuid']}/{watch.last_changed}"
# Helper function to generate diff content for a watch
def generate_watch_diff_content(watch, dates, rss_content_format):
"""
Generate HTML diff content for a watch given its history dates.
Returns the rendered HTML content ready for RSS/display.
"""
from changedetectionio import diff
# Same logic as watch-overview.html
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
watch_label = watch.label
else:
watch_label = watch.get('url')
try:
html_diff = diff.render_diff(
previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]),
newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]),
include_equal=False
)
requested_output_format = datastore.data['settings']['application'].get('rss_content_format')
url, html_diff, n_title = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
except FileNotFoundError as e:
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
# @note: We use <pre> because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
rss_template = "<pre>{{watch_label}} had a change.\n\n{{html_diff}}\n</pre>"
if 'html' in rss_content_format:
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_label}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
content = jinja_render(template_str=rss_template, watch_label=watch_label, html_diff=html_diff, watch_url=watch.link)
# Out of range chars could also break feedgen
if scan_invalid_chars_in_rss(content):
content = clean_entry_content(content)
return content, watch_label
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
# to some earlier blueprint rerouting work, it should goto feed.
@rss_blueprint.route("/", methods=['GET'])
@@ -51,6 +96,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
@@ -81,10 +128,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
fg.description('Feed description')
fg.link(href='https://changedetection.io')
html_colour_enable = False
if datastore.data['settings']['application'].get('rss_content_format') == 'html':
html_colour_enable = True
for watch in sorted_watches:
dates = list(watch.history.keys())
@@ -95,7 +138,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if not watch.viewed:
# Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
guid = generate_watch_guid(watch)
fe = fg.add_entry()
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
@@ -109,11 +152,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
fe.link(link=diff_link)
# Same logic as watch-overview.html
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
watch_label = watch.label
else:
watch_label = watch.get('url')
content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format)
fe.title(title=watch_label)
try:
@@ -122,10 +161,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]),
include_equal=False,
)
requested_output_format = 'htmlcolor' if html_colour_enable else 'html'
html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=rss_content_format)
except FileNotFoundError as e:
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
@@ -140,6 +176,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if scan_invalid_chars_in_rss(content):
content = clean_entry_content(content)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
@@ -151,4 +188,59 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
return response
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
def rss_single_watch(uuid):
"""
Display the most recent change for a single watch as RSS feed.
Returns RSS XML with a single entry showing the diff between the last two snapshots.
"""
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
# Get the watch by UUID
watch = datastore.data['watching'].get(uuid)
if not watch:
return f"Watch with UUID {uuid} not found", 404
# Check if watch has at least 2 history snapshots
dates = list(watch.history.keys())
if len(dates) < 2:
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
# Add uuid to watch for proper functioning
watch['uuid'] = uuid
# Generate the diff content using the shared helper function
content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format)
# Create RSS feed with single entry
fg = FeedGenerator()
fg.title(f'changedetection.io - {watch.label}')
fg.description('Changes')
fg.link(href='https://changedetection.io')
# Add single entry for this watch
guid = generate_watch_guid(watch)
fe = fg.add_entry()
# Include a link to the diff page
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
fe.title(title=watch_label)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
return response
return rss_blueprint
@@ -86,7 +86,7 @@
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
<span class="pure-form-message-inline">When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
</div>
</div>
</fieldset>
+6 -1
View File
@@ -236,7 +236,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Import the global plugin system
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
template_args = {
'available_processors': processors.available_processors(),
'available_timezones': sorted(available_timezones()),
@@ -252,6 +252,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
'jq_support': jq_support,
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
'app_rss_token': app_rss_token,
'rss_uuid_feed' : {
'label': watch.label,
'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token)
},
'settings_application': datastore.data['settings']['application'],
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
@@ -477,6 +477,7 @@ Math: {{ 1 + 1 }}") }}
class="pure-button button-error">Clear History</a>{% endif %}
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
class="pure-button">Clone &amp; Edit</a>
<a href="{{ url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token)}}"><img alt="RSS Feed for this watch" style="padding: .5em 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
</div>
</div>
</form>
@@ -139,7 +139,7 @@ class fetcher(Fetcher):
content = await self.page.content()
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
logger.debug(f"Saving step HTML to {destination}")
with open(destination, 'w') as f:
with open(destination, 'w', encoding='utf-8') as f:
f.write(content)
async def run(self,
+2 -2
View File
@@ -101,12 +101,12 @@ def init_app_secret(datastore_path):
path = os.path.join(datastore_path, "secret.txt")
try:
with open(path, "r") as f:
with open(path, "r", encoding='utf-8') as f:
secret = f.read()
except FileNotFoundError:
import secrets
with open(path, "w") as f:
with open(path, "w", encoding='utf-8') as f:
secret = secrets.token_hex(32)
f.write(secret)
+1 -1
View File
@@ -1000,7 +1000,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
validators=[validators.NumberRange(min=0,
message="Should be atleast zero (disabled)")])
rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES)
rss_content_format = SelectField('RSS Content format', choices=list(RSS_FORMAT_TYPES.items()))
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
+3 -3
View File
@@ -1,7 +1,7 @@
from os import getenv
from copy import deepcopy
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
from changedetectionio.notification import (
default_notification_body,
@@ -54,7 +54,7 @@ class model(dict):
'password': False,
'render_anchor_tag_content': False,
'rss_access_token': None,
'rss_content_format': RSS_FORMAT_TYPES[0][0],
'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT,
'rss_hide_muted_watches': True,
'rss_reader_mode': False,
'scheduler_timezone_default': None, # Default IANA timezone name
@@ -81,7 +81,7 @@ class model(dict):
def parse_headers_from_text_file(filepath):
headers = {}
with open(filepath, 'r') as f:
with open(filepath, 'r', encoding='utf-8') as f:
for l in f.readlines():
l = l.strip()
if not l.startswith('#') and ':' in l:
+2 -2
View File
@@ -188,7 +188,7 @@ class model(watch_base):
fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname):
logger.debug(f"Reading watch history index for {self.get('uuid')}")
with open(fname, "r") as f:
with open(fname, "r", encoding='utf-8') as f:
for i in f.readlines():
if ',' in i:
k, v = i.strip().split(',', 2)
@@ -594,7 +594,7 @@ class model(watch_base):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error.txt")
if os.path.isfile(fname):
with open(fname, 'r') as f:
with open(fname, 'r', encoding='utf-8') as f:
return f.read()
return False
@@ -205,6 +205,8 @@ def replace_placemarkers_in_text(text, url, requested_output_format):
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
logger.debug(f"Applying markup in '{requested_output_format}' mode")
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :(
# 2000 bytes minus -
@@ -279,7 +279,7 @@ class ContentProcessor:
# Sort JSON to avoid false alerts from reordering
try:
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False)
except Exception:
# Might be malformed JSON, continue anyway
pass
+53
View File
@@ -11,6 +11,56 @@ set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# Since theres no curl installed lets roll with python3
check_sanity() {
local port="$1"
if [ -z "$port" ]; then
echo "Usage: check_sanity <port>" >&2
return 1
fi
python3 - "$port" <<'PYCODE'
import sys, time, urllib.request, socket
port = sys.argv[1]
url = f'http://localhost:{port}'
ok = False
for _ in range(6): # --retry 6
try:
r = urllib.request.urlopen(url, timeout=3).read().decode()
if 'est-url-is-sanity' in r:
ok = True
break
except (urllib.error.URLError, ConnectionRefusedError, socket.error):
time.sleep(1)
sys.exit(0 if ok else 1)
PYCODE
}
data_sanity_test () {
# Restart data sanity test
cd ..
TMPDIR=$(mktemp -d)
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
PID=$!
sleep 5
kill $PID
sleep 2
./changedetection.py -p $PORT_N -d $TMPDIR &
PID=$!
sleep 5
# On a restart the URL should still be there
check_sanity $PORT_N || exit 1
kill $PID
cd $OLDPWD
# datastore looks alright, continue
}
data_sanity_test
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load tests/test_*.py
@@ -41,3 +91,6 @@ FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
# Check file:// will pickup a file when enabled
echo "Hello world" > /tmp/test-file.txt
ALLOW_FILE_URI=yes pytest tests/test_security.py
+102 -53
View File
@@ -6,6 +6,7 @@ from flask import (
flash
)
from .blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
from .html_tools import TRANSLATE_WHITESPACE_TABLE
from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from copy import deepcopy, copy
@@ -44,7 +45,7 @@ class ChangeDetectionStore:
lock = Lock()
# For general updates/writes that can wait a few seconds
needs_write = False
datastore_path = None
# For when we edit, we should write to disk
needs_write_urgent = False
@@ -54,18 +55,30 @@ class ChangeDetectionStore:
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
# Should only be active for docker
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
self.datastore_path = datastore_path
self.needs_write = False
self.start_time = time.time()
self.stop_thread = False
self.save_version_copy_json_db(version_tag)
self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag)
def save_version_copy_json_db(self, version_tag):
import re
version_text = re.sub(r'\D+', '-', version_tag)
db_path = os.path.join(self.datastore_path, "url-watches.json")
db_path_version_backup = os.path.join(self.datastore_path, f"url-watches-{version_text}.json")
if not os.path.isfile(db_path_version_backup) and os.path.isfile(db_path):
from shutil import copyfile
logger.info(f"Backing up JSON DB due to new version to '{db_path_version_backup}'.")
copyfile(db_path, db_path_version_backup)
def reload_state(self, datastore_path, include_default_watches, version_tag):
logger.info(f"Datastore path is '{datastore_path}'")
self.__data = App.model()
self.datastore_path = datastore_path
self.json_store_path = os.path.join(self.datastore_path, "url-watches.json")
# Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly
@@ -78,37 +91,46 @@ class ChangeDetectionStore:
self.__data['build_sha'] = f.read()
try:
# @todo retest with ", encoding='utf-8'"
with open(self.json_store_path) as json_file:
from_disk = json.load(json_file)
if HAS_ORJSON:
# orjson.loads() expects UTF-8 encoded bytes #3611
with open(self.json_store_path, 'rb') as json_file:
from_disk = orjson.loads(json_file.read())
else:
with open(self.json_store_path, encoding='utf-8') as json_file:
from_disk = json.load(json_file)
# @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
if not from_disk:
# No FileNotFound exception was thrown but somehow the JSON was empty - abort for safety.
logger.critical(f"JSON DB existed but was empty on load - empty JSON file? '{self.json_store_path}' Aborting")
raise Exception('JSON DB existed but was empty on load - Aborting')
if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid']
# @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
if 'settings' in from_disk:
if 'headers' in from_disk['settings']:
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid']
if 'requests' in from_disk['settings']:
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
if 'settings' in from_disk:
if 'headers' in from_disk['settings']:
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
if 'requests' in from_disk['settings']:
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
# Convert each existing watch back to the Watch.model object
for uuid, watch in self.__data['watching'].items():
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
logger.info(f"Watching: {uuid} {watch['url']}")
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
# And for Tags also, should be Restock type because it has extra settings
for uuid, tag in self.__data['settings']['application']['tags'].items():
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff')
logger.info(f"Tag: {uuid} {tag['title']}")
# Convert each existing watch back to the Watch.model object
for uuid, watch in self.__data['watching'].items():
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
logger.info(f"Watching: {uuid} {watch['url']}")
# And for Tags also, should be Restock type because it has extra settings
for uuid, tag in self.__data['settings']['application']['tags'].items():
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff')
logger.info(f"Tag: {uuid} {tag['title']}")
# First time ran, Create the datastore.
except (FileNotFoundError):
@@ -435,12 +457,13 @@ class ChangeDetectionStore:
# system was out of memory, out of RAM etc
if HAS_ORJSON:
# Use orjson for faster serialization
# orjson.dumps() always returns UTF-8 encoded bytes #3611
with open(self.json_store_path+".tmp", 'wb') as json_file:
json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
else:
# Fallback to standard json module
with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=2)
with open(self.json_store_path+".tmp", 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=2, ensure_ascii=False)
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
@@ -502,8 +525,13 @@ class ChangeDetectionStore:
# Load from external config file
if path.isfile(proxy_list_file):
with open(os.path.join(self.datastore_path, "proxies.json")) as f:
proxy_list = json.load(f)
if HAS_ORJSON:
# orjson.loads() expects UTF-8 encoded bytes #3611
with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f:
proxy_list = orjson.loads(f.read())
else:
with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f:
proxy_list = json.load(f)
# Mapping from UI config if available
extras = self.data['settings']['requests'].get('extra_proxies')
@@ -748,6 +776,28 @@ class ChangeDetectionStore:
return updates_available
def add_notification_url(self, notification_url):
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
notification_urls = self.data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
with self.lock:
notification_urls = self.__data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
# Append and update the datastore
notification_urls.append(notification_url)
self.__data['settings']['application']['notification_urls'] = notification_urls
self.needs_write = True
return notification_url
# Run all updates
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
@@ -760,7 +810,16 @@ class ChangeDetectionStore:
logger.critical(f"Applying update_{update_n}")
# Wont exist on fresh installs
if os.path.exists(self.json_store_path):
shutil.copyfile(self.json_store_path, os.path.join(self.datastore_path, f"url-watches-before-{update_n}.json"))
i = 0
while True:
i+=1
dest = os.path.join(self.datastore_path, f"url-watches-before-{update_n}-{i}.json")
if not os.path.exists(dest):
logger.debug(f"Copying url-watches.json DB to '{dest}' backup.")
shutil.copyfile(self.json_store_path, dest)
break
else:
logger.warning(f"Backup of url-watches.json '{dest}', DB already exists, trying {i+1}.. ")
try:
update_method = getattr(self, f"update_{update_n}")()
@@ -1051,25 +1110,15 @@ class ChangeDetectionStore:
formats['markdown'] = 'Markdown'
re_run(formats)
def add_notification_url(self, notification_url):
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
notification_urls = self.data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
with self.lock:
notification_urls = self.__data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
# Append and update the datastore
notification_urls.append(notification_url)
self.__data['settings']['application']['notification_urls'] = notification_urls
self.needs_write = True
return notification_url
# RSS types should be inline with the same names as notification types
def update_24(self):
rss_format = self.data['settings']['application'].get('rss_content_format')
if not rss_format or 'text' in rss_format:
# might have been 'plaintext, 'plain text' or something
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
elif 'html' in rss_format:
self.data['settings']['application']['rss_content_format'] = 'htmlcolor'
else:
# safe fallback to text
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
+7 -2
View File
@@ -8,8 +8,13 @@
<meta name="robots" content="noindex">
<title>Change Detection{{extra_title}}</title>
{% if app_rss_token %}
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss.feed', tag=active_tag_uuid , token=app_rss_token)}}" >
{% endif %}
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token, _external=True )}}" >
{% if rss_uuid_feed %}
<link rel="alternate" type="application/rss+xml" title="Feed » {{ rss_uuid_feed['label'] }}" href="{{ rss_uuid_feed['url'] }}" >
{%- endif -%}
{%- endif -%}
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='pure-min.css')}}" >
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='styles.css')}}?v={{ get_css_version() }}" >
{% if extra_stylesheets %}
+1 -2
View File
@@ -77,10 +77,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b'<rss' in res.data
# re #16 should have the diff in here too
assert b'(into) which has this one new line' in res.data
assert b'which has this one new line' in res.data
assert b'CDATA' in res.data
assert expected_url.encode('utf-8') in res.data
#
# Following the 'diff' link, it should no longer display as 'has-unread-changes' even after we recheck it a few times
res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid))
@@ -40,7 +40,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
json_obj = None
with open(json_db_file, 'r') as f:
with open(json_db_file, 'r', encoding='utf-8') as f:
json_obj = json.load(f)
# assert the right amount of watches was found in the JSON
@@ -76,7 +76,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
with open(json_db_file, 'r') as f:
with open(json_db_file, 'r', encoding='utf-8') as f:
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
@@ -442,13 +442,12 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast
snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
assert b'&#34;hello&#34;: 123,' in res.data # properly html escaped in the front end
import json
data = json.loads(snapshot_contents)
keys = list(data.keys())
# Should be correctly formatted and sorted, ("world" goes to end)
assert snapshot_contents == """{
"hello": 123,
"world": 123
}"""
assert keys == ["hello", "world"]
delete_all_watches(client)
def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path):
+2 -2
View File
@@ -142,7 +142,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
watches_with_body = 0
with open(os.path.join(datastore_path, 'url-watches.json')) as f:
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
app_struct = json.load(f)
for uuid in app_struct['watching']:
if app_struct['watching'][uuid]['body']==body_value:
@@ -225,7 +225,7 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_
wait_for_all_checks(client)
watches_with_method = 0
with open(os.path.join(datastore_path, 'url-watches.json')) as f:
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
app_struct = json.load(f)
for uuid in app_struct['watching']:
if app_struct['watching'][uuid]['method'] == 'PATCH':
+73
View File
@@ -4,6 +4,8 @@ import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
extract_UUID_from_client, delete_all_watches
from loguru import logger
from ..blueprint.rss import RSS_FORMAT_TYPES
def set_original_cdata_xml(datastore_path):
@@ -238,6 +240,77 @@ def test_rss_bad_chars_breaking(client, live_server, measure_memory_usage, datas
#assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n == 2
def test_rss_single_watch_feed(client, live_server, measure_memory_usage, datastore_path):
app_rss_token = live_server.app.config['DATASTORE'].data['settings']['application'].get('rss_access_token')
rss_content_format = live_server.app.config['DATASTORE'].data['settings']['application'].get('rss_content_format')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token),
follow_redirects=False
)
assert res.status_code == 400
assert b'not have enough history' in res.data
set_modified_response(datastore_path=datastore_path)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token),
follow_redirects=False
)
assert res.status_code == 200
import xml.etree.ElementTree as ET
root = ET.fromstring(res.data)
def check_formatting(expected_type, content, url):
logger.debug(f"Checking formatting type {expected_type}")
if expected_type == 'text':
assert '<p>' not in content
assert 'body' not in content
assert '(changed) Which is across multiple lines\n'
assert 'modified head title had a change.' # Because it picked it up <title> as watch_title in default template
elif expected_type == 'html':
assert '<p>' in content
assert '<body>' in content
assert '<p>(changed) Which is across multiple lines<br>' in content
assert f'href="{url}">modified head title had a change.</a>'
elif expected_type == 'htmlcolor':
assert '<body>' in content
assert ' role="note" aria-label="Changed text" title="Changed text">Which is across multiple lines</span>' in content
assert f'href="{url}">modified head title had a change.</a>'
else:
raise Exception(f"Unknown type {expected_type}")
item = root.findall('.//item')[0].findtext('description')
check_formatting(expected_type=rss_content_format, content=item, url=test_url)
# Now the default one is over, lets try all the others
for k in list(RSS_FORMAT_TYPES.keys()):
res = client.post(
url_for("settings.settings_page"),
data={"application-rss_content_format": k},
follow_redirects=True
)
assert b'Settings updated' in res.data
res = client.get(
url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token),
follow_redirects=False
)
assert res.status_code == 200
root = ET.fromstring(res.data)
item = root.findall('.//item')[0].findtext('description')
check_formatting(expected_type=k, content=item, url=test_url)
+1 -1
View File
@@ -33,7 +33,7 @@ wtforms~=3.2
jsonpath-ng~=1.7.0
# Fast JSON serialization for better performance
orjson~=3.10
orjson~=3.11
# dnspython - Used by paho-mqtt for MQTT broker resolution
# Version pin removed since eventlet (which required the specific 2.6.1 pin) has been eliminated