Compare commits

...

26 Commits

Author SHA1 Message Date
dgtlmoon
e9c65147c8 Migrate watches from main DB file to a subfile 2024-12-30 15:01:30 +01:00
dgtlmoon
2a6c707d17 test tweak 2024-12-30 13:45:06 +01:00
dgtlmoon
13e682666f test fix 2024-12-30 13:41:15 +01:00
dgtlmoon
748ed79314 fix zombie watch usage 2024-12-30 13:23:08 +01:00
dgtlmoon
0084812300 make writes more safe 2024-12-30 13:03:26 +01:00
dgtlmoon
ff9e164fc2 adjust test 2024-12-30 13:03:08 +01:00
dgtlmoon
339a881d61 Save each watch to its own dir 2024-12-30 12:42:35 +01:00
dgtlmoon
8604ac7e56 bump unit tests of watch/store object 2024-12-30 12:42:23 +01:00
dgtlmoon
281f637068 Small refactor, internalise the datastruct 2024-12-30 10:57:30 +01:00
dgtlmoon
2d37f94c5f share should use custom encoder 2024-12-29 19:24:03 +01:00
dgtlmoon
05c46f3f25 Handle model type 2024-12-29 17:28:10 +01:00
dgtlmoon
9f73432540 deepcopy not needed 2024-12-29 16:39:21 +01:00
dgtlmoon
061693b117 tweak API reference 2024-12-29 16:38:46 +01:00
dgtlmoon
78e875b70d Abstract data from the model 2024-12-29 16:07:12 +01:00
Florian Kretschmer
d67d396b88 Builder/Docker - Remove PUID and PGID ( they were not used ) (#2852) 2024-12-27 13:03:36 +01:00
MoshiMoshi0
05f54f0ce6 UI - Fix diff not starting from last viewed snapshot (#2744) (#2856) 2024-12-27 13:03:10 +01:00
dgtlmoon
6adf10597e 0.48.05 2024-12-27 11:24:56 +01:00
dgtlmoon
4419bc0e61 Fixing test for CVE-2024-56509 (#2864) 2024-12-27 11:09:52 +01:00
dgtlmoon
f7e9846c9b CVE-2024-56509 - Stricter file protocol checking pre-check ( Improper Input Validation Leading to LFR/Path Traversal when fetching file:.. ) 2024-12-27 09:26:28 +01:00
dgtlmoon
5dea5e1def 0.48.04 2024-12-16 21:50:53 +01:00
dgtlmoon
0fade0a473 Windows was sometimes missing timezone data (#2845 #2826) 2024-12-16 21:50:28 +01:00
dgtlmoon
121e9c20e0 0.48.03 2024-12-16 16:14:03 +01:00
dgtlmoon
12cec2d541 0.48.02 2024-12-16 16:10:47 +01:00
dgtlmoon
d52e6e8e11 Notifications - "Send test" was not always following "System default notification format" (#2844) 2024-12-16 15:50:07 +01:00
dgtlmoon
bae1a89b75 Notifications - Default notification format (for new installs) now "HTML color" (#2843) 2024-12-16 14:55:10 +01:00
dgtlmoon
e49711f449 Notification - HTML Color format notification colors should be same as UI, {{diff_full}} token should also get HTML colors ( #2842 #2554 ) 2024-12-16 14:46:39 +01:00
23 changed files with 297 additions and 169 deletions

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.48.01'
__version__ = '0.48.05'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -12,10 +12,10 @@ import copy
# See docs/README.md for rebuilding the docs/apidoc information
from . import api_schema
from ..model import watch_base
from ..model import WatchBase
# Build a JSON Schema atleast partially based on our Watch model
watch_base_config = watch_base()
watch_base_config = WatchBase()
schema = api_schema.build_watch_json_schema(watch_base_config)
schema_create_watch = copy.deepcopy(schema)
@@ -52,8 +52,8 @@ class Watch(Resource):
@apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch
@apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
"""
from copy import deepcopy
watch = deepcopy(self.datastore.data['watching'].get(uuid))
watch = self.datastore.data['watching'].get(uuid)
if not watch:
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
@@ -75,10 +75,11 @@ class Watch(Resource):
# Return without history, get that via another API call
# Properties are not returned as a JSON, so add the required props manually
watch['history_n'] = watch.history_n
watch['last_changed'] = watch.last_changed
watch['viewed'] = watch.viewed
return watch
result = watch.as_dict()
result['history_n'] = watch.history_n
result['last_changed'] = watch.last_changed
result['viewed'] = watch.viewed
return result
@auth.check_token
def delete(self, uuid):

View File

@@ -41,6 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
now = time.time()
try:
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
# @todo can now just pass the watch here?
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid
)

View File

@@ -1,6 +1,9 @@
import difflib
from typing import List, Iterator, Union
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
"""Return a slice of the list, or a single element if start == end."""
return lst[start:end] if start != end else [lst[start]]
@@ -33,24 +36,26 @@ def customSequenceMatcher(
"""
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if include_equal and tag == 'equal':
yield before[alo:ahi]
elif include_removed and tag == 'delete':
if html_colour:
yield [f'<span style="background-color: #ffcecb;">{line}</span>' for line in same_slicer(before, alo, ahi)]
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)]
else:
yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi)
elif include_replaced and tag == 'replace':
if html_colour:
yield [f'<span style="background-color: #ffcecb;">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
[f'<span style="background-color: #dafbe1;">{line}</span>' for line in same_slicer(after, blo, bhi)]
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
[f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
else:
yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \
[f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
elif include_added and tag == 'insert':
if html_colour:
yield [f'<span style="background-color: #dafbe1;">{line}</span>' for line in same_slicer(after, blo, bhi)]
yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
else:
yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi)

View File

@@ -43,6 +43,7 @@ from loguru import logger
from changedetectionio import html_tools, __version__
from changedetectionio import queuedWatchMetaData
from changedetectionio.api import api_v1
from .store import CustomEncoder
from .time_handler import is_within_schedule
datastore = None
@@ -800,7 +801,7 @@ def changedetection_app(config=None, datastore_o=None):
# Recast it if need be to right data Watch handler
watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
datastore.data['watching'][uuid] = watch_class(__datastore=datastore_o, default=datastore.data['watching'][uuid])
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
@@ -1613,7 +1614,7 @@ def changedetection_app(config=None, datastore_o=None):
watch['ignore_text'] += datastore.data['settings']['application']['global_ignore_text']
watch['subtractive_selectors'] += datastore.data['settings']['application']['global_subtractive_selectors']
watch_json = json.dumps(watch)
watch_json = json.dumps(watch, cls=CustomEncoder)
try:
r = requests.request(method="POST",

View File

@@ -10,7 +10,7 @@ _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
class model(dict):
base_config = {
__base_config = {
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
'watching': {},
'settings': {
@@ -60,7 +60,7 @@ class model(dict):
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
self.update(self.base_config)
self.update(self.__base_config)
def parse_headers_from_text_file(filepath):

View File

@@ -1,8 +1,8 @@
from changedetectionio.model import watch_base
from changedetectionio.model import WatchBase
class model(watch_base):
class model(WatchBase):
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)

View File

@@ -1,17 +1,17 @@
from changedetectionio.strtobool import strtobool
from changedetectionio.safe_jinja import render as jinja_render
from . import watch_base
import os
import re
from pathlib import Path
from loguru import logger
from . import WatchBase
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
# Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
WATCH_DB_JSON_FILENAME = 'watch.json'
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -32,15 +32,20 @@ def is_safe_url(test_url):
return True
class model(watch_base):
__newest_history_key = None
__history_n = 0
jitter_seconds = 0
class model(WatchBase):
__datastore = None
__datastore_checksum = None
__history_n = 0
__newest_history_key = None
jitter_seconds = 0
def __init__(self, *arg, **kw):
self.__datastore_path = kw.get('datastore_path')
if kw.get('datastore_path'):
del kw['datastore_path']
if not kw.get('__datastore'):
logger.critical('No __datastore reference was set!')
self.__datastore = kw.get('__datastore')
super(model, self).__init__(*arg, **kw)
if kw.get('default'):
self.update(kw['default'])
@@ -179,7 +184,7 @@ class model(watch_base):
tmp_history = {}
# In the case we are only using the watch for processing without history
if not self.watch_data_dir:
if not self.__datastore or not self.watch_data_dir:
return []
# Read the history file as a dict
@@ -247,37 +252,32 @@ class model(watch_base):
bump = self.history
return self.__newest_history_key
# Given an arbitrary timestamp, find the closest next key
# For example, last_viewed = 1000 so it should return the next 1001 timestamp
#
# used for the [diff] button so it can preset a smarter from_version
# Given an arbitrary timestamp, find the best history key for the [diff] button so it can preset a smarter from_version
@property
def get_next_snapshot_key_to_last_viewed(self):
def get_from_version_based_on_last_viewed(self):
"""Unfortunately for now timestamp is stored as string key"""
keys = list(self.history.keys())
if not keys:
return None
if len(keys) == 1:
return keys[0]
last_viewed = int(self.get('last_viewed'))
prev_k = keys[0]
sorted_keys = sorted(keys, key=lambda x: int(x))
sorted_keys.reverse()
# When the 'last viewed' timestamp is greater than the newest snapshot, return second last
if last_viewed > int(sorted_keys[0]):
# When the 'last viewed' timestamp is greater than or equal the newest snapshot, return second newest
if last_viewed >= int(sorted_keys[0]):
return sorted_keys[1]
# When the 'last viewed' timestamp is between snapshots, return the older snapshot
for newer, older in list(zip(sorted_keys[0:], sorted_keys[1:])):
if last_viewed < int(newer) and last_viewed >= int(older):
return older
for k in sorted_keys:
if int(k) < last_viewed:
if prev_k == sorted_keys[0]:
# Return the second last one so we dont recommend the same version compares itself
return sorted_keys[1]
return prev_k
prev_k = k
return keys[0]
# When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
return sorted_keys[-1]
def get_history_snapshot(self, timestamp):
import brotli
@@ -424,7 +424,7 @@ class model(watch_base):
@property
def watch_data_dir(self):
# The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
return os.path.join(self.__datastore.datastore_path, self['uuid']) if self.__datastore.datastore_path else None
def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error"""
@@ -529,6 +529,22 @@ class model(watch_base):
# None is set
return False
def save_data(self):
import json
# @todo dict change?
# Save it to a temp file first so that if the disk is full or other error it wont corrupt (hopefully).
dest = os.path.join(self.watch_data_dir, WATCH_DB_JSON_FILENAME)
logger.debug(f"Saving watch {dest}")
try:
with open(dest + '.tmp', 'w') as json_file:
json.dump(self.as_dict(), json_file, indent=2)
os.replace(dest + '.tmp', dest)
except Exception as e:
logger.critical(f"Exception saving watch JSON {dest} - {e}")
def save_error_text(self, contents):
self.ensure_data_dir_exists()
target_path = os.path.join(self.watch_data_dir, "last-error.txt")

View File

@@ -1,13 +1,14 @@
import os
import uuid
from collections.abc import MutableMapping
from changedetectionio import strtobool
from changedetectionio.notification import default_notification_format_for_watch
class watch_base(dict):
def __init__(self, *arg, **kw):
self.update({
class WatchBase(MutableMapping):
__data_checksum = None
def __init__(self, *args, **kwargs):
self.__internal_dict = {
# Custom notification content
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
@@ -127,9 +128,37 @@ class watch_base(dict):
'uuid': str(uuid.uuid4()),
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection
})
}
super(watch_base, self).__init__(*arg, **kw)
# Update with any provided arguments
self.update(*args, **kwargs)
if self.get('default'):
del self['default']
del self['default']
# Implement abstract methods required by MutableMapping
def __getitem__(self, key):
return self.__internal_dict[key]
def __setitem__(self, key, value):
if key == '__datastore':
self.__datastore = value
else:
self.__internal_dict[key] = value
def __delitem__(self, key):
del self.__internal_dict[key]
def __iter__(self):
return iter(self.__internal_dict)
def __len__(self):
return len(self.__internal_dict)
# Optional: Implement additional methods for convenience
def __repr__(self):
return f"{self.__class__.__name__}({self.__internal_dict})"
def as_dict(self):
return self.__internal_dict

View File

@@ -23,7 +23,7 @@ valid_tokens = {
}
default_notification_format_for_watch = 'System default'
default_notification_format = 'Text'
default_notification_format = 'HTML Color'
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'

View File

@@ -33,8 +33,8 @@ class difference_detection_processor():
url = self.watch.link
# Protect against file://, file:/ access, check the real "link" without any meta "source:" etc prepended.
if re.search(r'^file:/', url.strip(), re.IGNORECASE):
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
if re.search(r'^file:', url.strip(), re.IGNORECASE):
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
raise Exception(
"file:// type access is denied for security reasons."

View File

@@ -1,3 +1,5 @@
import glob
from changedetectionio.strtobool import strtobool
from flask import (
@@ -5,8 +7,8 @@ from flask import (
)
from .html_tools import TRANSLATE_WHITESPACE_TABLE
from . model import App, Watch
from copy import deepcopy, copy
from .model import App, Watch, WatchBase
from copy import deepcopy
from os import path, unlink
from threading import Lock
import json
@@ -18,6 +20,7 @@ import time
import uuid as uuid_builder
from loguru import logger
from .model.Watch import WATCH_DB_JSON_FILENAME
from .processors import get_custom_watch_obj_for_processor
from .processors.restock_diff import Restock
@@ -26,6 +29,13 @@ BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if obj and isinstance(obj, WatchBase):
return obj.as_dict()
# Add more custom type handlers here
return super().default(obj)
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
@@ -42,16 +52,14 @@ class ChangeDetectionStore:
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
# Should only be active for docker
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
from os.path import join
self.__data = App.model()
self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
self.json_store_path = join(self.datastore_path, 'url-watches.json')
logger.info(f"Datastore path is '{self.json_store_path}'")
self.needs_write = False
self.start_time = time.time()
self.stop_thread = False
# Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
if path.isfile('changedetectionio/source.txt'):
with open('changedetectionio/source.txt') as f:
@@ -65,10 +73,6 @@ class ChangeDetectionStore:
from_disk = json.load(json_file)
# @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid']
@@ -82,10 +86,7 @@ class ChangeDetectionStore:
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
# Convert each existing watch back to the Watch.model object
for uuid, watch in self.__data['watching'].items():
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
logger.info(f"Watching: {uuid} {watch['url']}")
self.scan_and_load_watches()
# And for Tags also, should be Restock type because it has extra settings
for uuid, tag in self.__data['settings']['application']['tags'].items():
@@ -158,9 +159,29 @@ class ChangeDetectionStore:
if entity.get('uuid') != 'text_json_diff':
logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
entity = watch_class(datastore_path=self.datastore_path, default=entity)
entity = watch_class(__datastore=self, default=entity)
return entity
def scan_and_load_watches(self):
# Use glob to find all occurrences of 'watch.json' in subdirectories
# @todo move to some other function so we can trigger a rescan in a thread
for file_path in glob.glob(f"{self.datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True):
try:
with open(file_path, 'r') as json_file:
data = json.load(json_file)
# So that we can always move it to another UUID by renaming the dir
directory_path = os.path.dirname(file_path)
uuid = os.path.basename(directory_path)
if data.get('uuid'):
del data['uuid']
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, data)
except json.JSONDecodeError as e:
logger.error(f"Error decoding JSON in file {file_path}: {e}")
except Exception as e:
logger.critical(f"Exception decoding JSON in file {file_path}: {e}")
def set_last_viewed(self, uuid, timestamp):
logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
@@ -177,13 +198,15 @@ class ChangeDetectionStore:
return
with self.lock:
# deepcopy part of #569 - not sure why its needed exactly
# self.generic_definition = deepcopy(Watch.model(default={}))
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
for dict_key, d in self.generic_definition.items():
if isinstance(d, dict):
if update_obj is not None and dict_key in update_obj:
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
del (update_obj[dict_key])
# # In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
# for dict_key, d in self.generic_definition.items():
# if isinstance(d, dict):
# if update_obj is not None and dict_key in update_obj:
# self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
# del (update_obj[dict_key])
self.__data['watching'][uuid].update(update_obj)
self.needs_write = True
@@ -346,7 +369,7 @@ class ChangeDetectionStore:
# If the processor also has its own Watch implementation
watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
new_watch = watch_class(datastore_path=self.datastore_path, url=url)
new_watch = watch_class(__datastore=self, url=url)
new_uuid = new_watch.get('uuid')
@@ -383,7 +406,8 @@ class ChangeDetectionStore:
def sync_to_json(self):
logger.info("Saving JSON..")
try:
data = deepcopy(self.__data)
data = {key: deepcopy(value) for key, value in self.__data.items() if key != 'watching'}
except RuntimeError as e:
# Try again in 15 seconds
time.sleep(15)
@@ -397,11 +421,15 @@ class ChangeDetectionStore:
# This is a fairly basic strategy to deal with the case that the file is corrupted,
# system was out of memory, out of RAM etc
with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=4)
json.dump(data, json_file, indent=2, cls=CustomEncoder)
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
# Write each watch to the disk (data in their own subdir) if it changed
for watch_uuid, watch in self.__data['watching'].items():
watch.save_data()
self.needs_write = False
self.needs_write_urgent = False
@@ -924,3 +952,25 @@ class ChangeDetectionStore:
f_d.write(zlib.compress(f_j.read()))
os.unlink(json_path)
# Move each 'watching' from a big JSON file to their own datafile in their data subdirectory
def update_20(self):
with open(self.json_store_path) as json_file:
data = json.load(json_file)
if data.get('watching'):
for uuid, watch in data['watching'].items():
watch_data_dir = os.path.join(self.datastore_path, uuid)
dest = os.path.join(watch_data_dir, WATCH_DB_JSON_FILENAME)
try:
if not os.path.isdir(watch_data_dir):
logger.debug(f"> Creating data dir {watch_data_dir}")
os.mkdir(watch_data_dir)
with open(dest + '.tmp', 'w') as json_file:
json.dump(watch, json_file, indent=2)
os.replace(dest + '.tmp', dest)
logger.info(f"Saved watch to {dest}")
except Exception as e:
logger.critical(f"Exception saving watch JSON {dest} - {e}")
self.data['watching'] = {}
self.scan_and_load_watches()

View File

@@ -191,7 +191,7 @@
{% if watch.history_n >= 2 %}
{% if is_unviewed %}
<a href="{{ url_for('diff_history_page', uuid=watch.uuid, from_version=watch.get_next_snapshot_key_to_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
<a href="{{ url_for('diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
{% else %}
<a href="{{ url_for('diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
{% endif %}

View File

@@ -113,7 +113,8 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
res = client.post(
url_for("settings_page"),
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
"application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了',
# triggered_text will contain multiple lines
"application-notification_body": 'triggered text was -{{triggered_text}}- ### 网站监测 内容更新了 ####',
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_urls": test_notification_url,
"application-minutes_between_check": 180,
@@ -171,7 +172,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
with open("test-datastore/notification.txt", 'rb') as f:
response = f.read()
assert b'-Oh yes please-' in response
assert b'-Oh yes please' in response
assert '网站监测 内容更新了'.encode('utf-8') in response
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -8,11 +8,19 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from urllib.parse import urlparse, parse_qs
from ..model.Watch import WATCH_DB_JSON_FILENAME
def test_consistent_history(client, live_server, measure_memory_usage):
live_server_setup(live_server)
import glob
r = range(1, 30)
# incase some exist from a previous test
for f in glob.glob(f"{live_server.app.config['DATASTORE'].datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True):
os.unlink(f)
for one in r:
test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
res = client.post(
@@ -44,11 +52,17 @@ def test_consistent_history(client, live_server, measure_memory_usage):
with open(json_db_file, 'r') as f:
json_obj = json.load(f)
found_db_jsons = glob.glob(f"{live_server.app.config['DATASTORE'].datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True)
# assert the right amount of watches was found in the JSON
assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON"
assert len(found_db_jsons) == len(r), "Correct number of watches was found in the JSON"
# each one should have a history.txt containing just one line
for w in json_obj['watching'].keys():
for json_db_file in found_db_jsons:
directory_path = os.path.dirname(json_db_file)
w = os.path.basename(directory_path)
history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
@@ -58,22 +72,21 @@ def test_consistent_history(client, live_server, measure_memory_usage):
assert len(tmp_history) == 1, "History.txt should contain 1 line"
# Should be two files,. the history.txt , and the snapshot.txt
files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,
w))
files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,w))
# Find the snapshot one
for fname in files_in_watch_dir:
if fname != 'history.txt' and 'html' not in fname:
# contents should match what we requested as content returned from the test url
with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
contents = snapshot_f.read()
watch_url = json_obj['watching'][w]['url']
u = urlparse(watch_url)
q = parse_qs(u[4])
assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
# for fname in files_in_watch_dir:
# if fname != 'history.txt' and 'html' not in fname and fname != WATCH_DB_JSON_FILENAME:
# # contents should match what we requested as content returned from the test url
# with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
# contents = snapshot_f.read()
# watch_url = json_obj['watching'][w]['url']
# u = urlparse(watch_url)
# q = parse_qs(u[4])
# assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
assert len(files_in_watch_dir) == 4, "Should be just four files in the dir, html.br snapshot, history.txt, watch.json and the extracted text snapshot"
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')

View File

@@ -442,9 +442,9 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
assert b"Error: You must have atleast one watch configured for 'test notification' to work" in res.data
def test_html_color_notifications(client, live_server, measure_memory_usage):
def _test_color_notifications(client, notification_body_token):
#live_server_setup(live_server)
from changedetectionio.diff import ADDED_STYLE, REMOVED_STYLE
set_original_response()
@@ -461,7 +461,7 @@ def test_html_color_notifications(client, live_server, measure_memory_usage):
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"application-notification_body": '{{diff}}',
"application-notification_body": notification_body_token,
"application-notification_format": "HTML Color",
"application-notification_urls": test_notification_url,
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
@@ -492,7 +492,7 @@ def test_html_color_notifications(client, live_server, measure_memory_usage):
with open("test-datastore/notification.txt", 'r') as f:
x = f.read()
assert '<span style="background-color: #ffcecb;">Which is across multiple lines' in x
assert f'<span style="{REMOVED_STYLE}">Which is across multiple lines' in x
client.get(
@@ -500,3 +500,9 @@ def test_html_color_notifications(client, live_server, measure_memory_usage):
follow_redirects=True
)
def test_html_color_notifications(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
_test_color_notifications(client, '{{diff}}')
_test_color_notifications(client, '{{diff_full}}')

View File

@@ -2,7 +2,7 @@ import json
import os
import time
from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_setup(live_server):
live_server_setup(live_server)
@@ -74,8 +74,8 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
# Re #137 - It should have only one set of headers entered
watches_with_headers = 0
for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
if (len(watch['headers'])):
watches_with_headers += 1
if (len(watch['headers'])):
watches_with_headers += 1
assert watches_with_headers == 1
# 'server' http header was automatically recorded
@@ -156,11 +156,10 @@ def test_body_in_request(client, live_server, measure_memory_usage):
assert b"1 Imported" in res.data
watches_with_body = 0
with open('test-datastore/url-watches.json') as f:
app_struct = json.load(f)
for uuid in app_struct['watching']:
if app_struct['watching'][uuid]['body']==body_value:
watches_with_body += 1
for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
if watch['body'] == body_value:
watches_with_body += 1
# Should be only one with body set
assert watches_with_body==1
@@ -244,11 +243,9 @@ def test_method_in_request(client, live_server, measure_memory_usage):
wait_for_all_checks(client)
watches_with_method = 0
with open('test-datastore/url-watches.json') as f:
app_struct = json.load(f)
for uuid in app_struct['watching']:
if app_struct['watching'][uuid]['method'] == 'PATCH':
watches_with_method += 1
for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
if watch['method'] == 'PATCH':
watches_with_method += 1
# Should be only one with method set to PATCH
assert watches_with_method == 1

View File

@@ -1,9 +1,7 @@
import os
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import time
from .util import live_server_setup, wait_for_all_checks
from .. import strtobool
@@ -61,54 +59,44 @@ def test_bad_access(client, live_server, measure_memory_usage):
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
def test_file_slashslash_access(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
def _runner_test_various_file_slash(client, file_uri):
test_file_path = os.path.abspath(__file__)
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
client.post(
url_for("form_quick_watch_add"),
data={"url": f"file://{test_file_path}", "tags": ''},
data={"url": file_uri, "tags": ''},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(url_for("index"))
substrings = [b"URLs with hostname components are not permitted", b"No connection adapters were found for"]
# If it is enabled at test time
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
if file_uri.startswith('file:///'):
# This one should be the full qualified path to the file and should get the contents of this file
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'_runner_test_various_file_slash' in res.data
else:
# This will give some error from requests or if it went to chrome, will give some other error :-)
assert any(s in res.data for s in substrings)
assert b"test_file_slashslash_access" in res.data
else:
# Default should be here
assert b'file:// type access is denied for security reasons.' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_file_slash_access(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# file: is NOT permitted by default, so it will be caught by ALLOW_FILE_URI check
test_file_path = os.path.abspath(__file__)
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
client.post(
url_for("form_quick_watch_add"),
data={"url": f"file:/{test_file_path}", "tags": ''},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(url_for("index"))
# If it is enabled at test time
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
# So it should permit it, but it should fall back to the 'requests' library giving an error
# (but means it gets passed to playwright etc)
assert b"URLs with hostname components are not permitted" in res.data
else:
# Default should be here
assert b'file:// type access is denied for security reasons.' in res.data
_runner_test_various_file_slash(client, file_uri=f"file://{test_file_path}")
_runner_test_various_file_slash(client, file_uri=f"file:/{test_file_path}")
_runner_test_various_file_slash(client, file_uri=f"file:{test_file_path}") # CVE-2024-56509
def test_xss(client, live_server, measure_memory_usage):
#live_server_setup(live_server)

View File

@@ -6,6 +6,7 @@
import unittest
import os
from changedetectionio import store
from changedetectionio.model import Watch
# mostly
@@ -13,10 +14,10 @@ class TestDiffBuilder(unittest.TestCase):
def test_watch_get_suggested_from_diff_timestamp(self):
import uuid as uuid_builder
watch = Watch.model(datastore_path='/tmp', default={})
datastore = store.ChangeDetectionStore(datastore_path='/tmp')
watch = Watch.model(__datastore=datastore, default={})
watch.ensure_data_dir_exists()
watch['last_viewed'] = 110
# Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python
watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
@@ -25,31 +26,43 @@ class TestDiffBuilder(unittest.TestCase):
watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
p = watch.get_from_version_based_on_last_viewed
assert p == "100", "Correct 'last viewed' timestamp was detected"
p = watch.get_next_snapshot_key_to_last_viewed
assert p == "112", "Correct last-viewed timestamp was detected"
watch['last_viewed'] = 110
p = watch.get_from_version_based_on_last_viewed
assert p == "109", "Correct 'last viewed' timestamp was detected"
# When there is only one step of difference from the end of the list, it should return second-last change
watch['last_viewed'] = 116
p = watch.get_next_snapshot_key_to_last_viewed
assert p == "115", "Correct 'second last' last-viewed timestamp was detected when using the last timestamp"
p = watch.get_from_version_based_on_last_viewed
assert p == "115", "Correct 'last viewed' timestamp was detected"
watch['last_viewed'] = 99
p = watch.get_next_snapshot_key_to_last_viewed
assert p == "100"
p = watch.get_from_version_based_on_last_viewed
assert p == "100", "When the 'last viewed' timestamp is less than the oldest snapshot, return oldest"
watch['last_viewed'] = 200
p = watch.get_next_snapshot_key_to_last_viewed
assert p == "115", "When the 'last viewed' timestamp is greater than the newest snapshot, return second last "
p = watch.get_from_version_based_on_last_viewed
assert p == "115", "When the 'last viewed' timestamp is greater than the newest snapshot, return second newest"
watch['last_viewed'] = 109
p = watch.get_next_snapshot_key_to_last_viewed
p = watch.get_from_version_based_on_last_viewed
assert p == "109", "Correct when its the same time"
# new empty one
watch = Watch.model(datastore_path='/tmp', default={})
p = watch.get_next_snapshot_key_to_last_viewed
watch = Watch.model(__datastore=datastore, default={})
p = watch.get_from_version_based_on_last_viewed
assert p == None, "None when no history available"
watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
p = watch.get_from_version_based_on_last_viewed
assert p == "100", "Correct with only one history snapshot"
watch['last_viewed'] = 200
p = watch.get_from_version_based_on_last_viewed
assert p == "100", "Correct with only one history snapshot"
datastore.stop_thread = True
if __name__ == '__main__':
unittest.main()

View File

@@ -80,6 +80,7 @@ def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
#@todo - could check the apprise object directly instead of looking for this file
from os.path import isfile
for i in range(1, 20):
time.sleep(1)
if isfile("test-datastore/notification.txt"):

View File

@@ -28,6 +28,8 @@ class update_worker(threading.Thread):
def queue_notification_for_watch(self, notification_q, n_object, watch):
from changedetectionio import diff
from changedetectionio.notification import default_notification_format_for_watch
dates = []
trigger_text = ''
@@ -44,6 +46,10 @@ class update_worker(threading.Thread):
else:
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
# If we ended up here with "System default"
if n_object.get('notification_format') == default_notification_format_for_watch:
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
html_colour_enable = False
# HTML needs linebreak, but MarkDown and Text can use a linefeed
if n_object.get('notification_format') == 'HTML':
@@ -77,7 +83,7 @@ class update_worker(threading.Thread):
'current_snapshot': snapshot_contents,
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
'notification_timestamp': now,
@@ -276,6 +282,7 @@ class update_worker(threading.Thread):
print(f"Processor module '{processor}' not found.")
raise e
# Can pass just the watch here?
update_handler = processor_module.perform_site_check(datastore=self.datastore,
watch_uuid=uuid
)

View File

@@ -12,9 +12,6 @@ services:
# environment:
# Default listening port, can also be changed with the -p option
# - PORT=5000
# - PUID=1000
# - PGID=1000
#
# Log levels are in descending order. (TRACE is the most detailed one)
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL

View File

@@ -95,3 +95,5 @@ babel
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3
# Scheduler - Windows seemed to miss a lot of default timezone info (even "UTC" !)
tzdata