Compare commits

...

14 Commits

Author SHA1 Message Date
dgtlmoon
e9c65147c8 Migrate watches from main DB file to a subfile 2024-12-30 15:01:30 +01:00
dgtlmoon
2a6c707d17 test tweak 2024-12-30 13:45:06 +01:00
dgtlmoon
13e682666f test fix 2024-12-30 13:41:15 +01:00
dgtlmoon
748ed79314 fix zombie watch usage 2024-12-30 13:23:08 +01:00
dgtlmoon
0084812300 make writes more safe 2024-12-30 13:03:26 +01:00
dgtlmoon
ff9e164fc2 adjust test 2024-12-30 13:03:08 +01:00
dgtlmoon
339a881d61 Save each watch to its own dir 2024-12-30 12:42:35 +01:00
dgtlmoon
8604ac7e56 bump unit tests of watch/store object 2024-12-30 12:42:23 +01:00
dgtlmoon
281f637068 Small refactor, internalise the datastruct 2024-12-30 10:57:30 +01:00
dgtlmoon
2d37f94c5f share should use custom encoder 2024-12-29 19:24:03 +01:00
dgtlmoon
05c46f3f25 Handle model type 2024-12-29 17:28:10 +01:00
dgtlmoon
9f73432540 deepcopy not needed 2024-12-29 16:39:21 +01:00
dgtlmoon
061693b117 tweak API reference 2024-12-29 16:38:46 +01:00
dgtlmoon
78e875b70d Abstract data from the model 2024-12-29 16:07:12 +01:00
13 changed files with 203 additions and 85 deletions

View File

@@ -12,10 +12,10 @@ import copy
# See docs/README.md for rebuilding the docs/apidoc information # See docs/README.md for rebuilding the docs/apidoc information
from . import api_schema from . import api_schema
from ..model import watch_base from ..model import WatchBase
# Build a JSON Schema atleast partially based on our Watch model # Build a JSON Schema atleast partially based on our Watch model
watch_base_config = watch_base() watch_base_config = WatchBase()
schema = api_schema.build_watch_json_schema(watch_base_config) schema = api_schema.build_watch_json_schema(watch_base_config)
schema_create_watch = copy.deepcopy(schema) schema_create_watch = copy.deepcopy(schema)
@@ -52,8 +52,8 @@ class Watch(Resource):
@apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch @apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch
@apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch @apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
""" """
from copy import deepcopy watch = self.datastore.data['watching'].get(uuid)
watch = deepcopy(self.datastore.data['watching'].get(uuid))
if not watch: if not watch:
abort(404, message='No watch exists with the UUID of {}'.format(uuid)) abort(404, message='No watch exists with the UUID of {}'.format(uuid))
@@ -75,10 +75,11 @@ class Watch(Resource):
# Return without history, get that via another API call # Return without history, get that via another API call
# Properties are not returned as a JSON, so add the required props manually # Properties are not returned as a JSON, so add the required props manually
watch['history_n'] = watch.history_n result = watch.as_dict()
watch['last_changed'] = watch.last_changed result['history_n'] = watch.history_n
watch['viewed'] = watch.viewed result['last_changed'] = watch.last_changed
return watch result['viewed'] = watch.viewed
return result
@auth.check_token @auth.check_token
def delete(self, uuid): def delete(self, uuid):

View File

@@ -41,6 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
now = time.time() now = time.time()
try: try:
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
# @todo can now just pass the watch here?
update_handler = processor_module.perform_site_check(datastore=datastore, update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid watch_uuid=uuid
) )

View File

@@ -43,6 +43,7 @@ from loguru import logger
from changedetectionio import html_tools, __version__ from changedetectionio import html_tools, __version__
from changedetectionio import queuedWatchMetaData from changedetectionio import queuedWatchMetaData
from changedetectionio.api import api_v1 from changedetectionio.api import api_v1
from .store import CustomEncoder
from .time_handler import is_within_schedule from .time_handler import is_within_schedule
datastore = None datastore = None
@@ -800,7 +801,7 @@ def changedetection_app(config=None, datastore_o=None):
# Recast it if need be to right data Watch handler # Recast it if need be to right data Watch handler
watch_class = get_custom_watch_obj_for_processor(form.data.get('processor')) watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid]) datastore.data['watching'][uuid] = watch_class(__datastore=datastore_o, default=datastore.data['watching'][uuid])
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.") flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
@@ -1613,7 +1614,7 @@ def changedetection_app(config=None, datastore_o=None):
watch['ignore_text'] += datastore.data['settings']['application']['global_ignore_text'] watch['ignore_text'] += datastore.data['settings']['application']['global_ignore_text']
watch['subtractive_selectors'] += datastore.data['settings']['application']['global_subtractive_selectors'] watch['subtractive_selectors'] += datastore.data['settings']['application']['global_subtractive_selectors']
watch_json = json.dumps(watch) watch_json = json.dumps(watch, cls=CustomEncoder)
try: try:
r = requests.request(method="POST", r = requests.request(method="POST",

View File

@@ -10,7 +10,7 @@ _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36' DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
class model(dict): class model(dict):
base_config = { __base_config = {
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!", 'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
'watching': {}, 'watching': {},
'settings': { 'settings': {
@@ -60,7 +60,7 @@ class model(dict):
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw) super(model, self).__init__(*arg, **kw)
self.update(self.base_config) self.update(self.__base_config)
def parse_headers_from_text_file(filepath): def parse_headers_from_text_file(filepath):

View File

@@ -1,8 +1,8 @@
from changedetectionio.model import watch_base from changedetectionio.model import WatchBase
class model(watch_base): class model(WatchBase):
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw) super(model, self).__init__(*arg, **kw)

View File

@@ -1,17 +1,17 @@
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from changedetectionio.safe_jinja import render as jinja_render from changedetectionio.safe_jinja import render as jinja_render
from . import watch_base
import os import os
import re import re
from pathlib import Path from pathlib import Path
from loguru import logger from loguru import logger
from . import WatchBase
from ..html_tools import TRANSLATE_WHITESPACE_TABLE from ..html_tools import TRANSLATE_WHITESPACE_TABLE
# Allowable protocols, protects against javascript: etc # Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI # file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
WATCH_DB_JSON_FILENAME = 'watch.json'
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -32,15 +32,20 @@ def is_safe_url(test_url):
return True return True
class model(watch_base): class model(WatchBase):
__newest_history_key = None __datastore = None
__history_n = 0 __datastore_checksum = None
jitter_seconds = 0
__history_n = 0
__newest_history_key = None
jitter_seconds = 0
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
self.__datastore_path = kw.get('datastore_path') if not kw.get('__datastore'):
if kw.get('datastore_path'): logger.critical('No __datastore reference was set!')
del kw['datastore_path']
self.__datastore = kw.get('__datastore')
super(model, self).__init__(*arg, **kw) super(model, self).__init__(*arg, **kw)
if kw.get('default'): if kw.get('default'):
self.update(kw['default']) self.update(kw['default'])
@@ -179,7 +184,7 @@ class model(watch_base):
tmp_history = {} tmp_history = {}
# In the case we are only using the watch for processing without history # In the case we are only using the watch for processing without history
if not self.watch_data_dir: if not self.__datastore or not self.watch_data_dir:
return [] return []
# Read the history file as a dict # Read the history file as a dict
@@ -419,7 +424,7 @@ class model(watch_base):
@property @property
def watch_data_dir(self): def watch_data_dir(self):
# The base dir of the watch data # The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None return os.path.join(self.__datastore.datastore_path, self['uuid']) if self.__datastore.datastore_path else None
def get_error_text(self): def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error""" """Return the text saved from a previous request that resulted in a non-200 error"""
@@ -524,6 +529,22 @@ class model(watch_base):
# None is set # None is set
return False return False
def save_data(self):
import json
# @todo dict change?
# Save it to a temp file first so that if the disk is full or other error it wont corrupt (hopefully).
dest = os.path.join(self.watch_data_dir, WATCH_DB_JSON_FILENAME)
logger.debug(f"Saving watch {dest}")
try:
with open(dest + '.tmp', 'w') as json_file:
json.dump(self.as_dict(), json_file, indent=2)
os.replace(dest + '.tmp', dest)
except Exception as e:
logger.critical(f"Exception saving watch JSON {dest} - {e}")
def save_error_text(self, contents): def save_error_text(self, contents):
self.ensure_data_dir_exists() self.ensure_data_dir_exists()
target_path = os.path.join(self.watch_data_dir, "last-error.txt") target_path = os.path.join(self.watch_data_dir, "last-error.txt")

View File

@@ -1,13 +1,14 @@
import os import os
import uuid import uuid
from collections.abc import MutableMapping
from changedetectionio import strtobool from changedetectionio import strtobool
from changedetectionio.notification import default_notification_format_for_watch from changedetectionio.notification import default_notification_format_for_watch
class watch_base(dict): class WatchBase(MutableMapping):
__data_checksum = None
def __init__(self, *arg, **kw): def __init__(self, *args, **kwargs):
self.update({ self.__internal_dict = {
# Custom notification content # Custom notification content
# Re #110, so then if this is set to None, we know to use the default value instead # Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default # Requires setting to None on submit if it's the same as the default
@@ -127,9 +128,37 @@ class watch_base(dict):
'uuid': str(uuid.uuid4()), 'uuid': str(uuid.uuid4()),
'webdriver_delay': None, 'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection 'webdriver_js_execute_code': None, # Run before change-detection
}) }
super(watch_base, self).__init__(*arg, **kw) # Update with any provided arguments
self.update(*args, **kwargs)
if self.get('default'): if self.get('default'):
del self['default'] del self['default']
# Implement abstract methods required by MutableMapping
def __getitem__(self, key):
return self.__internal_dict[key]
def __setitem__(self, key, value):
if key == '__datastore':
self.__datastore = value
else:
self.__internal_dict[key] = value
def __delitem__(self, key):
del self.__internal_dict[key]
def __iter__(self):
return iter(self.__internal_dict)
def __len__(self):
return len(self.__internal_dict)
# Optional: Implement additional methods for convenience
def __repr__(self):
return f"{self.__class__.__name__}({self.__internal_dict})"
def as_dict(self):
return self.__internal_dict

View File

@@ -1,3 +1,5 @@
import glob
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from flask import ( from flask import (
@@ -5,8 +7,8 @@ from flask import (
) )
from .html_tools import TRANSLATE_WHITESPACE_TABLE from .html_tools import TRANSLATE_WHITESPACE_TABLE
from . model import App, Watch from .model import App, Watch, WatchBase
from copy import deepcopy, copy from copy import deepcopy
from os import path, unlink from os import path, unlink
from threading import Lock from threading import Lock
import json import json
@@ -18,6 +20,7 @@ import time
import uuid as uuid_builder import uuid as uuid_builder
from loguru import logger from loguru import logger
from .model.Watch import WATCH_DB_JSON_FILENAME
from .processors import get_custom_watch_obj_for_processor from .processors import get_custom_watch_obj_for_processor
from .processors.restock_diff import Restock from .processors.restock_diff import Restock
@@ -26,6 +29,13 @@ BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ]) dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if obj and isinstance(obj, WatchBase):
return obj.as_dict()
# Add more custom type handlers here
return super().default(obj)
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods? # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :) # Open a github issue if you know something :)
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change # https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
@@ -42,16 +52,14 @@ class ChangeDetectionStore:
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
# Should only be active for docker # Should only be active for docker
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO) # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
from os.path import join
self.__data = App.model() self.__data = App.model()
self.datastore_path = datastore_path self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path) self.json_store_path = join(self.datastore_path, 'url-watches.json')
logger.info(f"Datastore path is '{self.json_store_path}'") logger.info(f"Datastore path is '{self.json_store_path}'")
self.needs_write = False self.needs_write = False
self.start_time = time.time() self.start_time = time.time()
self.stop_thread = False self.stop_thread = False
# Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
if path.isfile('changedetectionio/source.txt'): if path.isfile('changedetectionio/source.txt'):
with open('changedetectionio/source.txt') as f: with open('changedetectionio/source.txt') as f:
@@ -65,10 +73,6 @@ class ChangeDetectionStore:
from_disk = json.load(json_file) from_disk = json.load(json_file)
# @todo isnt there a way todo this dict.update recursively? # @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
if 'app_guid' in from_disk: if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid'] self.__data['app_guid'] = from_disk['app_guid']
@@ -82,10 +86,7 @@ class ChangeDetectionStore:
if 'application' in from_disk['settings']: if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application']) self.__data['settings']['application'].update(from_disk['settings']['application'])
# Convert each existing watch back to the Watch.model object self.scan_and_load_watches()
for uuid, watch in self.__data['watching'].items():
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
logger.info(f"Watching: {uuid} {watch['url']}")
# And for Tags also, should be Restock type because it has extra settings # And for Tags also, should be Restock type because it has extra settings
for uuid, tag in self.__data['settings']['application']['tags'].items(): for uuid, tag in self.__data['settings']['application']['tags'].items():
@@ -158,9 +159,29 @@ class ChangeDetectionStore:
if entity.get('uuid') != 'text_json_diff': if entity.get('uuid') != 'text_json_diff':
logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}") logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
entity = watch_class(datastore_path=self.datastore_path, default=entity) entity = watch_class(__datastore=self, default=entity)
return entity return entity
def scan_and_load_watches(self):
# Use glob to find all occurrences of 'watch.json' in subdirectories
# @todo move to some other function so we can trigger a rescan in a thread
for file_path in glob.glob(f"{self.datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True):
try:
with open(file_path, 'r') as json_file:
data = json.load(json_file)
# So that we can always move it to another UUID by renaming the dir
directory_path = os.path.dirname(file_path)
uuid = os.path.basename(directory_path)
if data.get('uuid'):
del data['uuid']
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, data)
except json.JSONDecodeError as e:
logger.error(f"Error decoding JSON in file {file_path}: {e}")
except Exception as e:
logger.critical(f"Exception decoding JSON in file {file_path}: {e}")
def set_last_viewed(self, uuid, timestamp): def set_last_viewed(self, uuid, timestamp):
logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}") logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
self.data['watching'][uuid].update({'last_viewed': int(timestamp)}) self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
@@ -177,13 +198,15 @@ class ChangeDetectionStore:
return return
with self.lock: with self.lock:
# deepcopy part of #569 - not sure why its needed exactly
# self.generic_definition = deepcopy(Watch.model(default={}))
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures... # # In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
for dict_key, d in self.generic_definition.items(): # for dict_key, d in self.generic_definition.items():
if isinstance(d, dict): # if isinstance(d, dict):
if update_obj is not None and dict_key in update_obj: # if update_obj is not None and dict_key in update_obj:
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key]) # self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
del (update_obj[dict_key]) # del (update_obj[dict_key])
self.__data['watching'][uuid].update(update_obj) self.__data['watching'][uuid].update(update_obj)
self.needs_write = True self.needs_write = True
@@ -346,7 +369,7 @@ class ChangeDetectionStore:
# If the processor also has its own Watch implementation # If the processor also has its own Watch implementation
watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor')) watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
new_watch = watch_class(datastore_path=self.datastore_path, url=url) new_watch = watch_class(__datastore=self, url=url)
new_uuid = new_watch.get('uuid') new_uuid = new_watch.get('uuid')
@@ -383,7 +406,8 @@ class ChangeDetectionStore:
def sync_to_json(self): def sync_to_json(self):
logger.info("Saving JSON..") logger.info("Saving JSON..")
try: try:
data = deepcopy(self.__data) data = {key: deepcopy(value) for key, value in self.__data.items() if key != 'watching'}
except RuntimeError as e: except RuntimeError as e:
# Try again in 15 seconds # Try again in 15 seconds
time.sleep(15) time.sleep(15)
@@ -397,11 +421,15 @@ class ChangeDetectionStore:
# This is a fairly basic strategy to deal with the case that the file is corrupted, # This is a fairly basic strategy to deal with the case that the file is corrupted,
# system was out of memory, out of RAM etc # system was out of memory, out of RAM etc
with open(self.json_store_path+".tmp", 'w') as json_file: with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=4) json.dump(data, json_file, indent=2, cls=CustomEncoder)
os.replace(self.json_store_path+".tmp", self.json_store_path) os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e: except Exception as e:
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
# Write each watch to the disk (data in their own subdir) if it changed
for watch_uuid, watch in self.__data['watching'].items():
watch.save_data()
self.needs_write = False self.needs_write = False
self.needs_write_urgent = False self.needs_write_urgent = False
@@ -924,3 +952,25 @@ class ChangeDetectionStore:
f_d.write(zlib.compress(f_j.read())) f_d.write(zlib.compress(f_j.read()))
os.unlink(json_path) os.unlink(json_path)
# Move each 'watching' from a big JSON file to their own datafile in their data subdirectory
def update_20(self):
with open(self.json_store_path) as json_file:
data = json.load(json_file)
if data.get('watching'):
for uuid, watch in data['watching'].items():
watch_data_dir = os.path.join(self.datastore_path, uuid)
dest = os.path.join(watch_data_dir, WATCH_DB_JSON_FILENAME)
try:
if not os.path.isdir(watch_data_dir):
logger.debug(f"> Creating data dir {watch_data_dir}")
os.mkdir(watch_data_dir)
with open(dest + '.tmp', 'w') as json_file:
json.dump(watch, json_file, indent=2)
os.replace(dest + '.tmp', dest)
logger.info(f"Saved watch to {dest}")
except Exception as e:
logger.critical(f"Exception saving watch JSON {dest} - {e}")
self.data['watching'] = {}
self.scan_and_load_watches()

View File

@@ -8,11 +8,19 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks from .util import live_server_setup, wait_for_all_checks
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
from ..model.Watch import WATCH_DB_JSON_FILENAME
def test_consistent_history(client, live_server, measure_memory_usage): def test_consistent_history(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
import glob
r = range(1, 30) r = range(1, 30)
# incase some exist from a previous test
for f in glob.glob(f"{live_server.app.config['DATASTORE'].datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True):
os.unlink(f)
for one in r: for one in r:
test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True) test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
res = client.post( res = client.post(
@@ -44,11 +52,17 @@ def test_consistent_history(client, live_server, measure_memory_usage):
with open(json_db_file, 'r') as f: with open(json_db_file, 'r') as f:
json_obj = json.load(f) json_obj = json.load(f)
found_db_jsons = glob.glob(f"{live_server.app.config['DATASTORE'].datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True)
# assert the right amount of watches was found in the JSON # assert the right amount of watches was found in the JSON
assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON" assert len(found_db_jsons) == len(r), "Correct number of watches was found in the JSON"
# each one should have a history.txt containing just one line # each one should have a history.txt containing just one line
for w in json_obj['watching'].keys(): for json_db_file in found_db_jsons:
directory_path = os.path.dirname(json_db_file)
w = os.path.basename(directory_path)
history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt') history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}" assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
@@ -58,22 +72,21 @@ def test_consistent_history(client, live_server, measure_memory_usage):
assert len(tmp_history) == 1, "History.txt should contain 1 line" assert len(tmp_history) == 1, "History.txt should contain 1 line"
# Should be two files,. the history.txt , and the snapshot.txt # Should be two files,. the history.txt , and the snapshot.txt
files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path, files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,w))
w))
# Find the snapshot one # Find the snapshot one
for fname in files_in_watch_dir: # for fname in files_in_watch_dir:
if fname != 'history.txt' and 'html' not in fname: # if fname != 'history.txt' and 'html' not in fname and fname != WATCH_DB_JSON_FILENAME:
# contents should match what we requested as content returned from the test url # # contents should match what we requested as content returned from the test url
with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f: # with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
contents = snapshot_f.read() # contents = snapshot_f.read()
watch_url = json_obj['watching'][w]['url'] # watch_url = json_obj['watching'][w]['url']
u = urlparse(watch_url) # u = urlparse(watch_url)
q = parse_qs(u[4]) # q = parse_qs(u[4])
assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}" # assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot" assert len(files_in_watch_dir) == 4, "Should be just four files in the dir, html.br snapshot, history.txt, watch.json and the extracted text snapshot"
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')

View File

@@ -2,7 +2,7 @@ import json
import os import os
import time import time
from flask import url_for from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
@@ -74,8 +74,8 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
# Re #137 - It should have only one set of headers entered # Re #137 - It should have only one set of headers entered
watches_with_headers = 0 watches_with_headers = 0
for k, watch in client.application.config.get('DATASTORE').data.get('watching').items(): for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
if (len(watch['headers'])): if (len(watch['headers'])):
watches_with_headers += 1 watches_with_headers += 1
assert watches_with_headers == 1 assert watches_with_headers == 1
# 'server' http header was automatically recorded # 'server' http header was automatically recorded
@@ -156,11 +156,10 @@ def test_body_in_request(client, live_server, measure_memory_usage):
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
watches_with_body = 0 watches_with_body = 0
with open('test-datastore/url-watches.json') as f:
app_struct = json.load(f) for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
for uuid in app_struct['watching']: if watch['body'] == body_value:
if app_struct['watching'][uuid]['body']==body_value: watches_with_body += 1
watches_with_body += 1
# Should be only one with body set # Should be only one with body set
assert watches_with_body==1 assert watches_with_body==1
@@ -244,11 +243,9 @@ def test_method_in_request(client, live_server, measure_memory_usage):
wait_for_all_checks(client) wait_for_all_checks(client)
watches_with_method = 0 watches_with_method = 0
with open('test-datastore/url-watches.json') as f: for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
app_struct = json.load(f) if watch['method'] == 'PATCH':
for uuid in app_struct['watching']: watches_with_method += 1
if app_struct['watching'][uuid]['method'] == 'PATCH':
watches_with_method += 1
# Should be only one with method set to PATCH # Should be only one with method set to PATCH
assert watches_with_method == 1 assert watches_with_method == 1

View File

@@ -6,6 +6,7 @@
import unittest import unittest
import os import os
from changedetectionio import store
from changedetectionio.model import Watch from changedetectionio.model import Watch
# mostly # mostly
@@ -13,7 +14,8 @@ class TestDiffBuilder(unittest.TestCase):
def test_watch_get_suggested_from_diff_timestamp(self): def test_watch_get_suggested_from_diff_timestamp(self):
import uuid as uuid_builder import uuid as uuid_builder
watch = Watch.model(datastore_path='/tmp', default={}) datastore = store.ChangeDetectionStore(datastore_path='/tmp')
watch = Watch.model(__datastore=datastore, default={})
watch.ensure_data_dir_exists() watch.ensure_data_dir_exists()
@@ -49,7 +51,7 @@ class TestDiffBuilder(unittest.TestCase):
assert p == "109", "Correct when its the same time" assert p == "109", "Correct when its the same time"
# new empty one # new empty one
watch = Watch.model(datastore_path='/tmp', default={}) watch = Watch.model(__datastore=datastore, default={})
p = watch.get_from_version_based_on_last_viewed p = watch.get_from_version_based_on_last_viewed
assert p == None, "None when no history available" assert p == None, "None when no history available"
@@ -61,5 +63,6 @@ class TestDiffBuilder(unittest.TestCase):
p = watch.get_from_version_based_on_last_viewed p = watch.get_from_version_based_on_last_viewed
assert p == "100", "Correct with only one history snapshot" assert p == "100", "Correct with only one history snapshot"
datastore.stop_thread = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -80,6 +80,7 @@ def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire''' '''Apprise can take a few seconds to fire'''
#@todo - could check the apprise object directly instead of looking for this file #@todo - could check the apprise object directly instead of looking for this file
from os.path import isfile from os.path import isfile
for i in range(1, 20): for i in range(1, 20):
time.sleep(1) time.sleep(1)
if isfile("test-datastore/notification.txt"): if isfile("test-datastore/notification.txt"):

View File

@@ -282,6 +282,7 @@ class update_worker(threading.Thread):
print(f"Processor module '{processor}' not found.") print(f"Processor module '{processor}' not found.")
raise e raise e
# Can pass just the watch here?
update_handler = processor_module.perform_site_check(datastore=self.datastore, update_handler = processor_module.perform_site_check(datastore=self.datastore,
watch_uuid=uuid watch_uuid=uuid
) )