mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			14 Commits
		
	
	
		
			HTML-notif
			...
			abstract-d
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					e9c65147c8 | ||
| 
						 | 
					2a6c707d17 | ||
| 
						 | 
					13e682666f | ||
| 
						 | 
					748ed79314 | ||
| 
						 | 
					0084812300 | ||
| 
						 | 
					ff9e164fc2 | ||
| 
						 | 
					339a881d61 | ||
| 
						 | 
					8604ac7e56 | ||
| 
						 | 
					281f637068 | ||
| 
						 | 
					2d37f94c5f | ||
| 
						 | 
					05c46f3f25 | ||
| 
						 | 
					9f73432540 | ||
| 
						 | 
					061693b117 | ||
| 
						 | 
					78e875b70d | 
@@ -12,10 +12,10 @@ import copy
 | 
			
		||||
# See docs/README.md for rebuilding the docs/apidoc information
 | 
			
		||||
 | 
			
		||||
from . import api_schema
 | 
			
		||||
from ..model import watch_base
 | 
			
		||||
from ..model import WatchBase
 | 
			
		||||
 | 
			
		||||
# Build a JSON Schema atleast partially based on our Watch model
 | 
			
		||||
watch_base_config = watch_base()
 | 
			
		||||
watch_base_config = WatchBase()
 | 
			
		||||
schema = api_schema.build_watch_json_schema(watch_base_config)
 | 
			
		||||
 | 
			
		||||
schema_create_watch = copy.deepcopy(schema)
 | 
			
		||||
@@ -52,8 +52,8 @@ class Watch(Resource):
 | 
			
		||||
        @apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch
 | 
			
		||||
        @apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
 | 
			
		||||
        """
 | 
			
		||||
        from copy import deepcopy
 | 
			
		||||
        watch = deepcopy(self.datastore.data['watching'].get(uuid))
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid)
 | 
			
		||||
 | 
			
		||||
        if not watch:
 | 
			
		||||
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
 | 
			
		||||
 | 
			
		||||
@@ -75,10 +75,11 @@ class Watch(Resource):
 | 
			
		||||
 | 
			
		||||
        # Return without history, get that via another API call
 | 
			
		||||
        # Properties are not returned as a JSON, so add the required props manually
 | 
			
		||||
        watch['history_n'] = watch.history_n
 | 
			
		||||
        watch['last_changed'] = watch.last_changed
 | 
			
		||||
        watch['viewed'] = watch.viewed
 | 
			
		||||
        return watch
 | 
			
		||||
        result = watch.as_dict()
 | 
			
		||||
        result['history_n'] = watch.history_n
 | 
			
		||||
        result['last_changed'] = watch.last_changed
 | 
			
		||||
        result['viewed'] = watch.viewed
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    def delete(self, uuid):
 | 
			
		||||
 
 | 
			
		||||
@@ -41,6 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        try:
 | 
			
		||||
            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
 | 
			
		||||
            # @todo can now just pass the watch here?
 | 
			
		||||
            update_handler = processor_module.perform_site_check(datastore=datastore,
 | 
			
		||||
                                                                 watch_uuid=uuid
 | 
			
		||||
                                                                 )
 | 
			
		||||
 
 | 
			
		||||
@@ -43,6 +43,7 @@ from loguru import logger
 | 
			
		||||
from changedetectionio import html_tools, __version__
 | 
			
		||||
from changedetectionio import queuedWatchMetaData
 | 
			
		||||
from changedetectionio.api import api_v1
 | 
			
		||||
from .store import CustomEncoder
 | 
			
		||||
from .time_handler import is_within_schedule
 | 
			
		||||
 | 
			
		||||
datastore = None
 | 
			
		||||
@@ -800,7 +801,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
            # Recast it if need be to right data Watch handler
 | 
			
		||||
            watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
 | 
			
		||||
            datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
 | 
			
		||||
            datastore.data['watching'][uuid] = watch_class(__datastore=datastore_o, default=datastore.data['watching'][uuid])
 | 
			
		||||
            flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
 | 
			
		||||
 | 
			
		||||
            # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
 | 
			
		||||
@@ -1613,7 +1614,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
        watch['ignore_text'] += datastore.data['settings']['application']['global_ignore_text']
 | 
			
		||||
        watch['subtractive_selectors'] += datastore.data['settings']['application']['global_subtractive_selectors']
 | 
			
		||||
 | 
			
		||||
        watch_json = json.dumps(watch)
 | 
			
		||||
        watch_json = json.dumps(watch, cls=CustomEncoder)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            r = requests.request(method="POST",
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
 | 
			
		||||
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
 | 
			
		||||
 | 
			
		||||
class model(dict):
 | 
			
		||||
    base_config = {
 | 
			
		||||
    __base_config = {
 | 
			
		||||
            'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
 | 
			
		||||
            'watching': {},
 | 
			
		||||
            'settings': {
 | 
			
		||||
@@ -60,7 +60,7 @@ class model(dict):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *arg, **kw):
 | 
			
		||||
        super(model, self).__init__(*arg, **kw)
 | 
			
		||||
        self.update(self.base_config)
 | 
			
		||||
        self.update(self.__base_config)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_headers_from_text_file(filepath):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,8 @@
 | 
			
		||||
 | 
			
		||||
from changedetectionio.model import watch_base
 | 
			
		||||
from changedetectionio.model import WatchBase
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class model(watch_base):
 | 
			
		||||
class model(WatchBase):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *arg, **kw):
 | 
			
		||||
        super(model, self).__init__(*arg, **kw)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,17 +1,17 @@
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
from . import watch_base
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from . import WatchBase
 | 
			
		||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
 | 
			
		||||
 | 
			
		||||
# Allowable protocols, protects against javascript: etc
 | 
			
		||||
# file:// is further checked by ALLOW_FILE_URI
 | 
			
		||||
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
 | 
			
		||||
 | 
			
		||||
WATCH_DB_JSON_FILENAME = 'watch.json'
 | 
			
		||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
 | 
			
		||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
 | 
			
		||||
 | 
			
		||||
@@ -32,15 +32,20 @@ def is_safe_url(test_url):
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class model(watch_base):
 | 
			
		||||
    __newest_history_key = None
 | 
			
		||||
    __history_n = 0
 | 
			
		||||
    jitter_seconds = 0
 | 
			
		||||
class model(WatchBase):
 | 
			
		||||
    __datastore = None
 | 
			
		||||
    __datastore_checksum = None
 | 
			
		||||
 | 
			
		||||
    __history_n = 0
 | 
			
		||||
    __newest_history_key = None
 | 
			
		||||
    jitter_seconds = 0
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, *arg, **kw):
 | 
			
		||||
        self.__datastore_path = kw.get('datastore_path')
 | 
			
		||||
        if kw.get('datastore_path'):
 | 
			
		||||
            del kw['datastore_path']
 | 
			
		||||
        if not kw.get('__datastore'):
 | 
			
		||||
            logger.critical('No __datastore reference was set!')
 | 
			
		||||
 | 
			
		||||
        self.__datastore = kw.get('__datastore')
 | 
			
		||||
 | 
			
		||||
        super(model, self).__init__(*arg, **kw)
 | 
			
		||||
        if kw.get('default'):
 | 
			
		||||
            self.update(kw['default'])
 | 
			
		||||
@@ -179,7 +184,7 @@ class model(watch_base):
 | 
			
		||||
        tmp_history = {}
 | 
			
		||||
 | 
			
		||||
        # In the case we are only using the watch for processing without history
 | 
			
		||||
        if not self.watch_data_dir:
 | 
			
		||||
        if not self.__datastore or not self.watch_data_dir:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        # Read the history file as a dict
 | 
			
		||||
@@ -419,7 +424,7 @@ class model(watch_base):
 | 
			
		||||
    @property
 | 
			
		||||
    def watch_data_dir(self):
 | 
			
		||||
        # The base dir of the watch data
 | 
			
		||||
        return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
 | 
			
		||||
        return os.path.join(self.__datastore.datastore_path, self['uuid']) if self.__datastore.datastore_path else None
 | 
			
		||||
 | 
			
		||||
    def get_error_text(self):
 | 
			
		||||
        """Return the text saved from a previous request that resulted in a non-200 error"""
 | 
			
		||||
@@ -524,6 +529,22 @@ class model(watch_base):
 | 
			
		||||
        # None is set
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def save_data(self):
 | 
			
		||||
        import json
 | 
			
		||||
        # @todo dict change?
 | 
			
		||||
        # Save it to a temp file first so that if the disk is full or other error it wont corrupt (hopefully).
 | 
			
		||||
 | 
			
		||||
        dest = os.path.join(self.watch_data_dir, WATCH_DB_JSON_FILENAME)
 | 
			
		||||
        logger.debug(f"Saving watch {dest}")
 | 
			
		||||
        try:
 | 
			
		||||
            with open(dest + '.tmp', 'w') as json_file:
 | 
			
		||||
                json.dump(self.as_dict(), json_file, indent=2)
 | 
			
		||||
            os.replace(dest + '.tmp', dest)
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception saving watch JSON {dest} - {e}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def save_error_text(self, contents):
 | 
			
		||||
        self.ensure_data_dir_exists()
 | 
			
		||||
        target_path = os.path.join(self.watch_data_dir, "last-error.txt")
 | 
			
		||||
 
 | 
			
		||||
@@ -1,13 +1,14 @@
 | 
			
		||||
import os
 | 
			
		||||
import uuid
 | 
			
		||||
from collections.abc import MutableMapping
 | 
			
		||||
 | 
			
		||||
from changedetectionio import strtobool
 | 
			
		||||
from changedetectionio.notification import default_notification_format_for_watch
 | 
			
		||||
 | 
			
		||||
class watch_base(dict):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *arg, **kw):
 | 
			
		||||
        self.update({
 | 
			
		||||
class WatchBase(MutableMapping):
 | 
			
		||||
    __data_checksum = None
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        self.__internal_dict = {
 | 
			
		||||
            # Custom notification content
 | 
			
		||||
            # Re #110, so then if this is set to None, we know to use the default value instead
 | 
			
		||||
            # Requires setting to None on submit if it's the same as the default
 | 
			
		||||
@@ -127,9 +128,37 @@ class watch_base(dict):
 | 
			
		||||
            'uuid': str(uuid.uuid4()),
 | 
			
		||||
            'webdriver_delay': None,
 | 
			
		||||
            'webdriver_js_execute_code': None,  # Run before change-detection
 | 
			
		||||
        })
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        super(watch_base, self).__init__(*arg, **kw)
 | 
			
		||||
        # Update with any provided arguments
 | 
			
		||||
        self.update(*args, **kwargs)
 | 
			
		||||
 | 
			
		||||
        if self.get('default'):
 | 
			
		||||
            del self['default']
 | 
			
		||||
            del self['default']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Implement abstract methods required by MutableMapping
 | 
			
		||||
    def __getitem__(self, key):
 | 
			
		||||
        return self.__internal_dict[key]
 | 
			
		||||
 | 
			
		||||
    def __setitem__(self, key, value):
 | 
			
		||||
        if key == '__datastore':
 | 
			
		||||
            self.__datastore = value
 | 
			
		||||
        else:
 | 
			
		||||
            self.__internal_dict[key] = value
 | 
			
		||||
 | 
			
		||||
    def __delitem__(self, key):
 | 
			
		||||
        del self.__internal_dict[key]
 | 
			
		||||
 | 
			
		||||
    def __iter__(self):
 | 
			
		||||
        return iter(self.__internal_dict)
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        return len(self.__internal_dict)
 | 
			
		||||
 | 
			
		||||
    # Optional: Implement additional methods for convenience
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f"{self.__class__.__name__}({self.__internal_dict})"
 | 
			
		||||
 | 
			
		||||
    def as_dict(self):
 | 
			
		||||
        return self.__internal_dict
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
import glob
 | 
			
		||||
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
 | 
			
		||||
from flask import (
 | 
			
		||||
@@ -5,8 +7,8 @@ from flask import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
from .html_tools import TRANSLATE_WHITESPACE_TABLE
 | 
			
		||||
from . model import App, Watch
 | 
			
		||||
from copy import deepcopy, copy
 | 
			
		||||
from .model import App, Watch, WatchBase
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
from os import path, unlink
 | 
			
		||||
from threading import Lock
 | 
			
		||||
import json
 | 
			
		||||
@@ -18,6 +20,7 @@ import time
 | 
			
		||||
import uuid as uuid_builder
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from .model.Watch import WATCH_DB_JSON_FILENAME
 | 
			
		||||
from .processors import get_custom_watch_obj_for_processor
 | 
			
		||||
from .processors.restock_diff import Restock
 | 
			
		||||
 | 
			
		||||
@@ -26,6 +29,13 @@ BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
 | 
			
		||||
 | 
			
		||||
dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])
 | 
			
		||||
 | 
			
		||||
class CustomEncoder(json.JSONEncoder):
 | 
			
		||||
    def default(self, obj):
 | 
			
		||||
        if obj and isinstance(obj, WatchBase):
 | 
			
		||||
            return obj.as_dict()
 | 
			
		||||
        # Add more custom type handlers here
 | 
			
		||||
        return super().default(obj)
 | 
			
		||||
 | 
			
		||||
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
 | 
			
		||||
# Open a github issue if you know something :)
 | 
			
		||||
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
 | 
			
		||||
@@ -42,16 +52,14 @@ class ChangeDetectionStore:
 | 
			
		||||
    def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
 | 
			
		||||
        # Should only be active for docker
 | 
			
		||||
        # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
 | 
			
		||||
        from os.path import join
 | 
			
		||||
        self.__data = App.model()
 | 
			
		||||
        self.datastore_path = datastore_path
 | 
			
		||||
        self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
 | 
			
		||||
        self.json_store_path = join(self.datastore_path, 'url-watches.json')
 | 
			
		||||
        logger.info(f"Datastore path is '{self.json_store_path}'")
 | 
			
		||||
        self.needs_write = False
 | 
			
		||||
        self.start_time = time.time()
 | 
			
		||||
        self.stop_thread = False
 | 
			
		||||
        # Base definition for all watchers
 | 
			
		||||
        # deepcopy part of #569 - not sure why its needed exactly
 | 
			
		||||
        self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
 | 
			
		||||
 | 
			
		||||
        if path.isfile('changedetectionio/source.txt'):
 | 
			
		||||
            with open('changedetectionio/source.txt') as f:
 | 
			
		||||
@@ -65,10 +73,6 @@ class ChangeDetectionStore:
 | 
			
		||||
                from_disk = json.load(json_file)
 | 
			
		||||
 | 
			
		||||
                # @todo isnt there a way todo this dict.update recursively?
 | 
			
		||||
                # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
 | 
			
		||||
                if 'watching' in from_disk:
 | 
			
		||||
                    self.__data['watching'].update(from_disk['watching'])
 | 
			
		||||
 | 
			
		||||
                if 'app_guid' in from_disk:
 | 
			
		||||
                    self.__data['app_guid'] = from_disk['app_guid']
 | 
			
		||||
 | 
			
		||||
@@ -82,10 +86,7 @@ class ChangeDetectionStore:
 | 
			
		||||
                    if 'application' in from_disk['settings']:
 | 
			
		||||
                        self.__data['settings']['application'].update(from_disk['settings']['application'])
 | 
			
		||||
 | 
			
		||||
                # Convert each existing watch back to the Watch.model object
 | 
			
		||||
                for uuid, watch in self.__data['watching'].items():
 | 
			
		||||
                    self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
 | 
			
		||||
                    logger.info(f"Watching: {uuid} {watch['url']}")
 | 
			
		||||
                self.scan_and_load_watches()
 | 
			
		||||
 | 
			
		||||
                # And for Tags also, should be Restock type because it has extra settings
 | 
			
		||||
                for uuid, tag in self.__data['settings']['application']['tags'].items():
 | 
			
		||||
@@ -158,9 +159,29 @@ class ChangeDetectionStore:
 | 
			
		||||
        if entity.get('uuid') != 'text_json_diff':
 | 
			
		||||
            logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
 | 
			
		||||
 | 
			
		||||
        entity = watch_class(datastore_path=self.datastore_path, default=entity)
 | 
			
		||||
        entity = watch_class(__datastore=self, default=entity)
 | 
			
		||||
        return entity
 | 
			
		||||
 | 
			
		||||
    def scan_and_load_watches(self):
 | 
			
		||||
 | 
			
		||||
        # Use glob to find all occurrences of 'watch.json' in subdirectories
 | 
			
		||||
        # @todo move to some other function so we can trigger a rescan in a thread
 | 
			
		||||
        for file_path in glob.glob(f"{self.datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True):
 | 
			
		||||
            try:
 | 
			
		||||
                with open(file_path, 'r') as json_file:
 | 
			
		||||
                    data = json.load(json_file)
 | 
			
		||||
                    # So that we can always move it to another UUID by renaming the dir
 | 
			
		||||
                    directory_path = os.path.dirname(file_path)
 | 
			
		||||
                    uuid = os.path.basename(directory_path)
 | 
			
		||||
                    if data.get('uuid'):
 | 
			
		||||
                        del data['uuid']
 | 
			
		||||
                    self.__data['watching'][uuid] = self.rehydrate_entity(uuid, data)
 | 
			
		||||
 | 
			
		||||
            except json.JSONDecodeError as e:
 | 
			
		||||
                logger.error(f"Error decoding JSON in file {file_path}: {e}")
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.critical(f"Exception decoding JSON in file {file_path}: {e}")
 | 
			
		||||
 | 
			
		||||
    def set_last_viewed(self, uuid, timestamp):
 | 
			
		||||
        logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
 | 
			
		||||
        self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
 | 
			
		||||
@@ -177,13 +198,15 @@ class ChangeDetectionStore:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        with self.lock:
 | 
			
		||||
            # deepcopy part of #569 - not sure why its needed exactly
 | 
			
		||||
#            self.generic_definition = deepcopy(Watch.model(default={}))
 | 
			
		||||
 | 
			
		||||
            # In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
 | 
			
		||||
            for dict_key, d in self.generic_definition.items():
 | 
			
		||||
                if isinstance(d, dict):
 | 
			
		||||
                    if update_obj is not None and dict_key in update_obj:
 | 
			
		||||
                        self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
 | 
			
		||||
                        del (update_obj[dict_key])
 | 
			
		||||
#            # In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
 | 
			
		||||
#            for dict_key, d in self.generic_definition.items():
 | 
			
		||||
#                if isinstance(d, dict):
 | 
			
		||||
#                    if update_obj is not None and dict_key in update_obj:
 | 
			
		||||
#                        self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
 | 
			
		||||
#                        del (update_obj[dict_key])
 | 
			
		||||
 | 
			
		||||
            self.__data['watching'][uuid].update(update_obj)
 | 
			
		||||
        self.needs_write = True
 | 
			
		||||
@@ -346,7 +369,7 @@ class ChangeDetectionStore:
 | 
			
		||||
 | 
			
		||||
        # If the processor also has its own Watch implementation
 | 
			
		||||
        watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
 | 
			
		||||
        new_watch = watch_class(datastore_path=self.datastore_path, url=url)
 | 
			
		||||
        new_watch = watch_class(__datastore=self, url=url)
 | 
			
		||||
 | 
			
		||||
        new_uuid = new_watch.get('uuid')
 | 
			
		||||
 | 
			
		||||
@@ -383,7 +406,8 @@ class ChangeDetectionStore:
 | 
			
		||||
    def sync_to_json(self):
 | 
			
		||||
        logger.info("Saving JSON..")
 | 
			
		||||
        try:
 | 
			
		||||
            data = deepcopy(self.__data)
 | 
			
		||||
            data = {key: deepcopy(value) for key, value in self.__data.items() if key != 'watching'}
 | 
			
		||||
 | 
			
		||||
        except RuntimeError as e:
 | 
			
		||||
            # Try again in 15 seconds
 | 
			
		||||
            time.sleep(15)
 | 
			
		||||
@@ -397,11 +421,15 @@ class ChangeDetectionStore:
 | 
			
		||||
                # This is a fairly basic strategy to deal with the case that the file is corrupted,
 | 
			
		||||
                # system was out of memory, out of RAM etc
 | 
			
		||||
                with open(self.json_store_path+".tmp", 'w') as json_file:
 | 
			
		||||
                    json.dump(data, json_file, indent=4)
 | 
			
		||||
                    json.dump(data, json_file, indent=2, cls=CustomEncoder)
 | 
			
		||||
                os.replace(self.json_store_path+".tmp", self.json_store_path)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
 | 
			
		||||
 | 
			
		||||
            # Write each watch to the disk (data in their own subdir) if it changed
 | 
			
		||||
            for watch_uuid, watch in self.__data['watching'].items():
 | 
			
		||||
                watch.save_data()
 | 
			
		||||
 | 
			
		||||
            self.needs_write = False
 | 
			
		||||
            self.needs_write_urgent = False
 | 
			
		||||
 | 
			
		||||
@@ -924,3 +952,25 @@ class ChangeDetectionStore:
 | 
			
		||||
                        f_d.write(zlib.compress(f_j.read()))
 | 
			
		||||
                        os.unlink(json_path)
 | 
			
		||||
 | 
			
		||||
    # Move each 'watching' from a big JSON file to their own datafile in their data subdirectory
 | 
			
		||||
    def update_20(self):
 | 
			
		||||
        with open(self.json_store_path) as json_file:
 | 
			
		||||
            data = json.load(json_file)
 | 
			
		||||
            if data.get('watching'):
 | 
			
		||||
                for uuid, watch in data['watching'].items():
 | 
			
		||||
                    watch_data_dir = os.path.join(self.datastore_path, uuid)
 | 
			
		||||
                    dest = os.path.join(watch_data_dir, WATCH_DB_JSON_FILENAME)
 | 
			
		||||
 | 
			
		||||
                    try:
 | 
			
		||||
                        if not os.path.isdir(watch_data_dir):
 | 
			
		||||
                            logger.debug(f"> Creating data dir {watch_data_dir}")
 | 
			
		||||
                            os.mkdir(watch_data_dir)
 | 
			
		||||
                        with open(dest + '.tmp', 'w') as json_file:
 | 
			
		||||
                            json.dump(watch, json_file, indent=2)
 | 
			
		||||
                        os.replace(dest + '.tmp', dest)
 | 
			
		||||
                        logger.info(f"Saved watch to {dest}")
 | 
			
		||||
                    except Exception as e:
 | 
			
		||||
                        logger.critical(f"Exception saving watch JSON {dest} - {e}")
 | 
			
		||||
 | 
			
		||||
            self.data['watching'] = {}
 | 
			
		||||
            self.scan_and_load_watches()
 | 
			
		||||
 
 | 
			
		||||
@@ -8,11 +8,19 @@ from flask import url_for
 | 
			
		||||
from .util import live_server_setup, wait_for_all_checks
 | 
			
		||||
from urllib.parse import urlparse, parse_qs
 | 
			
		||||
 | 
			
		||||
from ..model.Watch import WATCH_DB_JSON_FILENAME
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_consistent_history(client, live_server, measure_memory_usage):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    import glob
 | 
			
		||||
    r = range(1, 30)
 | 
			
		||||
 | 
			
		||||
    # incase some exist from a previous test
 | 
			
		||||
    for f in glob.glob(f"{live_server.app.config['DATASTORE'].datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True):
 | 
			
		||||
        os.unlink(f)
 | 
			
		||||
 | 
			
		||||
    for one in r:
 | 
			
		||||
        test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
 | 
			
		||||
        res = client.post(
 | 
			
		||||
@@ -44,11 +52,17 @@ def test_consistent_history(client, live_server, measure_memory_usage):
 | 
			
		||||
    with open(json_db_file, 'r') as f:
 | 
			
		||||
        json_obj = json.load(f)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    found_db_jsons = glob.glob(f"{live_server.app.config['DATASTORE'].datastore_path}/*/{WATCH_DB_JSON_FILENAME}", recursive=True)
 | 
			
		||||
    # assert the right amount of watches was found in the JSON
 | 
			
		||||
    assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON"
 | 
			
		||||
    assert len(found_db_jsons) == len(r), "Correct number of watches was found in the JSON"
 | 
			
		||||
 | 
			
		||||
    # each one should have a history.txt containing just one line
 | 
			
		||||
    for w in json_obj['watching'].keys():
 | 
			
		||||
    for json_db_file in found_db_jsons:
 | 
			
		||||
 | 
			
		||||
        directory_path = os.path.dirname(json_db_file)
 | 
			
		||||
        w = os.path.basename(directory_path)
 | 
			
		||||
 | 
			
		||||
        history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
 | 
			
		||||
        assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
 | 
			
		||||
 | 
			
		||||
@@ -58,22 +72,21 @@ def test_consistent_history(client, live_server, measure_memory_usage):
 | 
			
		||||
            assert len(tmp_history) == 1, "History.txt should contain 1 line"
 | 
			
		||||
 | 
			
		||||
        # Should be two files,. the history.txt , and the snapshot.txt
 | 
			
		||||
        files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,
 | 
			
		||||
                                                     w))
 | 
			
		||||
        files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,w))
 | 
			
		||||
        # Find the snapshot one
 | 
			
		||||
        for fname in files_in_watch_dir:
 | 
			
		||||
            if fname != 'history.txt' and 'html' not in fname:
 | 
			
		||||
                # contents should match what we requested as content returned from the test url
 | 
			
		||||
                with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
 | 
			
		||||
                    contents = snapshot_f.read()
 | 
			
		||||
                    watch_url = json_obj['watching'][w]['url']
 | 
			
		||||
                    u = urlparse(watch_url)
 | 
			
		||||
                    q = parse_qs(u[4])
 | 
			
		||||
                    assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
 | 
			
		||||
#        for fname in files_in_watch_dir:
 | 
			
		||||
#            if fname != 'history.txt' and 'html' not in fname and fname != WATCH_DB_JSON_FILENAME:
 | 
			
		||||
#                # contents should match what we requested as content returned from the test url
 | 
			
		||||
#                with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
 | 
			
		||||
#                    contents = snapshot_f.read()
 | 
			
		||||
#                    watch_url = json_obj['watching'][w]['url']
 | 
			
		||||
#                    u = urlparse(watch_url)
 | 
			
		||||
#                    q = parse_qs(u[4])
 | 
			
		||||
#                    assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
 | 
			
		||||
        assert len(files_in_watch_dir) == 4, "Should be just four files in the dir, html.br snapshot, history.txt, watch.json and the extracted text snapshot"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@ import json
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client
 | 
			
		||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
 | 
			
		||||
 | 
			
		||||
def test_setup(live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
@@ -74,8 +74,8 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
 | 
			
		||||
    # Re #137 -  It should have only one set of headers entered
 | 
			
		||||
    watches_with_headers = 0
 | 
			
		||||
    for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
 | 
			
		||||
            if (len(watch['headers'])):
 | 
			
		||||
                watches_with_headers += 1
 | 
			
		||||
        if (len(watch['headers'])):
 | 
			
		||||
            watches_with_headers += 1
 | 
			
		||||
    assert watches_with_headers == 1
 | 
			
		||||
 | 
			
		||||
    # 'server' http header was automatically recorded
 | 
			
		||||
@@ -156,11 +156,10 @@ def test_body_in_request(client, live_server, measure_memory_usage):
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    watches_with_body = 0
 | 
			
		||||
    with open('test-datastore/url-watches.json') as f:
 | 
			
		||||
        app_struct = json.load(f)
 | 
			
		||||
        for uuid in app_struct['watching']:
 | 
			
		||||
            if app_struct['watching'][uuid]['body']==body_value:
 | 
			
		||||
                watches_with_body += 1
 | 
			
		||||
 | 
			
		||||
    for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
 | 
			
		||||
        if watch['body'] == body_value:
 | 
			
		||||
            watches_with_body += 1
 | 
			
		||||
 | 
			
		||||
    # Should be only one with body set
 | 
			
		||||
    assert watches_with_body==1
 | 
			
		||||
@@ -244,11 +243,9 @@ def test_method_in_request(client, live_server, measure_memory_usage):
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    watches_with_method = 0
 | 
			
		||||
    with open('test-datastore/url-watches.json') as f:
 | 
			
		||||
        app_struct = json.load(f)
 | 
			
		||||
        for uuid in app_struct['watching']:
 | 
			
		||||
            if app_struct['watching'][uuid]['method'] == 'PATCH':
 | 
			
		||||
                watches_with_method += 1
 | 
			
		||||
    for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
 | 
			
		||||
        if watch['method'] == 'PATCH':
 | 
			
		||||
            watches_with_method += 1
 | 
			
		||||
 | 
			
		||||
    # Should be only one with method set to PATCH
 | 
			
		||||
    assert watches_with_method == 1
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@
 | 
			
		||||
import unittest
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from changedetectionio import store
 | 
			
		||||
from changedetectionio.model import Watch
 | 
			
		||||
 | 
			
		||||
# mostly
 | 
			
		||||
@@ -13,7 +14,8 @@ class TestDiffBuilder(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_watch_get_suggested_from_diff_timestamp(self):
 | 
			
		||||
        import uuid as uuid_builder
 | 
			
		||||
        watch = Watch.model(datastore_path='/tmp', default={})
 | 
			
		||||
        datastore = store.ChangeDetectionStore(datastore_path='/tmp')
 | 
			
		||||
        watch = Watch.model(__datastore=datastore, default={})
 | 
			
		||||
        watch.ensure_data_dir_exists()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -49,7 +51,7 @@ class TestDiffBuilder(unittest.TestCase):
 | 
			
		||||
        assert p == "109", "Correct when its the same time"
 | 
			
		||||
 | 
			
		||||
        # new empty one
 | 
			
		||||
        watch = Watch.model(datastore_path='/tmp', default={})
 | 
			
		||||
        watch = Watch.model(__datastore=datastore, default={})
 | 
			
		||||
        p = watch.get_from_version_based_on_last_viewed
 | 
			
		||||
        assert p == None, "None when no history available"
 | 
			
		||||
 | 
			
		||||
@@ -61,5 +63,6 @@ class TestDiffBuilder(unittest.TestCase):
 | 
			
		||||
        p = watch.get_from_version_based_on_last_viewed
 | 
			
		||||
        assert p == "100", "Correct with only one history snapshot"
 | 
			
		||||
 | 
			
		||||
        datastore.stop_thread = True
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -80,6 +80,7 @@ def wait_for_notification_endpoint_output():
 | 
			
		||||
    '''Apprise can take a few seconds to fire'''
 | 
			
		||||
    #@todo - could check the apprise object directly instead of looking for this file
 | 
			
		||||
    from os.path import isfile
 | 
			
		||||
 | 
			
		||||
    for i in range(1, 20):
 | 
			
		||||
        time.sleep(1)
 | 
			
		||||
        if isfile("test-datastore/notification.txt"):
 | 
			
		||||
 
 | 
			
		||||
@@ -282,6 +282,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                            print(f"Processor module '{processor}' not found.")
 | 
			
		||||
                            raise e
 | 
			
		||||
 | 
			
		||||
                        # Can pass just the watch here?
 | 
			
		||||
                        update_handler = processor_module.perform_site_check(datastore=self.datastore,
 | 
			
		||||
                                                                             watch_uuid=uuid
 | 
			
		||||
                                                                             )
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user