diff --git a/Dockerfile b/Dockerfile index e44eace7..109305ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ RUN echo "commit: $SOURCE_COMMIT branch: $SOURCE_BRANCH" >/source.txt RUN [ ! -d "/datastore" ] && mkdir /datastore -CMD [ "python", "./backend.py" ] +CMD [ "python", "./backend.py" , "-d", "/datastore"] diff --git a/backend.py b/backend.py index ef002d9d..fb3dfd80 100644 --- a/backend.py +++ b/backend.py @@ -9,6 +9,9 @@ import eventlet import eventlet.wsgi import backend +from backend import store + + def main(argv): ssl_mode = False port = 5000 @@ -17,14 +20,14 @@ def main(argv): try: opts, args = getopt.getopt(argv, "sd:p:", "purge") except getopt.GetoptError: - print('backend.py -s SSL enable -p [port]') + print('backend.py -s SSL enable -p [port] -d [datastore path]') sys.exit(2) for opt, arg in opts: -# if opt == '--purge': - # Remove history, the actual files you need to delete manually. -# for uuid, watch in datastore.data['watching'].items(): -# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) + # if opt == '--purge': + # Remove history, the actual files you need to delete manually. + # for uuid, watch in datastore.data['watching'].items(): + # watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) if opt == '-s': ssl_mode = True @@ -36,19 +39,21 @@ def main(argv): datastore_path = arg - # @todo finalise SSL config, but this should get you in the right direction if you need it. + # Kinda weird to tell them both where `datastore_path` is right.. + app_config = {'datastore_path': datastore_path} + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) + app = backend.changedetection_app(app_config, datastore) - app = backend.changedetection_app({'datastore_path':datastore_path}) if ssl_mode: + # @todo finalise SSL config, but this should get you in the right direction if you need it. eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)), certfile='cert.pem', keyfile='privkey.pem', server_side=True), app) else: - eventlet.wsgi.server(eventlet.listen(('', port)), backend.changedetection_app()) + eventlet.wsgi.server(eventlet.listen(('', port)), app) + if __name__ == '__main__': - main(sys.argv) - -#print (__name__) \ No newline at end of file + main(sys.argv[1:]) diff --git a/backend/__init__.py b/backend/__init__.py index 4a60aff4..2d8ce9c8 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -24,10 +24,9 @@ import queue from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for +datastore=None # Local - -from backend import store running_update_threads = [] ticker_thread = None @@ -75,13 +74,14 @@ def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): # return timeago.format(timestamp, time.time()) # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) -def changedetection_app(config=None): +def changedetection_app(config=None, datastore_o=None): + global datastore + datastore = datastore_o # Hmm app.config.update(dict(DEBUG=True)) app.config.update(config or {}) - datastore = store.ChangeDetectionStore(datastore_path=app.config['datastore_path']) # Setup cors headers to allow all domains # https://flask-cors.readthedocs.io/en/latest/ @@ -454,37 +454,38 @@ class Worker(threading.Thread): self.q.task_done() - # Thread runner to check every minute, look for new watches to feed into the Queue. - def ticker_thread_check_time_launch_checks(): +# Thread runner to check every minute, look for new watches to feed into the Queue. +def ticker_thread_check_time_launch_checks(): - # Spin up Workers. - for _ in range(datastore.data['settings']['requests']['workers']): - new_worker = Worker(update_q) - running_update_threads.append(new_worker) - new_worker.start() + # Spin up Workers. + for _ in range(datastore.data['settings']['requests']['workers']): + print ("...") + new_worker = Worker(update_q) + running_update_threads.append(new_worker) + new_worker.start() - # Every minute check for new UUIDs to follow up on - while True: - minutes = datastore.data['settings']['requests']['minutes_between_check'] - for uuid, watch in datastore.data['watching'].items(): - if watch['last_checked'] <= time.time() - (minutes * 60): - update_q.put(uuid) + # Every minute check for new UUIDs to follow up on + while True: + minutes = datastore.data['settings']['requests']['minutes_between_check'] + for uuid, watch in datastore.data['watching'].items(): + if watch['last_checked'] <= time.time() - (minutes * 60): + update_q.put(uuid) - if app.config['STOP_THREADS']: - return - time.sleep(1) + if app.config['STOP_THREADS']: + return + time.sleep(1) - # Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON - # by just running periodically in one thread, according to python, dict updates are threadsafe. - def save_datastore(): +# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON +# by just running periodically in one thread, according to python, dict updates are threadsafe. +def save_datastore(): - global stop_threads + global stop_threads - while True: - if stop_threads: - return - if datastore.needs_write: - datastore.sync_to_json() - time.sleep(1) + while True: + if app.config['STOP_THREADS']: + return + if datastore.needs_write: + datastore.sync_to_json() + time.sleep(1) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 83647c4c..2ed85c05 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -20,7 +20,10 @@ class perform_site_check(): return def ensure_output_path(self): - os.mkdir(self.output_path) + try: + os.mkdir(self.output_path) + except FileExistsError: + print (self.output_path, "already exists.") def save_response_stripped_output(self, output, fname): @@ -40,7 +43,7 @@ class perform_site_check(): "last_checked": timestamp } - self.output_path = "/datastore/{}".format(uuid) + self.output_path = "{}/{}".format(self.datastore.datastore_path,uuid) self.ensure_output_path() extra_headers = self.datastore.get_val(uuid, 'headers') diff --git a/backend/store.py b/backend/store.py index cc3c0976..36f9eebd 100644 --- a/backend/store.py +++ b/backend/store.py @@ -7,6 +7,7 @@ from threading import Lock, Thread from copy import deepcopy + # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods? # Open a github issue if you know something :) # https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change @@ -42,7 +43,7 @@ class ChangeDetectionStore: 'tag': None, 'last_checked': 0, 'last_changed': 0, - 'last_viewed': 0, # history key value of the last viewed via the [diff] link + 'last_viewed': 0, # history key value of the last viewed via the [diff] link 'newest_history_key': "", 'title': None, 'previous_md5': "", @@ -58,7 +59,7 @@ class ChangeDetectionStore: self.__data['build_sha'] = f.read() try: - with open('/datastore/url-watches.json') as json_file: + with open("{}/url-watches.json".format(self.datastore_path)) as json_file: from_disk = json.load(json_file) # @todo isnt there a way todo this dict.update recursively? @@ -85,7 +86,7 @@ class ChangeDetectionStore: # First time ran, doesnt exist. except (FileNotFoundError, json.decoder.JSONDecodeError): - print("Creating JSON store") + print("Creating JSON store at", self.datastore_path) self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') @@ -106,9 +107,6 @@ class ChangeDetectionStore: return 0 - - - def set_last_viewed(self, uuid, timestamp): self.data['watching'][uuid].update({'last_viewed': str(timestamp)}) self.needs_write = True @@ -122,7 +120,7 @@ class ChangeDetectionStore: if isinstance(d, dict): if update_obj is not None and dict_key in update_obj: self.__data['watching'][uuid][dict_key].update(update_obj[dict_key]) - del(update_obj[dict_key]) + del (update_obj[dict_key]) self.__data['watching'][uuid].update(update_obj) self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid) @@ -167,7 +165,6 @@ class ChangeDetectionStore: def add_watch(self, url, tag): with self.lock: - # @todo use a common generic version of this new_uuid = str(uuid_builder.uuid4()) _blank = deepcopy(self.generic_definition) @@ -185,8 +182,7 @@ class ChangeDetectionStore: def sync_to_json(self): - - with open('/datastore/url-watches.json', 'w') as json_file: + with open("{}/url-watches.json".format(self.datastore_path), 'w') as json_file: json.dump(self.__data, json_file, indent=4) print("Re-saved index") diff --git a/backend/test_backend.py b/backend/test_backend.py index 3909d0f6..7ac1936f 100644 --- a/backend/test_backend.py +++ b/backend/test_backend.py @@ -2,7 +2,8 @@ import pytest import backend - +from backend import store +import os # https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py # Much better boilerplate than the docs @@ -10,7 +11,20 @@ import backend @pytest.fixture def app(request): - app = backend.changedetection_app({'datastore_path':'./datastorexxx'}) + + + datastore_path ="./test-datastore" + try: + os.mkdir(datastore_path) + except FileExistsError: + pass + + # Kinda weird to tell them both where `datastore_path` is right.. + app_config = {'datastore_path': datastore_path} + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) + app = backend.changedetection_app(app_config, datastore) + + app.debug = True def teardown():