Compare commits

...

2 Commits

Author SHA1 Message Date
dgtlmoon
2e59f2a115 WIP 2022-05-13 12:05:52 +02:00
dgtlmoon
8735b73746 Proof of concept 2022-05-13 11:00:06 +02:00
3 changed files with 61 additions and 20 deletions

View File

@@ -932,6 +932,9 @@ def changedetection_app(config=None, datastore_o=None):
# Add the flask app secret
zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
# Add the sqlite3 db
zipObj.write(os.path.join(datastore_o.datastore_path, "watch.db"), arcname="watch.db")
# Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip.
for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'):
parent_p = txt_file_path.parent

View File

@@ -27,7 +27,8 @@ class model(dict):
'headers': {}, # Extra headers to send
'body': None,
'method': 'GET',
'history': {}, # Dict of timestamp and output stripped filename
# now stored in a sqlite3 db to reduce memory usage
#'history': {}, # Dict of timestamp and output stripped filename
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
# Custom notification content
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)

View File

@@ -12,9 +12,11 @@ from os import mkdir, path, unlink
from threading import Lock
import re
import requests
import sqlite3
from . model import App, Watch
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
@@ -32,6 +34,11 @@ class ChangeDetectionStore:
self.needs_write = False
self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
self.datastore_path = datastore_path
#@todo - check for better options
self.__history_db_connection = sqlite3.connect("{}/watch.db".format(self.datastore_path))
self.proxy_list = None
self.stop_thread = False
@@ -70,6 +77,9 @@ class ChangeDetectionStore:
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
# Bump the update version by running updates
self.run_updates()
# Reinitialise each `watching` with our generic_definition in the case that we add a new var in the future.
# @todo pretty sure theres a python we todo this with an abstracted(?) object!
for uuid, watch in self.__data['watching'].items():
@@ -79,6 +89,7 @@ class ChangeDetectionStore:
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
# First time ran, doesnt exist.
except (FileNotFoundError, json.decoder.JSONDecodeError):
if include_default_watches:
@@ -111,7 +122,6 @@ class ChangeDetectionStore:
secret = secrets.token_hex(16)
self.__data['settings']['application']['rss_access_token'] = secret
# Proxy list support - available as a selection in settings when text file is imported
# CSV list
# "name, address", or just "name"
@@ -119,8 +129,6 @@ class ChangeDetectionStore:
if path.isfile(proxy_list_file):
self.import_proxy_list(proxy_list_file)
# Bump the update version by running updates
self.run_updates()
self.needs_write = True
@@ -129,19 +137,20 @@ class ChangeDetectionStore:
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
def get_newest_history_key(self, uuid):
if len(self.__data['watching'][uuid]['history']) == 1:
cur = self.__history_db_connection.cursor()
c = cur.execute("SELECT COUNT(*) FROM watch_history WHERE watch_uuid = :uuid", {"uuid": uuid}).fetchone()
if c and c[0] <= 1:
return 0
dates = list(self.__data['watching'][uuid]['history'].keys())
# Convert to int, sort and back to str again
# @todo replace datastore getter that does this automatically
dates = [int(i) for i in dates]
dates.sort(reverse=True)
if len(dates):
# always keyed as str
return str(dates[0])
max = cur.execute("SELECT MAX(timestamp) FROM watch_history WHERE watch_uuid = :uuid", {"uuid": uuid}).fetchone()
return max[0]
return 0
def __refresh_history_max_timestamp(self):
# select watch_uuid, max(timestamp) from watch_history group by watch_uuid;
# could be way faster
x=1
def set_last_viewed(self, uuid, timestamp):
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
@@ -186,13 +195,13 @@ class ChangeDetectionStore:
def data(self):
has_unviewed = False
for uuid, v in self.__data['watching'].items():
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
if int(v['newest_history_key']) <= int(v['last_viewed']):
self.__data['watching'][uuid]['viewed'] = True
# self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
# if int(v['newest_history_key']) <= int(v['last_viewed']):
# self.__data['watching'][uuid]['viewed'] = True
else:
self.__data['watching'][uuid]['viewed'] = False
has_unviewed = True
# else:
# self.__data['watching'][uuid]['viewed'] = False
# has_unviewed = True
# #106 - Be sure this is None on empty string, False, None, etc
# Default var for fetch_backend
@@ -495,3 +504,31 @@ class ChangeDetectionStore:
# Only upgrade individual watch time if it was set
if watch.get('minutes_between_check', False):
self.data['watching'][uuid]['time_between_check']['minutes'] = watch['minutes_between_check']
def update_3(self):
"""Migrate storage of history data to SQLite
- No need to store the history list in memory and re-write it everytime
- I've seen memory usage grow exponentially due to having large lists of watches with long histories
- Data about 'last changed' still stored in the main JSON struct which is fine
- We don't really need this data until we query against it (like for listing other available snapshots in the diff page etc)
"""
if self.__history_db_connection:
# Create the table
self.__history_db_connection.execute("CREATE TABLE IF NOT EXISTS watch_history(id INTEGER PRIMARY KEY, watch_uuid VARCHAR(36), timestamp INT, path TEXT, snapshot_type VARCHAR(10))")
self.__history_db_connection.execute("CREATE INDEX IF NOT EXISTS `uuid` ON `watch_history` (`watch_uuid`)")
self.__history_db_connection.execute("CREATE INDEX IF NOT EXISTS `uuid_timestamp` ON `watch_history` (`watch_uuid`, `timestamp`)")
# Insert each watch history list as executemany() for faster migration
for uuid, watch in self.data['watching'].items():
history = []
if watch.get('history', False):
for d, p in watch['history'].items():
d = int(d) # Used to be keyed as str, we'll fix this now too
history.append((uuid, d, p, 'text'))
if len(history):
self.__history_db_connection.executemany("INSERT INTO watch_history (watch_uuid, timestamp, path, snapshot_type) VALUES (?,?,?,?)", history)
self.__history_db_connection.commit()
del(self.data['watching'][uuid]['history'])