diff --git a/doc/self-hosters/config.md b/doc/self-hosters/config.md index e12d84a6e..3f84f5aef 100644 --- a/doc/self-hosters/config.md +++ b/doc/self-hosters/config.md @@ -67,12 +67,6 @@ something like the following (updated 2025-02-26): "dynamo" :{"path":"./puter-ddb"}, "thumbnails": { "engine": "http" - }, - "file-cache": { - "disk_limit": 5368709120, - "disk_max_size": 204800, - "precache_size": 209715200, - "path": "./file-cache" } }, "cookie_name": "...", @@ -87,4 +81,4 @@ something like the following (updated 2025-02-26): ## Root-Level Parameters - **domain** - origin for Puter. Do **not** include URL schema (the 'http(s)://' portion) -- \ No newline at end of file +- diff --git a/extensions/puterfs/PuterFSProvider.js b/extensions/puterfs/PuterFSProvider.js index 4dd8fc910..0bb7cbc44 100644 --- a/extensions/puterfs/PuterFSProvider.js +++ b/extensions/puterfs/PuterFSProvider.js @@ -40,9 +40,6 @@ const svc_acl = extension.import('service:acl'); const svc_size = extension.import('service:sizeService'); const svc_resource = extension.import('service:resourceService'); -// Not sure where these really belong yet -const svc_fileCache = extension.import('service:file-cache'); - // TODO: depending on mountpoint service will not be necessary // once the storage provider is moved to this extension const svc_mountpoint = extension.import('service:mountpoint'); @@ -648,12 +645,12 @@ export default class PuterFSProvider { async directory_has_name ({ parent, name }) { const uid = await parent.get('uid'); - /* eslint-disable */ + let check_dupe = await db.read( 'SELECT `id` FROM `fsentries` WHERE `parent_uid` = ? AND name = ? LIMIT 1', [uid, name], ); - /* eslint-enable */ + return !!check_dupe[0]; } @@ -847,12 +844,8 @@ export default class PuterFSProvider { svc_resource.free(uid); })(); - const cachePromise = (async () => { - await svc_fileCache.invalidate(node); - })(); - (async () => { - await Promise.all([entryOpPromise, cachePromise]); + await entryOpPromise; svc_event.emit('fs.write.file', { node, context, @@ -866,8 +859,6 @@ export default class PuterFSProvider { db, user: actor.type.user, node, uid, message, ts, }); - await cachePromise; - return node; } diff --git a/src/backend/doc/log_config.md b/src/backend/doc/log_config.md index 72f50ff41..04385e114 100644 --- a/src/backend/doc/log_config.md +++ b/src/backend/doc/log_config.md @@ -6,18 +6,16 @@ The configuration file can define an array parameter called `logging`. This configures the visibility of specific logs in core areas based on which string flags are present. -For example, the following configuration will cause FileCacheService to -log information about cache hits and misses: +For example, the following configuration enables HTTP request logs: ```json { - "logging": ['file-cache'] + "logging": ['http'] } ``` Sometimes "enabling" a log means moving its log level from `debug` to `info`. #### Available logging flags: -- `file-cache`: file cache hits and misses - `http`: http requests - `fsentries-not-found`: information about files that were stat'd but weren't there diff --git a/src/backend/src/CoreModule.js b/src/backend/src/CoreModule.js index 4b48acdd1..c5622a69a 100644 --- a/src/backend/src/CoreModule.js +++ b/src/backend/src/CoreModule.js @@ -388,8 +388,6 @@ const install = async ({ context, services, app, useapi, modapi }) => { const { PermissionShortcutService } = require('./services/auth/PermissionShortcutService'); services.registerService('permission-shortcut', PermissionShortcutService); - const { FileCacheService } = require('./services/file-cache/FileCacheService'); - services.registerService('file-cache', FileCacheService); }; const install_legacy = async ({ services }) => { diff --git a/src/backend/src/boot/default_config.js b/src/backend/src/boot/default_config.js index 6ecffcf0b..0cbc17eea 100644 --- a/src/backend/src/boot/default_config.js +++ b/src/backend/src/boot/default_config.js @@ -37,12 +37,5 @@ module.exports = { thumbnails: { engine: 'purejs', }, - 'file-cache': { - disk_limit: 16384, - disk_max_size: 16384, - precache_size: 16384, - path: './file-cache', - - }, }, }; diff --git a/src/backend/src/filesystem/ll_operations/ll_read.js b/src/backend/src/filesystem/ll_operations/ll_read.js index b86afd391..f1f1055b0 100644 --- a/src/backend/src/filesystem/ll_operations/ll_read.js +++ b/src/backend/src/filesystem/ll_operations/ll_read.js @@ -18,12 +18,10 @@ */ const APIError = require('../../api/APIError'); const { get_user } = require('../../helpers'); -const { MemoryFSProvider } = require('../../modules/puterfs/customfs/MemoryFSProvider'); const { UserActorType } = require('../../services/auth/Actor'); const { Actor } = require('../../services/auth/Actor'); const { DB_WRITE } = require('../../services/database/consts'); const { Context } = require('../../util/context'); -const { buffer_to_stream } = require('../../util/streamutil'); const { TYPE_SYMLINK, TYPE_DIRECTORY } = require('../FSNodeContext'); const { LLFilesystemOperation } = require('./definitions'); @@ -48,7 +46,6 @@ class LLRead extends LLFilesystemOperation { const aclService = Context.get('services').get('acl'); const db = Context.get('services') .get('database').get(DB_WRITE, 'filesystem'); - const fileCacheService = Context.get('services').get('file-cache'); // validate input if ( ! await fsNode.exists() ) { @@ -96,20 +93,6 @@ class LLRead extends LLFilesystemOperation { /** @type {import("../../services/MeteringService/MeteringService").MeteringService} */ const meteringService = Context.get('services').get('meteringService').meteringService; - // check file cache - const maybe_buffer = await fileCacheService.try_get(fsNode); // TODO DS: do we need those cache hit logs? - if ( maybe_buffer ) { - // Meter cached egress - // return cached stream - if ( has_range && (length || offset) ) { - meteringService.incrementUsage(chargedActor, 'filesystem:cached-egress:bytes', length); - return buffer_to_stream(maybe_buffer.slice(offset, offset + length)); - } - meteringService.incrementUsage(chargedActor, 'filesystem:cached-egress:bytes', await fsNode.get('size')); - return buffer_to_stream(maybe_buffer); - } - - // if no cache attempt reading from storageProvider (s3) const svc_mountpoint = Context.get('services').get('mountpoint'); const provider = await svc_mountpoint.get_provider(fsNode.selector); // const storage = svc_mountpoint.get_storage(provider.constructor.name); @@ -156,17 +139,6 @@ class LLRead extends LLFilesystemOperation { })(); meteringService.incrementUsage(chargedActor, 'filesystem:egress:bytes', size); - // cache if whole file read - if ( ! has_range ) { - // only cache for non-memoryfs providers - if ( ! (fsNode.provider instanceof MemoryFSProvider) ) { - const res = await fileCacheService.maybe_store(fsNode, stream); - if ( res.stream ) { - // return with split cached stream - return res.stream; - } - } - } return stream; } } diff --git a/src/backend/src/services/file-cache/FileCacheService.js b/src/backend/src/services/file-cache/FileCacheService.js deleted file mode 100644 index 4b37f4cbd..000000000 --- a/src/backend/src/services/file-cache/FileCacheService.js +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (C) 2024-present Puter Technologies Inc. - * - * This file is part of Puter. - * - * Puter is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -const { FileTracker } = require('./FileTracker'); -const { pausing_tee } = require('../../util/streamutil'); -const putility = require('@heyputer/putility'); -const { EWMA } = require('../../util/opmath'); - -const crypto = require('crypto'); -const BaseService = require('../BaseService'); - -/** -* @class FileCacheService -* @extends AdvancedBase -* @description - -* The FileCacheService class manages a cache for file storage and retrieval in the Puter system. -* This service provides functionalities to: -* - Cache files either in memory (precache) or on disk. -* - Track file usage with FileTracker instances to manage cache eviction policies. -* - Ensure files are stored within configured limits for both disk and memory usage. -* - Provide methods for initializing the cache, storing, retrieving, and invalidating cached files. -* - Register commands for managing and inspecting the cache status. -* -* @property {Object} MODULES - Static property containing module dependencies. -* @property {number} disk_limit - The maximum size allowed for disk storage of cached files. -* @property {number} disk_max_size - The maximum size of a file that can be cached on disk. -* @property {number} precache_size - The size limit for memory (precache) storage. -* @property {string} path - The directory path where cached files are stored on disk. -* @property {number} ttl - Time-to-live for cached files, after which they are considered for eviction. -* @property {Map} precache - A Map to hold files in memory. -* @property {Map} uid_to_tracker - A Map to track each file with its FileTracker instance. -*/ -class FileCacheService extends BaseService { - static MODULES = { - fs: require('fs'), - path_: require('path'), - }; - - _construct () { - this.disk_limit = this.config.disk_limit; - this.disk_max_size = this.config.disk_max_size; - this.precache_size = this.config.precache_size; - this.path = this.config.path; - - this.ttl = this.config.ttl || (60 * 1000); - - this.precache = new Map(); - this.uid_to_tracker = new Map(); - - this.cache_hit_rate = new EWMA({ - initial: 0.5, - alpha: 0.2, - }); - - this.logging_enabled = (this.global_config.logging ?? []) - .includes('file-cache'); - } - - /** - * Retrieves the amount of precache space currently used. - * - * @returns {number} The total size in bytes of files stored in the precache. - */ - get _precache_used () { - let used = 0; - - // Iterate over file trackers in PHASE_PRECACHE - for ( const tracker of this.uid_to_tracker.values() ) { - if ( tracker.phase !== FileTracker.PHASE_PRECACHE ) continue; - used += tracker.size; - } - - return used; - } - - /** - * Calculates the total disk space used by files in the PHASE_DISK phase. - * - * @returns {number} The total size of all files currently stored on disk. - */ - get _disk_used () { - let used = 0; - - // Iterate over file trackers in PHASE_DISK - for ( const tracker of this.uid_to_tracker.values() ) { - if ( tracker.phase !== FileTracker.PHASE_DISK ) continue; - used += tracker.size; - } - - return used; - } - - /** - * Initializes the cache by ensuring the storage directory exists. - * - * @async - * @method init - * @returns {Promise} A promise that resolves when the initialization is complete. - * @throws {Error} If there's an error creating the directory. - */ - async _init () { - this._register_commands(this.services.get('commands')); - - const { fs } = this.modules; - // Ensure storage path exists - await fs.promises.mkdir(this.path, { recursive: true }); - - // Distributed cache invalidation - const svc_event = this.services.get('event'); - svc_event.on('outer.fs.write-hash', async (_, { uuid, hash }) => { - const tracker = this.uid_to_tracker.get(uuid); - if ( ! tracker ) return; - - if ( tracker.hash !== hash ) { - await this.invalidate(uuid); - } - }); - } - - /** - * Get the file path for a given file UID. - * - * @param {string} uid - The unique identifier of the file. - * @returns {string} The full path where the file is stored on disk. - */ - _get_path (uid) { - const { path_ } = this.modules; - return path_.join(this.path, uid); - } - - /** - * Attempts to retrieve a cached file. - * - * This method first checks if the file exists in the cache by its UID. - * If found, it verifies the file's age against the TTL (time-to-live). - * If the file is expired, it invalidates the cache entry. Otherwise, - * it returns the cached data or null if not found or invalidated. - * - * @param {Object} fsNode - The file system node representing the file. - * @param {Object} [opt_log] - Optional logging service to log cache hits. - * @returns {Promise} - The file data if found, or null. - */ - async try_get (fsNode, opt_log) { - const result = await this.try_get_(fsNode, opt_log); - this.cache_hit_rate.put(result ? 1 : 0); - return result; - } - async try_get_ (fsNode, opt_log) { - const tracker = this.uid_to_tracker.get(await fsNode.get('uid')); - - if ( ! tracker ) { - return null; - } - - if ( tracker.age > this.ttl ) { - await this.invalidate(fsNode); - return null; - } - - tracker.touch(); - - // If the file is in pending, that means it's currenty being read - // for cache entry, so we wait for it to be ready. - if ( tracker.phase === FileTracker.PHASE_PENDING ) { - Promise.race([ - tracker.p_ready, - new Promise(resolve => setTimeout(resolve, 2000)), - ]); - } - - // If the file is still in pending it means we waited too long; - // it's possible that reading the file failed is is delayed. - if ( tracker.phase === FileTracker.PHASE_PENDING ) { - return null; - } - - // Since we waited for the file to be ready, it's not impossible - // that it was evicted in the meantime; just very unlikely. - if ( tracker.phase === FileTracker.PHASE_GONE ) { - return null; - } - - if ( tracker.phase === FileTracker.PHASE_PRECACHE ) { - return this.precache.get(await fsNode.get('uid')); - } - - if ( tracker.phase === FileTracker.PHASE_DISK ) { - const { fs } = this.modules; - const path = this._get_path(await fsNode.get('uid')); - try { - const data = await fs.promises.readFile(path); - return data; - } catch ( e ) { - this.errors.report('file_cache:read_error', { - source: e, - trace: true, - alarm: true, - }); - } - } - - this.errors.report('file_cache:unexpected-cache-state', { - message: `Unexpected cache state: ${tracker.phase?.label}`, - trace: true, - alarm: true, - extra: { - phase: tracker.phase?.label, - }, - }); - - return null; - } - - /** - * Stores a file in the cache if it's "important enough" - * to be in the cache (i.e. wouldn't get immediately evicted). - * @param {*} fsNode - * @param {*} stream - * @returns - */ - async maybe_store (fsNode, stream) { - const size = await fsNode.get('size'); - - // If the file is too big, don't cache it - if ( size > this.disk_max_size ) { - return { cached: false }; - } - - const key = await fsNode.get('uid'); - - // If the file is already cached, don't cache it again - if ( this.uid_to_tracker.has(key) ) { - return { cached: true }; - } - - // Add file tracker - const tracker = new FileTracker({ key, size }); - this.uid_to_tracker.set(key, tracker); - tracker.p_ready = new putility.libs.promise.TeePromise(); - tracker.touch(); - - // Store binary data in memory (precache) - const data = Buffer.alloc(size); - - const [replace_stream, store_stream] = pausing_tee(stream, 2); - - (async () => { - let offset = 0; - const hash = crypto.createHash('sha256'); - for await ( const chunk of store_stream ) { - chunk.copy(data, offset); - hash.update(chunk); - offset += chunk.length; - } - - await this._precache_make_room(size); - this.precache.set(key, data); - tracker.hash = hash.digest('hex'); - tracker.phase = FileTracker.PHASE_PRECACHE; - tracker.p_ready.resolve(); - })(); - - return { cached: true, stream: replace_stream }; - } - - /** - * Invalidates a file from the cache. - * - * @param {FsNode} fsNode - The file system node to invalidate. - * @returns {Promise} A promise that resolves when the file has been invalidated. - * - * @description - * This method checks if the given file is in the cache, and if so, removes it from both - * the precache and disk storage, ensuring that any references to this file are cleaned up. - * If the file is not found in the cache, the method does nothing. - */ - async invalidate (fsNode_or_uid) { - const key = (typeof fsNode_or_uid === 'string') - ? fsNode_or_uid - : await fsNode_or_uid.get('uid'); - - if ( ! this.uid_to_tracker.has(key) ) return; - const tracker = this.uid_to_tracker.get(key); - if ( tracker.phase === FileTracker.PHASE_PRECACHE ) { - this.precache.delete(key); - } - if ( tracker.phase === FileTracker.PHASE_DISK ) { - await this._disk_evict(tracker); - } - this.uid_to_tracker.delete(key); - } - - /** - * Evicts files from precache until there's enough room for a new file. - * @param {*} size - The size of the file to be stored. - */ - async _precache_make_room (size) { - if ( this._precache_used + size > this.precache_size ) { - await this._precache_evict(this._precache_used + size - this.precache_size); - } - } - - /** - * Evicts files from precache to make room for new files. - * This method sorts all trackers by score and evicts the lowest scoring - * files in precache phase until the specified capacity is freed. - * - * @param {number} capacity_needed - The amount of capacity (in bytes) that needs to be freed in precache. - */ - async _precache_evict (capacity_needed) { - // Sort by score from tracker - const sorted = Array.from(this.uid_to_tracker.values()) - .sort((a, b) => b.score - a.score); - - let capacity = 0; - for ( const tracker of sorted ) { - if ( tracker.phase !== FileTracker.PHASE_PRECACHE ) continue; - capacity += tracker.size; - await this._maybe_promote_to_disk(tracker); - if ( capacity >= capacity_needed ) break; - } - } - - /** - * Promotes a file from precache to disk if it has a higher score than the files that would be evicted. - * - * It may seem unintuitive that going from memory to disk is called a - * "promotion". However, the in-memory cache used here is considered a - * "precache"; the idea is as soon as we prepare to write a file to disk cache - * we're very likely to access it again soon, so we keep it in memory for a - * while before writing it to disk. - * - * @param {*} tracker - The FileTracker instance representing the file to be promoted. - * @returns - */ - async _maybe_promote_to_disk (tracker) { - if ( tracker.phase !== FileTracker.PHASE_PRECACHE ) return; - - // It's important to check that the score of this file is - // higher than the combined score of the N files that - // would be evicted to make room for it. - const sorted = Array.from(this.uid_to_tracker.values()) - .sort((a, b) => b.score - a.score); - - let capacity = 0; - let score_needed = 0; - const capacity_needed = this._disk_used + tracker.size - this.disk_limit; - for ( const tracker of sorted ) { - if ( tracker.phase !== FileTracker.PHASE_DISK ) continue; - capacity += tracker.size; - score_needed += tracker.score; - if ( capacity >= capacity_needed ) break; - } - - if ( tracker.score < score_needed ) return; - - // Now we can remove the lowest scoring files - // to make room for this file. - capacity = 0; - for ( const tracker of sorted ) { - if ( tracker.phase !== FileTracker.PHASE_DISK ) continue; - capacity += tracker.size; - await this._disk_evict(tracker); - if ( capacity >= capacity_needed ) break; - } - - const { fs } = this.modules; - const path = this._get_path(tracker.key); - console.log('precache fetch key', tracker.key); - const data = this.precache.get(tracker.key); - await fs.promises.writeFile(path, data); - this.precache.delete(tracker.key); - tracker.phase = FileTracker.PHASE_DISK; - } - - /** - * Evicts a file from disk cache. - * - * @param {FileTracker} tracker - The FileTracker instance representing the file to be evicted. - * @returns {Promise} A promise that resolves when the file is evicted or if the tracker is not in the disk phase. - * - * @note This method ensures that the file is removed from the disk cache and the tracker's phase is updated to GONE. - */ - async _disk_evict (tracker) { - if ( tracker.phase !== FileTracker.PHASE_DISK ) return; - - const { fs } = this.modules; - const path = this._get_path(tracker.key); - - await fs.promises.unlink(path); - tracker.phase = FileTracker.PHASE_GONE; - this.uid_to_tracker.delete(tracker.key); - } - - _register_commands (commands) { - commands.registerCommands('fsc', [ - { - id: 'status', - handler: async (args, log) => { - const status = { - precache: { - used: this._precache_used, - max: this.precache_size, - }, - disk: { - used: this._disk_used, - max: this.disk_limit, - }, - }; - - log.log(JSON.stringify(status, null, 2)); - }, - }, - { - id: 'hitrate', - handler: async (args, log) => { - log.log(this.cache_hit_rate.get()); - }, - }, - ]); - } -} - -module.exports = { - FileCacheService, -};