feat: remove file cache service (#2464)

This commit is contained in:
Daniel Salazar
2026-02-11 12:57:10 -08:00
committed by GitHub
parent 50e2d275bc
commit afeac494ac
7 changed files with 6 additions and 502 deletions
+1 -7
View File
@@ -67,12 +67,6 @@ something like the following (updated 2025-02-26):
"dynamo" :{"path":"./puter-ddb"},
"thumbnails": {
"engine": "http"
},
"file-cache": {
"disk_limit": 5368709120,
"disk_max_size": 204800,
"precache_size": 209715200,
"path": "./file-cache"
}
},
"cookie_name": "...",
@@ -87,4 +81,4 @@ something like the following (updated 2025-02-26):
## Root-Level Parameters
- **domain** - origin for Puter. Do **not** include URL schema (the 'http(s)://' portion)
-
-
+3 -12
View File
@@ -40,9 +40,6 @@ const svc_acl = extension.import('service:acl');
const svc_size = extension.import('service:sizeService');
const svc_resource = extension.import('service:resourceService');
// Not sure where these really belong yet
const svc_fileCache = extension.import('service:file-cache');
// TODO: depending on mountpoint service will not be necessary
// once the storage provider is moved to this extension
const svc_mountpoint = extension.import('service:mountpoint');
@@ -648,12 +645,12 @@ export default class PuterFSProvider {
async directory_has_name ({ parent, name }) {
const uid = await parent.get('uid');
/* eslint-disable */
let check_dupe = await db.read(
'SELECT `id` FROM `fsentries` WHERE `parent_uid` = ? AND name = ? LIMIT 1',
[uid, name],
);
/* eslint-enable */
return !!check_dupe[0];
}
@@ -847,12 +844,8 @@ export default class PuterFSProvider {
svc_resource.free(uid);
})();
const cachePromise = (async () => {
await svc_fileCache.invalidate(node);
})();
(async () => {
await Promise.all([entryOpPromise, cachePromise]);
await entryOpPromise;
svc_event.emit('fs.write.file', {
node,
context,
@@ -866,8 +859,6 @@ export default class PuterFSProvider {
db, user: actor.type.user, node, uid, message, ts,
});
await cachePromise;
return node;
}
+2 -4
View File
@@ -6,18 +6,16 @@ The configuration file can define an array parameter called `logging`.
This configures the visibility of specific logs in core areas based on
which string flags are present.
For example, the following configuration will cause FileCacheService to
log information about cache hits and misses:
For example, the following configuration enables HTTP request logs:
```json
{
"logging": ['file-cache']
"logging": ['http']
}
```
Sometimes "enabling" a log means moving its log level from `debug` to `info`.
#### Available logging flags:
- `file-cache`: file cache hits and misses
- `http`: http requests
- `fsentries-not-found`: information about files that were stat'd but weren't there
-2
View File
@@ -388,8 +388,6 @@ const install = async ({ context, services, app, useapi, modapi }) => {
const { PermissionShortcutService } = require('./services/auth/PermissionShortcutService');
services.registerService('permission-shortcut', PermissionShortcutService);
const { FileCacheService } = require('./services/file-cache/FileCacheService');
services.registerService('file-cache', FileCacheService);
};
const install_legacy = async ({ services }) => {
-7
View File
@@ -37,12 +37,5 @@ module.exports = {
thumbnails: {
engine: 'purejs',
},
'file-cache': {
disk_limit: 16384,
disk_max_size: 16384,
precache_size: 16384,
path: './file-cache',
},
},
};
@@ -18,12 +18,10 @@
*/
const APIError = require('../../api/APIError');
const { get_user } = require('../../helpers');
const { MemoryFSProvider } = require('../../modules/puterfs/customfs/MemoryFSProvider');
const { UserActorType } = require('../../services/auth/Actor');
const { Actor } = require('../../services/auth/Actor');
const { DB_WRITE } = require('../../services/database/consts');
const { Context } = require('../../util/context');
const { buffer_to_stream } = require('../../util/streamutil');
const { TYPE_SYMLINK, TYPE_DIRECTORY } = require('../FSNodeContext');
const { LLFilesystemOperation } = require('./definitions');
@@ -48,7 +46,6 @@ class LLRead extends LLFilesystemOperation {
const aclService = Context.get('services').get('acl');
const db = Context.get('services')
.get('database').get(DB_WRITE, 'filesystem');
const fileCacheService = Context.get('services').get('file-cache');
// validate input
if ( ! await fsNode.exists() ) {
@@ -96,20 +93,6 @@ class LLRead extends LLFilesystemOperation {
/** @type {import("../../services/MeteringService/MeteringService").MeteringService} */
const meteringService = Context.get('services').get('meteringService').meteringService;
// check file cache
const maybe_buffer = await fileCacheService.try_get(fsNode); // TODO DS: do we need those cache hit logs?
if ( maybe_buffer ) {
// Meter cached egress
// return cached stream
if ( has_range && (length || offset) ) {
meteringService.incrementUsage(chargedActor, 'filesystem:cached-egress:bytes', length);
return buffer_to_stream(maybe_buffer.slice(offset, offset + length));
}
meteringService.incrementUsage(chargedActor, 'filesystem:cached-egress:bytes', await fsNode.get('size'));
return buffer_to_stream(maybe_buffer);
}
// if no cache attempt reading from storageProvider (s3)
const svc_mountpoint = Context.get('services').get('mountpoint');
const provider = await svc_mountpoint.get_provider(fsNode.selector);
// const storage = svc_mountpoint.get_storage(provider.constructor.name);
@@ -156,17 +139,6 @@ class LLRead extends LLFilesystemOperation {
})();
meteringService.incrementUsage(chargedActor, 'filesystem:egress:bytes', size);
// cache if whole file read
if ( ! has_range ) {
// only cache for non-memoryfs providers
if ( ! (fsNode.provider instanceof MemoryFSProvider) ) {
const res = await fileCacheService.maybe_store(fsNode, stream);
if ( res.stream ) {
// return with split cached stream
return res.stream;
}
}
}
return stream;
}
}
@@ -1,442 +0,0 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const { FileTracker } = require('./FileTracker');
const { pausing_tee } = require('../../util/streamutil');
const putility = require('@heyputer/putility');
const { EWMA } = require('../../util/opmath');
const crypto = require('crypto');
const BaseService = require('../BaseService');
/**
* @class FileCacheService
* @extends AdvancedBase
* @description
* The FileCacheService class manages a cache for file storage and retrieval in the Puter system.
* This service provides functionalities to:
* - Cache files either in memory (precache) or on disk.
* - Track file usage with FileTracker instances to manage cache eviction policies.
* - Ensure files are stored within configured limits for both disk and memory usage.
* - Provide methods for initializing the cache, storing, retrieving, and invalidating cached files.
* - Register commands for managing and inspecting the cache status.
*
* @property {Object} MODULES - Static property containing module dependencies.
* @property {number} disk_limit - The maximum size allowed for disk storage of cached files.
* @property {number} disk_max_size - The maximum size of a file that can be cached on disk.
* @property {number} precache_size - The size limit for memory (precache) storage.
* @property {string} path - The directory path where cached files are stored on disk.
* @property {number} ttl - Time-to-live for cached files, after which they are considered for eviction.
* @property {Map} precache - A Map to hold files in memory.
* @property {Map} uid_to_tracker - A Map to track each file with its FileTracker instance.
*/
class FileCacheService extends BaseService {
static MODULES = {
fs: require('fs'),
path_: require('path'),
};
_construct () {
this.disk_limit = this.config.disk_limit;
this.disk_max_size = this.config.disk_max_size;
this.precache_size = this.config.precache_size;
this.path = this.config.path;
this.ttl = this.config.ttl || (60 * 1000);
this.precache = new Map();
this.uid_to_tracker = new Map();
this.cache_hit_rate = new EWMA({
initial: 0.5,
alpha: 0.2,
});
this.logging_enabled = (this.global_config.logging ?? [])
.includes('file-cache');
}
/**
* Retrieves the amount of precache space currently used.
*
* @returns {number} The total size in bytes of files stored in the precache.
*/
get _precache_used () {
let used = 0;
// Iterate over file trackers in PHASE_PRECACHE
for ( const tracker of this.uid_to_tracker.values() ) {
if ( tracker.phase !== FileTracker.PHASE_PRECACHE ) continue;
used += tracker.size;
}
return used;
}
/**
* Calculates the total disk space used by files in the PHASE_DISK phase.
*
* @returns {number} The total size of all files currently stored on disk.
*/
get _disk_used () {
let used = 0;
// Iterate over file trackers in PHASE_DISK
for ( const tracker of this.uid_to_tracker.values() ) {
if ( tracker.phase !== FileTracker.PHASE_DISK ) continue;
used += tracker.size;
}
return used;
}
/**
* Initializes the cache by ensuring the storage directory exists.
*
* @async
* @method init
* @returns {Promise<void>} A promise that resolves when the initialization is complete.
* @throws {Error} If there's an error creating the directory.
*/
async _init () {
this._register_commands(this.services.get('commands'));
const { fs } = this.modules;
// Ensure storage path exists
await fs.promises.mkdir(this.path, { recursive: true });
// Distributed cache invalidation
const svc_event = this.services.get('event');
svc_event.on('outer.fs.write-hash', async (_, { uuid, hash }) => {
const tracker = this.uid_to_tracker.get(uuid);
if ( ! tracker ) return;
if ( tracker.hash !== hash ) {
await this.invalidate(uuid);
}
});
}
/**
* Get the file path for a given file UID.
*
* @param {string} uid - The unique identifier of the file.
* @returns {string} The full path where the file is stored on disk.
*/
_get_path (uid) {
const { path_ } = this.modules;
return path_.join(this.path, uid);
}
/**
* Attempts to retrieve a cached file.
*
* This method first checks if the file exists in the cache by its UID.
* If found, it verifies the file's age against the TTL (time-to-live).
* If the file is expired, it invalidates the cache entry. Otherwise,
* it returns the cached data or null if not found or invalidated.
*
* @param {Object} fsNode - The file system node representing the file.
* @param {Object} [opt_log] - Optional logging service to log cache hits.
* @returns {Promise<Buffer|null>} - The file data if found, or null.
*/
async try_get (fsNode, opt_log) {
const result = await this.try_get_(fsNode, opt_log);
this.cache_hit_rate.put(result ? 1 : 0);
return result;
}
async try_get_ (fsNode, opt_log) {
const tracker = this.uid_to_tracker.get(await fsNode.get('uid'));
if ( ! tracker ) {
return null;
}
if ( tracker.age > this.ttl ) {
await this.invalidate(fsNode);
return null;
}
tracker.touch();
// If the file is in pending, that means it's currenty being read
// for cache entry, so we wait for it to be ready.
if ( tracker.phase === FileTracker.PHASE_PENDING ) {
Promise.race([
tracker.p_ready,
new Promise(resolve => setTimeout(resolve, 2000)),
]);
}
// If the file is still in pending it means we waited too long;
// it's possible that reading the file failed is is delayed.
if ( tracker.phase === FileTracker.PHASE_PENDING ) {
return null;
}
// Since we waited for the file to be ready, it's not impossible
// that it was evicted in the meantime; just very unlikely.
if ( tracker.phase === FileTracker.PHASE_GONE ) {
return null;
}
if ( tracker.phase === FileTracker.PHASE_PRECACHE ) {
return this.precache.get(await fsNode.get('uid'));
}
if ( tracker.phase === FileTracker.PHASE_DISK ) {
const { fs } = this.modules;
const path = this._get_path(await fsNode.get('uid'));
try {
const data = await fs.promises.readFile(path);
return data;
} catch ( e ) {
this.errors.report('file_cache:read_error', {
source: e,
trace: true,
alarm: true,
});
}
}
this.errors.report('file_cache:unexpected-cache-state', {
message: `Unexpected cache state: ${tracker.phase?.label}`,
trace: true,
alarm: true,
extra: {
phase: tracker.phase?.label,
},
});
return null;
}
/**
* Stores a file in the cache if it's "important enough"
* to be in the cache (i.e. wouldn't get immediately evicted).
* @param {*} fsNode
* @param {*} stream
* @returns
*/
async maybe_store (fsNode, stream) {
const size = await fsNode.get('size');
// If the file is too big, don't cache it
if ( size > this.disk_max_size ) {
return { cached: false };
}
const key = await fsNode.get('uid');
// If the file is already cached, don't cache it again
if ( this.uid_to_tracker.has(key) ) {
return { cached: true };
}
// Add file tracker
const tracker = new FileTracker({ key, size });
this.uid_to_tracker.set(key, tracker);
tracker.p_ready = new putility.libs.promise.TeePromise();
tracker.touch();
// Store binary data in memory (precache)
const data = Buffer.alloc(size);
const [replace_stream, store_stream] = pausing_tee(stream, 2);
(async () => {
let offset = 0;
const hash = crypto.createHash('sha256');
for await ( const chunk of store_stream ) {
chunk.copy(data, offset);
hash.update(chunk);
offset += chunk.length;
}
await this._precache_make_room(size);
this.precache.set(key, data);
tracker.hash = hash.digest('hex');
tracker.phase = FileTracker.PHASE_PRECACHE;
tracker.p_ready.resolve();
})();
return { cached: true, stream: replace_stream };
}
/**
* Invalidates a file from the cache.
*
* @param {FsNode} fsNode - The file system node to invalidate.
* @returns {Promise<void>} A promise that resolves when the file has been invalidated.
*
* @description
* This method checks if the given file is in the cache, and if so, removes it from both
* the precache and disk storage, ensuring that any references to this file are cleaned up.
* If the file is not found in the cache, the method does nothing.
*/
async invalidate (fsNode_or_uid) {
const key = (typeof fsNode_or_uid === 'string')
? fsNode_or_uid
: await fsNode_or_uid.get('uid');
if ( ! this.uid_to_tracker.has(key) ) return;
const tracker = this.uid_to_tracker.get(key);
if ( tracker.phase === FileTracker.PHASE_PRECACHE ) {
this.precache.delete(key);
}
if ( tracker.phase === FileTracker.PHASE_DISK ) {
await this._disk_evict(tracker);
}
this.uid_to_tracker.delete(key);
}
/**
* Evicts files from precache until there's enough room for a new file.
* @param {*} size - The size of the file to be stored.
*/
async _precache_make_room (size) {
if ( this._precache_used + size > this.precache_size ) {
await this._precache_evict(this._precache_used + size - this.precache_size);
}
}
/**
* Evicts files from precache to make room for new files.
* This method sorts all trackers by score and evicts the lowest scoring
* files in precache phase until the specified capacity is freed.
*
* @param {number} capacity_needed - The amount of capacity (in bytes) that needs to be freed in precache.
*/
async _precache_evict (capacity_needed) {
// Sort by score from tracker
const sorted = Array.from(this.uid_to_tracker.values())
.sort((a, b) => b.score - a.score);
let capacity = 0;
for ( const tracker of sorted ) {
if ( tracker.phase !== FileTracker.PHASE_PRECACHE ) continue;
capacity += tracker.size;
await this._maybe_promote_to_disk(tracker);
if ( capacity >= capacity_needed ) break;
}
}
/**
* Promotes a file from precache to disk if it has a higher score than the files that would be evicted.
*
* It may seem unintuitive that going from memory to disk is called a
* "promotion". However, the in-memory cache used here is considered a
* "precache"; the idea is as soon as we prepare to write a file to disk cache
* we're very likely to access it again soon, so we keep it in memory for a
* while before writing it to disk.
*
* @param {*} tracker - The FileTracker instance representing the file to be promoted.
* @returns
*/
async _maybe_promote_to_disk (tracker) {
if ( tracker.phase !== FileTracker.PHASE_PRECACHE ) return;
// It's important to check that the score of this file is
// higher than the combined score of the N files that
// would be evicted to make room for it.
const sorted = Array.from(this.uid_to_tracker.values())
.sort((a, b) => b.score - a.score);
let capacity = 0;
let score_needed = 0;
const capacity_needed = this._disk_used + tracker.size - this.disk_limit;
for ( const tracker of sorted ) {
if ( tracker.phase !== FileTracker.PHASE_DISK ) continue;
capacity += tracker.size;
score_needed += tracker.score;
if ( capacity >= capacity_needed ) break;
}
if ( tracker.score < score_needed ) return;
// Now we can remove the lowest scoring files
// to make room for this file.
capacity = 0;
for ( const tracker of sorted ) {
if ( tracker.phase !== FileTracker.PHASE_DISK ) continue;
capacity += tracker.size;
await this._disk_evict(tracker);
if ( capacity >= capacity_needed ) break;
}
const { fs } = this.modules;
const path = this._get_path(tracker.key);
console.log('precache fetch key', tracker.key);
const data = this.precache.get(tracker.key);
await fs.promises.writeFile(path, data);
this.precache.delete(tracker.key);
tracker.phase = FileTracker.PHASE_DISK;
}
/**
* Evicts a file from disk cache.
*
* @param {FileTracker} tracker - The FileTracker instance representing the file to be evicted.
* @returns {Promise<void>} A promise that resolves when the file is evicted or if the tracker is not in the disk phase.
*
* @note This method ensures that the file is removed from the disk cache and the tracker's phase is updated to GONE.
*/
async _disk_evict (tracker) {
if ( tracker.phase !== FileTracker.PHASE_DISK ) return;
const { fs } = this.modules;
const path = this._get_path(tracker.key);
await fs.promises.unlink(path);
tracker.phase = FileTracker.PHASE_GONE;
this.uid_to_tracker.delete(tracker.key);
}
_register_commands (commands) {
commands.registerCommands('fsc', [
{
id: 'status',
handler: async (args, log) => {
const status = {
precache: {
used: this._precache_used,
max: this.precache_size,
},
disk: {
used: this._disk_used,
max: this.disk_limit,
},
};
log.log(JSON.stringify(status, null, 2));
},
},
{
id: 'hitrate',
handler: async (args, log) => {
log.log(this.cache_hit_rate.get());
},
},
]);
}
}
module.exports = {
FileCacheService,
};