fix: bring back puter site config logic (#3145)

This commit is contained in:
Daniel Salazar
2026-05-22 11:15:45 -07:00
committed by GitHub
parent 7438db7a90
commit bb0f56d758
3 changed files with 794 additions and 4 deletions
@@ -820,3 +820,344 @@ describe('createPuterSiteMiddleware — file serving', () => {
expect(out.statusCode).toBe(404);
});
});
// ── .puter_site_config (custom error pages) ─────────────────────────
//
// Sites can drop a `.puter_site_config` JSON file at the root of their
// hosting directory to map error codes onto custom pages — the
// canonical use case is SPA fallback (404 → /index.html with status
// 200). These tests pin the contract end-to-end through the real
// FSService so we exercise parsing, path resolution, and the loop-
// prevention guard together.
describe('createPuterSiteMiddleware — .puter_site_config', () => {
const setupSiteWithConfig = async (config: unknown) => {
const owner = await makeUserWithHome();
const homePath = `/${owner.username}`;
const homeEntry = await server.stores.fsEntry.getEntryByPath(homePath);
const sub = `cfg-${Math.random().toString(36).slice(2, 8)}`;
await server.stores.subdomain.create({
userId: owner.id,
subdomain: sub,
rootDirId: homeEntry!.id,
});
await writeFile(
owner.id,
`${homePath}/.puter_site_config`,
Buffer.from(JSON.stringify(config)),
'application/json',
);
return { owner, homePath, sub };
};
it('serves /index.html with status 200 on 404 when configured (SPA fallback)', async () => {
// The user's headline use case: route any unknown path through
// the SPA entrypoint so client-side routing can take over,
// while still serving HTTP 200 so search engines don't cache
// the page as a hard 404.
const { owner, homePath, sub } = await setupSiteWithConfig({
errors: {
'404': { file: '/index.html', status: 200 },
},
});
const body = Buffer.from('<html>spa-shell</html>');
await writeFile(owner.id, `${homePath}/index.html`, body, 'text/html');
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/some/client-route',
}),
res,
vi.fn(),
);
await new Promise<void>((resolve) => setImmediate(resolve));
expect(out.statusCode).toBe(200);
expect(out.headers['Content-Type']).toMatch(/text\/html/);
const piped = out.body as Buffer | undefined;
expect(Buffer.isBuffer(piped)).toBe(true);
expect(piped!.equals(body)).toBe(true);
});
it('serves the configured file with the matched code when `status` is omitted', async () => {
// No explicit `status` → default to the matched error code
// (404 here). Bare `{ file: '/404.html' }` should Just Work
// for the classic "pretty 404 page" use case.
const { owner, homePath, sub } = await setupSiteWithConfig({
errors: { '404': { file: '/404.html' } },
});
const body = Buffer.from('<html>custom 404</html>');
await writeFile(owner.id, `${homePath}/404.html`, body, 'text/html');
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/no-such-page',
}),
res,
vi.fn(),
);
await new Promise<void>((resolve) => setImmediate(resolve));
expect(out.statusCode).toBe(404);
const piped = out.body as Buffer | undefined;
expect(piped!.equals(body)).toBe(true);
});
it('falls back to the default 404 page when the configured error file does not exist (no infinite loop)', async () => {
// Critical loop guard: if the error page itself is missing,
// we must NOT recurse through the error config again. The
// request must terminate with the built-in 404 page rather
// than spinning errors.404 → errors.404 → … .
const { sub } = await setupSiteWithConfig({
errors: { '404': { file: '/missing.html', status: 200 } },
});
// Deliberately do not write missing.html.
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/anything',
}),
res,
vi.fn(),
);
expect(out.statusCode).toBe(404);
expect(out.contentType).toBe('text/html; charset=UTF-8');
expect(String(out.body)).toContain('Not Found');
});
it('returns 404 when a visitor requests `.puter_site_config` directly (no config leak)', async () => {
// The config file is implementation detail — hide it the same
// way any other missing path would 404, so visitors can't
// enumerate deployment shape by guessing well-known names.
const { sub } = await setupSiteWithConfig({
errors: { '404': { file: '/index.html', status: 200 } },
});
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/.puter_site_config',
}),
res,
vi.fn(),
);
// No error-page fallback because we didn't write /index.html.
expect(out.statusCode).toBe(404);
// Body shape is the default 404, not the JSON config.
expect(String(out.body)).not.toContain('errors');
});
it('rejects `errors.404.file` paths that try to climb out of the site root', async () => {
// The normalizer collapses `..` segments, so an attacker who
// can edit the config can't pivot it into reading files
// outside their own subdomain root.
const otherOwner = await makeUserWithHome();
const secretPath = `/${otherOwner.username}/secret.html`;
await writeFile(
otherOwner.id,
secretPath,
Buffer.from('SECRET'),
'text/html',
);
const { sub } = await setupSiteWithConfig({
errors: {
'404': {
// Tries to walk up to the other user's home dir.
file: `/../${otherOwner.username}/secret.html`,
status: 200,
},
},
});
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/anything',
}),
res,
vi.fn(),
);
// The collapsed path resolves under the requesting site's
// root, which doesn't have a `<username>/secret.html` file —
// so the fallback fails and we get the default 404, NOT the
// SECRET body.
expect(out.statusCode).toBe(404);
expect(String(out.body)).not.toContain('SECRET');
});
it('ignores malformed JSON configs and behaves like there is no config', async () => {
// A typo in the config file must not 5xx the request — the
// visitor still gets the default 404 on a missing path.
const owner = await makeUserWithHome();
const homePath = `/${owner.username}`;
const homeEntry = await server.stores.fsEntry.getEntryByPath(homePath);
const sub = `bad-${Math.random().toString(36).slice(2, 8)}`;
await server.stores.subdomain.create({
userId: owner.id,
subdomain: sub,
rootDirId: homeEntry!.id,
});
await writeFile(
owner.id,
`${homePath}/.puter_site_config`,
Buffer.from('this is not json {{{'),
'application/json',
);
await writeFile(
owner.id,
`${homePath}/index.html`,
Buffer.from('would-be-fallback'),
'text/html',
);
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/unknown',
}),
res,
vi.fn(),
);
expect(out.statusCode).toBe(404);
expect(String(out.body)).toContain('Not Found');
// Did NOT silently fall back to /index.html as if the config
// were valid — malformed config must be ignored, not partially
// applied.
expect(String(out.body)).not.toContain('would-be-fallback');
});
it('still serves real files normally — config only applies on 404', async () => {
// Sanity check: when the requested path exists, the config's
// error rules are not consulted, so the response is the live
// file at status 200 (not the error-page status override).
const { owner, homePath, sub } = await setupSiteWithConfig({
errors: { '404': { file: '/index.html', status: 200 } },
});
const body = Buffer.from('real-page');
await writeFile(
owner.id,
`${homePath}/page.html`,
body,
'text/html',
);
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/page.html',
}),
res,
vi.fn(),
);
await new Promise<void>((resolve) => setImmediate(resolve));
expect(out.statusCode).toBe(200);
const piped = out.body as Buffer | undefined;
expect(piped!.equals(body)).toBe(true);
});
it('serves the cached config on a second request, even when the on-disk file is removed (Redis cache holds within TTL)', async () => {
// First request seeds the cache with the parsed config. We
// then delete the underlying file and fire a second request:
// if the cache is wired correctly, the SPA fallback still
// applies because we never re-read from S3. Within-TTL
// staleness is the explicit contract (60s default).
const { owner, homePath, sub } = await setupSiteWithConfig({
errors: { '404': { file: '/index.html', status: 200 } },
});
const body = Buffer.from('<html>spa-shell</html>');
await writeFile(owner.id, `${homePath}/index.html`, body, 'text/html');
const mw = buildMiddleware();
// First request → primes the cache.
const first = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/route-a',
}),
first.res,
vi.fn(),
);
await new Promise<void>((resolve) => setImmediate(resolve));
expect(first.out.statusCode).toBe(200);
// Now wipe the on-disk config. If the loader hits S3 on every
// request, the second call below would see no config and fall
// through to a default 404. The cache contract is that it
// does NOT — within the TTL, the prior parse stands.
const configEntry = await server.stores.fsEntry.getEntryByPath(
`${homePath}/.puter_site_config`,
);
if (configEntry) {
await server.services.fs.remove(owner.id, { entry: configEntry });
}
const second = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/route-b',
}),
second.res,
vi.fn(),
);
await new Promise<void>((resolve) => setImmediate(resolve));
// SPA fallback still fires → cache served the deleted config.
expect(second.out.statusCode).toBe(200);
expect(
(second.out.body as Buffer | undefined)?.equals(body),
).toBe(true);
});
it('ignores `errors` entries with status codes outside 4xx/5xx', async () => {
// A `200` key in errors is meaningless and a footgun (it
// could be used to silently override the happy path). The
// parser must drop these on the floor.
const { sub } = await setupSiteWithConfig({
errors: {
'200': { file: '/oops.html', status: 200 },
'999': { file: '/oops.html', status: 200 },
},
});
const mw = buildMiddleware();
const { res, out } = makeRes();
await mw(
makeReq({
hostname: `${sub}.site.puter.localhost`,
path: '/missing',
}),
res,
vi.fn(),
);
// No valid 404 rule survives the filter → default 404.
expect(out.statusCode).toBe(404);
expect(String(out.body)).toContain('Not Found');
});
});
+67 -4
View File
@@ -36,6 +36,12 @@ import {
resolvePrivateIdentity,
resolvePublicHostedIdentity,
} from './privateAppGate';
import {
isSiteConfigPath,
loadSiteConfig,
resolveErrorTarget,
type SiteConfig,
} from './puterSiteConfig';
/**
* Serves user-hosted static sites on the hosting domains (`*.puter.site`,
@@ -52,9 +58,15 @@ import {
*
* Deferred (not yet implemented):
* - `.at` username-based sites (UUIDv5-keyed `/user/Public`).
* - `.puter_site_config` error rules (custom status-code file mapping).
* - Custom domains (subdomains table `domain` column) requires host
* validation to allow arbitrary hostnames first.
*
* Site config:
* - `.puter_site_config` at the site root (see `puterSiteConfig.ts`)
* supplies custom error pages. On a file 404, the matching rule's
* `file` is served with the rule's `status` supports the SPA
* fallback pattern of `404 → /index.html with status 200`. The
* config file itself is hidden from public serving.
*/
const SUBDOMAIN_404 = `<div style="font-size: 20px;
@@ -458,10 +470,35 @@ export const createPuterSiteMiddleware = (
}
const filePath = rootPath + resolvedUrlPath;
// Best-effort site config load. A missing / malformed config
// never blocks the request — `loadSiteConfig` swallows all
// errors and returns null on any failure. The Redis cache
// keyed on `rootDirId` keeps the hot path off S3 for the
// common case where the same subdomain gets repeat visits.
let siteConfig: SiteConfig | null = null;
try {
siteConfig = await loadSiteConfig({
rootPath,
rootDirId: rootEntry.id,
fsEntryStore: layers.stores.fsEntry,
fsService: layers.services.fs,
cache: layers.clients.redis,
});
} catch (e) {
console.warn('[puter-site] loadSiteConfig threw', e);
}
// Hide the config file from public serving — visitors should
// see the same 404 as for any other missing path so the
// deployment shape isn't leaked.
const isConfigRequest = isSiteConfigPath(resolvedUrlPath);
// Subdomain hosting bypasses ACL by design: anything the owner placed
// under the registered root_dir is treated as public. Path traversal
// is blocked above by `pathPosix.normalize` anchoring at `/`.
let entry = await layers.stores.fsEntry.getEntryByPath(filePath);
let entry = isConfigRequest
? null
: await layers.stores.fsEntry.getEntryByPath(filePath);
if (entry?.isDir) {
// Folder request → fall back to <folder>/index.html, the same
// way `/` is rewritten to `/index.html` at the site root above.
@@ -469,6 +506,27 @@ export const createPuterSiteMiddleware = (
pathPosix.join(filePath, 'index.html'),
);
}
// Custom error page fallback. On a 404 we consult the site config
// for a rule and, if one resolves to an existing file, serve that
// instead. Critical: we do this exactly once — the error page
// itself never re-enters error handling, so a misconfigured
// `errors.404.file` that doesn't exist falls through to the
// default 404 page rather than looping.
let statusOverride: number | undefined;
if (!entry || entry.isDir) {
const errorTarget = resolveErrorTarget(siteConfig, 404, rootPath);
if (errorTarget) {
const candidate = await layers.stores.fsEntry.getEntryByPath(
errorTarget.absPath,
);
if (candidate && !candidate.isDir) {
entry = candidate;
statusOverride = errorTarget.status;
}
}
}
if (!entry || entry.isDir) {
res.status(404)
.type('text/html; charset=UTF-8')
@@ -477,8 +535,13 @@ export const createPuterSiteMiddleware = (
}
// Stream the file. `fsEntry.readContent` honours Range + emits
// ETag/Last-Modified when the S3 layer returns them.
// ETag/Last-Modified when the S3 layer returns them. Range
// requests are suppressed when serving a custom error page so
// the visitor always receives the full document with the
// configured status code (no 206 with a stale `bytes=...`
// header from the original request).
const range =
statusOverride === undefined &&
typeof req.headers.range === 'string'
? req.headers.range
: undefined;
@@ -524,7 +587,7 @@ export const createPuterSiteMiddleware = (
res.setHeader('Last-Modified', download.lastModified.toUTCString());
res.setHeader('Accept-Ranges', 'bytes');
res.setHeader('Access-Control-Allow-Origin', '*');
res.status(range ? 206 : 200);
res.status(statusOverride ?? (range ? 206 : 200));
// Best-effort egress metering against the site owner. The request
// itself is unauthenticated (public site visitor), so we can't use
@@ -0,0 +1,386 @@
/**
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { posix as pathPosix } from 'node:path';
import type { FSEntry } from '../../../stores/fs/FSEntry';
/**
* Minimal Redis surface we need `get` / `set` with EX TTL. Typed as a
* subset of ioredis so callers can pass either the real cluster client
* or a mock without needing the full Cluster type here.
*/
export interface SiteConfigCache {
get(key: string): Promise<string | null>;
set(
key: string,
value: string,
mode: 'EX',
ttlSeconds: number,
): Promise<unknown>;
}
/**
* Site-level configuration loaded from `.puter_site_config` at the site
* root. Lets a hosted site customize how the server responds for it
* currently limited to error-page mapping (e.g. SPA fallback that serves
* `/index.html` for any 404 with a 200 status).
*
* The on-disk JSON shape:
*
* {
* "errors": {
* "404": { "file": "/index.html", "status": 200 }
* }
* }
*
* Other static-hosting platforms (Vercel `vercel.json`, Netlify/Amplify
* `_redirects`, nginx `error_page`) express the same idea with different
* syntax. The internal `SiteConfig` is intentionally narrow so additional
* parsers can normalize to it without churning the consumer in
* `puterSite.ts` see `SITE_CONFIG_FILENAMES` for the lookup list.
*
* Security posture (every input here originates from a user-uploaded file
* served on the open internet):
* - the on-disk file is size-capped before parse (`MAX_CONFIG_BYTES`),
* - JSON parsing is wrapped in try/catch a malformed file silently
* falls back to default behavior, never 5xx,
* - every `file` value is normalized as if it were a URL path: it must
* start with `/`, gets `pathPosix.normalize`d so `..` is collapsed,
* and is re-anchored under the site root before any FS lookup,
* - status codes are clamped to a strict allow-list (the request side
* only honours 4xx/5xx error keys; the response side validates the
* `status` is a legitimate HTTP integer),
* - the config file itself is hidden from public serving by the caller
* (`isSiteConfigPath`) same status/body as any other missing path,
* no separate 403 that would leak its existence.
*
* Loop safety is the consumer's responsibility: when serving an error
* page, do NOT re-consult `errors` if the error page itself is missing,
* otherwise a misconfigured site could spin a 404404404 cycle.
*/
export interface SiteErrorRule {
/** Absolute path under the site root (e.g. `/index.html`). */
file: string;
/** HTTP status to return when serving this error page (200599). */
status: number;
}
export interface SiteConfig {
/** Map of HTTP status code → custom error rule. Keys are 4xx/5xx. */
errors: Record<number, SiteErrorRule>;
}
const MAX_CONFIG_BYTES = 64 * 1024;
// Cache key prefix is distinct from `subdomains:` (SubdomainStore) and
// other Redis users — keep this in sync if you rename, otherwise stale
// entries from prior deploys could be read back as configs.
const CACHE_KEY_PREFIX = 'puter-site-config:';
const CACHE_TTL_SECONDS = 60;
// Sentinel for "we looked, the site has no config" so repeated visits
// to a config-less site don't keep round-tripping S3 to confirm.
const NEGATIVE_CACHE_MARKER = '__none__';
/**
* Filenames consulted at the site root, in priority order. First file
* that parses to a non-empty config wins. Extending this list to add
* Vercel / Netlify / nginx adapters is the entrypoint for multi-format
* support each parser receives the raw text and returns a normalized
* `SiteConfig` (or null if the file isn't valid in that format).
*/
const SITE_CONFIG_FILENAMES: ReadonlyArray<{
name: string;
parse: (text: string) => SiteConfig | null;
}> = [{ name: '.puter_site_config', parse: parsePuterSiteConfig }];
/**
* Returns true if `urlPath` (already normalized to start with `/`) names
* the site config file. Used by `puterSite.ts` to suppress direct
* serving so the deployment shape isn't leaked to visitors.
*/
export function isSiteConfigPath(urlPath: string): boolean {
const base = pathPosix.basename(urlPath);
return SITE_CONFIG_FILENAMES.some((f) => f.name === base);
}
interface LoadSiteConfigArgs {
/** Absolute site root path (e.g. `/<username>/Public`). */
rootPath: string;
/**
* Stable identifier for the site root, used as the cache key. We
* key on `rootDirId` (not the subdomain) so that renaming a
* subdomain or pointing multiple subdomains at the same
* directory neither orphans nor duplicates the cached entry.
*/
rootDirId: number;
fsEntryStore: {
getEntryByPath: (path: string) => Promise<FSEntry | null>;
};
fsService: {
readContent: (
entry: FSEntry,
options?: { range?: string },
) => Promise<{
body: NodeJS.ReadableStream;
contentLength: number | null;
}>;
};
/**
* Optional Redis cache. When omitted, every request re-reads the
* config from S3 fine for tests, slow for prod. Cache failures
* are swallowed (best-effort): a transient Redis blip just falls
* through to the live read, never errors the request.
*/
cache?: SiteConfigCache;
}
/**
* Locate and parse the site config. Returns null when no config file
* exists, the file is unreadable, oversized, or fails validation
* callers must treat null as "behave like there is no config" and never
* raise the error to the visitor. Errors are logged for the operator
* but never surfaced to the request.
*/
export async function loadSiteConfig(
args: LoadSiteConfigArgs,
): Promise<SiteConfig | null> {
const { rootPath, rootDirId, fsEntryStore, fsService, cache } = args;
if (!rootPath || rootPath === '/') return null;
// Reject non-positive-integer ids defensively — they'd produce a
// weird cache key and we'd cache something nonsensical against it.
// The store contract is positive integers, but this is a hot path
// for untrusted-origin traffic so we belt-and-brace it.
const cacheable =
Number.isInteger(rootDirId) && rootDirId > 0 && cache !== undefined;
const cacheKey = cacheable ? `${CACHE_KEY_PREFIX}${rootDirId}` : null;
if (cacheable && cacheKey) {
try {
const raw = await cache!.get(cacheKey);
if (raw === NEGATIVE_CACHE_MARKER) return null;
if (typeof raw === 'string' && raw.length > 0) {
// Trust the cached shape — it was produced by this
// same parser, validated, and the TTL is short. Still
// wrap in try/catch in case a different deploy wrote a
// legacy/unparseable value to the same key.
try {
const parsed = JSON.parse(raw) as SiteConfig;
if (parsed && parsed.errors) return parsed;
} catch {
/* fall through to a fresh load */
}
}
} catch {
// Cache transport failure — fall through to live load.
// Don't poison the next request with a half-applied state.
}
}
for (const { name, parse } of SITE_CONFIG_FILENAMES) {
const filePath = pathPosix.join(rootPath, name);
let entry: FSEntry | null;
try {
entry = await fsEntryStore.getEntryByPath(filePath);
} catch (e) {
console.warn('[puter-site] config lookup failed', {
path: filePath,
error: (e as Error)?.message,
});
continue;
}
if (!entry || entry.isDir) continue;
// Reject oversized configs before paying the S3 read. `size` can
// be null for legacy entries — accept and rely on the streaming
// byte counter below.
if (entry.size !== null && entry.size > MAX_CONFIG_BYTES) {
console.warn('[puter-site] config too large, ignoring', {
path: filePath,
size: entry.size,
});
continue;
}
let text: string | null;
try {
text = await readBoundedText(entry, fsService, MAX_CONFIG_BYTES);
} catch (e) {
console.warn('[puter-site] config read failed', {
path: filePath,
error: (e as Error)?.message,
});
continue;
}
// Null means the stream exceeded the byte cap mid-read.
if (text === null) continue;
let parsed: SiteConfig | null;
try {
parsed = parse(text);
} catch (e) {
console.warn('[puter-site] config parse threw', {
path: filePath,
error: (e as Error)?.message,
});
parsed = null;
}
if (parsed && Object.keys(parsed.errors).length > 0) {
if (cacheable && cacheKey) {
writeCache(
cache!,
cacheKey,
JSON.stringify(parsed),
CACHE_TTL_SECONDS,
);
}
return parsed;
}
}
// No file matched (or all matched files parsed to empty). Cache
// the negative result so config-less sites — which are the common
// case — don't keep paying the FS lookup on every visit.
if (cacheable && cacheKey) {
writeCache(cache!, cacheKey, NEGATIVE_CACHE_MARKER, CACHE_TTL_SECONDS);
}
return null;
}
// Fire-and-forget cache write. We never await it on the request path
// because failure is non-fatal and we don't want a slow Redis to add
// latency to the response — the next request just re-loads from FS.
function writeCache(
cache: SiteConfigCache,
key: string,
value: string,
ttlSeconds: number,
): void {
cache.set(key, value, 'EX', ttlSeconds).catch(() => {
/* swallow — cache writes are best-effort */
});
}
/**
* Resolve a custom error rule for `statusCode` into an absolute FS path
* under `rootPath`. Returns null if no rule applies or the rule's `file`
* would escape the site root after normalization. Caller is responsible
* for loop prevention (don't recurse into error handling when serving
* the error page itself).
*/
export function resolveErrorTarget(
config: SiteConfig | null,
statusCode: number,
rootPath: string,
): { absPath: string; status: number } | null {
if (!config) return null;
const rule = config.errors[statusCode];
if (!rule) return null;
// Defence-in-depth: re-normalize at use time. `parsePuterSiteConfig`
// already does this, but keeping the contract here means future
// parsers (Vercel, Netlify) only need to return raw paths and can
// rely on this final guard.
const normalized = pathPosix.normalize(pathPosix.join('/', rule.file));
if (!normalized.startsWith('/') || normalized === '/') return null;
const absPath = rootPath.replace(/\/+$/, '') + normalized;
return { absPath, status: rule.status };
}
// ── Parsers ─────────────────────────────────────────────────────────
function parsePuterSiteConfig(text: string): SiteConfig | null {
let raw: unknown;
try {
raw = JSON.parse(text);
} catch {
return null;
}
if (!raw || typeof raw !== 'object') return null;
const errorsField = (raw as { errors?: unknown }).errors;
const errors: Record<number, SiteErrorRule> = {};
if (errorsField && typeof errorsField === 'object') {
for (const [k, v] of Object.entries(
errorsField as Record<string, unknown>,
)) {
const code = Number(k);
// Only 4xx/5xx are meaningful as "error pages" — accepting
// 2xx/3xx keys would let a config silently override the
// happy path, which is out of scope and a footgun.
if (!Number.isInteger(code) || code < 400 || code > 599) continue;
if (!v || typeof v !== 'object') continue;
const rule = v as { file?: unknown; status?: unknown };
if (typeof rule.file !== 'string' || !rule.file.startsWith('/')) {
continue;
}
// Default the response status to the matched error code so a
// bare `{ file: '/404.html' }` Just Works. Allow overriding
// for the SPA-fallback case where 404 should turn into 200.
let status: number;
if (rule.status === undefined) {
status = code;
} else if (
typeof rule.status === 'number' &&
Number.isInteger(rule.status) &&
rule.status >= 200 &&
rule.status <= 599
) {
status = rule.status;
} else {
continue;
}
const normalized = pathPosix.normalize(
pathPosix.join('/', rule.file),
);
// Empty/root after normalize is meaningless as an error page
// (it would resolve to the site root itself).
if (normalized === '/') continue;
errors[code] = { file: normalized, status };
}
}
return { errors };
}
// Returns null when the stream exceeds `maxBytes` (caller treats as
// "config too large, ignore"). The stream is always destroyed before
// return so the S3 connection doesn't leak on early break.
async function readBoundedText(
entry: FSEntry,
fsService: LoadSiteConfigArgs['fsService'],
maxBytes: number,
): Promise<string | null> {
const download = await fsService.readContent(entry);
const stream = download.body as NodeJS.ReadableStream & {
destroy?: () => void;
};
const chunks: Buffer[] = [];
let total = 0;
let exceeded = false;
try {
for await (const chunk of stream as AsyncIterable<Buffer>) {
total += chunk.length;
if (total > maxBytes) {
exceeded = true;
break;
}
chunks.push(chunk);
}
} finally {
stream.destroy?.();
}
if (exceeded) return null;
return Buffer.concat(chunks).toString('utf8');
}