import pg from "pg"; import logger from "./logger.js"; const { Pool } = pg; // Transient error codes from PgBouncer / PostgreSQL that warrant retry const TRANSIENT_ERRORS = new Set([ "ECONNRESET", "ECONNREFUSED", "EPIPE", "ETIMEDOUT", "CONNECTION_LOST", "57P01", // admin_shutdown "57P02", // crash_shutdown "57P03", // cannot_connect_now "08006", // connection_failure "08003", // connection_does_not_exist "08001", // sqlclient_unable_to_establish_sqlconnection ]); const pool = new Pool({ host: process.env.DATABASE_HOST || "172.17.0.1", port: parseInt(process.env.DATABASE_PORT || "5432", 10), database: process.env.DATABASE_NAME || "docfast", user: process.env.DATABASE_USER || "docfast", password: process.env.DATABASE_PASSWORD || "docfast", max: 10, idleTimeoutMillis: 10000, // Evict idle connections after 10s (was 30s) — faster cleanup of stale sockets connectionTimeoutMillis: 5000, // Don't wait forever for a connection allowExitOnIdle: false, keepAlive: true, // TCP keepalive to detect dead connections keepAliveInitialDelayMillis: 10000, // Start keepalive probes after 10s idle }); // Handle errors on idle clients — pg.Pool automatically removes the client // after emitting this event, so we just log it. pool.on("error", (err, client) => { logger.error({ err }, "Unexpected error on idle PostgreSQL client — evicted from pool"); }); /** * Determine if an error is transient (PgBouncer failover, network blip) */ export function isTransientError(err) { if (!err) return false; const code = err.code || ""; const msg = (err.message || "").toLowerCase(); if (TRANSIENT_ERRORS.has(code)) return true; if (msg.includes("no available server")) return true; // PgBouncer specific if (msg.includes("connection terminated")) return true; if (msg.includes("connection refused")) return true; if (msg.includes("server closed the connection")) return true; if (msg.includes("timeout expired")) return true; return false; } /** * Execute a query with automatic retry on transient errors. * * KEY FIX: On transient error, we destroy the bad connection (client.release(true)) * so the pool creates a fresh TCP connection on the next attempt, instead of * reusing a dead socket from the pool. */ export async function queryWithRetry(queryText, params, maxRetries = 3) { let lastError; for (let attempt = 0; attempt <= maxRetries; attempt++) { let client; try { client = await pool.connect(); const result = await client.query(queryText, params); client.release(); // Return healthy connection to pool return result; } catch (err) { // Destroy the bad connection so pool doesn't reuse it if (client) { try { client.release(true); } catch (_) { /* already destroyed */ } } lastError = err; if (!isTransientError(err) || attempt === maxRetries) { throw err; } const delayMs = Math.min(1000 * Math.pow(2, attempt), 5000); // 1s, 2s, 4s (capped at 5s) logger.warn({ err: err.message, code: err.code, attempt: attempt + 1, maxRetries, delayMs }, "Transient DB error, destroying bad connection and retrying..."); await new Promise(resolve => setTimeout(resolve, delayMs)); } } throw lastError; } /** * Connect with retry — for operations that need a client (transactions). * On transient connect errors, waits and retries so the pool can establish * fresh connections to the new PgBouncer pod. */ export async function connectWithRetry(maxRetries = 3) { let lastError; for (let attempt = 0; attempt <= maxRetries; attempt++) { try { const client = await pool.connect(); // Validate the connection is actually alive try { await client.query("SELECT 1"); } catch (validationErr) { // Connection is dead — destroy it and retry try { client.release(true); } catch (_) { } if (!isTransientError(validationErr) || attempt === maxRetries) { throw validationErr; } const delayMs = Math.min(1000 * Math.pow(2, attempt), 5000); logger.warn({ err: validationErr.message, code: validationErr.code, attempt: attempt + 1 }, "Connection validation failed, destroying and retrying..."); await new Promise(resolve => setTimeout(resolve, delayMs)); continue; } return client; } catch (err) { lastError = err; if (!isTransientError(err) || attempt === maxRetries) { throw err; } const delayMs = Math.min(1000 * Math.pow(2, attempt), 5000); logger.warn({ err: err.message, code: err.code, attempt: attempt + 1, maxRetries, delayMs }, "Transient DB connect error, retrying..."); await new Promise(resolve => setTimeout(resolve, delayMs)); } } throw lastError; } export async function initDatabase() { const client = await connectWithRetry(); try { await client.query(` CREATE TABLE IF NOT EXISTS api_keys ( key TEXT PRIMARY KEY, tier TEXT NOT NULL DEFAULT 'free', email TEXT NOT NULL DEFAULT '', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), stripe_customer_id TEXT ); CREATE INDEX IF NOT EXISTS idx_api_keys_email ON api_keys(email); CREATE INDEX IF NOT EXISTS idx_api_keys_stripe ON api_keys(stripe_customer_id); CREATE TABLE IF NOT EXISTS verifications ( id SERIAL PRIMARY KEY, email TEXT NOT NULL, token TEXT NOT NULL UNIQUE, api_key TEXT NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), verified_at TIMESTAMPTZ ); CREATE INDEX IF NOT EXISTS idx_verifications_email ON verifications(email); CREATE INDEX IF NOT EXISTS idx_verifications_token ON verifications(token); CREATE TABLE IF NOT EXISTS pending_verifications ( email TEXT PRIMARY KEY, code TEXT NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), expires_at TIMESTAMPTZ NOT NULL, attempts INT NOT NULL DEFAULT 0 ); CREATE TABLE IF NOT EXISTS usage ( key TEXT PRIMARY KEY, count INT NOT NULL DEFAULT 0, month_key TEXT NOT NULL ); `); logger.info("PostgreSQL tables initialized"); } finally { client.release(); } } export { pool }; export default pool;