fix: compile TypeScript in Docker build — dist/ was never built in CI, connection resilience code was missing from images
This commit is contained in:
parent
95ca10175f
commit
e611609580
6 changed files with 183 additions and 50 deletions
128
dist/services/db.js
vendored
128
dist/services/db.js
vendored
|
|
@ -1,6 +1,20 @@
|
|||
import pg from "pg";
|
||||
import logger from "./logger.js";
|
||||
const { Pool } = pg;
|
||||
// Transient error codes from PgBouncer / PostgreSQL that warrant retry
|
||||
const TRANSIENT_ERRORS = new Set([
|
||||
"ECONNRESET",
|
||||
"ECONNREFUSED",
|
||||
"EPIPE",
|
||||
"ETIMEDOUT",
|
||||
"CONNECTION_LOST",
|
||||
"57P01", // admin_shutdown
|
||||
"57P02", // crash_shutdown
|
||||
"57P03", // cannot_connect_now
|
||||
"08006", // connection_failure
|
||||
"08003", // connection_does_not_exist
|
||||
"08001", // sqlclient_unable_to_establish_sqlconnection
|
||||
]);
|
||||
const pool = new Pool({
|
||||
host: process.env.DATABASE_HOST || "172.17.0.1",
|
||||
port: parseInt(process.env.DATABASE_PORT || "5432", 10),
|
||||
|
|
@ -8,13 +22,119 @@ const pool = new Pool({
|
|||
user: process.env.DATABASE_USER || "docfast",
|
||||
password: process.env.DATABASE_PASSWORD || "docfast",
|
||||
max: 10,
|
||||
idleTimeoutMillis: 30000,
|
||||
idleTimeoutMillis: 10000, // Evict idle connections after 10s (was 30s) — faster cleanup of stale sockets
|
||||
connectionTimeoutMillis: 5000, // Don't wait forever for a connection
|
||||
allowExitOnIdle: false,
|
||||
keepAlive: true, // TCP keepalive to detect dead connections
|
||||
keepAliveInitialDelayMillis: 10000, // Start keepalive probes after 10s idle
|
||||
});
|
||||
pool.on("error", (err) => {
|
||||
logger.error({ err }, "Unexpected PostgreSQL pool error");
|
||||
// Handle errors on idle clients — pg.Pool automatically removes the client
|
||||
// after emitting this event, so we just log it.
|
||||
pool.on("error", (err, client) => {
|
||||
logger.error({ err }, "Unexpected error on idle PostgreSQL client — evicted from pool");
|
||||
});
|
||||
/**
|
||||
* Determine if an error is transient (PgBouncer failover, network blip)
|
||||
*/
|
||||
export function isTransientError(err) {
|
||||
if (!err)
|
||||
return false;
|
||||
const code = err.code || "";
|
||||
const msg = (err.message || "").toLowerCase();
|
||||
if (TRANSIENT_ERRORS.has(code))
|
||||
return true;
|
||||
if (msg.includes("no available server"))
|
||||
return true; // PgBouncer specific
|
||||
if (msg.includes("connection terminated"))
|
||||
return true;
|
||||
if (msg.includes("connection refused"))
|
||||
return true;
|
||||
if (msg.includes("server closed the connection"))
|
||||
return true;
|
||||
if (msg.includes("timeout expired"))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
/**
|
||||
* Execute a query with automatic retry on transient errors.
|
||||
*
|
||||
* KEY FIX: On transient error, we destroy the bad connection (client.release(true))
|
||||
* so the pool creates a fresh TCP connection on the next attempt, instead of
|
||||
* reusing a dead socket from the pool.
|
||||
*/
|
||||
export async function queryWithRetry(queryText, params, maxRetries = 3) {
|
||||
let lastError;
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
let client;
|
||||
try {
|
||||
client = await pool.connect();
|
||||
const result = await client.query(queryText, params);
|
||||
client.release(); // Return healthy connection to pool
|
||||
return result;
|
||||
}
|
||||
catch (err) {
|
||||
// Destroy the bad connection so pool doesn't reuse it
|
||||
if (client) {
|
||||
try {
|
||||
client.release(true);
|
||||
}
|
||||
catch (_) { /* already destroyed */ }
|
||||
}
|
||||
lastError = err;
|
||||
if (!isTransientError(err) || attempt === maxRetries) {
|
||||
throw err;
|
||||
}
|
||||
const delayMs = Math.min(1000 * Math.pow(2, attempt), 5000); // 1s, 2s, 4s (capped at 5s)
|
||||
logger.warn({ err: err.message, code: err.code, attempt: attempt + 1, maxRetries, delayMs }, "Transient DB error, destroying bad connection and retrying...");
|
||||
await new Promise(resolve => setTimeout(resolve, delayMs));
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
/**
|
||||
* Connect with retry — for operations that need a client (transactions).
|
||||
* On transient connect errors, waits and retries so the pool can establish
|
||||
* fresh connections to the new PgBouncer pod.
|
||||
*/
|
||||
export async function connectWithRetry(maxRetries = 3) {
|
||||
let lastError;
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const client = await pool.connect();
|
||||
// Validate the connection is actually alive
|
||||
try {
|
||||
await client.query("SELECT 1");
|
||||
}
|
||||
catch (validationErr) {
|
||||
// Connection is dead — destroy it and retry
|
||||
try {
|
||||
client.release(true);
|
||||
}
|
||||
catch (_) { }
|
||||
if (!isTransientError(validationErr) || attempt === maxRetries) {
|
||||
throw validationErr;
|
||||
}
|
||||
const delayMs = Math.min(1000 * Math.pow(2, attempt), 5000);
|
||||
logger.warn({ err: validationErr.message, code: validationErr.code, attempt: attempt + 1 }, "Connection validation failed, destroying and retrying...");
|
||||
await new Promise(resolve => setTimeout(resolve, delayMs));
|
||||
continue;
|
||||
}
|
||||
return client;
|
||||
}
|
||||
catch (err) {
|
||||
lastError = err;
|
||||
if (!isTransientError(err) || attempt === maxRetries) {
|
||||
throw err;
|
||||
}
|
||||
const delayMs = Math.min(1000 * Math.pow(2, attempt), 5000);
|
||||
logger.warn({ err: err.message, code: err.code, attempt: attempt + 1, maxRetries, delayMs }, "Transient DB connect error, retrying...");
|
||||
await new Promise(resolve => setTimeout(resolve, delayMs));
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
export async function initDatabase() {
|
||||
const client = await pool.connect();
|
||||
const client = await connectWithRetry();
|
||||
try {
|
||||
await client.query(`
|
||||
CREATE TABLE IF NOT EXISTS api_keys (
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue