docfast/src/routes/health.ts
OpenClaw Deployer 95ca10175f
Some checks failed
Build & Deploy to Staging / Build & Deploy to Staging (push) Successful in 9m48s
Promote to Production / Deploy to Production (push) Failing after 3m46s
fix: destroy dead pool connections on transient errors (proper failover)
- queryWithRetry now uses explicit client checkout; on transient error,
  calls client.release(true) to DESTROY the dead connection instead of
  returning it to pool. Fresh connections are created on retry.
- connectWithRetry validates connections with SELECT 1 before returning
- Health check destroys bad connections on failure
- Reduced idleTimeoutMillis from 30s to 10s for faster stale connection eviction
- Fixes BUG-075: pool kept reusing dead TCP sockets after PgBouncer pod restart
2026-02-18 14:28:47 +00:00

69 lines
2.2 KiB
TypeScript

import { Router } from "express";
import { createRequire } from "module";
import { getPoolStats } from "../services/browser.js";
import { pool } from "../services/db.js";
const require = createRequire(import.meta.url);
const { version: APP_VERSION } = require("../../package.json");
export const healthRouter = Router();
const HEALTH_CHECK_TIMEOUT_MS = 3000;
healthRouter.get("/", async (_req, res) => {
const poolStats = getPoolStats();
let databaseStatus: any;
let overallStatus = "ok";
let httpStatus = 200;
// Check database connectivity with a real query and timeout
try {
const dbCheck = async () => {
const client = await pool.connect();
try {
// Use SELECT 1 as a lightweight liveness probe
await client.query('SELECT 1');
const result = await client.query('SELECT version()');
const version = result.rows[0]?.version || 'Unknown';
const versionMatch = version.match(/PostgreSQL ([\d.]+)/);
const shortVersion = versionMatch ? `PostgreSQL ${versionMatch[1]}` : 'PostgreSQL';
client.release();
return { status: "ok", version: shortVersion };
} catch (queryErr) {
// Destroy the bad connection so it doesn't go back to the pool
try { client.release(true); } catch (_) {}
throw queryErr;
}
};
const timeout = new Promise<never>((_resolve, reject) =>
setTimeout(() => reject(new Error("Database health check timed out")), HEALTH_CHECK_TIMEOUT_MS)
);
databaseStatus = await Promise.race([dbCheck(), timeout]);
} catch (error: any) {
databaseStatus = {
status: "error",
message: error.message || "Database connection failed"
};
overallStatus = "degraded";
httpStatus = 503;
}
const response = {
status: overallStatus,
version: APP_VERSION,
database: databaseStatus,
pool: {
size: poolStats.poolSize,
active: poolStats.totalPages - poolStats.availablePages,
available: poolStats.availablePages,
queueDepth: poolStats.queueDepth,
pdfCount: poolStats.pdfCount,
restarting: poolStats.restarting,
uptimeSeconds: Math.round(poolStats.uptimeMs / 1000),
},
};
res.status(httpStatus).json(response);
});