fix: destroy dead pool connections on transient errors (proper failover)
- queryWithRetry now uses explicit client checkout; on transient error, calls client.release(true) to DESTROY the dead connection instead of returning it to pool. Fresh connections are created on retry. - connectWithRetry validates connections with SELECT 1 before returning - Health check destroys bad connections on failure - Reduced idleTimeoutMillis from 30s to 10s for faster stale connection eviction - Fixes BUG-075: pool kept reusing dead TCP sockets after PgBouncer pod restart
This commit is contained in:
parent
8d88a9c235
commit
95ca10175f
2 changed files with 52 additions and 13 deletions
|
|
@ -16,18 +16,23 @@ healthRouter.get("/", async (_req, res) => {
|
|||
let overallStatus = "ok";
|
||||
let httpStatus = 200;
|
||||
|
||||
// Check database connectivity with a timeout
|
||||
// Check database connectivity with a real query and timeout
|
||||
try {
|
||||
const dbCheck = async () => {
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
// Use SELECT 1 as a lightweight liveness probe
|
||||
await client.query('SELECT 1');
|
||||
const result = await client.query('SELECT version()');
|
||||
const version = result.rows[0]?.version || 'Unknown';
|
||||
const versionMatch = version.match(/PostgreSQL ([\d.]+)/);
|
||||
const shortVersion = versionMatch ? `PostgreSQL ${versionMatch[1]}` : 'PostgreSQL';
|
||||
return { status: "ok", version: shortVersion };
|
||||
} finally {
|
||||
client.release();
|
||||
return { status: "ok", version: shortVersion };
|
||||
} catch (queryErr) {
|
||||
// Destroy the bad connection so it doesn't go back to the pool
|
||||
try { client.release(true); } catch (_) {}
|
||||
throw queryErr;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue