All checks were successful
Build & Deploy to Staging / Build & Deploy to Staging (push) Successful in 13m17s
When renderUrlPdf() sets up request interception for SSRF DNS pinning,
the interceptor and event listener were never cleaned up in recyclePage().
This could cause subsequent HTML-to-PDF conversions on the same pooled
page to have external resources blocked by the stale interceptor.
- Export recyclePage for testability
- Add removeAllListeners('request') + setRequestInterception(false)
- Add browser-recycle.test.ts with TDD (red→green verified)
Tests: 443 passing (was 442)
349 lines
12 KiB
TypeScript
349 lines
12 KiB
TypeScript
import puppeteer, { Browser, Page } from "puppeteer";
|
||
import logger from "./logger.js";
|
||
|
||
const BROWSER_COUNT = parseInt(process.env.BROWSER_COUNT || "2", 10);
|
||
const PAGES_PER_BROWSER = parseInt(process.env.PAGES_PER_BROWSER || "8", 10);
|
||
const RESTART_AFTER_PDFS = 1000;
|
||
const RESTART_AFTER_MS = 60 * 60 * 1000; // 1 hour
|
||
|
||
interface BrowserInstance {
|
||
browser: Browser;
|
||
availablePages: Page[];
|
||
pdfCount: number;
|
||
lastRestartTime: number;
|
||
restarting: boolean;
|
||
id: number;
|
||
}
|
||
|
||
const instances: BrowserInstance[] = [];
|
||
const waitingQueue: Array<{ resolve: (v: { page: Page; instance: BrowserInstance }) => void }> = [];
|
||
let roundRobinIndex = 0;
|
||
|
||
export function getPoolStats() {
|
||
const totalAvailable = instances.reduce((s, i) => s + i.availablePages.length, 0);
|
||
const totalPages = instances.length * PAGES_PER_BROWSER;
|
||
const totalPdfs = instances.reduce((s, i) => s + i.pdfCount, 0);
|
||
return {
|
||
poolSize: totalPages,
|
||
totalPages,
|
||
availablePages: totalAvailable,
|
||
queueDepth: waitingQueue.length,
|
||
pdfCount: totalPdfs,
|
||
restarting: instances.some((i) => i.restarting),
|
||
uptimeMs: Date.now() - (instances[0]?.lastRestartTime || Date.now()),
|
||
browsers: instances.map((i) => ({
|
||
id: i.id,
|
||
available: i.availablePages.length,
|
||
pdfCount: i.pdfCount,
|
||
restarting: i.restarting,
|
||
})),
|
||
};
|
||
}
|
||
|
||
export async function recyclePage(page: Page): Promise<void> {
|
||
try {
|
||
const client = await page.createCDPSession();
|
||
await client.send("Network.clearBrowserCache").catch(() => {});
|
||
await client.detach().catch(() => {});
|
||
// Clean up request interception (set by renderUrlPdf for SSRF protection)
|
||
page.removeAllListeners("request");
|
||
await page.setRequestInterception(false).catch(() => {});
|
||
const cookies = await page.cookies();
|
||
if (cookies.length > 0) {
|
||
await page.deleteCookie(...cookies);
|
||
}
|
||
await page.goto("about:blank", { timeout: 5000 }).catch(() => {});
|
||
} catch {
|
||
// ignore
|
||
}
|
||
}
|
||
|
||
async function createPages(b: Browser, count: number): Promise<Page[]> {
|
||
const pages: Page[] = [];
|
||
for (let i = 0; i < count; i++) {
|
||
const page = await b.newPage();
|
||
pages.push(page);
|
||
}
|
||
return pages;
|
||
}
|
||
|
||
function pickInstance(): BrowserInstance | null {
|
||
// Round-robin among instances that have available pages
|
||
for (let i = 0; i < instances.length; i++) {
|
||
const idx = (roundRobinIndex + i) % instances.length;
|
||
const inst = instances[idx];
|
||
if (inst.availablePages.length > 0 && !inst.restarting) {
|
||
roundRobinIndex = (idx + 1) % instances.length;
|
||
return inst;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
async function acquirePage(): Promise<{ page: Page; instance: BrowserInstance }> {
|
||
// Check restarts
|
||
for (const inst of instances) {
|
||
if (!inst.restarting && (inst.pdfCount >= RESTART_AFTER_PDFS || Date.now() - inst.lastRestartTime >= RESTART_AFTER_MS)) {
|
||
scheduleRestart(inst);
|
||
}
|
||
}
|
||
|
||
const inst = pickInstance();
|
||
if (inst) {
|
||
const page = inst.availablePages.pop()!;
|
||
return { page, instance: inst };
|
||
}
|
||
|
||
// All pages busy, queue with 30s timeout
|
||
return new Promise((resolve, reject) => {
|
||
const timer = setTimeout(() => {
|
||
const idx = waitingQueue.findIndex((w) => w.resolve === resolve);
|
||
if (idx >= 0) waitingQueue.splice(idx, 1);
|
||
reject(new Error("QUEUE_FULL"));
|
||
}, 30_000);
|
||
waitingQueue.push({
|
||
resolve: (v) => {
|
||
clearTimeout(timer);
|
||
resolve(v);
|
||
},
|
||
});
|
||
});
|
||
}
|
||
|
||
function releasePage(page: Page, inst: BrowserInstance): void {
|
||
inst.pdfCount++;
|
||
|
||
const waiter = waitingQueue.shift();
|
||
if (waiter) {
|
||
recyclePage(page).then(() => waiter.resolve({ page, instance: inst })).catch(() => {
|
||
if (inst.browser && !inst.restarting) {
|
||
inst.browser.newPage().then((p) => waiter.resolve({ page: p, instance: inst })).catch(() => {
|
||
waitingQueue.unshift(waiter);
|
||
});
|
||
} else {
|
||
waitingQueue.unshift(waiter);
|
||
}
|
||
});
|
||
return;
|
||
}
|
||
|
||
recyclePage(page).then(() => {
|
||
inst.availablePages.push(page);
|
||
}).catch(() => {
|
||
if (inst.browser && !inst.restarting) {
|
||
inst.browser.newPage().then((p) => inst.availablePages.push(p)).catch(() => {});
|
||
}
|
||
});
|
||
}
|
||
|
||
async function scheduleRestart(inst: BrowserInstance): Promise<void> {
|
||
if (inst.restarting) return;
|
||
inst.restarting = true;
|
||
logger.info(`Scheduling browser ${inst.id} restart (pdfs=${inst.pdfCount}, uptime=${Math.round((Date.now() - inst.lastRestartTime) / 1000)}s)`);
|
||
|
||
const drainCheck = () => new Promise<void>((resolve) => {
|
||
const check = () => {
|
||
if (inst.availablePages.length === PAGES_PER_BROWSER && waitingQueue.length === 0) {
|
||
resolve();
|
||
} else {
|
||
setTimeout(check, 100);
|
||
}
|
||
};
|
||
check();
|
||
});
|
||
await Promise.race([drainCheck(), new Promise<void>(r => setTimeout(r, 30000))]);
|
||
|
||
for (const page of inst.availablePages) {
|
||
await page.close().catch(() => {});
|
||
}
|
||
inst.availablePages.length = 0;
|
||
|
||
try { await inst.browser.close().catch(() => {}); } catch {}
|
||
|
||
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
|
||
inst.browser = await puppeteer.launch({
|
||
headless: true,
|
||
executablePath: execPath,
|
||
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
|
||
});
|
||
|
||
const pages = await createPages(inst.browser, PAGES_PER_BROWSER);
|
||
inst.availablePages.push(...pages);
|
||
|
||
inst.pdfCount = 0;
|
||
inst.lastRestartTime = Date.now();
|
||
inst.restarting = false;
|
||
logger.info(`Browser ${inst.id} restarted successfully`);
|
||
|
||
while (waitingQueue.length > 0 && inst.availablePages.length > 0) {
|
||
const waiter = waitingQueue.shift();
|
||
const p = inst.availablePages.pop();
|
||
if (waiter && p) waiter.resolve({ page: p, instance: inst });
|
||
}
|
||
}
|
||
|
||
async function launchInstance(id: number): Promise<BrowserInstance> {
|
||
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
|
||
const browser = await puppeteer.launch({
|
||
headless: true,
|
||
executablePath: execPath,
|
||
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
|
||
});
|
||
|
||
const pages = await createPages(browser, PAGES_PER_BROWSER);
|
||
const inst: BrowserInstance = {
|
||
browser,
|
||
availablePages: pages,
|
||
pdfCount: 0,
|
||
lastRestartTime: Date.now(),
|
||
restarting: false,
|
||
id,
|
||
};
|
||
return inst;
|
||
}
|
||
|
||
export async function initBrowser(): Promise<void> {
|
||
for (let i = 0; i < BROWSER_COUNT; i++) {
|
||
const inst = await launchInstance(i);
|
||
instances.push(inst);
|
||
}
|
||
logger.info(`Browser pool ready (${BROWSER_COUNT} browsers × ${PAGES_PER_BROWSER} pages = ${BROWSER_COUNT * PAGES_PER_BROWSER} total)`);
|
||
}
|
||
|
||
export async function closeBrowser(): Promise<void> {
|
||
for (const inst of instances) {
|
||
for (const page of inst.availablePages) {
|
||
await page.close().catch(() => {});
|
||
}
|
||
inst.availablePages.length = 0;
|
||
await inst.browser.close().catch(() => {});
|
||
}
|
||
instances.length = 0;
|
||
}
|
||
|
||
export interface PdfRenderOptions {
|
||
format?: string;
|
||
landscape?: boolean;
|
||
margin?: { top?: string; right?: string; bottom?: string; left?: string };
|
||
printBackground?: boolean;
|
||
headerTemplate?: string;
|
||
footerTemplate?: string;
|
||
displayHeaderFooter?: boolean;
|
||
scale?: number;
|
||
pageRanges?: string;
|
||
preferCSSPageSize?: boolean;
|
||
width?: string;
|
||
height?: string;
|
||
}
|
||
|
||
export async function renderPdf(
|
||
html: string,
|
||
options: PdfRenderOptions = {}
|
||
): Promise<Buffer> {
|
||
const { page, instance } = await acquirePage();
|
||
try {
|
||
await page.setJavaScriptEnabled(false);
|
||
const result = await Promise.race([
|
||
(async () => {
|
||
await page.setContent(html, { waitUntil: "domcontentloaded", timeout: 15_000 });
|
||
await page.addStyleTag({ content: "* { margin: 0; padding: 0; } body { margin: 0; }" });
|
||
const pdf = await page.pdf({
|
||
format: (options.format as any) || "A4",
|
||
landscape: options.landscape || false,
|
||
printBackground: options.printBackground !== false,
|
||
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
|
||
headerTemplate: options.headerTemplate,
|
||
footerTemplate: options.footerTemplate,
|
||
displayHeaderFooter: options.displayHeaderFooter || false,
|
||
...(options.scale !== undefined && { scale: options.scale }),
|
||
...(options.pageRanges && { pageRanges: options.pageRanges }),
|
||
...(options.preferCSSPageSize !== undefined && { preferCSSPageSize: options.preferCSSPageSize }),
|
||
...(options.width && { width: options.width }),
|
||
...(options.height && { height: options.height }),
|
||
});
|
||
return Buffer.from(pdf);
|
||
})(),
|
||
new Promise<never>((_, reject) =>
|
||
setTimeout(() => reject(new Error("PDF_TIMEOUT")), 30_000)
|
||
),
|
||
]);
|
||
return result;
|
||
} finally {
|
||
releasePage(page, instance);
|
||
}
|
||
}
|
||
|
||
export async function renderUrlPdf(
|
||
url: string,
|
||
options: PdfRenderOptions & {
|
||
waitUntil?: string;
|
||
hostResolverRules?: string;
|
||
} = {}
|
||
): Promise<Buffer> {
|
||
const { page, instance } = await acquirePage();
|
||
try {
|
||
await page.setJavaScriptEnabled(false);
|
||
// Pin DNS resolution to prevent DNS rebinding SSRF attacks
|
||
if (options.hostResolverRules) {
|
||
const client = await page.createCDPSession();
|
||
// Use Chrome DevTools Protocol to set host resolver rules per-page
|
||
await client.send("Network.enable");
|
||
// Extract hostname and IP from rules like "MAP hostname ip"
|
||
const match = options.hostResolverRules.match(/^MAP\s+(\S+)\s+(\S+)$/);
|
||
if (match) {
|
||
const [, hostname, ip] = match;
|
||
await page.setRequestInterception(true);
|
||
page.on("request", (request) => {
|
||
const reqUrl = new URL(request.url());
|
||
if (reqUrl.hostname === hostname) {
|
||
// For HTTP, rewrite to IP with Host header
|
||
if (reqUrl.protocol === "http:") {
|
||
reqUrl.hostname = ip;
|
||
request.continue({
|
||
url: reqUrl.toString(),
|
||
headers: { ...request.headers(), host: hostname },
|
||
});
|
||
} else {
|
||
// For HTTPS, we can't easily swap the IP without cert issues
|
||
// But we've already validated the IP, and the short window makes rebinding unlikely
|
||
// Combined with JS disabled, this is sufficient mitigation
|
||
request.continue();
|
||
}
|
||
} else {
|
||
// Block any requests to other hosts (prevent redirects to internal IPs)
|
||
request.abort("blockedbyclient");
|
||
}
|
||
});
|
||
}
|
||
}
|
||
const result = await Promise.race([
|
||
(async () => {
|
||
await page.goto(url, {
|
||
waitUntil: (options.waitUntil as any) || "domcontentloaded",
|
||
timeout: 30_000,
|
||
});
|
||
const pdf = await page.pdf({
|
||
format: (options.format as any) || "A4",
|
||
landscape: options.landscape || false,
|
||
printBackground: options.printBackground !== false,
|
||
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
|
||
...(options.headerTemplate && { headerTemplate: options.headerTemplate }),
|
||
...(options.footerTemplate && { footerTemplate: options.footerTemplate }),
|
||
...(options.displayHeaderFooter !== undefined && { displayHeaderFooter: options.displayHeaderFooter }),
|
||
...(options.scale !== undefined && { scale: options.scale }),
|
||
...(options.pageRanges && { pageRanges: options.pageRanges }),
|
||
...(options.preferCSSPageSize !== undefined && { preferCSSPageSize: options.preferCSSPageSize }),
|
||
...(options.width && { width: options.width }),
|
||
...(options.height && { height: options.height }),
|
||
});
|
||
return Buffer.from(pdf);
|
||
})(),
|
||
new Promise<never>((_, reject) =>
|
||
setTimeout(() => reject(new Error("PDF_TIMEOUT")), 30_000)
|
||
),
|
||
]);
|
||
return result;
|
||
} finally {
|
||
releasePage(page, instance);
|
||
}
|
||
}
|