docfast/dist/services/browser.js
OpenClaw 59cc8f3d0e
All checks were successful
Deploy to Production / Deploy to Server (push) Successful in 2m20s
Session 45: support email, audit fixes (template validation, content-type, admin auth, waitUntil)
- Added support@docfast.dev to footer, impressum, terms, landing page, openapi.json
- Fixed audit #6: Template render validates required fields (400 on missing)
- Fixed audit #7: Content-Type check on markdown/URL routes (415)
- Fixed audit #11: /v1/usage and /v1/concurrency now require ADMIN_API_KEY
- Fixed audit Critical #3: URL convert uses domcontentloaded instead of networkidle0
2026-02-16 19:30:21 +00:00

283 lines
11 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import puppeteer from "puppeteer";
import logger from "./logger.js";
const BROWSER_COUNT = parseInt(process.env.BROWSER_COUNT || "2", 10);
const PAGES_PER_BROWSER = parseInt(process.env.PAGES_PER_BROWSER || "8", 10);
const RESTART_AFTER_PDFS = 1000;
const RESTART_AFTER_MS = 60 * 60 * 1000; // 1 hour
const instances = [];
const waitingQueue = [];
let roundRobinIndex = 0;
export function getPoolStats() {
const totalAvailable = instances.reduce((s, i) => s + i.availablePages.length, 0);
const totalPages = instances.length * PAGES_PER_BROWSER;
const totalPdfs = instances.reduce((s, i) => s + i.pdfCount, 0);
return {
poolSize: totalPages,
totalPages,
availablePages: totalAvailable,
queueDepth: waitingQueue.length,
pdfCount: totalPdfs,
restarting: instances.some((i) => i.restarting),
uptimeMs: Date.now() - (instances[0]?.lastRestartTime || Date.now()),
browsers: instances.map((i) => ({
id: i.id,
available: i.availablePages.length,
pdfCount: i.pdfCount,
restarting: i.restarting,
})),
};
}
async function recyclePage(page) {
try {
const client = await page.createCDPSession();
await client.send("Network.clearBrowserCache").catch(() => { });
await client.detach().catch(() => { });
const cookies = await page.cookies();
if (cookies.length > 0) {
await page.deleteCookie(...cookies);
}
await page.goto("about:blank", { timeout: 5000 }).catch(() => { });
}
catch {
// ignore
}
}
async function createPages(b, count) {
const pages = [];
for (let i = 0; i < count; i++) {
const page = await b.newPage();
pages.push(page);
}
return pages;
}
function pickInstance() {
// Round-robin among instances that have available pages
for (let i = 0; i < instances.length; i++) {
const idx = (roundRobinIndex + i) % instances.length;
const inst = instances[idx];
if (inst.availablePages.length > 0 && !inst.restarting) {
roundRobinIndex = (idx + 1) % instances.length;
return inst;
}
}
return null;
}
async function acquirePage() {
// Check restarts
for (const inst of instances) {
if (!inst.restarting && (inst.pdfCount >= RESTART_AFTER_PDFS || Date.now() - inst.lastRestartTime >= RESTART_AFTER_MS)) {
scheduleRestart(inst);
}
}
const inst = pickInstance();
if (inst) {
const page = inst.availablePages.pop();
return { page, instance: inst };
}
// All pages busy, queue with 30s timeout
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
const idx = waitingQueue.findIndex((w) => w.resolve === resolve);
if (idx >= 0)
waitingQueue.splice(idx, 1);
reject(new Error("QUEUE_FULL"));
}, 30_000);
waitingQueue.push({
resolve: (v) => {
clearTimeout(timer);
resolve(v);
},
});
});
}
function releasePage(page, inst) {
inst.pdfCount++;
const waiter = waitingQueue.shift();
if (waiter) {
recyclePage(page).then(() => waiter.resolve({ page, instance: inst })).catch(() => {
if (inst.browser && !inst.restarting) {
inst.browser.newPage().then((p) => waiter.resolve({ page: p, instance: inst })).catch(() => {
waitingQueue.unshift(waiter);
});
}
else {
waitingQueue.unshift(waiter);
}
});
return;
}
recyclePage(page).then(() => {
inst.availablePages.push(page);
}).catch(() => {
if (inst.browser && !inst.restarting) {
inst.browser.newPage().then((p) => inst.availablePages.push(p)).catch(() => { });
}
});
}
async function scheduleRestart(inst) {
if (inst.restarting)
return;
inst.restarting = true;
logger.info(`Scheduling browser ${inst.id} restart (pdfs=${inst.pdfCount}, uptime=${Math.round((Date.now() - inst.lastRestartTime) / 1000)}s)`);
const drainCheck = () => new Promise((resolve) => {
const check = () => {
if (inst.availablePages.length === PAGES_PER_BROWSER && waitingQueue.length === 0) {
resolve();
}
else {
setTimeout(check, 100);
}
};
check();
});
await Promise.race([drainCheck(), new Promise(r => setTimeout(r, 30000))]);
for (const page of inst.availablePages) {
await page.close().catch(() => { });
}
inst.availablePages.length = 0;
try {
await inst.browser.close().catch(() => { });
}
catch { }
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
inst.browser = await puppeteer.launch({
headless: true,
executablePath: execPath,
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
});
const pages = await createPages(inst.browser, PAGES_PER_BROWSER);
inst.availablePages.push(...pages);
inst.pdfCount = 0;
inst.lastRestartTime = Date.now();
inst.restarting = false;
logger.info(`Browser ${inst.id} restarted successfully`);
while (waitingQueue.length > 0 && inst.availablePages.length > 0) {
const waiter = waitingQueue.shift();
const p = inst.availablePages.pop();
if (waiter && p)
waiter.resolve({ page: p, instance: inst });
}
}
async function launchInstance(id) {
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
const browser = await puppeteer.launch({
headless: true,
executablePath: execPath,
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
});
const pages = await createPages(browser, PAGES_PER_BROWSER);
const inst = {
browser,
availablePages: pages,
pdfCount: 0,
lastRestartTime: Date.now(),
restarting: false,
id,
};
return inst;
}
export async function initBrowser() {
for (let i = 0; i < BROWSER_COUNT; i++) {
const inst = await launchInstance(i);
instances.push(inst);
}
logger.info(`Browser pool ready (${BROWSER_COUNT} browsers × ${PAGES_PER_BROWSER} pages = ${BROWSER_COUNT * PAGES_PER_BROWSER} total)`);
}
export async function closeBrowser() {
for (const inst of instances) {
for (const page of inst.availablePages) {
await page.close().catch(() => { });
}
inst.availablePages.length = 0;
await inst.browser.close().catch(() => { });
}
instances.length = 0;
}
export async function renderPdf(html, options = {}) {
const { page, instance } = await acquirePage();
try {
await page.setJavaScriptEnabled(false);
const result = await Promise.race([
(async () => {
await page.setContent(html, { waitUntil: "domcontentloaded", timeout: 15_000 });
await page.addStyleTag({ content: "* { margin: 0; padding: 0; } body { margin: 0; }" });
const pdf = await page.pdf({
format: options.format || "A4",
landscape: options.landscape || false,
printBackground: options.printBackground !== false,
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
headerTemplate: options.headerTemplate,
footerTemplate: options.footerTemplate,
displayHeaderFooter: options.displayHeaderFooter || false,
});
return Buffer.from(pdf);
})(),
new Promise((_, reject) => setTimeout(() => reject(new Error("PDF_TIMEOUT")), 30_000)),
]);
return result;
}
finally {
releasePage(page, instance);
}
}
export async function renderUrlPdf(url, options = {}) {
const { page, instance } = await acquirePage();
try {
await page.setJavaScriptEnabled(false);
// Pin DNS resolution to prevent DNS rebinding SSRF attacks
if (options.hostResolverRules) {
const client = await page.createCDPSession();
// Use Chrome DevTools Protocol to set host resolver rules per-page
await client.send("Network.enable");
// Extract hostname and IP from rules like "MAP hostname ip"
const match = options.hostResolverRules.match(/^MAP\s+(\S+)\s+(\S+)$/);
if (match) {
const [, hostname, ip] = match;
await page.setRequestInterception(true);
page.on("request", (request) => {
const reqUrl = new URL(request.url());
if (reqUrl.hostname === hostname) {
// For HTTP, rewrite to IP with Host header
if (reqUrl.protocol === "http:") {
reqUrl.hostname = ip;
request.continue({
url: reqUrl.toString(),
headers: { ...request.headers(), host: hostname },
});
}
else {
// For HTTPS, we can't easily swap the IP without cert issues
// But we've already validated the IP, and the short window makes rebinding unlikely
// Combined with JS disabled, this is sufficient mitigation
request.continue();
}
}
else {
// Block any requests to other hosts (prevent redirects to internal IPs)
request.abort("blockedbyclient");
}
});
}
}
const result = await Promise.race([
(async () => {
await page.goto(url, {
waitUntil: options.waitUntil || "domcontentloaded",
timeout: 30_000,
});
const pdf = await page.pdf({
format: options.format || "A4",
landscape: options.landscape || false,
printBackground: options.printBackground !== false,
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
});
return Buffer.from(pdf);
})(),
new Promise((_, reject) => setTimeout(() => reject(new Error("PDF_TIMEOUT")), 30_000)),
]);
return result;
}
finally {
releasePage(page, instance);
}
}