docfast/dist/services/browser.js
openclawd 1ef8f5743c
Some checks failed
Deploy to Production / Deploy to Server (push) Failing after 20s
feat: Add built dist files with EU compliance routes
- Include compiled TypeScript with new /impressum, /privacy, /terms routes
- Temporary commit of dist files for Docker deployment
2026-02-16 13:09:25 +00:00

246 lines
8.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import puppeteer from "puppeteer";
import logger from "./logger.js";
const BROWSER_COUNT = parseInt(process.env.BROWSER_COUNT || "2", 10);
const PAGES_PER_BROWSER = parseInt(process.env.PAGES_PER_BROWSER || "8", 10);
const RESTART_AFTER_PDFS = 1000;
const RESTART_AFTER_MS = 60 * 60 * 1000; // 1 hour
const instances = [];
const waitingQueue = [];
let roundRobinIndex = 0;
export function getPoolStats() {
const totalAvailable = instances.reduce((s, i) => s + i.availablePages.length, 0);
const totalPages = instances.length * PAGES_PER_BROWSER;
const totalPdfs = instances.reduce((s, i) => s + i.pdfCount, 0);
return {
poolSize: totalPages,
totalPages,
availablePages: totalAvailable,
queueDepth: waitingQueue.length,
pdfCount: totalPdfs,
restarting: instances.some((i) => i.restarting),
uptimeMs: Date.now() - (instances[0]?.lastRestartTime || Date.now()),
browsers: instances.map((i) => ({
id: i.id,
available: i.availablePages.length,
pdfCount: i.pdfCount,
restarting: i.restarting,
})),
};
}
async function recyclePage(page) {
try {
const client = await page.createCDPSession();
await client.send("Network.clearBrowserCache").catch(() => { });
await client.detach().catch(() => { });
const cookies = await page.cookies();
if (cookies.length > 0) {
await page.deleteCookie(...cookies);
}
await page.goto("about:blank", { timeout: 5000 }).catch(() => { });
}
catch {
// ignore
}
}
async function createPages(b, count) {
const pages = [];
for (let i = 0; i < count; i++) {
const page = await b.newPage();
pages.push(page);
}
return pages;
}
function pickInstance() {
// Round-robin among instances that have available pages
for (let i = 0; i < instances.length; i++) {
const idx = (roundRobinIndex + i) % instances.length;
const inst = instances[idx];
if (inst.availablePages.length > 0 && !inst.restarting) {
roundRobinIndex = (idx + 1) % instances.length;
return inst;
}
}
return null;
}
async function acquirePage() {
// Check restarts
for (const inst of instances) {
if (!inst.restarting && (inst.pdfCount >= RESTART_AFTER_PDFS || Date.now() - inst.lastRestartTime >= RESTART_AFTER_MS)) {
scheduleRestart(inst);
}
}
const inst = pickInstance();
if (inst) {
const page = inst.availablePages.pop();
return { page, instance: inst };
}
// All pages busy, queue with 30s timeout
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
const idx = waitingQueue.findIndex((w) => w.resolve === resolve);
if (idx >= 0)
waitingQueue.splice(idx, 1);
reject(new Error("QUEUE_FULL"));
}, 30_000);
waitingQueue.push({
resolve: (v) => {
clearTimeout(timer);
resolve(v);
},
});
});
}
function releasePage(page, inst) {
inst.pdfCount++;
const waiter = waitingQueue.shift();
if (waiter) {
recyclePage(page).then(() => waiter.resolve({ page, instance: inst })).catch(() => {
if (inst.browser && !inst.restarting) {
inst.browser.newPage().then((p) => waiter.resolve({ page: p, instance: inst })).catch(() => {
waitingQueue.unshift(waiter);
});
}
else {
waitingQueue.unshift(waiter);
}
});
return;
}
recyclePage(page).then(() => {
inst.availablePages.push(page);
}).catch(() => {
if (inst.browser && !inst.restarting) {
inst.browser.newPage().then((p) => inst.availablePages.push(p)).catch(() => { });
}
});
}
async function scheduleRestart(inst) {
if (inst.restarting)
return;
inst.restarting = true;
logger.info(`Scheduling browser ${inst.id} restart (pdfs=${inst.pdfCount}, uptime=${Math.round((Date.now() - inst.lastRestartTime) / 1000)}s)`);
const drainCheck = () => new Promise((resolve) => {
const check = () => {
if (inst.availablePages.length === PAGES_PER_BROWSER && waitingQueue.length === 0) {
resolve();
}
else {
setTimeout(check, 100);
}
};
check();
});
await Promise.race([drainCheck(), new Promise(r => setTimeout(r, 30000))]);
for (const page of inst.availablePages) {
await page.close().catch(() => { });
}
inst.availablePages.length = 0;
try {
await inst.browser.close().catch(() => { });
}
catch { }
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
inst.browser = await puppeteer.launch({
headless: true,
executablePath: execPath,
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
});
const pages = await createPages(inst.browser, PAGES_PER_BROWSER);
inst.availablePages.push(...pages);
inst.pdfCount = 0;
inst.lastRestartTime = Date.now();
inst.restarting = false;
logger.info(`Browser ${inst.id} restarted successfully`);
while (waitingQueue.length > 0 && inst.availablePages.length > 0) {
const waiter = waitingQueue.shift();
const p = inst.availablePages.pop();
if (waiter && p)
waiter.resolve({ page: p, instance: inst });
}
}
async function launchInstance(id) {
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
const browser = await puppeteer.launch({
headless: true,
executablePath: execPath,
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
});
const pages = await createPages(browser, PAGES_PER_BROWSER);
const inst = {
browser,
availablePages: pages,
pdfCount: 0,
lastRestartTime: Date.now(),
restarting: false,
id,
};
return inst;
}
export async function initBrowser() {
for (let i = 0; i < BROWSER_COUNT; i++) {
const inst = await launchInstance(i);
instances.push(inst);
}
logger.info(`Browser pool ready (${BROWSER_COUNT} browsers × ${PAGES_PER_BROWSER} pages = ${BROWSER_COUNT * PAGES_PER_BROWSER} total)`);
}
export async function closeBrowser() {
for (const inst of instances) {
for (const page of inst.availablePages) {
await page.close().catch(() => { });
}
inst.availablePages.length = 0;
await inst.browser.close().catch(() => { });
}
instances.length = 0;
}
export async function renderPdf(html, options = {}) {
const { page, instance } = await acquirePage();
try {
const result = await Promise.race([
(async () => {
await page.setContent(html, { waitUntil: "domcontentloaded", timeout: 15_000 });
await page.addStyleTag({ content: "* { margin: 0; padding: 0; } body { margin: 0; }" });
const pdf = await page.pdf({
format: options.format || "A4",
landscape: options.landscape || false,
printBackground: options.printBackground !== false,
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
headerTemplate: options.headerTemplate,
footerTemplate: options.footerTemplate,
displayHeaderFooter: options.displayHeaderFooter || false,
});
return Buffer.from(pdf);
})(),
new Promise((_, reject) => setTimeout(() => reject(new Error("PDF_TIMEOUT")), 30_000)),
]);
return result;
}
finally {
releasePage(page, instance);
}
}
export async function renderUrlPdf(url, options = {}) {
const { page, instance } = await acquirePage();
try {
const result = await Promise.race([
(async () => {
await page.goto(url, {
waitUntil: options.waitUntil || "networkidle0",
timeout: 30_000,
});
const pdf = await page.pdf({
format: options.format || "A4",
landscape: options.landscape || false,
printBackground: options.printBackground !== false,
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
});
return Buffer.from(pdf);
})(),
new Promise((_, reject) => setTimeout(() => reject(new Error("PDF_TIMEOUT")), 30_000)),
]);
return result;
}
finally {
releasePage(page, instance);
}
}