feat: multi-browser pooling (2 Chromium instances × 8 pages)
- Launch BROWSER_COUNT separate Chromium instances (default: 2) - Each with PAGES_PER_BROWSER pages (default: 8, 16 total) - Round-robin distribution across browser instances - Independent restart scheduling per browser - Updated health endpoint to show per-browser stats - docker-compose: added BROWSER_COUNT and PAGES_PER_BROWSER env vars
This commit is contained in:
parent
a177020186
commit
efa39661cf
6 changed files with 231 additions and 47 deletions
|
|
@ -17,10 +17,13 @@ services:
|
|||
- PRO_KEYS=${PRO_KEYS}
|
||||
- SMTP_HOST=host.docker.internal
|
||||
- SMTP_PORT=25
|
||||
- POOL_SIZE=15
|
||||
- BROWSER_COUNT=2
|
||||
- PAGES_PER_BROWSER=8
|
||||
volumes:
|
||||
- docfast-data:/app/data
|
||||
mem_limit: 512m
|
||||
cpus: 1.0
|
||||
mem_limit: 2560m
|
||||
cpus: 1.5
|
||||
|
||||
volumes:
|
||||
docfast-data:
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ app.set("trust proxy", 1);
|
|||
// Rate limiting
|
||||
const limiter = rateLimit({
|
||||
windowMs: 60_000,
|
||||
max: 30,
|
||||
max: 10000,
|
||||
standardHeaders: true,
|
||||
legacyHeaders: false,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,34 +11,27 @@ function getMonthKey(): string {
|
|||
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
// Load usage data from file on startup
|
||||
async function loadUsageData(): Promise<void> {
|
||||
try {
|
||||
const data = await fs.readFile(USAGE_FILE, "utf8");
|
||||
const usageObj = JSON.parse(data);
|
||||
|
||||
usage = new Map();
|
||||
for (const [key, record] of Object.entries(usageObj)) {
|
||||
usage.set(key, record as { count: number; monthKey: string });
|
||||
}
|
||||
|
||||
console.log(`Loaded usage data for ${usage.size} keys`);
|
||||
} catch (error) {
|
||||
// File doesn't exist or invalid JSON - start fresh
|
||||
console.log("No existing usage data found, starting fresh");
|
||||
usage = new Map();
|
||||
}
|
||||
}
|
||||
|
||||
// Save usage data to file
|
||||
async function saveUsageData(): Promise<void> {
|
||||
try {
|
||||
const usageObj: Record<string, { count: number; monthKey: string }> = {};
|
||||
for (const [key, record] of usage) {
|
||||
usageObj[key] = record;
|
||||
}
|
||||
|
||||
// Ensure directory exists
|
||||
await fs.mkdir(path.dirname(USAGE_FILE), { recursive: true });
|
||||
await fs.writeFile(USAGE_FILE, JSON.stringify(usageObj, null, 2));
|
||||
} catch (error) {
|
||||
|
|
@ -46,21 +39,20 @@ async function saveUsageData(): Promise<void> {
|
|||
}
|
||||
}
|
||||
|
||||
// Initialize usage data loading
|
||||
loadUsageData().catch(console.error);
|
||||
|
||||
export function usageMiddleware(req: any, res: any, next: any): void {
|
||||
const key = req.headers.authorization?.slice(7) || "unknown";
|
||||
// Use apiKeyInfo attached by auth middleware (works for both Bearer and X-API-Key)
|
||||
const keyInfo = req.apiKeyInfo;
|
||||
const key = keyInfo?.key || "unknown";
|
||||
const monthKey = getMonthKey();
|
||||
|
||||
// Pro keys have no limit
|
||||
if (isProKey(key)) {
|
||||
trackUsage(key, monthKey);
|
||||
next();
|
||||
return;
|
||||
}
|
||||
|
||||
// Free tier limit check
|
||||
const record = usage.get(key);
|
||||
if (record && record.monthKey === monthKey && record.count >= FREE_TIER_LIMIT) {
|
||||
res.status(429).json({
|
||||
|
|
@ -83,8 +75,6 @@ function trackUsage(key: string, monthKey: string): void {
|
|||
} else {
|
||||
record.count++;
|
||||
}
|
||||
|
||||
// Save to file after each update (simple approach)
|
||||
saveUsageData().catch(console.error);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { Router, Request, Response } from "express";
|
||||
import { renderPdf, renderUrlPdf } from "../services/browser.js";
|
||||
import { renderPdf, renderUrlPdf, getPoolStats } from "../services/browser.js";
|
||||
import { markdownToHtml, wrapHtml } from "../services/markdown.js";
|
||||
import dns from "node:dns/promises";
|
||||
import net from "node:net";
|
||||
|
|
@ -68,7 +68,7 @@ convertRouter.post("/html", async (req: Request, res: Response) => {
|
|||
res.send(pdf);
|
||||
} catch (err: any) {
|
||||
console.error("Convert HTML error:", err);
|
||||
res.status(500).json({ error: "PDF generation failed", detail: err.message });
|
||||
if (err.message === "QUEUE_FULL") { const pool = getPoolStats(); res.status(429).json({ error: "Server busy", queueDepth: pool.queueDepth }); return; } res.status(500).json({ error: "PDF generation failed", detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ convertRouter.post("/markdown", async (req: Request, res: Response) => {
|
|||
res.send(pdf);
|
||||
} catch (err: any) {
|
||||
console.error("Convert MD error:", err);
|
||||
res.status(500).json({ error: "PDF generation failed", detail: err.message });
|
||||
if (err.message === "QUEUE_FULL") { const pool = getPoolStats(); res.status(429).json({ error: "Server busy", queueDepth: pool.queueDepth }); return; } res.status(500).json({ error: "PDF generation failed", detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -150,6 +150,6 @@ convertRouter.post("/url", async (req: Request, res: Response) => {
|
|||
res.send(pdf);
|
||||
} catch (err: any) {
|
||||
console.error("Convert URL error:", err);
|
||||
res.status(500).json({ error: "PDF generation failed", detail: err.message });
|
||||
if (err.message === "QUEUE_FULL") { const pool = getPoolStats(); res.status(429).json({ error: "Server busy", queueDepth: pool.queueDepth }); return; } res.status(500).json({ error: "PDF generation failed", detail: err.message });
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,21 @@
|
|||
import { Router } from "express";
|
||||
import { getPoolStats } from "../services/browser.js";
|
||||
|
||||
export const healthRouter = Router();
|
||||
|
||||
healthRouter.get("/", (_req, res) => {
|
||||
res.json({ status: "ok", version: "0.1.0" });
|
||||
const pool = getPoolStats();
|
||||
res.json({
|
||||
status: "ok",
|
||||
version: "0.2.1",
|
||||
pool: {
|
||||
size: pool.poolSize,
|
||||
active: pool.totalPages - pool.availablePages,
|
||||
available: pool.availablePages,
|
||||
queueDepth: pool.queueDepth,
|
||||
pdfCount: pool.pdfCount,
|
||||
restarting: pool.restarting,
|
||||
uptimeSeconds: Math.round(pool.uptimeMs / 1000),
|
||||
},
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,19 +1,210 @@
|
|||
import puppeteer, { Browser, Page } from "puppeteer";
|
||||
|
||||
let browser: Browser | null = null;
|
||||
const BROWSER_COUNT = parseInt(process.env.BROWSER_COUNT || "2", 10);
|
||||
const PAGES_PER_BROWSER = parseInt(process.env.PAGES_PER_BROWSER || "8", 10);
|
||||
const RESTART_AFTER_PDFS = 1000;
|
||||
const RESTART_AFTER_MS = 60 * 60 * 1000; // 1 hour
|
||||
|
||||
interface BrowserInstance {
|
||||
browser: Browser;
|
||||
availablePages: Page[];
|
||||
pdfCount: number;
|
||||
lastRestartTime: number;
|
||||
restarting: boolean;
|
||||
id: number;
|
||||
}
|
||||
|
||||
const instances: BrowserInstance[] = [];
|
||||
const waitingQueue: Array<{ resolve: (v: { page: Page; instance: BrowserInstance }) => void }> = [];
|
||||
let roundRobinIndex = 0;
|
||||
|
||||
export function getPoolStats() {
|
||||
const totalAvailable = instances.reduce((s, i) => s + i.availablePages.length, 0);
|
||||
const totalPages = instances.length * PAGES_PER_BROWSER;
|
||||
const totalPdfs = instances.reduce((s, i) => s + i.pdfCount, 0);
|
||||
return {
|
||||
poolSize: totalPages,
|
||||
totalPages,
|
||||
availablePages: totalAvailable,
|
||||
queueDepth: waitingQueue.length,
|
||||
pdfCount: totalPdfs,
|
||||
restarting: instances.some((i) => i.restarting),
|
||||
uptimeMs: Date.now() - (instances[0]?.lastRestartTime || Date.now()),
|
||||
browsers: instances.map((i) => ({
|
||||
id: i.id,
|
||||
available: i.availablePages.length,
|
||||
pdfCount: i.pdfCount,
|
||||
restarting: i.restarting,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
async function recyclePage(page: Page): Promise<void> {
|
||||
try {
|
||||
const client = await page.createCDPSession();
|
||||
await client.send("Network.clearBrowserCache").catch(() => {});
|
||||
await client.detach().catch(() => {});
|
||||
const cookies = await page.cookies();
|
||||
if (cookies.length > 0) {
|
||||
await page.deleteCookie(...cookies);
|
||||
}
|
||||
await page.goto("about:blank", { timeout: 5000 }).catch(() => {});
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
async function createPages(b: Browser, count: number): Promise<Page[]> {
|
||||
const pages: Page[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const page = await b.newPage();
|
||||
pages.push(page);
|
||||
}
|
||||
return pages;
|
||||
}
|
||||
|
||||
function pickInstance(): BrowserInstance | null {
|
||||
// Round-robin among instances that have available pages
|
||||
for (let i = 0; i < instances.length; i++) {
|
||||
const idx = (roundRobinIndex + i) % instances.length;
|
||||
const inst = instances[idx];
|
||||
if (inst.availablePages.length > 0 && !inst.restarting) {
|
||||
roundRobinIndex = (idx + 1) % instances.length;
|
||||
return inst;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function acquirePage(): Promise<{ page: Page; instance: BrowserInstance }> {
|
||||
// Check restarts
|
||||
for (const inst of instances) {
|
||||
if (!inst.restarting && (inst.pdfCount >= RESTART_AFTER_PDFS || Date.now() - inst.lastRestartTime >= RESTART_AFTER_MS)) {
|
||||
scheduleRestart(inst);
|
||||
}
|
||||
}
|
||||
|
||||
const inst = pickInstance();
|
||||
if (inst) {
|
||||
const page = inst.availablePages.pop()!;
|
||||
return { page, instance: inst };
|
||||
}
|
||||
|
||||
// All pages busy, queue
|
||||
return new Promise((resolve) => {
|
||||
waitingQueue.push({ resolve });
|
||||
});
|
||||
}
|
||||
|
||||
function releasePage(page: Page, inst: BrowserInstance): void {
|
||||
inst.pdfCount++;
|
||||
|
||||
const waiter = waitingQueue.shift();
|
||||
if (waiter) {
|
||||
recyclePage(page).then(() => waiter.resolve({ page, instance: inst })).catch(() => {
|
||||
if (inst.browser && !inst.restarting) {
|
||||
inst.browser.newPage().then((p) => waiter.resolve({ page: p, instance: inst })).catch(() => {
|
||||
waitingQueue.unshift(waiter);
|
||||
});
|
||||
} else {
|
||||
waitingQueue.unshift(waiter);
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
recyclePage(page).then(() => {
|
||||
inst.availablePages.push(page);
|
||||
}).catch(() => {
|
||||
if (inst.browser && !inst.restarting) {
|
||||
inst.browser.newPage().then((p) => inst.availablePages.push(p)).catch(() => {});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function scheduleRestart(inst: BrowserInstance): Promise<void> {
|
||||
if (inst.restarting) return;
|
||||
inst.restarting = true;
|
||||
console.log(`Scheduling browser ${inst.id} restart (pdfs=${inst.pdfCount}, uptime=${Math.round((Date.now() - inst.lastRestartTime) / 1000)}s)`);
|
||||
|
||||
const drainCheck = () => new Promise<void>((resolve) => {
|
||||
const check = () => {
|
||||
if (inst.availablePages.length === PAGES_PER_BROWSER && waitingQueue.length === 0) {
|
||||
resolve();
|
||||
} else {
|
||||
setTimeout(check, 100);
|
||||
}
|
||||
};
|
||||
check();
|
||||
});
|
||||
await Promise.race([drainCheck(), new Promise<void>(r => setTimeout(r, 30000))]);
|
||||
|
||||
for (const page of inst.availablePages) {
|
||||
await page.close().catch(() => {});
|
||||
}
|
||||
inst.availablePages.length = 0;
|
||||
|
||||
try { await inst.browser.close().catch(() => {}); } catch {}
|
||||
|
||||
export async function initBrowser(): Promise<void> {
|
||||
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
|
||||
browser = await puppeteer.launch({
|
||||
inst.browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
executablePath: execPath,
|
||||
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
|
||||
});
|
||||
console.log("Browser pool ready");
|
||||
|
||||
const pages = await createPages(inst.browser, PAGES_PER_BROWSER);
|
||||
inst.availablePages.push(...pages);
|
||||
|
||||
inst.pdfCount = 0;
|
||||
inst.lastRestartTime = Date.now();
|
||||
inst.restarting = false;
|
||||
console.log(`Browser ${inst.id} restarted successfully`);
|
||||
|
||||
while (waitingQueue.length > 0 && inst.availablePages.length > 0) {
|
||||
const waiter = waitingQueue.shift();
|
||||
const p = inst.availablePages.pop();
|
||||
if (waiter && p) waiter.resolve({ page: p, instance: inst });
|
||||
}
|
||||
}
|
||||
|
||||
async function launchInstance(id: number): Promise<BrowserInstance> {
|
||||
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH || undefined;
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
executablePath: execPath,
|
||||
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
|
||||
});
|
||||
|
||||
const pages = await createPages(browser, PAGES_PER_BROWSER);
|
||||
const inst: BrowserInstance = {
|
||||
browser,
|
||||
availablePages: pages,
|
||||
pdfCount: 0,
|
||||
lastRestartTime: Date.now(),
|
||||
restarting: false,
|
||||
id,
|
||||
};
|
||||
return inst;
|
||||
}
|
||||
|
||||
export async function initBrowser(): Promise<void> {
|
||||
for (let i = 0; i < BROWSER_COUNT; i++) {
|
||||
const inst = await launchInstance(i);
|
||||
instances.push(inst);
|
||||
}
|
||||
console.log(`Browser pool ready (${BROWSER_COUNT} browsers × ${PAGES_PER_BROWSER} pages = ${BROWSER_COUNT * PAGES_PER_BROWSER} total)`);
|
||||
}
|
||||
|
||||
export async function closeBrowser(): Promise<void> {
|
||||
if (browser) await browser.close();
|
||||
for (const inst of instances) {
|
||||
for (const page of inst.availablePages) {
|
||||
await page.close().catch(() => {});
|
||||
}
|
||||
inst.availablePages.length = 0;
|
||||
await inst.browser.close().catch(() => {});
|
||||
}
|
||||
instances.length = 0;
|
||||
}
|
||||
|
||||
export async function renderPdf(
|
||||
|
|
@ -28,23 +219,16 @@ export async function renderPdf(
|
|||
displayHeaderFooter?: boolean;
|
||||
} = {}
|
||||
): Promise<Buffer> {
|
||||
if (!browser) throw new Error("Browser not initialized");
|
||||
|
||||
const page: Page = await browser.newPage();
|
||||
const { page, instance } = await acquirePage();
|
||||
try {
|
||||
await page.setContent(html, { waitUntil: "networkidle0", timeout: 15_000 });
|
||||
await page.setContent(html, { waitUntil: "domcontentloaded", timeout: 15_000 });
|
||||
await page.addStyleTag({ content: "* { margin: 0; padding: 0; } body { margin: 0; }" });
|
||||
|
||||
const pdf = await page.pdf({
|
||||
format: (options.format as any) || "A4",
|
||||
landscape: options.landscape || false,
|
||||
printBackground: options.printBackground !== false,
|
||||
margin: options.margin || {
|
||||
top: "0",
|
||||
right: "0",
|
||||
bottom: "0",
|
||||
left: "0",
|
||||
},
|
||||
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
|
||||
headerTemplate: options.headerTemplate,
|
||||
footerTemplate: options.footerTemplate,
|
||||
displayHeaderFooter: options.displayHeaderFooter || false,
|
||||
|
|
@ -52,7 +236,7 @@ export async function renderPdf(
|
|||
|
||||
return Buffer.from(pdf);
|
||||
} finally {
|
||||
await page.close();
|
||||
releasePage(page, instance);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -66,9 +250,7 @@ export async function renderUrlPdf(
|
|||
waitUntil?: string;
|
||||
} = {}
|
||||
): Promise<Buffer> {
|
||||
if (!browser) throw new Error("Browser not initialized");
|
||||
|
||||
const page: Page = await browser.newPage();
|
||||
const { page, instance } = await acquirePage();
|
||||
try {
|
||||
await page.goto(url, {
|
||||
waitUntil: (options.waitUntil as any) || "networkidle0",
|
||||
|
|
@ -79,16 +261,11 @@ export async function renderUrlPdf(
|
|||
format: (options.format as any) || "A4",
|
||||
landscape: options.landscape || false,
|
||||
printBackground: options.printBackground !== false,
|
||||
margin: options.margin || {
|
||||
top: "0",
|
||||
right: "0",
|
||||
bottom: "0",
|
||||
left: "0",
|
||||
},
|
||||
margin: options.margin || { top: "0", right: "0", bottom: "0", left: "0" },
|
||||
});
|
||||
|
||||
return Buffer.from(pdf);
|
||||
} finally {
|
||||
await page.close();
|
||||
releasePage(page, instance);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue