import { Router } from "express"; import { renderPdf, renderUrlPdf } from "../services/browser.js"; import { markdownToHtml, wrapHtml } from "../services/markdown.js"; import dns from "node:dns/promises"; import logger from "../services/logger.js"; import net from "node:net"; function isPrivateIP(ip) { // IPv6 loopback/unspecified if (ip === "::1" || ip === "::") return true; // IPv6 link-local (fe80::/10) if (ip.toLowerCase().startsWith("fe8") || ip.toLowerCase().startsWith("fe9") || ip.toLowerCase().startsWith("fea") || ip.toLowerCase().startsWith("feb")) return true; // IPv6 unique local (fc00::/7) const lower = ip.toLowerCase(); if (lower.startsWith("fc") || lower.startsWith("fd")) return true; // IPv4-mapped IPv6 if (ip.startsWith("::ffff:")) ip = ip.slice(7); if (!net.isIPv4(ip)) return false; const parts = ip.split(".").map(Number); if (parts[0] === 0) return true; // 0.0.0.0/8 if (parts[0] === 10) return true; // 10.0.0.0/8 if (parts[0] === 127) return true; // 127.0.0.0/8 if (parts[0] === 169 && parts[1] === 254) return true; // 169.254.0.0/16 if (parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) return true; // 172.16.0.0/12 if (parts[0] === 192 && parts[1] === 168) return true; // 192.168.0.0/16 return false; } function sanitizeFilename(name) { // Strip characters dangerous in Content-Disposition headers return name.replace(/[\x00-\x1f"\\\r\n]/g, "").trim() || "document.pdf"; } export const convertRouter = Router(); /** * @openapi * /v1/convert/html: * post: * tags: [Conversion] * summary: Convert HTML to PDF * description: Converts HTML content to a PDF document. Bare HTML fragments are automatically wrapped in a full HTML document. * security: * - BearerAuth: [] * - ApiKeyHeader: [] * requestBody: * required: true * content: * application/json: * schema: * allOf: * - type: object * required: [html] * properties: * html: * type: string * description: HTML content to convert. Can be a full document or a fragment. * example: '
My first PDF
' * css: * type: string * description: Optional CSS to inject (only used when html is a fragment, not a full document) * example: 'body { font-family: sans-serif; padding: 40px; }' * - $ref: '#/components/schemas/PdfOptions' * responses: * 200: * description: PDF document * content: * application/pdf: * schema: * type: string * format: binary * 400: * description: Missing html field * 401: * description: Missing API key * 403: * description: Invalid API key * 415: * description: Unsupported Content-Type (must be application/json) * 429: * description: Rate limit or usage limit exceeded * 500: * description: PDF generation failed */ convertRouter.post("/html", async (req, res) => { let slotAcquired = false; try { // Reject non-JSON content types const ct = req.headers["content-type"] || ""; if (!ct.includes("application/json")) { res.status(415).json({ error: "Unsupported Content-Type. Use application/json." }); return; } const body = typeof req.body === "string" ? { html: req.body } : req.body; if (!body.html) { res.status(400).json({ error: "Missing 'html' field" }); return; } // Acquire concurrency slot if (req.acquirePdfSlot) { await req.acquirePdfSlot(); slotAcquired = true; } // Wrap bare HTML fragments const fullHtml = body.html.includes(" { let slotAcquired = false; try { // Reject non-JSON content types const ct = req.headers["content-type"] || ""; if (!ct.includes("application/json")) { res.status(415).json({ error: "Unsupported Content-Type. Use application/json." }); return; } const body = typeof req.body === "string" ? { markdown: req.body } : req.body; if (!body.markdown) { res.status(400).json({ error: "Missing 'markdown' field" }); return; } // Acquire concurrency slot if (req.acquirePdfSlot) { await req.acquirePdfSlot(); slotAcquired = true; } const html = markdownToHtml(body.markdown, body.css); const pdf = await renderPdf(html, { format: body.format, landscape: body.landscape, margin: body.margin, printBackground: body.printBackground, }); const filename = sanitizeFilename(body.filename || "document.pdf"); res.setHeader("Content-Type", "application/pdf"); res.setHeader("Content-Disposition", `inline; filename="${filename}"`); res.send(pdf); } catch (err) { logger.error({ err }, "Convert MD error"); if (err.message === "QUEUE_FULL") { res.status(429).json({ error: "Server busy - too many concurrent PDF generations. Please try again in a few seconds." }); return; } res.status(500).json({ error: `PDF generation failed: ${err.message}` }); } finally { if (slotAcquired && req.releasePdfSlot) { req.releasePdfSlot(); } } }); /** * @openapi * /v1/convert/url: * post: * tags: [Conversion] * summary: Convert URL to PDF * description: | * Fetches a URL and converts the rendered page to PDF. JavaScript is disabled for security. * Private/internal IP addresses are blocked (SSRF protection). DNS is pinned to prevent rebinding. * security: * - BearerAuth: [] * - ApiKeyHeader: [] * requestBody: * required: true * content: * application/json: * schema: * allOf: * - type: object * required: [url] * properties: * url: * type: string * format: uri * description: URL to convert (http or https only) * example: 'https://example.com' * waitUntil: * type: string * enum: [load, domcontentloaded, networkidle0, networkidle2] * default: domcontentloaded * description: When to consider navigation finished * - $ref: '#/components/schemas/PdfOptions' * responses: * 200: * description: PDF document * content: * application/pdf: * schema: * type: string * format: binary * 400: * description: Missing/invalid URL or URL resolves to private IP * 401: * description: Missing API key * 403: * description: Invalid API key * 415: * description: Unsupported Content-Type * 429: * description: Rate limit or usage limit exceeded * 500: * description: PDF generation failed */ convertRouter.post("/url", async (req, res) => { let slotAcquired = false; try { // Reject non-JSON content types const ct = req.headers["content-type"] || ""; if (!ct.includes("application/json")) { res.status(415).json({ error: "Unsupported Content-Type. Use application/json." }); return; } const body = req.body; if (!body.url) { res.status(400).json({ error: "Missing 'url' field" }); return; } // URL validation + SSRF protection let parsed; try { parsed = new URL(body.url); if (!["http:", "https:"].includes(parsed.protocol)) { res.status(400).json({ error: "Only http/https URLs are supported" }); return; } } catch { res.status(400).json({ error: "Invalid URL" }); return; } // DNS lookup to block private/reserved IPs + pin resolution to prevent DNS rebinding let resolvedAddress; try { const { address } = await dns.lookup(parsed.hostname); if (isPrivateIP(address)) { res.status(400).json({ error: "URL resolves to a private/internal IP address" }); return; } resolvedAddress = address; } catch { res.status(400).json({ error: "DNS lookup failed for URL hostname" }); return; } // Acquire concurrency slot if (req.acquirePdfSlot) { await req.acquirePdfSlot(); slotAcquired = true; } const pdf = await renderUrlPdf(body.url, { format: body.format, landscape: body.landscape, margin: body.margin, printBackground: body.printBackground, waitUntil: body.waitUntil, hostResolverRules: `MAP ${parsed.hostname} ${resolvedAddress}`, }); const filename = sanitizeFilename(body.filename || "page.pdf"); res.setHeader("Content-Type", "application/pdf"); res.setHeader("Content-Disposition", `inline; filename="${filename}"`); res.send(pdf); } catch (err) { logger.error({ err }, "Convert URL error"); if (err.message === "QUEUE_FULL") { res.status(429).json({ error: "Server busy - too many concurrent PDF generations. Please try again in a few seconds." }); return; } res.status(500).json({ error: `PDF generation failed: ${err.message}` }); } finally { if (slotAcquired && req.releasePdfSlot) { req.releasePdfSlot(); } } });