docfast/dist/routes/convert.js
OpenClaw 8b31d11e74
All checks were successful
Build & Deploy to Staging / Build & Deploy to Staging (push) Successful in 16m15s
docs: add missing OpenAPI annotations for signup/verify, billing/success, billing/webhook
2026-02-27 16:04:55 +00:00

348 lines
12 KiB
JavaScript

import { Router } from "express";
import { renderPdf, renderUrlPdf } from "../services/browser.js";
import { markdownToHtml, wrapHtml } from "../services/markdown.js";
import dns from "node:dns/promises";
import logger from "../services/logger.js";
import { isPrivateIP } from "../utils/network.js";
import { sanitizeFilename } from "../utils/sanitize.js";
export const convertRouter = Router();
/**
* @openapi
* /v1/convert/html:
* post:
* tags: [Conversion]
* summary: Convert HTML to PDF
* description: Converts HTML content to a PDF document. Bare HTML fragments are automatically wrapped in a full HTML document.
* security:
* - BearerAuth: []
* - ApiKeyHeader: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* allOf:
* - type: object
* required: [html]
* properties:
* html:
* type: string
* description: HTML content to convert. Can be a full document or a fragment.
* example: '<h1>Hello World</h1><p>My first PDF</p>'
* css:
* type: string
* description: Optional CSS to inject (only used when html is a fragment, not a full document)
* example: 'body { font-family: sans-serif; padding: 40px; }'
* - $ref: '#/components/schemas/PdfOptions'
* responses:
* 200:
* description: PDF document
* content:
* application/pdf:
* schema:
* type: string
* format: binary
* 400:
* description: Missing html field
* 401:
* description: Missing API key
* 403:
* description: Invalid API key
* 415:
* description: Unsupported Content-Type (must be application/json)
* 429:
* description: Rate limit or usage limit exceeded
* 500:
* description: PDF generation failed
*/
convertRouter.post("/html", async (req, res) => {
let slotAcquired = false;
try {
// Reject non-JSON content types
const ct = req.headers["content-type"] || "";
if (!ct.includes("application/json")) {
res.status(415).json({ error: "Unsupported Content-Type. Use application/json." });
return;
}
const body = typeof req.body === "string" ? { html: req.body } : req.body;
if (!body.html) {
res.status(400).json({ error: "Missing 'html' field" });
return;
}
// Acquire concurrency slot
if (req.acquirePdfSlot) {
await req.acquirePdfSlot();
slotAcquired = true;
}
// Wrap bare HTML fragments
const fullHtml = body.html.includes("<html")
? body.html
: wrapHtml(body.html, body.css);
const pdf = await renderPdf(fullHtml, {
format: body.format,
landscape: body.landscape,
margin: body.margin,
printBackground: body.printBackground,
headerTemplate: body.headerTemplate,
footerTemplate: body.footerTemplate,
displayHeaderFooter: body.displayHeaderFooter,
scale: body.scale,
pageRanges: body.pageRanges,
preferCSSPageSize: body.preferCSSPageSize,
width: body.width,
height: body.height,
});
const filename = sanitizeFilename(body.filename || "document.pdf");
res.setHeader("Content-Type", "application/pdf");
res.setHeader("Content-Disposition", `inline; filename="${filename}"`);
res.send(pdf);
}
catch (err) {
logger.error({ err }, "Convert HTML error");
if (err.message === "QUEUE_FULL") {
res.status(429).json({ error: "Server busy - too many concurrent PDF generations. Please try again in a few seconds." });
return;
}
res.status(500).json({ error: `PDF generation failed: ${err.message}` });
}
finally {
if (slotAcquired && req.releasePdfSlot) {
req.releasePdfSlot();
}
}
});
/**
* @openapi
* /v1/convert/markdown:
* post:
* tags: [Conversion]
* summary: Convert Markdown to PDF
* description: Converts Markdown content to HTML and then to a PDF document.
* security:
* - BearerAuth: []
* - ApiKeyHeader: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* allOf:
* - type: object
* required: [markdown]
* properties:
* markdown:
* type: string
* description: Markdown content to convert
* example: '# Hello World\n\nThis is **bold** and *italic*.'
* css:
* type: string
* description: Optional CSS to inject into the rendered HTML
* - $ref: '#/components/schemas/PdfOptions'
* responses:
* 200:
* description: PDF document
* content:
* application/pdf:
* schema:
* type: string
* format: binary
* 400:
* description: Missing markdown field
* 401:
* description: Missing API key
* 403:
* description: Invalid API key
* 415:
* description: Unsupported Content-Type
* 429:
* description: Rate limit or usage limit exceeded
* 500:
* description: PDF generation failed
*/
convertRouter.post("/markdown", async (req, res) => {
let slotAcquired = false;
try {
// Reject non-JSON content types
const ct = req.headers["content-type"] || "";
if (!ct.includes("application/json")) {
res.status(415).json({ error: "Unsupported Content-Type. Use application/json." });
return;
}
const body = typeof req.body === "string" ? { markdown: req.body } : req.body;
if (!body.markdown) {
res.status(400).json({ error: "Missing 'markdown' field" });
return;
}
// Acquire concurrency slot
if (req.acquirePdfSlot) {
await req.acquirePdfSlot();
slotAcquired = true;
}
const html = markdownToHtml(body.markdown, body.css);
const pdf = await renderPdf(html, {
format: body.format,
landscape: body.landscape,
margin: body.margin,
printBackground: body.printBackground,
headerTemplate: body.headerTemplate,
footerTemplate: body.footerTemplate,
displayHeaderFooter: body.displayHeaderFooter,
scale: body.scale,
pageRanges: body.pageRanges,
preferCSSPageSize: body.preferCSSPageSize,
width: body.width,
height: body.height,
});
const filename = sanitizeFilename(body.filename || "document.pdf");
res.setHeader("Content-Type", "application/pdf");
res.setHeader("Content-Disposition", `inline; filename="${filename}"`);
res.send(pdf);
}
catch (err) {
logger.error({ err }, "Convert MD error");
if (err.message === "QUEUE_FULL") {
res.status(429).json({ error: "Server busy - too many concurrent PDF generations. Please try again in a few seconds." });
return;
}
res.status(500).json({ error: `PDF generation failed: ${err.message}` });
}
finally {
if (slotAcquired && req.releasePdfSlot) {
req.releasePdfSlot();
}
}
});
/**
* @openapi
* /v1/convert/url:
* post:
* tags: [Conversion]
* summary: Convert URL to PDF
* description: |
* Fetches a URL and converts the rendered page to PDF. JavaScript is disabled for security.
* Private/internal IP addresses are blocked (SSRF protection). DNS is pinned to prevent rebinding.
* security:
* - BearerAuth: []
* - ApiKeyHeader: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* allOf:
* - type: object
* required: [url]
* properties:
* url:
* type: string
* format: uri
* description: URL to convert (http or https only)
* example: 'https://example.com'
* waitUntil:
* type: string
* enum: [load, domcontentloaded, networkidle0, networkidle2]
* default: domcontentloaded
* description: When to consider navigation finished
* - $ref: '#/components/schemas/PdfOptions'
* responses:
* 200:
* description: PDF document
* content:
* application/pdf:
* schema:
* type: string
* format: binary
* 400:
* description: Missing/invalid URL or URL resolves to private IP
* 401:
* description: Missing API key
* 403:
* description: Invalid API key
* 415:
* description: Unsupported Content-Type
* 429:
* description: Rate limit or usage limit exceeded
* 500:
* description: PDF generation failed
*/
convertRouter.post("/url", async (req, res) => {
let slotAcquired = false;
try {
// Reject non-JSON content types
const ct = req.headers["content-type"] || "";
if (!ct.includes("application/json")) {
res.status(415).json({ error: "Unsupported Content-Type. Use application/json." });
return;
}
const body = req.body;
if (!body.url) {
res.status(400).json({ error: "Missing 'url' field" });
return;
}
// URL validation + SSRF protection
let parsed;
try {
parsed = new URL(body.url);
if (!["http:", "https:"].includes(parsed.protocol)) {
res.status(400).json({ error: "Only http/https URLs are supported" });
return;
}
}
catch {
res.status(400).json({ error: "Invalid URL" });
return;
}
// DNS lookup to block private/reserved IPs + pin resolution to prevent DNS rebinding
let resolvedAddress;
try {
const { address } = await dns.lookup(parsed.hostname);
if (isPrivateIP(address)) {
res.status(400).json({ error: "URL resolves to a private/internal IP address" });
return;
}
resolvedAddress = address;
}
catch {
res.status(400).json({ error: "DNS lookup failed for URL hostname" });
return;
}
// Acquire concurrency slot
if (req.acquirePdfSlot) {
await req.acquirePdfSlot();
slotAcquired = true;
}
const pdf = await renderUrlPdf(body.url, {
format: body.format,
landscape: body.landscape,
margin: body.margin,
printBackground: body.printBackground,
headerTemplate: body.headerTemplate,
footerTemplate: body.footerTemplate,
displayHeaderFooter: body.displayHeaderFooter,
scale: body.scale,
pageRanges: body.pageRanges,
preferCSSPageSize: body.preferCSSPageSize,
width: body.width,
height: body.height,
waitUntil: body.waitUntil,
hostResolverRules: `MAP ${parsed.hostname} ${resolvedAddress}`,
});
const filename = sanitizeFilename(body.filename || "page.pdf");
res.setHeader("Content-Type", "application/pdf");
res.setHeader("Content-Disposition", `inline; filename="${filename}"`);
res.send(pdf);
}
catch (err) {
logger.error({ err }, "Convert URL error");
if (err.message === "QUEUE_FULL") {
res.status(429).json({ error: "Server busy - too many concurrent PDF generations. Please try again in a few seconds." });
return;
}
res.status(500).json({ error: `PDF generation failed: ${err.message}` });
}
finally {
if (slotAcquired && req.releasePdfSlot) {
req.releasePdfSlot();
}
}
});