docfast/src/routes/convert.ts
DocFast CEO 76b2179be9
All checks were successful
Build & Deploy to Staging / Build & Deploy to Staging (push) Successful in 19m19s
refactor: extract shared PDF route handler to eliminate convert route duplication
- New src/utils/pdf-handler.ts with handlePdfRoute() helper
- Handles: content-type validation, PDF option validation, slot acquire/release, error mapping, response headers
- Refactored convert.ts from 388 to 233 lines (40% reduction)
- 10 TDD tests for the new helper (RED→GREEN verified)
- All 618 tests passing, zero tsc --noEmit errors
2026-03-09 20:07:27 +01:00

233 lines
7.9 KiB
TypeScript

import { Router, Request, Response } from "express";
import { renderPdf, renderUrlPdf } from "../services/browser.js";
import { markdownToHtml, wrapHtml } from "../services/markdown.js";
import dns from "node:dns/promises";
import logger from "../services/logger.js";
import { isPrivateIP } from "../utils/network.js";
import { sanitizeFilename } from "../utils/sanitize.js";
import { handlePdfRoute } from "../utils/pdf-handler.js";
export const convertRouter = Router();
/**
* @openapi
* /v1/convert/html:
* post:
* tags: [Conversion]
* summary: Convert HTML to PDF
* description: Converts HTML content to a PDF document. Bare HTML fragments are automatically wrapped in a full HTML document.
* security:
* - BearerAuth: []
* - ApiKeyHeader: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* allOf:
* - type: object
* required: [html]
* properties:
* html:
* type: string
* description: HTML content to convert. Can be a full document or a fragment.
* example: '<h1>Hello World</h1><p>My first PDF</p>'
* css:
* type: string
* description: Optional CSS to inject (only used when html is a fragment, not a full document)
* example: 'body { font-family: sans-serif; padding: 40px; }'
* - $ref: '#/components/schemas/PdfOptions'
* responses:
* 200:
* description: PDF document
* content:
* application/pdf:
* schema:
* type: string
* format: binary
* 400:
* description: Missing html field
* 401:
* description: Missing API key
* 403:
* description: Invalid API key
* 415:
* description: Unsupported Content-Type (must be application/json)
* 429:
* description: Rate limit or usage limit exceeded
* 500:
* description: PDF generation failed
*/
convertRouter.post("/html", async (req: Request, res: Response) => {
await handlePdfRoute(req, res, async (sanitizedOptions) => {
const body = typeof req.body === "string" ? { html: req.body } : req.body;
if (!body.html) {
res.status(400).json({ error: "Missing 'html' field" });
return null;
}
const fullHtml = body.html.includes("<html")
? body.html
: wrapHtml(body.html, body.css);
const { pdf, durationMs } = await renderPdf(fullHtml, { ...sanitizedOptions });
return { pdf, durationMs, filename: sanitizeFilename(body.filename || "document.pdf") };
});
});
/**
* @openapi
* /v1/convert/markdown:
* post:
* tags: [Conversion]
* summary: Convert Markdown to PDF
* description: Converts Markdown content to HTML and then to a PDF document.
* security:
* - BearerAuth: []
* - ApiKeyHeader: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* allOf:
* - type: object
* required: [markdown]
* properties:
* markdown:
* type: string
* description: Markdown content to convert
* example: '# Hello World\n\nThis is **bold** and *italic*.'
* css:
* type: string
* description: Optional CSS to inject into the rendered HTML
* - $ref: '#/components/schemas/PdfOptions'
* responses:
* 200:
* description: PDF document
* content:
* application/pdf:
* schema:
* type: string
* format: binary
* 400:
* description: Missing markdown field
* 401:
* description: Missing API key
* 403:
* description: Invalid API key
* 415:
* description: Unsupported Content-Type
* 429:
* description: Rate limit or usage limit exceeded
* 500:
* description: PDF generation failed
*/
convertRouter.post("/markdown", async (req: Request, res: Response) => {
await handlePdfRoute(req, res, async (sanitizedOptions) => {
const body = typeof req.body === "string" ? { markdown: req.body } : req.body;
if (!body.markdown) {
res.status(400).json({ error: "Missing 'markdown' field" });
return null;
}
const html = markdownToHtml(body.markdown, body.css);
const { pdf, durationMs } = await renderPdf(html, { ...sanitizedOptions });
return { pdf, durationMs, filename: sanitizeFilename(body.filename || "document.pdf") };
});
});
/**
* @openapi
* /v1/convert/url:
* post:
* tags: [Conversion]
* summary: Convert URL to PDF
* description: |
* Fetches a URL and converts the rendered page to PDF. JavaScript is disabled for security.
* Private/internal IP addresses are blocked (SSRF protection). DNS is pinned to prevent rebinding.
* security:
* - BearerAuth: []
* - ApiKeyHeader: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* allOf:
* - type: object
* required: [url]
* properties:
* url:
* type: string
* format: uri
* description: URL to convert (http or https only)
* example: 'https://example.com'
* waitUntil:
* type: string
* enum: [load, domcontentloaded, networkidle0, networkidle2]
* default: domcontentloaded
* description: When to consider navigation finished
* - $ref: '#/components/schemas/PdfOptions'
* responses:
* 200:
* description: PDF document
* content:
* application/pdf:
* schema:
* type: string
* format: binary
* 400:
* description: Missing/invalid URL or URL resolves to private IP
* 401:
* description: Missing API key
* 403:
* description: Invalid API key
* 415:
* description: Unsupported Content-Type
* 429:
* description: Rate limit or usage limit exceeded
* 500:
* description: PDF generation failed
*/
convertRouter.post("/url", async (req: Request, res: Response) => {
await handlePdfRoute(req, res, async (sanitizedOptions) => {
const body = req.body as { url?: string; waitUntil?: string; filename?: string };
if (!body.url) {
res.status(400).json({ error: "Missing 'url' field" });
return null;
}
// URL validation + SSRF protection
let parsed: URL;
try {
parsed = new URL(body.url);
if (!["http:", "https:"].includes(parsed.protocol)) {
res.status(400).json({ error: "Only http/https URLs are supported" });
return null;
}
} catch {
res.status(400).json({ error: "Invalid URL" });
return null;
}
// DNS lookup to block private/reserved IPs + pin resolution
let resolvedAddress: string;
try {
const { address } = await dns.lookup(parsed.hostname);
if (isPrivateIP(address)) {
res.status(400).json({ error: "URL resolves to a private/internal IP address" });
return null;
}
resolvedAddress = address;
} catch {
res.status(400).json({ error: "DNS lookup failed for URL hostname" });
return null;
}
const { pdf, durationMs } = await renderUrlPdf(body.url, {
...sanitizedOptions,
hostResolverRules: `MAP ${parsed.hostname} ${resolvedAddress}`,
});
return { pdf, durationMs, filename: sanitizeFilename(body.filename || "page.pdf") };
});
});