From 76b2179be947e5c741f85687f728d2c0f1016d4d Mon Sep 17 00:00:00 2001 From: DocFast CEO Date: Mon, 9 Mar 2026 20:07:27 +0100 Subject: [PATCH] refactor: extract shared PDF route handler to eliminate convert route duplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New src/utils/pdf-handler.ts with handlePdfRoute() helper - Handles: content-type validation, PDF option validation, slot acquire/release, error mapping, response headers - Refactored convert.ts from 388 to 233 lines (40% reduction) - 10 TDD tests for the new helper (RED→GREEN verified) - All 618 tests passing, zero tsc --noEmit errors --- src/__tests__/pdf-handler.test.ts | 149 ++++++++++++++++++++++ src/routes/convert.ts | 203 ++++-------------------------- src/utils/pdf-handler.ts | 92 ++++++++++++++ 3 files changed, 265 insertions(+), 179 deletions(-) create mode 100644 src/__tests__/pdf-handler.test.ts create mode 100644 src/utils/pdf-handler.ts diff --git a/src/__tests__/pdf-handler.test.ts b/src/__tests__/pdf-handler.test.ts new file mode 100644 index 0000000..31a6906 --- /dev/null +++ b/src/__tests__/pdf-handler.test.ts @@ -0,0 +1,149 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { Request, Response } from "express"; + +// We'll test the handlePdfRoute helper +// RED phase: these tests should fail because pdf-handler.ts doesn't exist yet + +describe("handlePdfRoute", () => { + let mockReq: Partial; + let mockRes: Partial; + let statusFn: ReturnType; + let jsonFn: ReturnType; + let setHeaderFn: ReturnType; + let sendFn: ReturnType; + + beforeEach(() => { + jsonFn = vi.fn(); + sendFn = vi.fn(); + setHeaderFn = vi.fn(); + statusFn = vi.fn().mockReturnValue({ json: jsonFn, send: sendFn, end: vi.fn() }); + mockRes = { + status: statusFn, + json: jsonFn, + send: sendFn, + setHeader: setHeaderFn, + } as Partial; + mockReq = { + headers: { "content-type": "application/json" }, + body: {}, + acquirePdfSlot: undefined, + releasePdfSlot: undefined, + } as Partial; + }); + + it("rejects non-JSON content type with 415", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + mockReq.headers = { "content-type": "text/plain" }; + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => ({ + pdf: Buffer.from("test"), + durationMs: 10, + filename: "test.pdf", + })); + expect(statusFn).toHaveBeenCalledWith(415); + expect(jsonFn).toHaveBeenCalledWith({ error: "Unsupported Content-Type. Use application/json." }); + }); + + it("rejects invalid PDF options with 400", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + mockReq.body = { scale: 99 }; + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => ({ + pdf: Buffer.from("test"), + durationMs: 10, + filename: "test.pdf", + })); + expect(statusFn).toHaveBeenCalledWith(400); + }); + + it("returns 503 on QUEUE_FULL error", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => { + throw new Error("QUEUE_FULL"); + }); + expect(statusFn).toHaveBeenCalledWith(503); + }); + + it("returns 504 on PDF_TIMEOUT error", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => { + throw new Error("PDF_TIMEOUT"); + }); + expect(statusFn).toHaveBeenCalledWith(504); + }); + + it("returns 500 on generic error", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => { + throw new Error("Something broke"); + }); + expect(statusFn).toHaveBeenCalledWith(500); + }); + + it("sets correct response headers on success", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + const pdfBuf = Buffer.from("fake-pdf"); + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => ({ + pdf: pdfBuf, + durationMs: 42, + filename: "report.pdf", + })); + expect(setHeaderFn).toHaveBeenCalledWith("Content-Type", "application/pdf"); + expect(setHeaderFn).toHaveBeenCalledWith("Content-Disposition", 'inline; filename="report.pdf"'); + expect(setHeaderFn).toHaveBeenCalledWith("X-Render-Time", "42"); + expect(sendFn).toHaveBeenCalledWith(pdfBuf); + }); + + it("acquires and releases PDF slot when available", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + const acquireFn = vi.fn(); + const releaseFn = vi.fn(); + mockReq.acquirePdfSlot = acquireFn; + mockReq.releasePdfSlot = releaseFn; + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => ({ + pdf: Buffer.from("test"), + durationMs: 10, + filename: "test.pdf", + })); + expect(acquireFn).toHaveBeenCalledOnce(); + expect(releaseFn).toHaveBeenCalledOnce(); + }); + + it("releases PDF slot even on error", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + const acquireFn = vi.fn(); + const releaseFn = vi.fn(); + mockReq.acquirePdfSlot = acquireFn; + mockReq.releasePdfSlot = releaseFn; + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => { + throw new Error("boom"); + }); + expect(releaseFn).toHaveBeenCalledOnce(); + }); + + it("sanitizes filename with special characters", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + await handlePdfRoute(mockReq as Request, mockRes as Response, async () => ({ + pdf: Buffer.from("test"), + durationMs: 10, + filename: 'file"with\nspecial.pdf', + })); + const dispositionCall = setHeaderFn.mock.calls.find( + (c: unknown[]) => c[0] === "Content-Disposition" + ); + expect(dispositionCall).toBeTruthy(); + // Quotes and newlines should be sanitized + expect(dispositionCall![1]).not.toContain('"with'); + expect(dispositionCall![1]).not.toContain('\n'); + }); + + it("passes validated/sanitized options to renderFn", async () => { + const { handlePdfRoute } = await import("../utils/pdf-handler.js"); + let receivedOptions: Record | undefined; + mockReq.body = { format: "a4", landscape: true }; + await handlePdfRoute(mockReq as Request, mockRes as Response, async (sanitizedOptions) => { + receivedOptions = sanitizedOptions as Record; + return { pdf: Buffer.from("test"), durationMs: 10, filename: "test.pdf" }; + }); + expect(receivedOptions).toBeDefined(); + expect(receivedOptions!.format).toBe("A4"); // validatePdfOptions normalizes a4 → A4 + }); +}); diff --git a/src/routes/convert.ts b/src/routes/convert.ts index 0e14aa7..f3bad3c 100644 --- a/src/routes/convert.ts +++ b/src/routes/convert.ts @@ -3,33 +3,12 @@ import { renderPdf, renderUrlPdf } from "../services/browser.js"; import { markdownToHtml, wrapHtml } from "../services/markdown.js"; import dns from "node:dns/promises"; import logger from "../services/logger.js"; -import { errorMessage } from "../utils/errors.js"; import { isPrivateIP } from "../utils/network.js"; - import { sanitizeFilename } from "../utils/sanitize.js"; -import { validatePdfOptions } from "../utils/pdf-options.js"; +import { handlePdfRoute } from "../utils/pdf-handler.js"; export const convertRouter = Router(); -interface ConvertBody { - html?: string; - markdown?: string; - css?: string; - format?: string; - landscape?: boolean; - margin?: { top?: string; right?: string; bottom?: string; left?: string }; - printBackground?: boolean; - filename?: string; - headerTemplate?: string; - footerTemplate?: string; - displayHeaderFooter?: boolean; - scale?: number; - pageRanges?: string; - preferCSSPageSize?: boolean; - width?: string; - height?: string; -} - /** * @openapi * /v1/convert/html: @@ -80,66 +59,18 @@ interface ConvertBody { * description: PDF generation failed */ convertRouter.post("/html", async (req: Request, res: Response) => { - let slotAcquired = false; - try { - // Reject non-JSON content types - const ct = req.headers["content-type"] || ""; - if (!ct.includes("application/json")) { - res.status(415).json({ error: "Unsupported Content-Type. Use application/json." }); - return; - } - const body: ConvertBody = - typeof req.body === "string" ? { html: req.body } : req.body; - + await handlePdfRoute(req, res, async (sanitizedOptions) => { + const body = typeof req.body === "string" ? { html: req.body } : req.body; if (!body.html) { res.status(400).json({ error: "Missing 'html' field" }); - return; + return null; } - - // Validate PDF options - const validation = validatePdfOptions(body); - if (!validation.valid) { - res.status(400).json({ error: validation.error }); - return; - } - - // Acquire concurrency slot - if (req.acquirePdfSlot) { - await req.acquirePdfSlot(); - slotAcquired = true; - } - - // Wrap bare HTML fragments const fullHtml = body.html.includes(" { * description: PDF generation failed */ convertRouter.post("/markdown", async (req: Request, res: Response) => { - let slotAcquired = false; - try { - // Reject non-JSON content types - const ct = req.headers["content-type"] || ""; - if (!ct.includes("application/json")) { - res.status(415).json({ error: "Unsupported Content-Type. Use application/json." }); - return; - } - const body: ConvertBody = - typeof req.body === "string" ? { markdown: req.body } : req.body; - + await handlePdfRoute(req, res, async (sanitizedOptions) => { + const body = typeof req.body === "string" ? { markdown: req.body } : req.body; if (!body.markdown) { res.status(400).json({ error: "Missing 'markdown' field" }); - return; + return null; } - - // Validate PDF options - const validation = validatePdfOptions(body); - if (!validation.valid) { - res.status(400).json({ error: validation.error }); - return; - } - - // Acquire concurrency slot - if (req.acquirePdfSlot) { - await req.acquirePdfSlot(); - slotAcquired = true; - } - const html = markdownToHtml(body.markdown, body.css); - const { pdf, durationMs } = await renderPdf(html, { - ...validation.sanitized, - }); - - const filename = sanitizeFilename(body.filename || "document.pdf"); - res.setHeader("Content-Type", "application/pdf"); - res.setHeader("Content-Disposition", `inline; filename="${filename}"`); - res.setHeader("X-Render-Time", String(durationMs)); - res.send(pdf); - } catch (err: unknown) { - logger.error({ err }, "Convert MD error"); - const msg = errorMessage(err); - if (msg === "QUEUE_FULL") { - res.status(503).json({ error: "Server busy — too many concurrent PDF generations. Please try again in a few seconds." }); - return; - } - if (msg === "PDF_TIMEOUT") { - res.status(504).json({ error: "PDF generation timed out." }); - return; - } - res.status(500).json({ error: "PDF generation failed." }); - } finally { - if (slotAcquired && req.releasePdfSlot) { - req.releasePdfSlot(); - } - } + const { pdf, durationMs } = await renderPdf(html, { ...sanitizedOptions }); + return { pdf, durationMs, filename: sanitizeFilename(body.filename || "document.pdf") }; + }); }); /** @@ -303,19 +188,12 @@ convertRouter.post("/markdown", async (req: Request, res: Response) => { * description: PDF generation failed */ convertRouter.post("/url", async (req: Request, res: Response) => { - let slotAcquired = false; - try { - // Reject non-JSON content types - const ct = req.headers["content-type"] || ""; - if (!ct.includes("application/json")) { - res.status(415).json({ error: "Unsupported Content-Type. Use application/json." }); - return; - } - const body = req.body as { url?: string; format?: string; landscape?: boolean; margin?: string | { top?: string; right?: string; bottom?: string; left?: string }; printBackground?: boolean; waitUntil?: string; filename?: string; headerTemplate?: string; footerTemplate?: string; displayHeaderFooter?: boolean; scale?: number; pageRanges?: string; preferCSSPageSize?: boolean; width?: string; height?: string }; + await handlePdfRoute(req, res, async (sanitizedOptions) => { + const body = req.body as { url?: string; waitUntil?: string; filename?: string }; if (!body.url) { res.status(400).json({ error: "Missing 'url' field" }); - return; + return null; } // URL validation + SSRF protection @@ -324,65 +202,32 @@ convertRouter.post("/url", async (req: Request, res: Response) => { parsed = new URL(body.url); if (!["http:", "https:"].includes(parsed.protocol)) { res.status(400).json({ error: "Only http/https URLs are supported" }); - return; + return null; } } catch { res.status(400).json({ error: "Invalid URL" }); - return; + return null; } - // DNS lookup to block private/reserved IPs + pin resolution to prevent DNS rebinding + // DNS lookup to block private/reserved IPs + pin resolution let resolvedAddress: string; try { const { address } = await dns.lookup(parsed.hostname); if (isPrivateIP(address)) { res.status(400).json({ error: "URL resolves to a private/internal IP address" }); - return; + return null; } resolvedAddress = address; } catch { res.status(400).json({ error: "DNS lookup failed for URL hostname" }); - return; - } - - // Validate PDF options - const validation = validatePdfOptions(body); - if (!validation.valid) { - res.status(400).json({ error: validation.error }); - return; - } - - // Acquire concurrency slot - if (req.acquirePdfSlot) { - await req.acquirePdfSlot(); - slotAcquired = true; + return null; } const { pdf, durationMs } = await renderUrlPdf(body.url, { - ...validation.sanitized, + ...sanitizedOptions, hostResolverRules: `MAP ${parsed.hostname} ${resolvedAddress}`, }); - const filename = sanitizeFilename(body.filename || "page.pdf"); - res.setHeader("Content-Type", "application/pdf"); - res.setHeader("Content-Disposition", `inline; filename="${filename}"`); - res.setHeader("X-Render-Time", String(durationMs)); - res.send(pdf); - } catch (err: unknown) { - logger.error({ err }, "Convert URL error"); - const msg = errorMessage(err); - if (msg === "QUEUE_FULL") { - res.status(503).json({ error: "Server busy — too many concurrent PDF generations. Please try again in a few seconds." }); - return; - } - if (msg === "PDF_TIMEOUT") { - res.status(504).json({ error: "PDF generation timed out." }); - return; - } - res.status(500).json({ error: "PDF generation failed." }); - } finally { - if (slotAcquired && req.releasePdfSlot) { - req.releasePdfSlot(); - } - } + return { pdf, durationMs, filename: sanitizeFilename(body.filename || "page.pdf") }; + }); }); diff --git a/src/utils/pdf-handler.ts b/src/utils/pdf-handler.ts new file mode 100644 index 0000000..e7b5e92 --- /dev/null +++ b/src/utils/pdf-handler.ts @@ -0,0 +1,92 @@ +import { Request, Response } from "express"; +import type { PaperFormat } from "puppeteer"; +import logger from "../services/logger.js"; +import { errorMessage } from "./errors.js"; +import { sanitizeFilename } from "./sanitize.js"; +import { validatePdfOptions } from "./pdf-options.js"; + +export interface PdfRenderResult { + pdf: Buffer; + durationMs: number; + filename: string; +} + +export interface SanitizedPdfOptions { + format?: PaperFormat; + landscape?: boolean; + margin?: { top?: string; right?: string; bottom?: string; left?: string }; + printBackground?: boolean; + headerTemplate?: string; + footerTemplate?: string; + displayHeaderFooter?: boolean; + scale?: number; + pageRanges?: string; + preferCSSPageSize?: boolean; + width?: string; + height?: string; +} + +/** + * Shared handler for PDF generation routes. Handles: + * - Content-Type validation (415) + * - PDF option validation (400) + * - Concurrency slot acquire/release + * - Error mapping (QUEUE_FULL→503, PDF_TIMEOUT→504, generic→500) + * - Response headers (Content-Type, Content-Disposition, X-Render-Time) + * + * The renderFn receives sanitized PDF options and must return the PDF buffer + metadata. + */ +export async function handlePdfRoute( + req: Request, + res: Response, + renderFn: (sanitizedOptions: SanitizedPdfOptions) => Promise +): Promise { + let slotAcquired = false; + try { + // Reject non-JSON content types + const ct = req.headers["content-type"] || ""; + if (!ct.includes("application/json")) { + res.status(415).json({ error: "Unsupported Content-Type. Use application/json." }); + return; + } + + // Validate PDF options + const validation = validatePdfOptions(req.body); + if (!validation.valid) { + res.status(400).json({ error: validation.error }); + return; + } + + // Acquire concurrency slot + if (req.acquirePdfSlot) { + await req.acquirePdfSlot(); + slotAcquired = true; + } + + const result = await renderFn(validation.sanitized!); + if (!result) return; // renderFn already sent a response (e.g., 400 for missing field) + const { pdf, durationMs, filename } = result; + + const safeName = sanitizeFilename(filename || "document.pdf"); + res.setHeader("Content-Type", "application/pdf"); + res.setHeader("Content-Disposition", `inline; filename="${safeName}"`); + res.setHeader("X-Render-Time", String(durationMs)); + res.send(pdf); + } catch (err: unknown) { + logger.error({ err }, "PDF route error"); + const msg = errorMessage(err); + if (msg === "QUEUE_FULL") { + res.status(503).json({ error: "Server busy — too many concurrent PDF generations. Please try again in a few seconds." }); + return; + } + if (msg === "PDF_TIMEOUT") { + res.status(504).json({ error: "PDF generation timed out." }); + return; + } + res.status(500).json({ error: "PDF generation failed." }); + } finally { + if (slotAcquired && req.releasePdfSlot) { + req.releasePdfSlot(); + } + } +}