From af7637027e5a22668bf1d49781a59fc441aab8e5 Mon Sep 17 00:00:00 2001 From: OpenClaw Date: Fri, 6 Mar 2026 15:06:53 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20PDF=20output=20=E2=80=94=20format=3Dpdf?= =?UTF-8?q?=20with=20paper=20size,=20margins,=20scale=20options?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/routes/__tests__/pdf.test.ts | 244 +++++++++++++++++++++++++++++ src/routes/playground.ts | 33 +++- src/routes/screenshot.ts | 32 ++++ src/services/__tests__/pdf.test.ts | 133 ++++++++++++++++ src/services/screenshot.ts | 25 ++- 5 files changed, 460 insertions(+), 7 deletions(-) create mode 100644 src/routes/__tests__/pdf.test.ts create mode 100644 src/services/__tests__/pdf.test.ts diff --git a/src/routes/__tests__/pdf.test.ts b/src/routes/__tests__/pdf.test.ts new file mode 100644 index 0000000..2b11924 --- /dev/null +++ b/src/routes/__tests__/pdf.test.ts @@ -0,0 +1,244 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import { Request, Response } from 'express' +import { screenshotRouter } from '../screenshot.js' +import { playgroundRouter } from '../playground.js' + +// Mock dependencies +vi.mock('../../services/screenshot.js', () => ({ + takeScreenshot: vi.fn() +})) + +vi.mock('../../services/cache.js', () => ({ + screenshotCache: { + get: vi.fn(), + put: vi.fn(), + shouldBypass: vi.fn() + } +})) + +vi.mock('../../services/watermark.js', () => ({ + addWatermark: vi.fn() +})) + +vi.mock('../../services/logger.js', () => ({ + default: { + error: vi.fn() + } +})) + +vi.mock('../../middleware/auth.js', () => ({ + authMiddleware: vi.fn((req, res, next) => { + req.apiKeyInfo = { key: 'test_key', tier: 'pro', email: 'test@test.com' } + next() + }) +})) + +vi.mock('../../middleware/usage.js', () => ({ + usageMiddleware: vi.fn((req, res, next) => next()) +})) + +vi.mock('express-rate-limit', () => ({ + default: vi.fn(() => (req: any, res: any, next: any) => next()) +})) + +const { takeScreenshot } = await import('../../services/screenshot.js') +const { screenshotCache } = await import('../../services/cache.js') +const { addWatermark } = await import('../../services/watermark.js') +const mockTakeScreenshot = vi.mocked(takeScreenshot) +const mockCache = vi.mocked(screenshotCache) +const mockAddWatermark = vi.mocked(addWatermark) + +function createMockRequest(params: any = {}, overrides: any = {}): Partial { + const method = overrides.method || 'POST' + return { + method, + body: method === 'POST' ? params : {}, + query: method === 'GET' ? params : {}, + headers: { authorization: 'Bearer test_key' }, + apiKeyInfo: { key: 'test_key', tier: 'pro', email: 'test@test.com' }, + ip: '127.0.0.1', + socket: { remoteAddress: '127.0.0.1' } as any, + ...overrides + } +} + +function createMockResponse(): Partial { + const res: any = { + status: vi.fn().mockReturnThis(), + json: vi.fn().mockReturnThis(), + send: vi.fn().mockReturnThis(), + setHeader: vi.fn().mockReturnThis() + } + return res +} + +describe('PDF Output', () => { + beforeEach(() => { + vi.clearAllMocks() + mockCache.shouldBypass.mockReturnValue(false) + mockCache.get.mockReturnValue(null) + }) + + afterEach(() => { + vi.restoreAllMocks() + }) + + describe('POST /v1/screenshot with format=pdf', () => { + it('should return PDF with correct Content-Type', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4 fake pdf content') + mockTakeScreenshot.mockResolvedValueOnce({ + buffer: pdfBuffer, + contentType: 'application/pdf', + retryCount: 0 + }) + + const req = createMockRequest({ url: 'https://example.com', format: 'pdf' }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.setHeader).toHaveBeenCalledWith('Content-Type', 'application/pdf') + expect(res.send).toHaveBeenCalledWith(pdfBuffer) + }) + + it('should set Content-Disposition for PDF', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4 fake pdf content') + mockTakeScreenshot.mockResolvedValueOnce({ + buffer: pdfBuffer, + contentType: 'application/pdf', + retryCount: 0 + }) + + const req = createMockRequest({ url: 'https://example.com', format: 'pdf' }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.setHeader).toHaveBeenCalledWith('Content-Disposition', 'attachment; filename="screenshot.pdf"') + }) + + it('should pass pdfFormat option to takeScreenshot', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4') + mockTakeScreenshot.mockResolvedValueOnce({ buffer: pdfBuffer, contentType: 'application/pdf', retryCount: 0 }) + + const req = createMockRequest({ url: 'https://example.com', format: 'pdf', pdfFormat: 'a4' }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(mockTakeScreenshot).toHaveBeenCalledWith(expect.objectContaining({ + format: 'pdf', + pdfFormat: 'a4' + })) + }) + + it('should pass pdfLandscape option to takeScreenshot', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4') + mockTakeScreenshot.mockResolvedValueOnce({ buffer: pdfBuffer, contentType: 'application/pdf', retryCount: 0 }) + + const req = createMockRequest({ url: 'https://example.com', format: 'pdf', pdfLandscape: true }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(mockTakeScreenshot).toHaveBeenCalledWith(expect.objectContaining({ + format: 'pdf', + pdfLandscape: true + })) + }) + + it('should return 400 when format=pdf with selector', async () => { + const req = createMockRequest({ url: 'https://example.com', format: 'pdf', selector: '#content' }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.status).toHaveBeenCalledWith(400) + expect(res.json).toHaveBeenCalledWith({ error: 'format "pdf" is mutually exclusive with selector and clip' }) + }) + + it('should return 400 when format=pdf with clip', async () => { + const req = createMockRequest({ url: 'https://example.com', format: 'pdf', clip: { x: 0, y: 0, width: 100, height: 100 } }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.status).toHaveBeenCalledWith(400) + expect(res.json).toHaveBeenCalledWith({ error: 'format "pdf" is mutually exclusive with selector and clip' }) + }) + + it('should return 400 for invalid pdfFormat', async () => { + const req = createMockRequest({ url: 'https://example.com', format: 'pdf', pdfFormat: 'b5' }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.status).toHaveBeenCalledWith(400) + expect(res.json).toHaveBeenCalledWith({ error: 'pdfFormat must be one of: a4, letter, legal, a3' }) + }) + + it('should return 400 when pdfScale is out of range (too low)', async () => { + const req = createMockRequest({ url: 'https://example.com', format: 'pdf', pdfScale: 0.05 }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.status).toHaveBeenCalledWith(400) + expect(res.json).toHaveBeenCalledWith({ error: 'pdfScale must be between 0.1 and 2.0' }) + }) + + it('should return 400 when pdfScale is out of range (too high)', async () => { + const req = createMockRequest({ url: 'https://example.com', format: 'pdf', pdfScale: 3.0 }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.status).toHaveBeenCalledWith(400) + expect(res.json).toHaveBeenCalledWith({ error: 'pdfScale must be between 0.1 and 2.0' }) + }) + }) + + describe('GET /v1/screenshot with format=pdf', () => { + it('should handle PDF via GET request', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4') + mockTakeScreenshot.mockResolvedValueOnce({ buffer: pdfBuffer, contentType: 'application/pdf', retryCount: 0 }) + + const req = createMockRequest({ url: 'https://example.com', format: 'pdf' }, { method: 'GET' }) + const res = createMockResponse() + + const handler = screenshotRouter.stack.find(layer => layer.route?.methods.get)?.route.stack[0].handle + await handler(req, res, vi.fn()) + + expect(res.setHeader).toHaveBeenCalledWith('Content-Type', 'application/pdf') + expect(res.setHeader).toHaveBeenCalledWith('Content-Disposition', 'attachment; filename="screenshot.pdf"') + }) + }) + + describe('Playground PDF', () => { + it('should return PDF without watermark in playground', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4 playground pdf') + mockTakeScreenshot.mockResolvedValueOnce({ buffer: pdfBuffer, contentType: 'application/pdf', retryCount: 0 }) + + const req = createMockRequest({ url: 'https://example.com', format: 'pdf' }) + const res = createMockResponse() + + const handler = playgroundRouter.stack.find(layer => layer.route?.methods.post)?.route.stack[1].handle + await handler(req, res, vi.fn()) + + // Should NOT call addWatermark for PDF + expect(mockAddWatermark).not.toHaveBeenCalled() + expect(res.setHeader).toHaveBeenCalledWith('Content-Type', 'application/pdf') + expect(res.setHeader).toHaveBeenCalledWith('Content-Disposition', 'attachment; filename="screenshot.pdf"') + expect(res.send).toHaveBeenCalledWith(pdfBuffer) + }) + }) +}) diff --git a/src/routes/playground.ts b/src/routes/playground.ts index f39f858..a444c3e 100644 --- a/src/routes/playground.ts +++ b/src/routes/playground.ts @@ -84,7 +84,7 @@ const playgroundLimiter = rateLimit({ * schema: { $ref: "#/components/schemas/Error" } */ playgroundRouter.post("/", playgroundLimiter, async (req, res) => { - const { url, format, width, height, fullPage, quality, waitForSelector, deviceScale, waitUntil } = req.body; + const { url, format, width, height, fullPage, quality, waitForSelector, deviceScale, waitUntil, pdfFormat, pdfLandscape, pdfPrintBackground, pdfScale, pdfMargin } = req.body; if (!url || typeof url !== "string") { res.status(400).json({ error: "Missing required parameter: url" }); @@ -94,7 +94,7 @@ playgroundRouter.post("/", playgroundLimiter, async (req, res) => { // Enforce reasonable limits for playground const safeWidth = Math.min(Math.max(parseInt(width, 10) || 1280, 320), 1920); const safeHeight = Math.min(Math.max(parseInt(height, 10) || 800, 200), 1080); - const safeFormat = ["png", "jpeg", "webp"].includes(format) ? format : "png"; + const safeFormat = ["png", "jpeg", "webp", "pdf"].includes(format) ? format : "png"; const safeFullPage = fullPage === true; const safeQuality = safeFormat === "png" ? undefined : Math.min(Math.max(parseInt(quality, 10) || 80, 1), 100); const safeDeviceScale = Math.min(Math.max(parseInt(deviceScale, 10) || 1, 1), 3); @@ -105,9 +105,9 @@ playgroundRouter.post("/", playgroundLimiter, async (req, res) => { ? waitForSelector : undefined; try { - const result = await takeScreenshot({ + const screenshotOpts: any = { url, - format: safeFormat as "png" | "jpeg" | "webp", + format: safeFormat as "png" | "jpeg" | "webp" | "pdf", width: safeWidth, height: safeHeight, fullPage: safeFullPage, @@ -115,9 +115,30 @@ playgroundRouter.post("/", playgroundLimiter, async (req, res) => { deviceScale: safeDeviceScale, waitUntil: safeWaitUntil as any, waitForSelector: safeWaitForSelector, - }); + }; - // Add watermark + if (safeFormat === "pdf") { + if (pdfFormat) screenshotOpts.pdfFormat = pdfFormat; + if (pdfLandscape !== undefined) screenshotOpts.pdfLandscape = pdfLandscape; + if (pdfPrintBackground !== undefined) screenshotOpts.pdfPrintBackground = pdfPrintBackground; + if (pdfScale !== undefined) screenshotOpts.pdfScale = pdfScale; + if (pdfMargin) screenshotOpts.pdfMargin = pdfMargin; + } + + const result = await takeScreenshot(screenshotOpts); + + // Skip watermark for PDF (can't watermark a PDF the same way) + if (safeFormat === "pdf") { + res.setHeader("Content-Type", result.contentType); + res.setHeader("Content-Length", result.buffer.length); + res.setHeader("Cache-Control", "no-store"); + res.setHeader("X-Playground", "true"); + res.setHeader("Content-Disposition", 'attachment; filename="screenshot.pdf"'); + res.send(result.buffer); + return; + } + + // Add watermark for image formats const watermarked = await addWatermark(result.buffer, safeWidth, safeHeight); res.setHeader("Content-Type", result.contentType); diff --git a/src/routes/screenshot.ts b/src/routes/screenshot.ts index 07dc447..7ec12f9 100644 --- a/src/routes/screenshot.ts +++ b/src/routes/screenshot.ts @@ -442,6 +442,11 @@ async function handleScreenshotRequest(req: any, res: any) { clipY, clipW, clipH, + pdfFormat, + pdfLandscape, + pdfPrintBackground, + pdfScale, + pdfMargin, } = source; if (!url || typeof url !== "string") { @@ -449,6 +454,23 @@ async function handleScreenshotRequest(req: any, res: any) { return; } + // PDF-specific validation + if (format === "pdf") { + if (selector || clip || (clipX || clipY || clipW || clipH)) { + res.status(400).json({ error: 'format "pdf" is mutually exclusive with selector and clip' }); + return; + } + if (pdfFormat && !["a4", "letter", "legal", "a3"].includes(pdfFormat)) { + res.status(400).json({ error: "pdfFormat must be one of: a4, letter, legal, a3" }); + return; + } + const scale = pdfScale !== undefined ? parseFloat(pdfScale) : undefined; + if (scale !== undefined && (scale < 0.1 || scale > 2.0)) { + res.status(400).json({ error: "pdfScale must be between 0.1 and 2.0" }); + return; + } + } + // Validate userAgent parameter if (userAgent && typeof userAgent === 'string') { if (userAgent.length > 500) { @@ -566,6 +588,13 @@ async function handleScreenshotRequest(req: any, res: any) { selector: selector || undefined, userAgent: userAgent || undefined, clip: normalizedClip || undefined, + ...(format === "pdf" ? { + pdfFormat: pdfFormat || undefined, + pdfLandscape: pdfLandscape === true || pdfLandscape === "true" || undefined, + pdfPrintBackground: pdfPrintBackground === false || pdfPrintBackground === "false" ? false : undefined, + pdfScale: pdfScale ? parseFloat(pdfScale) : undefined, + pdfMargin: pdfMargin || undefined, + } : {}), }; try { @@ -596,6 +625,9 @@ async function handleScreenshotRequest(req: any, res: any) { res.setHeader("Cache-Control", "no-store"); res.setHeader("X-Cache", "MISS"); res.setHeader("X-Retry-Count", String(result.retryCount ?? 0)); + if (format === "pdf") { + res.setHeader("Content-Disposition", 'attachment; filename="screenshot.pdf"'); + } res.send(result.buffer); } catch (err: any) { logger.error({ err: err.message, url }, "Screenshot failed"); diff --git a/src/services/__tests__/pdf.test.ts b/src/services/__tests__/pdf.test.ts new file mode 100644 index 0000000..36d2939 --- /dev/null +++ b/src/services/__tests__/pdf.test.ts @@ -0,0 +1,133 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { takeScreenshot } from '../screenshot.js' + +// Mock browser +vi.mock('../browser.js', () => ({ + acquirePage: vi.fn(), + releasePage: vi.fn() +})) + +vi.mock('../ssrf.js', () => ({ + validateUrl: vi.fn() +})) + +vi.mock('../logger.js', () => ({ + default: { warn: vi.fn(), error: vi.fn() } +})) + +const { acquirePage, releasePage } = await import('../browser.js') +const mockAcquirePage = vi.mocked(acquirePage) + +describe('takeScreenshot - PDF format', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should call page.pdf() for format=pdf and return application/pdf', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4 test') + const mockPage = { + setViewport: vi.fn(), + goto: vi.fn(), + pdf: vi.fn().mockResolvedValue(pdfBuffer), + emulateMediaFeatures: vi.fn(), + setUserAgent: vi.fn(), + addStyleTag: vi.fn(), + waitForSelector: vi.fn(), + evaluate: vi.fn(), + $: vi.fn(), + screenshot: vi.fn() + } + mockAcquirePage.mockResolvedValue({ page: mockPage as any, instance: {} as any }) + + const result = await takeScreenshot({ + url: 'https://example.com', + format: 'pdf' as any + }) + + expect(mockPage.pdf).toHaveBeenCalled() + expect(mockPage.screenshot).not.toHaveBeenCalled() + expect(result.contentType).toBe('application/pdf') + expect(result.buffer.toString().startsWith('%PDF')).toBe(true) + }) + + it('should pass PDF options to page.pdf()', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4') + const mockPage = { + setViewport: vi.fn(), + goto: vi.fn(), + pdf: vi.fn().mockResolvedValue(pdfBuffer), + emulateMediaFeatures: vi.fn(), + setUserAgent: vi.fn(), + addStyleTag: vi.fn(), + waitForSelector: vi.fn(), + evaluate: vi.fn(), + $: vi.fn(), + screenshot: vi.fn() + } + mockAcquirePage.mockResolvedValue({ page: mockPage as any, instance: {} as any }) + + await takeScreenshot({ + url: 'https://example.com', + format: 'pdf' as any, + pdfFormat: 'letter', + pdfLandscape: true, + pdfPrintBackground: false, + pdfScale: 1.5, + pdfMargin: { top: '2cm', right: '2cm', bottom: '2cm', left: '2cm' } + } as any) + + expect(mockPage.pdf).toHaveBeenCalledWith({ + format: 'letter', + landscape: true, + printBackground: false, + scale: 1.5, + margin: { top: '2cm', right: '2cm', bottom: '2cm', left: '2cm' } + }) + }) + + it('should use default PDF options when none specified', async () => { + const pdfBuffer = Buffer.from('%PDF-1.4') + const mockPage = { + setViewport: vi.fn(), + goto: vi.fn(), + pdf: vi.fn().mockResolvedValue(pdfBuffer), + emulateMediaFeatures: vi.fn(), + setUserAgent: vi.fn(), + addStyleTag: vi.fn(), + waitForSelector: vi.fn(), + evaluate: vi.fn(), + $: vi.fn(), + screenshot: vi.fn() + } + mockAcquirePage.mockResolvedValue({ page: mockPage as any, instance: {} as any }) + + await takeScreenshot({ + url: 'https://example.com', + format: 'pdf' as any + }) + + expect(mockPage.pdf).toHaveBeenCalledWith({ + format: 'a4', + landscape: false, + printBackground: true, + scale: 1.0, + margin: { top: '1cm', right: '1cm', bottom: '1cm', left: '1cm' } + }) + }) + + it('should reject format=pdf with selector', async () => { + await expect(takeScreenshot({ + url: 'https://example.com', + format: 'pdf' as any, + selector: '#content' + })).rejects.toThrow('format "pdf" is mutually exclusive with selector and clip') + }) + + it('should reject format=pdf with clip', async () => { + await expect(takeScreenshot({ + url: 'https://example.com', + format: 'pdf' as any, + clip: { x: 0, y: 0, width: 100, height: 100 } + })).rejects.toThrow('format "pdf" is mutually exclusive with selector and clip') + }) +}) diff --git a/src/services/screenshot.ts b/src/services/screenshot.ts index df0b966..bade970 100644 --- a/src/services/screenshot.ts +++ b/src/services/screenshot.ts @@ -6,7 +6,7 @@ import logger from "./logger.js"; export interface ScreenshotOptions { url: string; - format?: "png" | "jpeg" | "webp"; + format?: "png" | "jpeg" | "webp" | "pdf"; width?: number; height?: number; fullPage?: boolean; @@ -22,6 +22,11 @@ export interface ScreenshotOptions { selector?: string; userAgent?: string; clip?: { x: number; y: number; width: number; height: number }; + pdfFormat?: string; + pdfLandscape?: boolean; + pdfPrintBackground?: boolean; + pdfScale?: number; + pdfMargin?: { top?: string; right?: string; bottom?: string; left?: string }; } export interface ScreenshotResult { @@ -99,6 +104,11 @@ export async function takeScreenshot(opts: ScreenshotOptions): Promise((_, reject) => setTimeout(() => reject(new Error("SCREENSHOT_TIMEOUT")), TIMEOUT_MS)), ]); + // PDF output branch + if (format === "pdf") { + const pdfResult = await page.pdf({ + format: (opts.pdfFormat || 'a4') as any, + landscape: opts.pdfLandscape ?? false, + printBackground: opts.pdfPrintBackground ?? true, + scale: opts.pdfScale ?? 1.0, + margin: opts.pdfMargin || { top: '1cm', right: '1cm', bottom: '1cm', left: '1cm' } + }); + const buffer = Buffer.from(pdfResult as unknown as ArrayBuffer); + return { buffer, contentType: 'application/pdf' }; + } + const screenshotOpts: any = { type: format === "webp" ? "webp" : format, encoding: "binary",