/** * render-worker.ts — off-main-thread PDF rendering via Web Worker. * * NOTE: pdf.min.js is prepended to the compiled output of this file at build * time by build.cjs (see prependPdfjsPlugin). It sets `self.pdfjsLib` as a * global, which this module reads via the declaration below. * * Worker message protocol * ──────────────────────── * Main → Worker: * { type: "init", pdfData: ArrayBuffer } (transferred, not copied) * { type: "render", pageNum, scale, gen } * { type: "cleanup" } — pdfDoc.cleanup(): free internal caches * { type: "destroy" } — pdfDoc.destroy() + self.close() * * Worker → Main: * { type: "ready", numPages, dims } — page-1 dim used as stub for all pages * { type: "rendered", pageNum, gen, bitmap } (bitmap as transferable) * { type: "error", message } * * WebKit nested-worker constraint * ──────────────────────────────── * WebKit forbids nested workers (a worker spawning a worker). PDF.js normally * spawns pdf.worker.min.js from wherever it is used — which here would be a * nested worker. Instead, pdf.worker.min.js is bundled into this file (by * build.cjs) preceded by a preamble that sets globalThis.window = globalThis. * This causes pdf.worker.min.js to expose its WorkerMessageHandler on * globalThis.pdfjsWorker without auto-calling initializeFromPort(self). PDF.js * detects the pre-loaded handler via _mainThreadWorkerMessageHandler and runs * inline in this thread — no nested worker creation attempted at all. * * Render queue * ──────────── * With disableFontFace: true, every text glyph becomes canvas path operations. * A text-heavy page can require 20 000+ synchronous canvas calls. Running * multiple page renders concurrently exhausts the worker thread and causes * system-wide CPU saturation (perceived UI lock). The queue serialises renders * so exactly one page is active at a time. Incoming render messages for the * same page supersede any queued (but not yet started) request. */ import type { PDFDocumentProxy } from "pdfjs-dist"; import type { WorkerInbound, WorkerOutbound, PageDimensions, TextItem } from "./types.js"; // pdf.min.js is prepended at build time and sets globalThis.pdfjsLib. declare const pdfjsLib: typeof import("pdfjs-dist"); let pdfDoc: PDFDocumentProxy | null = null; // Custom canvas factory that uses OffscreenCanvas instead of // document.createElement("canvas"). Required because this file runs in a Web // Worker where `document` is not available. PDF.js uses the factory to create // intermediate canvases (e.g. for scaling inline images during page rendering). const offscreenCanvasFactory = { create(width: number, height: number) { const canvas = new OffscreenCanvas(width, height); const context = canvas.getContext("2d")!; return { canvas, context }; }, reset( item: { canvas: OffscreenCanvas; context: OffscreenCanvasRenderingContext2D | null }, width: number, height: number, ) { item.canvas.width = width; item.canvas.height = height; }, destroy( item: { canvas: OffscreenCanvas; context: OffscreenCanvasRenderingContext2D | null }, ) { // Release memory by shrinking the canvas; nulling context is cosmetic. item.canvas.width = 1; item.canvas.height = 1; item.context = null; }, }; // ── Render queue ────────────────────────────────────────────────────────────── interface RenderJob { pageNum: number; scale: number; gen: number; } const renderQueue: RenderJob[] = []; let activeRenders = 0; const MAX_CONCURRENT = 1; function enqueueRender(pageNum: number, scale: number, gen: number): void { // Supersede any already-queued (not yet started) job for the same page. const dup = renderQueue.findIndex(j => j.pageNum === pageNum); if (dup >= 0) renderQueue.splice(dup, 1); renderQueue.push({ pageNum, scale, gen }); drainQueue(); } function drainQueue(): void { while (activeRenders < MAX_CONCURRENT && renderQueue.length > 0) { const job = renderQueue.shift()!; activeRenders++; handleRender(job.pageNum, job.scale, job.gen).finally(() => { activeRenders--; drainQueue(); }); } } // ── Message dispatch ────────────────────────────────────────────────────────── self.onmessage = async (ev: MessageEvent): Promise => { const msg = ev.data; switch (msg.type) { case "init": await handleInit(msg.pdfData); break; case "render": // Enqueue and return immediately; drainQueue() handles concurrency. enqueueRender(msg.pageNum, msg.scale, msg.gen); break; case "cleanup": await pdfDoc?.cleanup(); break; case "destroy": renderQueue.length = 0; // cancel pending jobs if (pdfDoc) { await pdfDoc.destroy(); pdfDoc = null; } self.close(); break; } }; // ── Handlers ────────────────────────────────────────────────────────────────── async function handleInit(pdfData: ArrayBuffer): Promise { try { // pdf.worker.min.js is bundled into this file (prepended by build.cjs). // It sets globalThis.pdfjsWorker.WorkerMessageHandler, which PDF.js detects // via its _mainThreadWorkerMessageHandler getter and uses as an inline fake // worker — no nested-worker creation attempted. pdfjsLib.GlobalWorkerOptions.workerSrc = "brittle://app/pdfjs/build/pdf.worker.min.js"; // disableFontFace: PDF.js normally registers custom fonts via // document.fonts.add() (the Font Loading API). In a Web Worker, `document` // is undefined, so font registration fails and text is invisible. Setting // disableFontFace: true makes PDF.js render all glyphs as canvas vector // paths instead — no browser font API needed, text renders correctly. // // canvasFactory: the installed pdfjs-dist types omit this parameter even // though the runtime API accepts it. `any` cast bypasses the type check. // eslint-disable-next-line @typescript-eslint/no-explicit-any pdfDoc = await pdfjsLib.getDocument({ data: new Uint8Array(pdfData), canvasFactory: offscreenCanvasFactory, disableFontFace: true, } as any).promise; const numPages = pdfDoc.numPages; // Fetch page-1 dimensions so the main thread can lay out placeholders. // Page-1 dims are used as a uniform stub for all pages — accurate for most // academic PDFs (uniform paper size). We deliberately do NOT fetch dims for // every page here: that would tie up the worker with N getPage() calls while // render jobs are already arriving, causing further delays. const firstPage = await pdfDoc.getPage(1); const firstVp = firstPage.getViewport({ scale: 1.0 }); firstPage.cleanup(); const stubDim: PageDimensions = { width: firstVp.width, height: firstVp.height }; const stubDims: PageDimensions[] = Array.from({ length: numPages }, () => stubDim); const out: WorkerOutbound = { type: "ready", numPages, dims: stubDims }; self.postMessage(out); } catch (e) { const out: WorkerOutbound = { type: "error", message: String(e) }; self.postMessage(out); } } async function handleRender( pageNum: number, scale: number, gen: number, ): Promise { if (!pdfDoc) return; let page = null; try { page = await pdfDoc.getPage(pageNum); const vp = page.getViewport({ scale }); const width = Math.round(vp.width); const height = Math.round(vp.height); const offscreen = new OffscreenCanvas(width, height); // OffscreenCanvasRenderingContext2D is assignable to the canvasContext // parameter of page.render(); the cast satisfies the type checker. const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D; // Run canvas rendering and text extraction in parallel — they are independent. const [, textContent] = await Promise.all([ page.render({ canvasContext: ctx, viewport: vp }).promise, page.getTextContent(), ]); const bitmap = offscreen.transferToImageBitmap(); const renderedOut: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap }; (self as unknown as Worker).postMessage(renderedOut, [bitmap]); // Send text items for the selection overlay. Filter out TextMarkedContent // entries (which lack a `str` field) — we only need actual text runs. // eslint-disable-next-line @typescript-eslint/no-explicit-any const items: TextItem[] = (textContent.items as any[]) .filter((item) => typeof item.str === "string") .map(({ str, transform, width, height, hasEOL }: { str: string; transform: number[]; width: number; height: number; hasEOL: boolean; }) => ({ str, transform: Array.from(transform), width, height, hasEOL })); const textOut: WorkerOutbound = { type: "textcontent", pageNum, gen, items }; self.postMessage(textOut); } catch (e) { if ((e as Error)?.name !== "RenderingCancelledException") { console.warn("[render-worker] render error page", pageNum, e); } } finally { page?.cleanup(); } }