205 lines
8.5 KiB
TypeScript
205 lines
8.5 KiB
TypeScript
/**
|
|
* render-worker.ts — off-main-thread PDF rendering via Web Worker.
|
|
*
|
|
* NOTE: pdf.min.js is prepended to the compiled output of this file at build
|
|
* time by build.cjs (see prependPdfjsPlugin). It sets `self.pdfjsLib` as a
|
|
* global, which this module reads via the declaration below.
|
|
*
|
|
* Worker message protocol
|
|
* ────────────────────────
|
|
* Main → Worker:
|
|
* { type: "init", pdfData: ArrayBuffer } (transferred, not copied)
|
|
* { type: "render", pageNum, scale, gen }
|
|
* { type: "cleanup" } — pdfDoc.cleanup(): free internal caches
|
|
* { type: "destroy" } — pdfDoc.destroy() + self.close()
|
|
*
|
|
* Worker → Main:
|
|
* { type: "ready", numPages, dims } — page-1 dim used as stub for all pages
|
|
* { type: "rendered", pageNum, gen, bitmap } (bitmap as transferable)
|
|
* { type: "error", message }
|
|
*
|
|
* WebKit nested-worker constraint
|
|
* ────────────────────────────────
|
|
* WebKit forbids nested workers (a worker spawning a worker). PDF.js normally
|
|
* spawns pdf.worker.min.js from wherever it is used — which here would be a
|
|
* nested worker. Instead, pdf.worker.min.js is bundled into this file (by
|
|
* build.cjs) preceded by a preamble that sets globalThis.window = globalThis.
|
|
* This causes pdf.worker.min.js to expose its WorkerMessageHandler on
|
|
* globalThis.pdfjsWorker without auto-calling initializeFromPort(self). PDF.js
|
|
* detects the pre-loaded handler via _mainThreadWorkerMessageHandler and runs
|
|
* inline in this thread — no nested worker creation attempted at all.
|
|
*
|
|
* Render queue
|
|
* ────────────
|
|
* With disableFontFace: true, every text glyph becomes canvas path operations.
|
|
* A text-heavy page can require 20 000+ synchronous canvas calls. Running
|
|
* multiple page renders concurrently exhausts the worker thread and causes
|
|
* system-wide CPU saturation (perceived UI lock). The queue serialises renders
|
|
* so exactly one page is active at a time. Incoming render messages for the
|
|
* same page supersede any queued (but not yet started) request.
|
|
*/
|
|
|
|
import type { PDFDocumentProxy } from "pdfjs-dist";
|
|
import type { WorkerInbound, WorkerOutbound, PageDimensions } from "./types.js";
|
|
|
|
// pdf.min.js is prepended at build time and sets globalThis.pdfjsLib.
|
|
declare const pdfjsLib: typeof import("pdfjs-dist");
|
|
|
|
let pdfDoc: PDFDocumentProxy | null = null;
|
|
|
|
// Custom canvas factory that uses OffscreenCanvas instead of
|
|
// document.createElement("canvas"). Required because this file runs in a Web
|
|
// Worker where `document` is not available. PDF.js uses the factory to create
|
|
// intermediate canvases (e.g. for scaling inline images during page rendering).
|
|
const offscreenCanvasFactory = {
|
|
create(width: number, height: number) {
|
|
const canvas = new OffscreenCanvas(width, height);
|
|
const context = canvas.getContext("2d")!;
|
|
return { canvas, context };
|
|
},
|
|
reset(
|
|
item: { canvas: OffscreenCanvas; context: OffscreenCanvasRenderingContext2D | null },
|
|
width: number,
|
|
height: number,
|
|
) {
|
|
item.canvas.width = width;
|
|
item.canvas.height = height;
|
|
},
|
|
destroy(
|
|
item: { canvas: OffscreenCanvas; context: OffscreenCanvasRenderingContext2D | null },
|
|
) {
|
|
// Release memory by shrinking the canvas; nulling context is cosmetic.
|
|
item.canvas.width = 1;
|
|
item.canvas.height = 1;
|
|
item.context = null;
|
|
},
|
|
};
|
|
|
|
// ── Render queue ──────────────────────────────────────────────────────────────
|
|
|
|
interface RenderJob { pageNum: number; scale: number; gen: number; }
|
|
|
|
const renderQueue: RenderJob[] = [];
|
|
let activeRenders = 0;
|
|
const MAX_CONCURRENT = 1;
|
|
|
|
function enqueueRender(pageNum: number, scale: number, gen: number): void {
|
|
// Supersede any already-queued (not yet started) job for the same page.
|
|
const dup = renderQueue.findIndex(j => j.pageNum === pageNum);
|
|
if (dup >= 0) renderQueue.splice(dup, 1);
|
|
renderQueue.push({ pageNum, scale, gen });
|
|
drainQueue();
|
|
}
|
|
|
|
function drainQueue(): void {
|
|
while (activeRenders < MAX_CONCURRENT && renderQueue.length > 0) {
|
|
const job = renderQueue.shift()!;
|
|
activeRenders++;
|
|
handleRender(job.pageNum, job.scale, job.gen).finally(() => {
|
|
activeRenders--;
|
|
drainQueue();
|
|
});
|
|
}
|
|
}
|
|
|
|
// ── Message dispatch ──────────────────────────────────────────────────────────
|
|
|
|
self.onmessage = async (ev: MessageEvent<WorkerInbound>): Promise<void> => {
|
|
const msg = ev.data;
|
|
switch (msg.type) {
|
|
case "init":
|
|
await handleInit(msg.pdfData);
|
|
break;
|
|
case "render":
|
|
// Enqueue and return immediately; drainQueue() handles concurrency.
|
|
enqueueRender(msg.pageNum, msg.scale, msg.gen);
|
|
break;
|
|
case "cleanup":
|
|
await pdfDoc?.cleanup();
|
|
break;
|
|
case "destroy":
|
|
renderQueue.length = 0; // cancel pending jobs
|
|
if (pdfDoc) { await pdfDoc.destroy(); pdfDoc = null; }
|
|
self.close();
|
|
break;
|
|
}
|
|
};
|
|
|
|
// ── Handlers ──────────────────────────────────────────────────────────────────
|
|
|
|
async function handleInit(pdfData: ArrayBuffer): Promise<void> {
|
|
try {
|
|
// pdf.worker.min.js is bundled into this file (prepended by build.cjs).
|
|
// It sets globalThis.pdfjsWorker.WorkerMessageHandler, which PDF.js detects
|
|
// via its _mainThreadWorkerMessageHandler getter and uses as an inline fake
|
|
// worker — no nested-worker creation attempted.
|
|
pdfjsLib.GlobalWorkerOptions.workerSrc =
|
|
"brittle://app/pdfjs/build/pdf.worker.min.js";
|
|
|
|
// disableFontFace: PDF.js normally registers custom fonts via
|
|
// document.fonts.add() (the Font Loading API). In a Web Worker, `document`
|
|
// is undefined, so font registration fails and text is invisible. Setting
|
|
// disableFontFace: true makes PDF.js render all glyphs as canvas vector
|
|
// paths instead — no browser font API needed, text renders correctly.
|
|
//
|
|
// canvasFactory: the installed pdfjs-dist types omit this parameter even
|
|
// though the runtime API accepts it. `any` cast bypasses the type check.
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
pdfDoc = await pdfjsLib.getDocument({
|
|
data: new Uint8Array(pdfData),
|
|
canvasFactory: offscreenCanvasFactory,
|
|
disableFontFace: true,
|
|
} as any).promise;
|
|
const numPages = pdfDoc.numPages;
|
|
|
|
// Fetch page-1 dimensions so the main thread can lay out placeholders.
|
|
// Page-1 dims are used as a uniform stub for all pages — accurate for most
|
|
// academic PDFs (uniform paper size). We deliberately do NOT fetch dims for
|
|
// every page here: that would tie up the worker with N getPage() calls while
|
|
// render jobs are already arriving, causing further delays.
|
|
const firstPage = await pdfDoc.getPage(1);
|
|
const firstVp = firstPage.getViewport({ scale: 1.0 });
|
|
firstPage.cleanup();
|
|
const stubDim: PageDimensions = { width: firstVp.width, height: firstVp.height };
|
|
|
|
const stubDims: PageDimensions[] = Array.from({ length: numPages }, () => stubDim);
|
|
const out: WorkerOutbound = { type: "ready", numPages, dims: stubDims };
|
|
self.postMessage(out);
|
|
} catch (e) {
|
|
const out: WorkerOutbound = { type: "error", message: String(e) };
|
|
self.postMessage(out);
|
|
}
|
|
}
|
|
|
|
async function handleRender(
|
|
pageNum: number,
|
|
scale: number,
|
|
gen: number,
|
|
): Promise<void> {
|
|
if (!pdfDoc) return;
|
|
let page = null;
|
|
try {
|
|
page = await pdfDoc.getPage(pageNum);
|
|
|
|
const vp = page.getViewport({ scale });
|
|
const width = Math.round(vp.width);
|
|
const height = Math.round(vp.height);
|
|
|
|
const offscreen = new OffscreenCanvas(width, height);
|
|
// OffscreenCanvasRenderingContext2D is assignable to the canvasContext
|
|
// parameter of page.render(); the cast satisfies the type checker.
|
|
const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D;
|
|
await page.render({ canvasContext: ctx, viewport: vp }).promise;
|
|
|
|
const bitmap = offscreen.transferToImageBitmap();
|
|
const out: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
|
|
(self as unknown as Worker).postMessage(out, [bitmap]);
|
|
} catch (e) {
|
|
if ((e as Error)?.name !== "RenderingCancelledException") {
|
|
console.warn("[render-worker] render error page", pageNum, e);
|
|
}
|
|
} finally {
|
|
page?.cleanup();
|
|
}
|
|
}
|