Files
brittle/src-tauri/assets/viewer-src/src/render-worker.ts
2026-04-03 18:45:08 +02:00

205 lines
8.5 KiB
TypeScript

/**
* render-worker.ts — off-main-thread PDF rendering via Web Worker.
*
* NOTE: pdf.min.js is prepended to the compiled output of this file at build
* time by build.cjs (see prependPdfjsPlugin). It sets `self.pdfjsLib` as a
* global, which this module reads via the declaration below.
*
* Worker message protocol
* ────────────────────────
* Main → Worker:
* { type: "init", pdfData: ArrayBuffer } (transferred, not copied)
* { type: "render", pageNum, scale, gen }
* { type: "cleanup" } — pdfDoc.cleanup(): free internal caches
* { type: "destroy" } — pdfDoc.destroy() + self.close()
*
* Worker → Main:
* { type: "ready", numPages, dims } — page-1 dim used as stub for all pages
* { type: "rendered", pageNum, gen, bitmap } (bitmap as transferable)
* { type: "error", message }
*
* WebKit nested-worker constraint
* ────────────────────────────────
* WebKit forbids nested workers (a worker spawning a worker). PDF.js normally
* spawns pdf.worker.min.js from wherever it is used — which here would be a
* nested worker. Instead, pdf.worker.min.js is bundled into this file (by
* build.cjs) preceded by a preamble that sets globalThis.window = globalThis.
* This causes pdf.worker.min.js to expose its WorkerMessageHandler on
* globalThis.pdfjsWorker without auto-calling initializeFromPort(self). PDF.js
* detects the pre-loaded handler via _mainThreadWorkerMessageHandler and runs
* inline in this thread — no nested worker creation attempted at all.
*
* Render queue
* ────────────
* With disableFontFace: true, every text glyph becomes canvas path operations.
* A text-heavy page can require 20 000+ synchronous canvas calls. Running
* multiple page renders concurrently exhausts the worker thread and causes
* system-wide CPU saturation (perceived UI lock). The queue serialises renders
* so exactly one page is active at a time. Incoming render messages for the
* same page supersede any queued (but not yet started) request.
*/
import type { PDFDocumentProxy } from "pdfjs-dist";
import type { WorkerInbound, WorkerOutbound, PageDimensions } from "./types.js";
// pdf.min.js is prepended at build time and sets globalThis.pdfjsLib.
declare const pdfjsLib: typeof import("pdfjs-dist");
let pdfDoc: PDFDocumentProxy | null = null;
// Custom canvas factory that uses OffscreenCanvas instead of
// document.createElement("canvas"). Required because this file runs in a Web
// Worker where `document` is not available. PDF.js uses the factory to create
// intermediate canvases (e.g. for scaling inline images during page rendering).
const offscreenCanvasFactory = {
create(width: number, height: number) {
const canvas = new OffscreenCanvas(width, height);
const context = canvas.getContext("2d")!;
return { canvas, context };
},
reset(
item: { canvas: OffscreenCanvas; context: OffscreenCanvasRenderingContext2D | null },
width: number,
height: number,
) {
item.canvas.width = width;
item.canvas.height = height;
},
destroy(
item: { canvas: OffscreenCanvas; context: OffscreenCanvasRenderingContext2D | null },
) {
// Release memory by shrinking the canvas; nulling context is cosmetic.
item.canvas.width = 1;
item.canvas.height = 1;
item.context = null;
},
};
// ── Render queue ──────────────────────────────────────────────────────────────
interface RenderJob { pageNum: number; scale: number; gen: number; }
const renderQueue: RenderJob[] = [];
let activeRenders = 0;
const MAX_CONCURRENT = 1;
function enqueueRender(pageNum: number, scale: number, gen: number): void {
// Supersede any already-queued (not yet started) job for the same page.
const dup = renderQueue.findIndex(j => j.pageNum === pageNum);
if (dup >= 0) renderQueue.splice(dup, 1);
renderQueue.push({ pageNum, scale, gen });
drainQueue();
}
function drainQueue(): void {
while (activeRenders < MAX_CONCURRENT && renderQueue.length > 0) {
const job = renderQueue.shift()!;
activeRenders++;
handleRender(job.pageNum, job.scale, job.gen).finally(() => {
activeRenders--;
drainQueue();
});
}
}
// ── Message dispatch ──────────────────────────────────────────────────────────
self.onmessage = async (ev: MessageEvent<WorkerInbound>): Promise<void> => {
const msg = ev.data;
switch (msg.type) {
case "init":
await handleInit(msg.pdfData);
break;
case "render":
// Enqueue and return immediately; drainQueue() handles concurrency.
enqueueRender(msg.pageNum, msg.scale, msg.gen);
break;
case "cleanup":
await pdfDoc?.cleanup();
break;
case "destroy":
renderQueue.length = 0; // cancel pending jobs
if (pdfDoc) { await pdfDoc.destroy(); pdfDoc = null; }
self.close();
break;
}
};
// ── Handlers ──────────────────────────────────────────────────────────────────
async function handleInit(pdfData: ArrayBuffer): Promise<void> {
try {
// pdf.worker.min.js is bundled into this file (prepended by build.cjs).
// It sets globalThis.pdfjsWorker.WorkerMessageHandler, which PDF.js detects
// via its _mainThreadWorkerMessageHandler getter and uses as an inline fake
// worker — no nested-worker creation attempted.
pdfjsLib.GlobalWorkerOptions.workerSrc =
"brittle://app/pdfjs/build/pdf.worker.min.js";
// disableFontFace: PDF.js normally registers custom fonts via
// document.fonts.add() (the Font Loading API). In a Web Worker, `document`
// is undefined, so font registration fails and text is invisible. Setting
// disableFontFace: true makes PDF.js render all glyphs as canvas vector
// paths instead — no browser font API needed, text renders correctly.
//
// canvasFactory: the installed pdfjs-dist types omit this parameter even
// though the runtime API accepts it. `any` cast bypasses the type check.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
pdfDoc = await pdfjsLib.getDocument({
data: new Uint8Array(pdfData),
canvasFactory: offscreenCanvasFactory,
disableFontFace: true,
} as any).promise;
const numPages = pdfDoc.numPages;
// Fetch page-1 dimensions so the main thread can lay out placeholders.
// Page-1 dims are used as a uniform stub for all pages — accurate for most
// academic PDFs (uniform paper size). We deliberately do NOT fetch dims for
// every page here: that would tie up the worker with N getPage() calls while
// render jobs are already arriving, causing further delays.
const firstPage = await pdfDoc.getPage(1);
const firstVp = firstPage.getViewport({ scale: 1.0 });
firstPage.cleanup();
const stubDim: PageDimensions = { width: firstVp.width, height: firstVp.height };
const stubDims: PageDimensions[] = Array.from({ length: numPages }, () => stubDim);
const out: WorkerOutbound = { type: "ready", numPages, dims: stubDims };
self.postMessage(out);
} catch (e) {
const out: WorkerOutbound = { type: "error", message: String(e) };
self.postMessage(out);
}
}
async function handleRender(
pageNum: number,
scale: number,
gen: number,
): Promise<void> {
if (!pdfDoc) return;
let page = null;
try {
page = await pdfDoc.getPage(pageNum);
const vp = page.getViewport({ scale });
const width = Math.round(vp.width);
const height = Math.round(vp.height);
const offscreen = new OffscreenCanvas(width, height);
// OffscreenCanvasRenderingContext2D is assignable to the canvasContext
// parameter of page.render(); the cast satisfies the type checker.
const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D;
await page.render({ canvasContext: ctx, viewport: vp }).promise;
const bitmap = offscreen.transferToImageBitmap();
const out: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
(self as unknown as Worker).postMessage(out, [bitmap]);
} catch (e) {
if ((e as Error)?.name !== "RenderingCancelledException") {
console.warn("[render-worker] render error page", pageNum, e);
}
} finally {
page?.cleanup();
}
}