Add (at this point faulty) text selection

This commit is contained in:
2026-04-03 23:56:51 +02:00
parent 0d0e9fe043
commit 96ba5d35c7
7 changed files with 214 additions and 682 deletions

View File

@@ -40,7 +40,7 @@
*/
import type { PDFDocumentProxy } from "pdfjs-dist";
import type { WorkerInbound, WorkerOutbound, PageDimensions } from "./types.js";
import type { WorkerInbound, WorkerOutbound, PageDimensions, TextItem } from "./types.js";
// pdf.min.js is prepended at build time and sets globalThis.pdfjsLib.
declare const pdfjsLib: typeof import("pdfjs-dist");
@@ -189,11 +189,27 @@ async function handleRender(
// OffscreenCanvasRenderingContext2D is assignable to the canvasContext
// parameter of page.render(); the cast satisfies the type checker.
const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D;
await page.render({ canvasContext: ctx, viewport: vp }).promise;
// Run canvas rendering and text extraction in parallel — they are independent.
const [, textContent] = await Promise.all([
page.render({ canvasContext: ctx, viewport: vp }).promise,
page.getTextContent(),
]);
const bitmap = offscreen.transferToImageBitmap();
const out: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
(self as unknown as Worker).postMessage(out, [bitmap]);
const renderedOut: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
(self as unknown as Worker).postMessage(renderedOut, [bitmap]);
// Send text items for the selection overlay. Filter out TextMarkedContent
// entries (which lack a `str` field) — we only need actual text runs.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const items: TextItem[] = (textContent.items as any[])
.filter((item) => typeof item.str === "string")
.map(({ str, transform, width, height, hasEOL }: {
str: string; transform: number[]; width: number; height: number; hasEOL: boolean;
}) => ({ str, transform: Array.from(transform), width, height, hasEOL }));
const textOut: WorkerOutbound = { type: "textcontent", pageNum, gen, items };
self.postMessage(textOut);
} catch (e) {
if ((e as Error)?.name !== "RenderingCancelledException") {
console.warn("[render-worker] render error page", pageNum, e);