Add (at this point faulty) text selection

This commit is contained in:
2026-04-03 23:56:51 +02:00
parent 0d0e9fe043
commit 96ba5d35c7
7 changed files with 214 additions and 682 deletions

View File

@@ -12,10 +12,29 @@
*/
import { PageState } from "./types.js";
import type { PageDimensions } from "./types.js";
import type { PageDimensions, TextItem } from "./types.js";
const MAX_CANVAS_PIXELS = 16_777_216; // 4096 × 4096
/**
* Compose two 2D affine transforms (each represented as a 6-element array
* [a, b, c, d, e, f] matching the CSS matrix() order).
*/
function composeTransform(m1: number[], m2: number[]): number[] {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const [a1, b1, c1, d1, e1, f1] = [m1[0]!, m1[1]!, m1[2]!, m1[3]!, m1[4]!, m1[5]!];
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const [a2, b2, c2, d2, e2, f2] = [m2[0]!, m2[1]!, m2[2]!, m2[3]!, m2[4]!, m2[5]!];
return [
a1*a2 + c1*b2,
b1*a2 + d1*b2,
a1*c2 + c1*d2,
b1*c2 + d1*d2,
a1*e2 + c1*f2 + e1,
b1*e2 + d1*f2 + f1,
];
}
/**
* Clamp render scale so the canvas pixel count stays within budget.
* CSS dimensions stay correct — pages appear at the right size, just at
@@ -47,6 +66,11 @@ export class PageManager {
private readonly _states: PageState[];
// _canvases[i] is the canvas currently in the DOM for page i+1, or null.
private readonly _canvases: (HTMLCanvasElement | null)[];
// Text selection overlay: raw items from the worker, and the live DOM div.
private readonly _rawTextItems: (TextItem[] | null)[];
private readonly _textLayers: (HTMLDivElement | null)[];
// Handle for any in-progress requestIdleCallback text-layer build (per page).
private readonly _textBuildHandles: (number | null)[];
private _scale: number;
private _renderGen: number = 0;
@@ -67,6 +91,9 @@ export class PageManager {
this._dispatchRender = dispatchRender;
this._states = new Array<PageState>(dims.length).fill(PageState.PLACEHOLDER);
this._canvases = new Array<HTMLCanvasElement | null>(dims.length).fill(null);
this._rawTextItems = new Array<TextItem[] | null>(dims.length).fill(null);
this._textLayers = new Array<HTMLDivElement | null>(dims.length).fill(null);
this._textBuildHandles = new Array<number | null>(dims.length).fill(null);
this._buildPlaceholders();
}
@@ -78,7 +105,22 @@ export class PageManager {
get allRendered(): boolean { return this._inFlight === 0; }
/** Called by ZoomController to suppress canvas teardown during Phase 1 CSS zoom. */
setZooming(z: boolean): void { this._zooming = z; }
setZooming(z: boolean): void {
this._zooming = z;
if (!z) return;
// Suppress text-layer DOM work for the duration of Phase 1 CSS zoom.
// Hiding layers removes their spans from the browser's layout tree so the
// CSS zoom loop in applyScale doesn't have to reflow thousands of spans per
// page. Cancelling in-progress builds prevents setTimeout chunks from
// injecting layout work between wheel events.
// setZooming(false) needs no restore — onScaleChange calls _cleanupTextLayer
// for every page immediately after, so the hidden divs are removed anyway.
for (let i = 0; i < this._dims.length; i++) {
this._cancelTextBuild(i);
const layer = this._textLayers[i];
if (layer) layer.style.display = "none";
}
}
private _buildPlaceholders(): void {
for (let i = 0; i < this._dims.length; i++) {
@@ -154,6 +196,95 @@ export class PageManager {
canvas.remove();
this._canvases[i] = null;
}
this._cleanupTextLayer(i);
}
private _cancelTextBuild(i: number): void {
const h = this._textBuildHandles[i];
if (h != null) { clearTimeout(h); this._textBuildHandles[i] = null; }
}
private _cleanupTextLayer(i: number): void {
this._cancelTextBuild(i);
this._textLayers[i]?.remove();
this._textLayers[i] = null;
// _rawTextItems[i] is intentionally kept — text content is scale-independent
// and can be reused to rebuild the overlay after zoom without a worker round-trip.
}
/**
* Called when the render worker delivers text content for a page.
* Stores the raw items and builds the invisible selection overlay.
* If items are already cached (e.g. re-render after zoom), the message is a
* no-op — the overlay was already rebuilt from the cache in onScaleChange/onRendered.
*/
onTextContent(pageNum: number, gen: number, items: TextItem[]): void {
if (gen !== this._renderGen) return; // stale — a zoom reset superseded this render
const i = pageNum - 1;
if (i < 0 || i >= this._dims.length) return;
// Already cached from a prior render at this zoom level — skip redundant rebuild.
if (this._rawTextItems[i] !== null) return;
this._rawTextItems[i] = items;
this._buildTextLayer(i);
}
private _buildTextLayer(i: number): void {
const items = this._rawTextItems[i];
if (!items) return;
this._cancelTextBuild(i);
const dim = this._dims[i]!;
const wrap = this._wrappers[i]!;
// Viewport transform: maps PDF coordinates (origin bottom-left, y up) to
// CSS pixel coordinates (origin top-left, y down) at the current scale.
const S = this._scale;
const vt = [S, 0, 0, -S, 0, dim.height * S];
// Create and attach the layer div immediately so it is in the DOM even
// before any spans are appended (spans are added in idle-time chunks).
const layer = document.createElement("div");
layer.className = "textLayer";
wrap.appendChild(layer);
this._textLayers[i] = layer;
let offset = 0;
// Fixed items-per-chunk keeps each callback well under one frame regardless
// of how much idle time the scheduler reports. The requestIdleCallback
// polyfill always returns timeRemaining()=50, so a time-budget loop would
// process every item in one shot — defeating chunking entirely.
const CHUNK_SIZE = 150;
const buildChunk = (): void => {
// Bail if the layer was replaced or removed since this chunk was scheduled.
if (this._textLayers[i] !== layer) return;
const end = Math.min(offset + CHUNK_SIZE, items.length);
const frag = document.createDocumentFragment();
while (offset < end) {
const item = items[offset++]!;
if (!item.str) continue;
const m = composeTransform(vt, item.transform);
const span = document.createElement("span");
span.textContent = item.str;
span.style.transform =
`matrix(${m[0]},${m[1]},${m[2]},${m[3]},${m[4]},${m[5]})`;
frag.appendChild(span);
}
layer.appendChild(frag);
if (offset < items.length) {
this._textBuildHandles[i] = setTimeout(buildChunk, 0);
} else {
this._textBuildHandles[i] = null;
}
};
this._textBuildHandles[i] = setTimeout(buildChunk, 0);
}
/**
@@ -213,6 +344,12 @@ export class PageManager {
this._canvases[i] = canvas;
this._states[i] = PageState.RENDERED;
// Rebuild text layer from cache if available (covers pages that left the
// buffer and re-entered, where _cleanupTextLayer removed the DOM div but
// kept the raw items — no worker round-trip needed).
this._cleanupTextLayer(i);
this._buildTextLayer(i);
}
/**
@@ -249,11 +386,14 @@ export class PageManager {
}
if (this._states[i] === PageState.RENDERING) this._inFlight--;
this._states[i] = PageState.PLACEHOLDER;
this._cleanupTextLayer(i);
} else {
// Off-screen: safe to discard immediately (no visible flash).
const canvas = this._canvases[i];
if (canvas) { canvas.remove(); this._canvases[i] = null; }
this._states[i] = PageState.PLACEHOLDER;
// Remove the DOM overlay; raw items are kept for when the page re-enters.
this._cleanupTextLayer(i);
}
}

View File

@@ -40,7 +40,7 @@
*/
import type { PDFDocumentProxy } from "pdfjs-dist";
import type { WorkerInbound, WorkerOutbound, PageDimensions } from "./types.js";
import type { WorkerInbound, WorkerOutbound, PageDimensions, TextItem } from "./types.js";
// pdf.min.js is prepended at build time and sets globalThis.pdfjsLib.
declare const pdfjsLib: typeof import("pdfjs-dist");
@@ -189,11 +189,27 @@ async function handleRender(
// OffscreenCanvasRenderingContext2D is assignable to the canvasContext
// parameter of page.render(); the cast satisfies the type checker.
const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D;
await page.render({ canvasContext: ctx, viewport: vp }).promise;
// Run canvas rendering and text extraction in parallel — they are independent.
const [, textContent] = await Promise.all([
page.render({ canvasContext: ctx, viewport: vp }).promise,
page.getTextContent(),
]);
const bitmap = offscreen.transferToImageBitmap();
const out: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
(self as unknown as Worker).postMessage(out, [bitmap]);
const renderedOut: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
(self as unknown as Worker).postMessage(renderedOut, [bitmap]);
// Send text items for the selection overlay. Filter out TextMarkedContent
// entries (which lack a `str` field) — we only need actual text runs.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const items: TextItem[] = (textContent.items as any[])
.filter((item) => typeof item.str === "string")
.map(({ str, transform, width, height, hasEOL }: {
str: string; transform: number[]; width: number; height: number; hasEOL: boolean;
}) => ({ str, transform: Array.from(transform), width, height, hasEOL }));
const textOut: WorkerOutbound = { type: "textcontent", pageNum, gen, items };
self.postMessage(textOut);
} catch (e) {
if ((e as Error)?.name !== "RenderingCancelledException") {
console.warn("[render-worker] render error page", pageNum, e);

View File

@@ -1,3 +1,14 @@
// ── Text layer ────────────────────────────────────────────────────────────────
/** A single text item extracted from a PDF page (subset of pdfjs-dist TextItem). */
export interface TextItem {
str: string;
transform: number[]; // [a, b, c, d, tx, ty] in PDF coordinate space
width: number;
height: number;
hasEOL: boolean;
}
// ── Page lifecycle ────────────────────────────────────────────────────────────
export const PageState = {
@@ -29,9 +40,10 @@ export type WorkerInbound =
* without a second round-trip.
*/
export type WorkerOutbound =
| { type: "ready"; numPages: number; dims: PageDimensions[] }
| { type: "rendered"; pageNum: number; gen: number; bitmap: ImageBitmap }
| { type: "error"; message: string };
| { type: "ready"; numPages: number; dims: PageDimensions[] }
| { type: "rendered"; pageNum: number; gen: number; bitmap: ImageBitmap }
| { type: "textcontent"; pageNum: number; gen: number; items: TextItem[] }
| { type: "error"; message: string };
// ── iframe ↔ parent postMessage protocol ─────────────────────────────────────

View File

@@ -22,12 +22,16 @@ import { ViewportTracker } from "./viewport-tracker.js";
import { ZoomController } from "./zoom-controller.js";
import type { WorkerInbound, WorkerOutbound, OutboundMessage, PageDimensions } from "./types.js";
// requestIdleCallback polyfill for older WebKitGTK builds.
// requestIdleCallback / cancelIdleCallback polyfills for older WebKitGTK builds.
if (typeof requestIdleCallback === "undefined") {
(self as unknown as Record<string, unknown>)["requestIdleCallback"] =
(cb: IdleRequestCallback): ReturnType<typeof setTimeout> =>
setTimeout(() => cb({ timeRemaining: () => 50, didTimeout: false }), 1);
}
if (typeof cancelIdleCallback === "undefined") {
(self as unknown as Record<string, unknown>)["cancelIdleCallback"] =
(id: ReturnType<typeof setTimeout>): void => clearTimeout(id);
}
// ── DOM refs ──────────────────────────────────────────────────────────────────
const container = document.getElementById("canvas-container")!;
@@ -165,6 +169,8 @@ async function load(): Promise<void> {
pageManager?.onRendered(msg.pageNum, msg.gen, msg.bitmap);
refreshPageIndicator();
if (pageManager?.allRendered) setStatus("Ready");
} else if (msg.type === "textcontent") {
pageManager?.onTextContent(msg.pageNum, msg.gen, msg.items);
} else if (msg.type === "error") {
console.warn("[viewer] worker error:", msg.message);
}

View File

@@ -102,6 +102,34 @@
.page-wrapper canvas {
display: block;
}
/* Invisible text overlay for native selection. Spans are positioned via
CSS matrix() derived from PDF text transforms; color:transparent keeps
the canvas visible while the browser handles selection normally. */
.textLayer {
position: absolute;
inset: 0;
overflow: hidden;
z-index: 2;
line-height: 1;
text-size-adjust: none;
forced-color-adjust: none;
}
.textLayer span {
color: transparent;
position: absolute;
white-space: pre;
cursor: text;
transform-origin: 0% 0%;
font-size: 1px;
user-select: text;
-webkit-user-select: text;
}
.textLayer ::selection {
background: rgba(100, 160, 255, 0.35);
}
</style>
</head>
<body>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long