Add (at this point faulty) text selection
This commit is contained in:
@@ -12,10 +12,29 @@
|
||||
*/
|
||||
|
||||
import { PageState } from "./types.js";
|
||||
import type { PageDimensions } from "./types.js";
|
||||
import type { PageDimensions, TextItem } from "./types.js";
|
||||
|
||||
const MAX_CANVAS_PIXELS = 16_777_216; // 4096 × 4096
|
||||
|
||||
/**
|
||||
* Compose two 2D affine transforms (each represented as a 6-element array
|
||||
* [a, b, c, d, e, f] matching the CSS matrix() order).
|
||||
*/
|
||||
function composeTransform(m1: number[], m2: number[]): number[] {
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
const [a1, b1, c1, d1, e1, f1] = [m1[0]!, m1[1]!, m1[2]!, m1[3]!, m1[4]!, m1[5]!];
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
const [a2, b2, c2, d2, e2, f2] = [m2[0]!, m2[1]!, m2[2]!, m2[3]!, m2[4]!, m2[5]!];
|
||||
return [
|
||||
a1*a2 + c1*b2,
|
||||
b1*a2 + d1*b2,
|
||||
a1*c2 + c1*d2,
|
||||
b1*c2 + d1*d2,
|
||||
a1*e2 + c1*f2 + e1,
|
||||
b1*e2 + d1*f2 + f1,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Clamp render scale so the canvas pixel count stays within budget.
|
||||
* CSS dimensions stay correct — pages appear at the right size, just at
|
||||
@@ -47,6 +66,11 @@ export class PageManager {
|
||||
private readonly _states: PageState[];
|
||||
// _canvases[i] is the canvas currently in the DOM for page i+1, or null.
|
||||
private readonly _canvases: (HTMLCanvasElement | null)[];
|
||||
// Text selection overlay: raw items from the worker, and the live DOM div.
|
||||
private readonly _rawTextItems: (TextItem[] | null)[];
|
||||
private readonly _textLayers: (HTMLDivElement | null)[];
|
||||
// Handle for any in-progress requestIdleCallback text-layer build (per page).
|
||||
private readonly _textBuildHandles: (number | null)[];
|
||||
|
||||
private _scale: number;
|
||||
private _renderGen: number = 0;
|
||||
@@ -67,6 +91,9 @@ export class PageManager {
|
||||
this._dispatchRender = dispatchRender;
|
||||
this._states = new Array<PageState>(dims.length).fill(PageState.PLACEHOLDER);
|
||||
this._canvases = new Array<HTMLCanvasElement | null>(dims.length).fill(null);
|
||||
this._rawTextItems = new Array<TextItem[] | null>(dims.length).fill(null);
|
||||
this._textLayers = new Array<HTMLDivElement | null>(dims.length).fill(null);
|
||||
this._textBuildHandles = new Array<number | null>(dims.length).fill(null);
|
||||
|
||||
this._buildPlaceholders();
|
||||
}
|
||||
@@ -78,7 +105,22 @@ export class PageManager {
|
||||
get allRendered(): boolean { return this._inFlight === 0; }
|
||||
|
||||
/** Called by ZoomController to suppress canvas teardown during Phase 1 CSS zoom. */
|
||||
setZooming(z: boolean): void { this._zooming = z; }
|
||||
setZooming(z: boolean): void {
|
||||
this._zooming = z;
|
||||
if (!z) return;
|
||||
// Suppress text-layer DOM work for the duration of Phase 1 CSS zoom.
|
||||
// Hiding layers removes their spans from the browser's layout tree so the
|
||||
// CSS zoom loop in applyScale doesn't have to reflow thousands of spans per
|
||||
// page. Cancelling in-progress builds prevents setTimeout chunks from
|
||||
// injecting layout work between wheel events.
|
||||
// setZooming(false) needs no restore — onScaleChange calls _cleanupTextLayer
|
||||
// for every page immediately after, so the hidden divs are removed anyway.
|
||||
for (let i = 0; i < this._dims.length; i++) {
|
||||
this._cancelTextBuild(i);
|
||||
const layer = this._textLayers[i];
|
||||
if (layer) layer.style.display = "none";
|
||||
}
|
||||
}
|
||||
|
||||
private _buildPlaceholders(): void {
|
||||
for (let i = 0; i < this._dims.length; i++) {
|
||||
@@ -154,6 +196,95 @@ export class PageManager {
|
||||
canvas.remove();
|
||||
this._canvases[i] = null;
|
||||
}
|
||||
this._cleanupTextLayer(i);
|
||||
}
|
||||
|
||||
private _cancelTextBuild(i: number): void {
|
||||
const h = this._textBuildHandles[i];
|
||||
if (h != null) { clearTimeout(h); this._textBuildHandles[i] = null; }
|
||||
}
|
||||
|
||||
private _cleanupTextLayer(i: number): void {
|
||||
this._cancelTextBuild(i);
|
||||
this._textLayers[i]?.remove();
|
||||
this._textLayers[i] = null;
|
||||
// _rawTextItems[i] is intentionally kept — text content is scale-independent
|
||||
// and can be reused to rebuild the overlay after zoom without a worker round-trip.
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when the render worker delivers text content for a page.
|
||||
* Stores the raw items and builds the invisible selection overlay.
|
||||
* If items are already cached (e.g. re-render after zoom), the message is a
|
||||
* no-op — the overlay was already rebuilt from the cache in onScaleChange/onRendered.
|
||||
*/
|
||||
onTextContent(pageNum: number, gen: number, items: TextItem[]): void {
|
||||
if (gen !== this._renderGen) return; // stale — a zoom reset superseded this render
|
||||
|
||||
const i = pageNum - 1;
|
||||
if (i < 0 || i >= this._dims.length) return;
|
||||
|
||||
// Already cached from a prior render at this zoom level — skip redundant rebuild.
|
||||
if (this._rawTextItems[i] !== null) return;
|
||||
|
||||
this._rawTextItems[i] = items;
|
||||
this._buildTextLayer(i);
|
||||
}
|
||||
|
||||
private _buildTextLayer(i: number): void {
|
||||
const items = this._rawTextItems[i];
|
||||
if (!items) return;
|
||||
|
||||
this._cancelTextBuild(i);
|
||||
|
||||
const dim = this._dims[i]!;
|
||||
const wrap = this._wrappers[i]!;
|
||||
|
||||
// Viewport transform: maps PDF coordinates (origin bottom-left, y up) to
|
||||
// CSS pixel coordinates (origin top-left, y down) at the current scale.
|
||||
const S = this._scale;
|
||||
const vt = [S, 0, 0, -S, 0, dim.height * S];
|
||||
|
||||
// Create and attach the layer div immediately so it is in the DOM even
|
||||
// before any spans are appended (spans are added in idle-time chunks).
|
||||
const layer = document.createElement("div");
|
||||
layer.className = "textLayer";
|
||||
wrap.appendChild(layer);
|
||||
this._textLayers[i] = layer;
|
||||
|
||||
let offset = 0;
|
||||
// Fixed items-per-chunk keeps each callback well under one frame regardless
|
||||
// of how much idle time the scheduler reports. The requestIdleCallback
|
||||
// polyfill always returns timeRemaining()=50, so a time-budget loop would
|
||||
// process every item in one shot — defeating chunking entirely.
|
||||
const CHUNK_SIZE = 150;
|
||||
|
||||
const buildChunk = (): void => {
|
||||
// Bail if the layer was replaced or removed since this chunk was scheduled.
|
||||
if (this._textLayers[i] !== layer) return;
|
||||
|
||||
const end = Math.min(offset + CHUNK_SIZE, items.length);
|
||||
const frag = document.createDocumentFragment();
|
||||
while (offset < end) {
|
||||
const item = items[offset++]!;
|
||||
if (!item.str) continue;
|
||||
const m = composeTransform(vt, item.transform);
|
||||
const span = document.createElement("span");
|
||||
span.textContent = item.str;
|
||||
span.style.transform =
|
||||
`matrix(${m[0]},${m[1]},${m[2]},${m[3]},${m[4]},${m[5]})`;
|
||||
frag.appendChild(span);
|
||||
}
|
||||
layer.appendChild(frag);
|
||||
|
||||
if (offset < items.length) {
|
||||
this._textBuildHandles[i] = setTimeout(buildChunk, 0);
|
||||
} else {
|
||||
this._textBuildHandles[i] = null;
|
||||
}
|
||||
};
|
||||
|
||||
this._textBuildHandles[i] = setTimeout(buildChunk, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -213,6 +344,12 @@ export class PageManager {
|
||||
|
||||
this._canvases[i] = canvas;
|
||||
this._states[i] = PageState.RENDERED;
|
||||
|
||||
// Rebuild text layer from cache if available (covers pages that left the
|
||||
// buffer and re-entered, where _cleanupTextLayer removed the DOM div but
|
||||
// kept the raw items — no worker round-trip needed).
|
||||
this._cleanupTextLayer(i);
|
||||
this._buildTextLayer(i);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -249,11 +386,14 @@ export class PageManager {
|
||||
}
|
||||
if (this._states[i] === PageState.RENDERING) this._inFlight--;
|
||||
this._states[i] = PageState.PLACEHOLDER;
|
||||
this._cleanupTextLayer(i);
|
||||
} else {
|
||||
// Off-screen: safe to discard immediately (no visible flash).
|
||||
const canvas = this._canvases[i];
|
||||
if (canvas) { canvas.remove(); this._canvases[i] = null; }
|
||||
this._states[i] = PageState.PLACEHOLDER;
|
||||
// Remove the DOM overlay; raw items are kept for when the page re-enters.
|
||||
this._cleanupTextLayer(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
*/
|
||||
|
||||
import type { PDFDocumentProxy } from "pdfjs-dist";
|
||||
import type { WorkerInbound, WorkerOutbound, PageDimensions } from "./types.js";
|
||||
import type { WorkerInbound, WorkerOutbound, PageDimensions, TextItem } from "./types.js";
|
||||
|
||||
// pdf.min.js is prepended at build time and sets globalThis.pdfjsLib.
|
||||
declare const pdfjsLib: typeof import("pdfjs-dist");
|
||||
@@ -189,11 +189,27 @@ async function handleRender(
|
||||
// OffscreenCanvasRenderingContext2D is assignable to the canvasContext
|
||||
// parameter of page.render(); the cast satisfies the type checker.
|
||||
const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D;
|
||||
await page.render({ canvasContext: ctx, viewport: vp }).promise;
|
||||
|
||||
// Run canvas rendering and text extraction in parallel — they are independent.
|
||||
const [, textContent] = await Promise.all([
|
||||
page.render({ canvasContext: ctx, viewport: vp }).promise,
|
||||
page.getTextContent(),
|
||||
]);
|
||||
|
||||
const bitmap = offscreen.transferToImageBitmap();
|
||||
const out: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
|
||||
(self as unknown as Worker).postMessage(out, [bitmap]);
|
||||
const renderedOut: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
|
||||
(self as unknown as Worker).postMessage(renderedOut, [bitmap]);
|
||||
|
||||
// Send text items for the selection overlay. Filter out TextMarkedContent
|
||||
// entries (which lack a `str` field) — we only need actual text runs.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const items: TextItem[] = (textContent.items as any[])
|
||||
.filter((item) => typeof item.str === "string")
|
||||
.map(({ str, transform, width, height, hasEOL }: {
|
||||
str: string; transform: number[]; width: number; height: number; hasEOL: boolean;
|
||||
}) => ({ str, transform: Array.from(transform), width, height, hasEOL }));
|
||||
const textOut: WorkerOutbound = { type: "textcontent", pageNum, gen, items };
|
||||
self.postMessage(textOut);
|
||||
} catch (e) {
|
||||
if ((e as Error)?.name !== "RenderingCancelledException") {
|
||||
console.warn("[render-worker] render error page", pageNum, e);
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
// ── Text layer ────────────────────────────────────────────────────────────────
|
||||
|
||||
/** A single text item extracted from a PDF page (subset of pdfjs-dist TextItem). */
|
||||
export interface TextItem {
|
||||
str: string;
|
||||
transform: number[]; // [a, b, c, d, tx, ty] in PDF coordinate space
|
||||
width: number;
|
||||
height: number;
|
||||
hasEOL: boolean;
|
||||
}
|
||||
|
||||
// ── Page lifecycle ────────────────────────────────────────────────────────────
|
||||
|
||||
export const PageState = {
|
||||
@@ -31,6 +42,7 @@ export type WorkerInbound =
|
||||
export type WorkerOutbound =
|
||||
| { type: "ready"; numPages: number; dims: PageDimensions[] }
|
||||
| { type: "rendered"; pageNum: number; gen: number; bitmap: ImageBitmap }
|
||||
| { type: "textcontent"; pageNum: number; gen: number; items: TextItem[] }
|
||||
| { type: "error"; message: string };
|
||||
|
||||
// ── iframe ↔ parent postMessage protocol ─────────────────────────────────────
|
||||
|
||||
@@ -22,12 +22,16 @@ import { ViewportTracker } from "./viewport-tracker.js";
|
||||
import { ZoomController } from "./zoom-controller.js";
|
||||
import type { WorkerInbound, WorkerOutbound, OutboundMessage, PageDimensions } from "./types.js";
|
||||
|
||||
// requestIdleCallback polyfill for older WebKitGTK builds.
|
||||
// requestIdleCallback / cancelIdleCallback polyfills for older WebKitGTK builds.
|
||||
if (typeof requestIdleCallback === "undefined") {
|
||||
(self as unknown as Record<string, unknown>)["requestIdleCallback"] =
|
||||
(cb: IdleRequestCallback): ReturnType<typeof setTimeout> =>
|
||||
setTimeout(() => cb({ timeRemaining: () => 50, didTimeout: false }), 1);
|
||||
}
|
||||
if (typeof cancelIdleCallback === "undefined") {
|
||||
(self as unknown as Record<string, unknown>)["cancelIdleCallback"] =
|
||||
(id: ReturnType<typeof setTimeout>): void => clearTimeout(id);
|
||||
}
|
||||
|
||||
// ── DOM refs ──────────────────────────────────────────────────────────────────
|
||||
const container = document.getElementById("canvas-container")!;
|
||||
@@ -165,6 +169,8 @@ async function load(): Promise<void> {
|
||||
pageManager?.onRendered(msg.pageNum, msg.gen, msg.bitmap);
|
||||
refreshPageIndicator();
|
||||
if (pageManager?.allRendered) setStatus("Ready");
|
||||
} else if (msg.type === "textcontent") {
|
||||
pageManager?.onTextContent(msg.pageNum, msg.gen, msg.items);
|
||||
} else if (msg.type === "error") {
|
||||
console.warn("[viewer] worker error:", msg.message);
|
||||
}
|
||||
|
||||
@@ -102,6 +102,34 @@
|
||||
.page-wrapper canvas {
|
||||
display: block;
|
||||
}
|
||||
|
||||
/* Invisible text overlay for native selection. Spans are positioned via
|
||||
CSS matrix() derived from PDF text transforms; color:transparent keeps
|
||||
the canvas visible while the browser handles selection normally. */
|
||||
.textLayer {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
overflow: hidden;
|
||||
z-index: 2;
|
||||
line-height: 1;
|
||||
text-size-adjust: none;
|
||||
forced-color-adjust: none;
|
||||
}
|
||||
|
||||
.textLayer span {
|
||||
color: transparent;
|
||||
position: absolute;
|
||||
white-space: pre;
|
||||
cursor: text;
|
||||
transform-origin: 0% 0%;
|
||||
font-size: 1px;
|
||||
user-select: text;
|
||||
-webkit-user-select: text;
|
||||
}
|
||||
|
||||
.textLayer ::selection {
|
||||
background: rgba(100, 160, 255, 0.35);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user