Add (at this point faulty) text selection
This commit is contained in:
@@ -12,10 +12,29 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { PageState } from "./types.js";
|
import { PageState } from "./types.js";
|
||||||
import type { PageDimensions } from "./types.js";
|
import type { PageDimensions, TextItem } from "./types.js";
|
||||||
|
|
||||||
const MAX_CANVAS_PIXELS = 16_777_216; // 4096 × 4096
|
const MAX_CANVAS_PIXELS = 16_777_216; // 4096 × 4096
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compose two 2D affine transforms (each represented as a 6-element array
|
||||||
|
* [a, b, c, d, e, f] matching the CSS matrix() order).
|
||||||
|
*/
|
||||||
|
function composeTransform(m1: number[], m2: number[]): number[] {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||||
|
const [a1, b1, c1, d1, e1, f1] = [m1[0]!, m1[1]!, m1[2]!, m1[3]!, m1[4]!, m1[5]!];
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||||
|
const [a2, b2, c2, d2, e2, f2] = [m2[0]!, m2[1]!, m2[2]!, m2[3]!, m2[4]!, m2[5]!];
|
||||||
|
return [
|
||||||
|
a1*a2 + c1*b2,
|
||||||
|
b1*a2 + d1*b2,
|
||||||
|
a1*c2 + c1*d2,
|
||||||
|
b1*c2 + d1*d2,
|
||||||
|
a1*e2 + c1*f2 + e1,
|
||||||
|
b1*e2 + d1*f2 + f1,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clamp render scale so the canvas pixel count stays within budget.
|
* Clamp render scale so the canvas pixel count stays within budget.
|
||||||
* CSS dimensions stay correct — pages appear at the right size, just at
|
* CSS dimensions stay correct — pages appear at the right size, just at
|
||||||
@@ -47,6 +66,11 @@ export class PageManager {
|
|||||||
private readonly _states: PageState[];
|
private readonly _states: PageState[];
|
||||||
// _canvases[i] is the canvas currently in the DOM for page i+1, or null.
|
// _canvases[i] is the canvas currently in the DOM for page i+1, or null.
|
||||||
private readonly _canvases: (HTMLCanvasElement | null)[];
|
private readonly _canvases: (HTMLCanvasElement | null)[];
|
||||||
|
// Text selection overlay: raw items from the worker, and the live DOM div.
|
||||||
|
private readonly _rawTextItems: (TextItem[] | null)[];
|
||||||
|
private readonly _textLayers: (HTMLDivElement | null)[];
|
||||||
|
// Handle for any in-progress requestIdleCallback text-layer build (per page).
|
||||||
|
private readonly _textBuildHandles: (number | null)[];
|
||||||
|
|
||||||
private _scale: number;
|
private _scale: number;
|
||||||
private _renderGen: number = 0;
|
private _renderGen: number = 0;
|
||||||
@@ -67,6 +91,9 @@ export class PageManager {
|
|||||||
this._dispatchRender = dispatchRender;
|
this._dispatchRender = dispatchRender;
|
||||||
this._states = new Array<PageState>(dims.length).fill(PageState.PLACEHOLDER);
|
this._states = new Array<PageState>(dims.length).fill(PageState.PLACEHOLDER);
|
||||||
this._canvases = new Array<HTMLCanvasElement | null>(dims.length).fill(null);
|
this._canvases = new Array<HTMLCanvasElement | null>(dims.length).fill(null);
|
||||||
|
this._rawTextItems = new Array<TextItem[] | null>(dims.length).fill(null);
|
||||||
|
this._textLayers = new Array<HTMLDivElement | null>(dims.length).fill(null);
|
||||||
|
this._textBuildHandles = new Array<number | null>(dims.length).fill(null);
|
||||||
|
|
||||||
this._buildPlaceholders();
|
this._buildPlaceholders();
|
||||||
}
|
}
|
||||||
@@ -78,7 +105,22 @@ export class PageManager {
|
|||||||
get allRendered(): boolean { return this._inFlight === 0; }
|
get allRendered(): boolean { return this._inFlight === 0; }
|
||||||
|
|
||||||
/** Called by ZoomController to suppress canvas teardown during Phase 1 CSS zoom. */
|
/** Called by ZoomController to suppress canvas teardown during Phase 1 CSS zoom. */
|
||||||
setZooming(z: boolean): void { this._zooming = z; }
|
setZooming(z: boolean): void {
|
||||||
|
this._zooming = z;
|
||||||
|
if (!z) return;
|
||||||
|
// Suppress text-layer DOM work for the duration of Phase 1 CSS zoom.
|
||||||
|
// Hiding layers removes their spans from the browser's layout tree so the
|
||||||
|
// CSS zoom loop in applyScale doesn't have to reflow thousands of spans per
|
||||||
|
// page. Cancelling in-progress builds prevents setTimeout chunks from
|
||||||
|
// injecting layout work between wheel events.
|
||||||
|
// setZooming(false) needs no restore — onScaleChange calls _cleanupTextLayer
|
||||||
|
// for every page immediately after, so the hidden divs are removed anyway.
|
||||||
|
for (let i = 0; i < this._dims.length; i++) {
|
||||||
|
this._cancelTextBuild(i);
|
||||||
|
const layer = this._textLayers[i];
|
||||||
|
if (layer) layer.style.display = "none";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private _buildPlaceholders(): void {
|
private _buildPlaceholders(): void {
|
||||||
for (let i = 0; i < this._dims.length; i++) {
|
for (let i = 0; i < this._dims.length; i++) {
|
||||||
@@ -154,6 +196,95 @@ export class PageManager {
|
|||||||
canvas.remove();
|
canvas.remove();
|
||||||
this._canvases[i] = null;
|
this._canvases[i] = null;
|
||||||
}
|
}
|
||||||
|
this._cleanupTextLayer(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
private _cancelTextBuild(i: number): void {
|
||||||
|
const h = this._textBuildHandles[i];
|
||||||
|
if (h != null) { clearTimeout(h); this._textBuildHandles[i] = null; }
|
||||||
|
}
|
||||||
|
|
||||||
|
private _cleanupTextLayer(i: number): void {
|
||||||
|
this._cancelTextBuild(i);
|
||||||
|
this._textLayers[i]?.remove();
|
||||||
|
this._textLayers[i] = null;
|
||||||
|
// _rawTextItems[i] is intentionally kept — text content is scale-independent
|
||||||
|
// and can be reused to rebuild the overlay after zoom without a worker round-trip.
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the render worker delivers text content for a page.
|
||||||
|
* Stores the raw items and builds the invisible selection overlay.
|
||||||
|
* If items are already cached (e.g. re-render after zoom), the message is a
|
||||||
|
* no-op — the overlay was already rebuilt from the cache in onScaleChange/onRendered.
|
||||||
|
*/
|
||||||
|
onTextContent(pageNum: number, gen: number, items: TextItem[]): void {
|
||||||
|
if (gen !== this._renderGen) return; // stale — a zoom reset superseded this render
|
||||||
|
|
||||||
|
const i = pageNum - 1;
|
||||||
|
if (i < 0 || i >= this._dims.length) return;
|
||||||
|
|
||||||
|
// Already cached from a prior render at this zoom level — skip redundant rebuild.
|
||||||
|
if (this._rawTextItems[i] !== null) return;
|
||||||
|
|
||||||
|
this._rawTextItems[i] = items;
|
||||||
|
this._buildTextLayer(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
private _buildTextLayer(i: number): void {
|
||||||
|
const items = this._rawTextItems[i];
|
||||||
|
if (!items) return;
|
||||||
|
|
||||||
|
this._cancelTextBuild(i);
|
||||||
|
|
||||||
|
const dim = this._dims[i]!;
|
||||||
|
const wrap = this._wrappers[i]!;
|
||||||
|
|
||||||
|
// Viewport transform: maps PDF coordinates (origin bottom-left, y up) to
|
||||||
|
// CSS pixel coordinates (origin top-left, y down) at the current scale.
|
||||||
|
const S = this._scale;
|
||||||
|
const vt = [S, 0, 0, -S, 0, dim.height * S];
|
||||||
|
|
||||||
|
// Create and attach the layer div immediately so it is in the DOM even
|
||||||
|
// before any spans are appended (spans are added in idle-time chunks).
|
||||||
|
const layer = document.createElement("div");
|
||||||
|
layer.className = "textLayer";
|
||||||
|
wrap.appendChild(layer);
|
||||||
|
this._textLayers[i] = layer;
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
// Fixed items-per-chunk keeps each callback well under one frame regardless
|
||||||
|
// of how much idle time the scheduler reports. The requestIdleCallback
|
||||||
|
// polyfill always returns timeRemaining()=50, so a time-budget loop would
|
||||||
|
// process every item in one shot — defeating chunking entirely.
|
||||||
|
const CHUNK_SIZE = 150;
|
||||||
|
|
||||||
|
const buildChunk = (): void => {
|
||||||
|
// Bail if the layer was replaced or removed since this chunk was scheduled.
|
||||||
|
if (this._textLayers[i] !== layer) return;
|
||||||
|
|
||||||
|
const end = Math.min(offset + CHUNK_SIZE, items.length);
|
||||||
|
const frag = document.createDocumentFragment();
|
||||||
|
while (offset < end) {
|
||||||
|
const item = items[offset++]!;
|
||||||
|
if (!item.str) continue;
|
||||||
|
const m = composeTransform(vt, item.transform);
|
||||||
|
const span = document.createElement("span");
|
||||||
|
span.textContent = item.str;
|
||||||
|
span.style.transform =
|
||||||
|
`matrix(${m[0]},${m[1]},${m[2]},${m[3]},${m[4]},${m[5]})`;
|
||||||
|
frag.appendChild(span);
|
||||||
|
}
|
||||||
|
layer.appendChild(frag);
|
||||||
|
|
||||||
|
if (offset < items.length) {
|
||||||
|
this._textBuildHandles[i] = setTimeout(buildChunk, 0);
|
||||||
|
} else {
|
||||||
|
this._textBuildHandles[i] = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
this._textBuildHandles[i] = setTimeout(buildChunk, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -213,6 +344,12 @@ export class PageManager {
|
|||||||
|
|
||||||
this._canvases[i] = canvas;
|
this._canvases[i] = canvas;
|
||||||
this._states[i] = PageState.RENDERED;
|
this._states[i] = PageState.RENDERED;
|
||||||
|
|
||||||
|
// Rebuild text layer from cache if available (covers pages that left the
|
||||||
|
// buffer and re-entered, where _cleanupTextLayer removed the DOM div but
|
||||||
|
// kept the raw items — no worker round-trip needed).
|
||||||
|
this._cleanupTextLayer(i);
|
||||||
|
this._buildTextLayer(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -249,11 +386,14 @@ export class PageManager {
|
|||||||
}
|
}
|
||||||
if (this._states[i] === PageState.RENDERING) this._inFlight--;
|
if (this._states[i] === PageState.RENDERING) this._inFlight--;
|
||||||
this._states[i] = PageState.PLACEHOLDER;
|
this._states[i] = PageState.PLACEHOLDER;
|
||||||
|
this._cleanupTextLayer(i);
|
||||||
} else {
|
} else {
|
||||||
// Off-screen: safe to discard immediately (no visible flash).
|
// Off-screen: safe to discard immediately (no visible flash).
|
||||||
const canvas = this._canvases[i];
|
const canvas = this._canvases[i];
|
||||||
if (canvas) { canvas.remove(); this._canvases[i] = null; }
|
if (canvas) { canvas.remove(); this._canvases[i] = null; }
|
||||||
this._states[i] = PageState.PLACEHOLDER;
|
this._states[i] = PageState.PLACEHOLDER;
|
||||||
|
// Remove the DOM overlay; raw items are kept for when the page re-enters.
|
||||||
|
this._cleanupTextLayer(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import type { PDFDocumentProxy } from "pdfjs-dist";
|
import type { PDFDocumentProxy } from "pdfjs-dist";
|
||||||
import type { WorkerInbound, WorkerOutbound, PageDimensions } from "./types.js";
|
import type { WorkerInbound, WorkerOutbound, PageDimensions, TextItem } from "./types.js";
|
||||||
|
|
||||||
// pdf.min.js is prepended at build time and sets globalThis.pdfjsLib.
|
// pdf.min.js is prepended at build time and sets globalThis.pdfjsLib.
|
||||||
declare const pdfjsLib: typeof import("pdfjs-dist");
|
declare const pdfjsLib: typeof import("pdfjs-dist");
|
||||||
@@ -189,11 +189,27 @@ async function handleRender(
|
|||||||
// OffscreenCanvasRenderingContext2D is assignable to the canvasContext
|
// OffscreenCanvasRenderingContext2D is assignable to the canvasContext
|
||||||
// parameter of page.render(); the cast satisfies the type checker.
|
// parameter of page.render(); the cast satisfies the type checker.
|
||||||
const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D;
|
const ctx = offscreen.getContext("2d") as unknown as CanvasRenderingContext2D;
|
||||||
await page.render({ canvasContext: ctx, viewport: vp }).promise;
|
|
||||||
|
// Run canvas rendering and text extraction in parallel — they are independent.
|
||||||
|
const [, textContent] = await Promise.all([
|
||||||
|
page.render({ canvasContext: ctx, viewport: vp }).promise,
|
||||||
|
page.getTextContent(),
|
||||||
|
]);
|
||||||
|
|
||||||
const bitmap = offscreen.transferToImageBitmap();
|
const bitmap = offscreen.transferToImageBitmap();
|
||||||
const out: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
|
const renderedOut: WorkerOutbound = { type: "rendered", pageNum, gen, bitmap };
|
||||||
(self as unknown as Worker).postMessage(out, [bitmap]);
|
(self as unknown as Worker).postMessage(renderedOut, [bitmap]);
|
||||||
|
|
||||||
|
// Send text items for the selection overlay. Filter out TextMarkedContent
|
||||||
|
// entries (which lack a `str` field) — we only need actual text runs.
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const items: TextItem[] = (textContent.items as any[])
|
||||||
|
.filter((item) => typeof item.str === "string")
|
||||||
|
.map(({ str, transform, width, height, hasEOL }: {
|
||||||
|
str: string; transform: number[]; width: number; height: number; hasEOL: boolean;
|
||||||
|
}) => ({ str, transform: Array.from(transform), width, height, hasEOL }));
|
||||||
|
const textOut: WorkerOutbound = { type: "textcontent", pageNum, gen, items };
|
||||||
|
self.postMessage(textOut);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if ((e as Error)?.name !== "RenderingCancelledException") {
|
if ((e as Error)?.name !== "RenderingCancelledException") {
|
||||||
console.warn("[render-worker] render error page", pageNum, e);
|
console.warn("[render-worker] render error page", pageNum, e);
|
||||||
|
|||||||
@@ -1,3 +1,14 @@
|
|||||||
|
// ── Text layer ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/** A single text item extracted from a PDF page (subset of pdfjs-dist TextItem). */
|
||||||
|
export interface TextItem {
|
||||||
|
str: string;
|
||||||
|
transform: number[]; // [a, b, c, d, tx, ty] in PDF coordinate space
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
hasEOL: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
// ── Page lifecycle ────────────────────────────────────────────────────────────
|
// ── Page lifecycle ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
export const PageState = {
|
export const PageState = {
|
||||||
@@ -31,6 +42,7 @@ export type WorkerInbound =
|
|||||||
export type WorkerOutbound =
|
export type WorkerOutbound =
|
||||||
| { type: "ready"; numPages: number; dims: PageDimensions[] }
|
| { type: "ready"; numPages: number; dims: PageDimensions[] }
|
||||||
| { type: "rendered"; pageNum: number; gen: number; bitmap: ImageBitmap }
|
| { type: "rendered"; pageNum: number; gen: number; bitmap: ImageBitmap }
|
||||||
|
| { type: "textcontent"; pageNum: number; gen: number; items: TextItem[] }
|
||||||
| { type: "error"; message: string };
|
| { type: "error"; message: string };
|
||||||
|
|
||||||
// ── iframe ↔ parent postMessage protocol ─────────────────────────────────────
|
// ── iframe ↔ parent postMessage protocol ─────────────────────────────────────
|
||||||
|
|||||||
@@ -22,12 +22,16 @@ import { ViewportTracker } from "./viewport-tracker.js";
|
|||||||
import { ZoomController } from "./zoom-controller.js";
|
import { ZoomController } from "./zoom-controller.js";
|
||||||
import type { WorkerInbound, WorkerOutbound, OutboundMessage, PageDimensions } from "./types.js";
|
import type { WorkerInbound, WorkerOutbound, OutboundMessage, PageDimensions } from "./types.js";
|
||||||
|
|
||||||
// requestIdleCallback polyfill for older WebKitGTK builds.
|
// requestIdleCallback / cancelIdleCallback polyfills for older WebKitGTK builds.
|
||||||
if (typeof requestIdleCallback === "undefined") {
|
if (typeof requestIdleCallback === "undefined") {
|
||||||
(self as unknown as Record<string, unknown>)["requestIdleCallback"] =
|
(self as unknown as Record<string, unknown>)["requestIdleCallback"] =
|
||||||
(cb: IdleRequestCallback): ReturnType<typeof setTimeout> =>
|
(cb: IdleRequestCallback): ReturnType<typeof setTimeout> =>
|
||||||
setTimeout(() => cb({ timeRemaining: () => 50, didTimeout: false }), 1);
|
setTimeout(() => cb({ timeRemaining: () => 50, didTimeout: false }), 1);
|
||||||
}
|
}
|
||||||
|
if (typeof cancelIdleCallback === "undefined") {
|
||||||
|
(self as unknown as Record<string, unknown>)["cancelIdleCallback"] =
|
||||||
|
(id: ReturnType<typeof setTimeout>): void => clearTimeout(id);
|
||||||
|
}
|
||||||
|
|
||||||
// ── DOM refs ──────────────────────────────────────────────────────────────────
|
// ── DOM refs ──────────────────────────────────────────────────────────────────
|
||||||
const container = document.getElementById("canvas-container")!;
|
const container = document.getElementById("canvas-container")!;
|
||||||
@@ -165,6 +169,8 @@ async function load(): Promise<void> {
|
|||||||
pageManager?.onRendered(msg.pageNum, msg.gen, msg.bitmap);
|
pageManager?.onRendered(msg.pageNum, msg.gen, msg.bitmap);
|
||||||
refreshPageIndicator();
|
refreshPageIndicator();
|
||||||
if (pageManager?.allRendered) setStatus("Ready");
|
if (pageManager?.allRendered) setStatus("Ready");
|
||||||
|
} else if (msg.type === "textcontent") {
|
||||||
|
pageManager?.onTextContent(msg.pageNum, msg.gen, msg.items);
|
||||||
} else if (msg.type === "error") {
|
} else if (msg.type === "error") {
|
||||||
console.warn("[viewer] worker error:", msg.message);
|
console.warn("[viewer] worker error:", msg.message);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -102,6 +102,34 @@
|
|||||||
.page-wrapper canvas {
|
.page-wrapper canvas {
|
||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Invisible text overlay for native selection. Spans are positioned via
|
||||||
|
CSS matrix() derived from PDF text transforms; color:transparent keeps
|
||||||
|
the canvas visible while the browser handles selection normally. */
|
||||||
|
.textLayer {
|
||||||
|
position: absolute;
|
||||||
|
inset: 0;
|
||||||
|
overflow: hidden;
|
||||||
|
z-index: 2;
|
||||||
|
line-height: 1;
|
||||||
|
text-size-adjust: none;
|
||||||
|
forced-color-adjust: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.textLayer span {
|
||||||
|
color: transparent;
|
||||||
|
position: absolute;
|
||||||
|
white-space: pre;
|
||||||
|
cursor: text;
|
||||||
|
transform-origin: 0% 0%;
|
||||||
|
font-size: 1px;
|
||||||
|
user-select: text;
|
||||||
|
-webkit-user-select: text;
|
||||||
|
}
|
||||||
|
|
||||||
|
.textLayer ::selection {
|
||||||
|
background: rgba(100, 160, 255, 0.35);
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user