Skip to content

Add new Bookreader integration #6975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions dev-server/documents/html/bookreader.mustache
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Internet Archive BookReader</title>
<style>
body {
font-family: sans-serif;
}
</style>
</head>
<body>
<h1>Mock BookReader book</h1>
<p>
This page simulates the essential parts of the <a href="https://github.com/internetarchive/bookreader">Internet Archive BookReader</a>. See <a href="https://archive.org/details/goodytwoshoes00newyiala"><i>Goody Two Shoes</i></a> for a real example.
</p>

<!-- JS dependencies -->
<!-- IA uses the @next version of BR -->
<script src="https://unpkg.com/@internetarchive/bookreader@next/BookReader/jquery-3.js"></script>

<!-- BookReader and any plugins -->
<link rel="stylesheet" href="https://unpkg.com/@internetarchive/bookreader@next/BookReader/BookReader.css" />
<script src="https://unpkg.com/@internetarchive/bookreader@next/BookReader/BookReader.js"></script>
<script src="https://unpkg.com/@internetarchive/bookreader@next/BookReader/plugins/plugin.text_selection.js"></script>

<!-- BookReader wrapper web component -->
<script type="module"
src="https://unpkg.com/@internetarchive/bookreader@next/BookReader/ia-bookreader-bundle.js"></script>

<style>
html {
/** This must be set because the nav menu uses rem and sets the fonts really big? */
font-size: 10px;
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
}

ia-bookreader, .BookReader {
/** Set the size you want bookreader to be */
height: calc(100vh - 100px);
}
</style>

<ia-bookreader>
<div id="BookReader" class="BookReader" slot="main"></div>
</ia-bookreader>

<script>
document.addEventListener('DOMContentLoaded', function () {
// Expose globally
br = new BookReader({
el: '#BookReader',
bookTitle: 'Goody Two Shoes',
bookUri: 'https://archive.org/details/goodytwoshoes00newyiala',
plugins: {
textSelection: {
enabled: true,
fullDjvuXmlUrl: 'https://cors.archive.org/cors/goodytwoshoes00newyiala/goodytwoshoes00newyiala_djvu.xml',
}
},

data: [
[
{
"width": 2454,
"height": 3192,
"uri": "https://archive.org/download/goodytwoshoes00newyiala/page/n0_s4.jpg",
"leafNum": 1,
}
],
[
{
"width": 2346,
"height": 3140,
"uri": "https://archive.org/download/goodytwoshoes00newyiala/page/n1_s4.jpg",
"leafNum": 2,
},
{
"width": 2346,
"height": 3140,
"uri": "https://archive.org/download/goodytwoshoes00newyiala/page/n2_s4.jpg",
"leafNum": 3,
}
],
[
{
"width": 2346,
"height": 3140,
"uri": "https://archive.org/download/goodytwoshoes00newyiala/page/n3_s4.jpg",
"leafNum": 4,
},
{
"width": 2346,
"height": 3140,
"uri": "https://archive.org/download/goodytwoshoes00newyiala/page/n4_s4.jpg",
"leafNum": 5,
}
],
[
{
"width": 2346,
"height": 3140,
"uri": "https://archive.org/download/goodytwoshoes00newyiala/page/n5_s4.jpg",
"leafNum": 6,
},
{
"width": 2346,
"height": 3140,
"uri": "https://archive.org/download/goodytwoshoes00newyiala/page/n6_s4.jpg",
"leafNum": 7,
}
]
],
});

// Let's go!
br.init();
});
</script>

{{{hypothesisScript}}}
</body>
</html>
6 changes: 6 additions & 0 deletions dev-server/templates/index.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
<li><a href="/document/vitalsource-pdf">Mock VitalSource PDF (aka. "fixed") book</a></li>
</ul>

<h3>BookReader test document</h3>
<p>These documents simulate the Internet Archive's <a href="https://github.com/internetarchive/bookreader">BookReader</a>.</p>
<ul>
<li><a href="/document/bookreader">Sample BookReader book</a></li>
</ul>

<h2>Test PDF documents</h2>
<h3>General test documents</h3>
<p>These documents test typical/simple scenarios.</p>
Expand Down
159 changes: 159 additions & 0 deletions src/annotator/anchoring/BookReader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import type { PageSelector, Selector } from '../../types/api';
import { anchor as htmlAnchor } from './html';
import { TextRange } from './text-range';
import { TextQuoteAnchor } from './types';

/**
* Return the text layer element of the PDF page containing `node`.
*/
function getNodeTextLayer(node: Node | Element): HTMLElement | null {
const el = 'closest' in node ? node : node.parentElement;
return el?.closest('.BRtextLayer') ?? null;
}

/**
* Prepare a DOM range for generating selectors and find the containing text layer.
*
* @throws If the range cannot be annotated
*/
function getTextLayerForRange(range: Range): [Range, HTMLElement] {
// "Shrink" the range so that the start and endpoints are at offsets within
// text nodes rather than any containing nodes.
try {
range = TextRange.fromRange(range).toRange();
} catch {
throw new Error('Selection does not contain text');
}

const startTextLayer = getNodeTextLayer(range.startContainer);
const endTextLayer = getNodeTextLayer(range.endContainer);

if (!startTextLayer || !endTextLayer) {
throw new Error('Selection is outside page text');
}

if (startTextLayer !== endTextLayer) {
throw new Error('Selecting across page breaks is not supported');
}

return [range, startTextLayer];
}

/**
* Return true if selectors can be generated for a range using `describe`.
*
* This function is faster than calling `describe` if the selectors are not
* required.
*/
export function canDescribe(range: Range) {
try {
getTextLayerForRange(range);
return true;
} catch {
return false;
}
}

/**
* Convert a DOM Range object into a set of selectors.
*
* Converts a DOM `Range` object into a `[quote, pageSelector]` tuple of selectors
* which can be saved with an annotation and later passed to `anchor` to
* convert the selectors back to a `Range`.
*/
export async function describe(
root: HTMLElement,
range: Range,
): Promise<Selector[]> {
const [textRange, textLayer] = getTextLayerForRange(range);

const quote = TextQuoteAnchor.fromRange(root, textRange).toSelector();

const pageContainer: HTMLElement = textLayer.closest('.BRpagecontainer')!;
const pageIndex = parseFloat(pageContainer.dataset.index!);
const pageNum = pageContainer.dataset.pageNum!;

const pageSelector: PageSelector = {
type: 'PageSelector',
index: pageIndex,
label: pageNum || `n${pageIndex}`,
};

return [quote, pageSelector];
}

/**
* Anchor a set of selectors to a DOM Range.
*
* `selectors` must include a `TextQuoteSelector` and may include other selector
* types.
*/
export async function anchor(
root: HTMLElement,
selectors: Selector[],
): Promise<Range> {
const quote = selectors.find(s => s.type === 'TextQuoteSelector');
if (!quote) {
// The quote selector is required in order to check that text position
// selector results are still valid.
throw new Error('No quote selector found');
}

const pageSelector = selectors.find(s => s.type === 'PageSelector');
if (!pageSelector) {
throw new Error('No page selector found');
}

// This will behave very similarly to the HTML; the only
// difference is the page might not be rendered yet, but for
// now let's assume it is.

const pageIndex = pageSelector.index;
const pageContainer = root.querySelector(
`.BookReader:not(.BRmodeThumb) .BRpagecontainer[data-index="${pageIndex}"]`,
);

if (!pageContainer) {
console.warn('Page not found:', pageIndex);
// It's off-screen ; create a placeholder
const placeholder = document.createElement('div');
placeholder.classList.add('BRhypothesisPlaceholder');
placeholder.style.display = 'none';
placeholder.textContent = quote.exact;
root.appendChild(placeholder);
const range = document.createRange();
range.selectNodeContents(placeholder);
return range;
}

// Wait for any animations/etc
const textLayer = await pollUntilTruthy(
() => pageContainer.querySelector('.BRtextLayer'),
{ timeout: 5000 },
);

return await htmlAnchor(textLayer!, selectors);
}

/**
* Helper method that polls the provided function until it returns a truthy
* value or the timeout is reached.
*/
async function pollUntilTruthy<T>(
fn: () => T,
{ timeout = 1000, step = 100 } = {},
): Promise<T | undefined> {
return new Promise(resolve => {
const start = Date.now();
const interval = setInterval(() => {
const val = fn();
if (val) {
clearInterval(interval);
resolve(val);
} else if (Date.now() - start > timeout) {
clearInterval(interval);
resolve(undefined);
}
}, step);
});
}
Loading