|
| 1 | +import * as brAnchoring from '../BookReader'; |
| 2 | + |
| 3 | +/** |
| 4 | + * Return a DOM Range which refers to the specified `text` in `container`. |
| 5 | + * |
| 6 | + * @param {Element} container |
| 7 | + * @param {string} text |
| 8 | + * @return {Range} |
| 9 | + */ |
| 10 | +function findText(container, text) { |
| 11 | + // This is a very lazy implementation ; assumes no duplicate words ; |
| 12 | + // but it's sufficient for the tests. |
| 13 | + const textWords = text.split(/\s+/); |
| 14 | + const wordElements = Array.from(container.querySelectorAll('.BRwordElement')); |
| 15 | + const startNodeIndex = wordElements.findIndex(node => { |
| 16 | + return node.textContent === textWords[0]; |
| 17 | + }); |
| 18 | + const endNodeIndex = |
| 19 | + wordElements.slice(startNodeIndex).findIndex(node => { |
| 20 | + return node.textContent === textWords[textWords.length - 1]; |
| 21 | + }) + startNodeIndex; |
| 22 | + const startNode = wordElements[startNodeIndex].firstChild; |
| 23 | + const endNode = wordElements[endNodeIndex].firstChild; |
| 24 | + |
| 25 | + if (!startNode || !endNode) { |
| 26 | + throw new Error(`Text "${text}" not found in container`); |
| 27 | + } |
| 28 | + const range = new Range(); |
| 29 | + range.setStart(startNode, 0); |
| 30 | + range.setEnd(endNode, textWords[textWords.length - 1].length); |
| 31 | + return range; |
| 32 | +} |
| 33 | + |
| 34 | +const FAKE_TEXT_DATA = [ |
| 35 | + ` |
| 36 | + Flatland, a romance of many dimensions |
| 37 | +
|
| 38 | + By A. Square, an Inhabitant of Flatland |
| 39 | + `, |
| 40 | + ` |
| 41 | + I call our world Flatland, not because we call it so, but |
| 42 | + to make its nature clearer to you, my happy readers, who are |
| 43 | + privileged to live in Space. |
| 44 | +
|
| 45 | + Imagine a vast sheet of paper on which straight Lines, Triangles, |
| 46 | + Squares, Pentagons, Hexagons, Heptagons, Octagons, and other |
| 47 | + Figures, instead of remaining fixed in their places, move freely |
| 48 | + about... |
| 49 | + `, |
| 50 | +]; |
| 51 | + |
| 52 | +/** |
| 53 | + * @param {string[]} texts |
| 54 | + * @return {HTMLDivElement} |
| 55 | + */ |
| 56 | +function buildBookReaderDOM(texts) { |
| 57 | + const container = document.createElement('div'); |
| 58 | + container.className = 'BookReader'; |
| 59 | + for (const [i, text] of texts.entries()) { |
| 60 | + const pageContainer = document.createElement('div'); |
| 61 | + pageContainer.className = 'BRpagecontainer'; |
| 62 | + pageContainer.setAttribute('data-index', (i + 1).toString()); |
| 63 | + // Arbitrary offset to denote the page label |
| 64 | + pageContainer.setAttribute('data-page-num', (i + 10).toString()); |
| 65 | + container.appendChild(pageContainer); |
| 66 | + |
| 67 | + const textLayer = document.createElement('div'); |
| 68 | + textLayer.className = 'BRtextLayer'; |
| 69 | + pageContainer.appendChild(textLayer); |
| 70 | + |
| 71 | + for (const paragraphText of text.trim().split('\n\n')) { |
| 72 | + const paragraphElement = document.createElement('p'); |
| 73 | + paragraphElement.className = 'BRparagraphElement'; |
| 74 | + textLayer.appendChild(paragraphElement); |
| 75 | + |
| 76 | + for (const line of paragraphText.trim().split('\n')) { |
| 77 | + const lineElement = document.createElement('span'); |
| 78 | + paragraphElement.appendChild(lineElement); |
| 79 | + |
| 80 | + lineElement.innerHTML = line |
| 81 | + .trim() |
| 82 | + .split(' ') |
| 83 | + .map(word => `<span class="BRwordElement">${word}</span>`) |
| 84 | + .join(`<span class="BRspace"> </span>`); |
| 85 | + // Add a space at the end of the line to match the text layer |
| 86 | + paragraphElement.append(' '); |
| 87 | + } |
| 88 | + } |
| 89 | + } |
| 90 | + return container; |
| 91 | +} |
| 92 | + |
| 93 | +describe('annotator/anchoring/BookReader', () => { |
| 94 | + /** @type {HTMLDivElement} */ |
| 95 | + let brRoot; |
| 96 | + |
| 97 | + beforeEach(() => { |
| 98 | + brRoot = buildBookReaderDOM(FAKE_TEXT_DATA); |
| 99 | + }); |
| 100 | + |
| 101 | + afterEach(() => { |
| 102 | + brRoot.remove(); |
| 103 | + }); |
| 104 | + |
| 105 | + describe('describe', () => { |
| 106 | + it('returns quote and page selectors', async () => { |
| 107 | + const range = findText(brRoot, 'live in Space.'); |
| 108 | + const selectors = await brAnchoring.describe(brRoot, range); |
| 109 | + selectors.sort((a, b) => a.type.localeCompare(b.type)); |
| 110 | + |
| 111 | + const types = selectors.map(s => s.type); |
| 112 | + assert.deepEqual(types, ['PageSelector', 'TextQuoteSelector']); |
| 113 | + }); |
| 114 | + |
| 115 | + it('returns a quote selector with the correct quote', async () => { |
| 116 | + const range = findText(brRoot, 'live in Space.'); |
| 117 | + const selectors = await brAnchoring.describe(brRoot, range); |
| 118 | + const quote = selectors.find(s => s.type === 'TextQuoteSelector'); |
| 119 | + |
| 120 | + assert.deepEqual(quote, { |
| 121 | + type: 'TextQuoteSelector', |
| 122 | + exact: 'live in Space.', |
| 123 | + prefix: ' readers, who are privileged to ', |
| 124 | + suffix: ' Imagine a vast sheet of paper o', |
| 125 | + }); |
| 126 | + }); |
| 127 | + |
| 128 | + it('returns a page selector with the page index and label', async () => { |
| 129 | + const range = findText(brRoot, 'live in Space.'); |
| 130 | + const selectors = await brAnchoring.describe(brRoot, range); |
| 131 | + |
| 132 | + const page = selectors.find(s => s.type === 'PageSelector'); |
| 133 | + assert.deepEqual(page, { |
| 134 | + type: 'PageSelector', |
| 135 | + index: 2, |
| 136 | + label: '11', |
| 137 | + }); |
| 138 | + }); |
| 139 | + |
| 140 | + it('throws if range spans multiple pages', async () => { |
| 141 | + const range = findText(brRoot, 'Inhabitant ... world'); |
| 142 | + |
| 143 | + await assert.rejects( |
| 144 | + brAnchoring.describe(brRoot, range), |
| 145 | + 'Selecting across page breaks is not supported', |
| 146 | + ); |
| 147 | + }); |
| 148 | + |
| 149 | + it('throws if range is outside the text layer', async () => { |
| 150 | + const range = new Range(); |
| 151 | + const el = document.createElement('div'); |
| 152 | + el.append('foobar'); |
| 153 | + range.setStart(el.firstChild, 0); |
| 154 | + range.setEnd(el.firstChild, 6); |
| 155 | + |
| 156 | + await assert.rejects( |
| 157 | + brAnchoring.describe(brRoot, range), |
| 158 | + 'Selection is outside page text', |
| 159 | + ); |
| 160 | + }); |
| 161 | + |
| 162 | + it('throws if range does not contain any text nodes', async () => { |
| 163 | + const range = new Range(); |
| 164 | + const el = document.createElement('div'); |
| 165 | + range.setStart(el, 0); |
| 166 | + range.setEnd(el, 0); |
| 167 | + |
| 168 | + await assert.rejects( |
| 169 | + brAnchoring.describe(brRoot, range), |
| 170 | + 'Selection does not contain text', |
| 171 | + ); |
| 172 | + }); |
| 173 | + }); |
| 174 | + |
| 175 | + describe('canDescribe', () => { |
| 176 | + it('returns true if range is in text layer', () => { |
| 177 | + const range = findText(brRoot, 'live in Space.'); |
| 178 | + assert.isTrue(brAnchoring.canDescribe(range)); |
| 179 | + }); |
| 180 | + }); |
| 181 | + |
| 182 | + describe('anchor', () => { |
| 183 | + it('anchors previously created selectors if the page is rendered', async () => { |
| 184 | + const range = findText(brRoot, 'live in Space.'); |
| 185 | + const selectors = await brAnchoring.describe(brRoot, range); |
| 186 | + const anchoredRange = await brAnchoring.anchor(brRoot, selectors); |
| 187 | + assert.equal(anchoredRange.toString(), range.toString()); |
| 188 | + }); |
| 189 | + |
| 190 | + [[], [{ type: 'PageSelector', index: 2 }]].forEach(selectors => { |
| 191 | + it('fails to anchor if there is no quote selector', async () => { |
| 192 | + await assert.rejects( |
| 193 | + brAnchoring.anchor(brRoot, selectors), |
| 194 | + 'No quote selector found', |
| 195 | + ); |
| 196 | + }); |
| 197 | + }); |
| 198 | + |
| 199 | + [ |
| 200 | + { |
| 201 | + // If there is only a prefix, that should match. |
| 202 | + test: 'prefix-only', |
| 203 | + prefix: 'Squares, Pent', |
| 204 | + suffix: undefined, |
| 205 | + expectedMatch: 'Pentagons,', |
| 206 | + }, |
| 207 | + { |
| 208 | + // If there is only a suffix, that should match. |
| 209 | + test: 'suffix-only', |
| 210 | + prefix: undefined, |
| 211 | + suffix: 's, Heptagons, and', |
| 212 | + expectedMatch: 'Hexagons,', |
| 213 | + }, |
| 214 | + { |
| 215 | + // If there is both a prefix and suffix, either can match |
| 216 | + test: 'prefix-match', |
| 217 | + prefix: 'tagons, Hexagons, ', |
| 218 | + suffix: 'DOES NOT MATCH', |
| 219 | + expectedMatch: 'Heptagons,', |
| 220 | + }, |
| 221 | + { |
| 222 | + // If there is both a prefix and suffix, either can match |
| 223 | + test: 'suffix-match', |
| 224 | + prefix: 'DOES NOT MATCH', |
| 225 | + suffix: ', and other Fig', |
| 226 | + expectedMatch: 'Octagons,', |
| 227 | + }, |
| 228 | + { |
| 229 | + // If there is neither a prefix or suffix, only the quote matters. |
| 230 | + test: 'no-context', |
| 231 | + prefix: undefined, |
| 232 | + suffix: undefined, |
| 233 | + expectedMatch: 'Pentagons,', |
| 234 | + }, |
| 235 | + ].forEach(({ test, prefix, suffix, expectedMatch }) => { |
| 236 | + it(`prefers a context match for quote selectors (${test})`, async () => { |
| 237 | + const selectors = [ |
| 238 | + { |
| 239 | + type: 'TextQuoteSelector', |
| 240 | + // Quote that occurs multiple times on the same page. |
| 241 | + exact: 'agon', |
| 242 | + prefix, |
| 243 | + suffix, |
| 244 | + }, |
| 245 | + { |
| 246 | + type: 'PageSelector', |
| 247 | + index: 2, |
| 248 | + }, |
| 249 | + ]; |
| 250 | + |
| 251 | + const range = await brAnchoring.anchor(brRoot, selectors); |
| 252 | + |
| 253 | + assert.equal(range.toString(), 'agon'); |
| 254 | + // Check that we found the correct occurrence of the quote. |
| 255 | + assert.equal( |
| 256 | + range.startContainer.parentElement.textContent, |
| 257 | + expectedMatch, |
| 258 | + ); |
| 259 | + }); |
| 260 | + }); |
| 261 | + |
| 262 | + it('rejects if quote cannot be anchored', async () => { |
| 263 | + const selectors = [ |
| 264 | + { |
| 265 | + type: 'TextQuoteSelector', |
| 266 | + exact: 'phrase that does not exist on the page', |
| 267 | + }, |
| 268 | + { |
| 269 | + type: 'PageSelector', |
| 270 | + index: 2, |
| 271 | + }, |
| 272 | + ]; |
| 273 | + await assert.rejects(brAnchoring.anchor(brRoot, selectors), 'Quote not found'); |
| 274 | + }); |
| 275 | + }); |
| 276 | +}); |
0 commit comments