Skip to content

Commit 939e151

Browse files
committed
feat(#53): use JSDOM to parse internal anchor tags
1 parent 4254a18 commit 939e151

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

src/html-link-parser.js

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import {JSDOM} from 'jsdom';
12
export default class HTMLLinkParser {
23

34
/**
@@ -24,7 +25,7 @@ export default class HTMLLinkParser {
2425
*/
2526
parseSingle(link, pageDirectory) {
2627
const meta = {
27-
href: link.slice(6, -1)
28+
href: link
2829
.replace(/.(md|markdown)\s?$/i, "")
2930
.replace("\\", "")
3031
.trim()
@@ -61,8 +62,19 @@ export default class HTMLLinkParser {
6162
* @return {Array<import('@photogabble/eleventy-plugin-interlinker').LinkMeta>}
6263
*/
6364
find(document, pageDirectory) {
65+
const dom = new JSDOM(document);
66+
const anchors = dom.window.document.getElementsByTagName('a');
67+
const toParse = [];
68+
69+
for (const anchor of anchors) {
70+
// Ignore any anchor tags within either code or pre tags
71+
if (anchor.closest('code,pre')) continue;
72+
// Ignore any links that don't begin with / denoting internal links
73+
if (anchor.href.startsWith('/')) toParse.push(anchor.href);
74+
}
75+
6476
return this.parseMultiple(
65-
(document.match(this.internalLinkRegex) || []),
77+
toParse,
6678
pageDirectory
6779
)
6880
}

tests/html-internal-link-parser.test.js

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@ import HTMLLinkParser from '../src/html-link-parser.js'
22
import DeadLinks from '../src/dead-links.js';
33
import {pageLookup} from '../src/find-page.js';
44
import test from 'ava';
5+
import fs from "node:fs";
6+
import path from "node:path";
7+
import {fileURLToPath} from "node:url";
58

69
const pageDirectory = pageLookup([]);
10+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
711

812
test('html link parser grabs multiple href, ignoring external links', t => {
913
const parser = new HTMLLinkParser(new DeadLinks());
@@ -25,8 +29,8 @@ test('html link parser grabs multiple href, ignoring external links', t => {
2529
});
2630

2731
test('html link parser ignores href within code blocks', t => {
28-
const parser = new HTMLLinkParser(new DeadLinks());
29-
const links = parser.find('<code><a href="/home">this is a link home</a></code>', pageDirectory);
32+
t.is(0, ((new HTMLLinkParser(new DeadLinks())).find('<code><a href="/home">this is a link home</a></code>', pageDirectory)).length);
3033

31-
t.is(0, links.length);
34+
const html = fs.readFileSync(__dirname + '/fixtures/within-code.html', {encoding:'utf8', flag:'r'});
35+
t.is(1, ((new HTMLLinkParser(new DeadLinks())).find(html, pageDirectory)).length);
3236
});

0 commit comments

Comments
 (0)