Skip to content

Commit 43013fd

Browse files
authored
Merge pull request #2 from md2docx/optimize
Optimize
2 parents 1f2b202 + 4fa3fa7 commit 43013fd

File tree

9 files changed

+153
-549
lines changed

9 files changed

+153
-549
lines changed

lib/CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# @m2d/html
22

3+
## 1.0.1
4+
5+
### Patch Changes
6+
7+
- 114454a: fix: Fix inline tag extraction logic
8+
- 2cc7d49: fix: Improve inline tag list. 2. Remove extra spacings.
9+
- 5964742: Fix: fix form styles
10+
- 0d71109: fix: Fix nested inline html tags
11+
- 06a4f14: Fix: handle block and inline nodes in preprocess.
12+
313
## 1.0.0
414

515
### Major Changes

lib/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "@m2d/html",
33
"author": "Mayank Kumar Chaudhari (https://mayank-chaudhari.vercel.app)",
44
"private": false,
5-
"version": "1.0.0",
5+
"version": "1.0.1",
66
"description": "Extend MDAST by parsing embedded HTML in Markdown. Converts HTML into structured MDAST nodes compatible with @m2d/core for DOCX generation.",
77
"license": "MPL-2.0",
88
"main": "./dist/index.js",

lib/src/index.ts

Lines changed: 113 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -9,48 +9,79 @@ import {
99
RootContent,
1010
BlockContent,
1111
TableRow,
12+
Html,
1213
} from "@m2d/core";
1314
import { standardizeColor } from "./utils";
14-
import {
15-
AlignmentType,
16-
BorderStyle,
17-
FrameAnchorType,
18-
HorizontalPositionAlign,
19-
IBorderOptions,
20-
VerticalPositionAlign,
21-
} from "docx";
15+
import { AlignmentType, BorderStyle, IBorderOptions } from "docx";
2216

2317
/**
2418
* HTML inline tags supported by the plugin for conversion.
2519
*/
2620
const INLINE_TAGS = [
21+
"A",
22+
"ABBR",
23+
"ACRONYM", // Deprecated but still inline
24+
"B",
25+
"BDI",
26+
"BDO",
27+
"BIG", // Deprecated but still inline
2728
"BR",
28-
"IMG",
29+
"BUTTON", // Technically inline-block, but often treated inline
30+
"CITE",
31+
"CODE",
32+
"DATA",
33+
"DATALIST",
34+
"DEL",
35+
"DFN",
2936
"EM",
3037
"I",
31-
"STRONG",
32-
"B",
33-
"DEL",
38+
"IMG",
39+
"INPUT",
40+
"INS",
41+
"KBD",
42+
"LABEL",
43+
"MARK",
44+
"METER",
45+
"NOSCRIPT",
46+
"OBJECT",
47+
"OUTPUT",
48+
"Q",
49+
"RUBY",
50+
"RP",
51+
"RT",
3452
"S",
35-
"A",
36-
"SUP",
53+
"SAMP",
54+
"SCRIPT",
55+
"SELECT",
56+
"SLOT",
57+
"SMALL",
58+
"SPAN",
59+
"STRONG",
3760
"SUB",
61+
"SUP",
3862
"svg",
63+
"TEMPLATE",
64+
"TEXTAREA",
65+
"TIME",
66+
"U",
67+
"TT", // Deprecated
68+
"VAR",
69+
"WBR",
3970
] as const;
4071

4172
/**
4273
* Mapping of DOM tag names to MDAST node types.
4374
*/
4475
const DOM_TO_MDAST_MAP = {
76+
A: "link",
77+
B: "strong",
4578
BR: "break",
46-
IMG: "image",
4779
EM: "emphasis",
48-
I: "emphasis",
4980
STRONG: "strong",
50-
B: "strong",
81+
I: "emphasis",
82+
IMG: "image",
5183
DEL: "delete",
5284
S: "delete",
53-
A: "link",
5485
} as const;
5586

5687
/**
@@ -68,6 +99,11 @@ const CSS_BORDER_STYLES = [
6899
"outset",
69100
];
70101

102+
interface HtmlNode extends Html {
103+
tag: string;
104+
children: (RootContent | PhrasingContent)[];
105+
}
106+
71107
/**
72108
* Parsed CSS border representation.
73109
*/
@@ -227,11 +263,14 @@ const parseStyles = (el: Node, inline = true): Data => {
227263
* @param el - DOM node to process.
228264
* @returns PhrasingContent-compatible node.
229265
*/
230-
const processInlineDOMNode = (el: Node): PhrasingContent => {
266+
const processInlineDOMNode = (el: Node, isPre = false): PhrasingContent => {
231267
if (!(el instanceof HTMLElement || el instanceof SVGElement))
232-
return { type: "text", value: el.textContent ?? "" };
268+
return {
269+
type: "text",
270+
value: (isPre ? el.textContent : el.textContent?.replace(/^\s+|\s+$/g, " ")) ?? "",
271+
};
233272

234-
const children = Array.from(el.childNodes).map(processInlineDOMNode);
273+
const children = Array.from(el.childNodes).map(cNode => processInlineDOMNode(cNode, isPre));
235274
const data = parseStyles(el);
236275
const attributes: Record<string, string> = el
237276
.getAttributeNames()
@@ -269,7 +308,13 @@ const processInlineDOMNode = (el: Node): PhrasingContent => {
269308
data,
270309
};
271310
case "INPUT":
272-
if (/(radio|checkbox)/.test((el as HTMLInputElement).type)) return { type: "checkbox" };
311+
return /(radio|checkbox)/.test((el as HTMLInputElement).type)
312+
? { type: "checkbox" }
313+
: {
314+
type: "text",
315+
value: `_${(el as HTMLInputElement).value || "_".repeat(20)}_`,
316+
data: { ...data, border: { style: BorderStyle.OUTSET } },
317+
};
273318
}
274319
return { type: "fragment", children, data };
275320
};
@@ -291,14 +336,21 @@ const createFragmentWithParentNodes = (el: Node, data?: Data): BlockContent => {
291336
!INLINE_TAGS.includes(node.tagName as (typeof INLINE_TAGS)[number])
292337
) {
293338
if (tmp.length) {
294-
children.push({ type: "paragraph", children: tmp.map(processInlineDOMNode) });
339+
children.push({
340+
type: "paragraph",
341+
children: tmp.map(tNode => processInlineDOMNode(tNode, data?.pre)),
342+
});
295343
tmp.length = 0;
296344
}
297345
// skipcq: JS-0357
298346
children.push(processDOMNode(node));
299347
} else tmp.push(node);
300348
}
301-
if (tmp.length) children.push({ type: "paragraph", children: tmp.map(processInlineDOMNode) });
349+
if (tmp.length)
350+
children.push({
351+
type: "paragraph",
352+
children: tmp.map(tNode => processInlineDOMNode(tNode, data?.pre)),
353+
});
302354
return children.length === 1
303355
? { ...children[0], data: { ...data, ...children[0].data } }
304356
: {
@@ -356,7 +408,7 @@ const processDOMNode = (el: HTMLElement | SVGElement): BlockContent => {
356408
return {
357409
type: "heading",
358410
depth: parseInt(el.tagName[1]),
359-
children: Array.from(el.childNodes).map(processInlineDOMNode),
411+
children: Array.from(el.childNodes).map(cNode => processInlineDOMNode(cNode)),
360412
data,
361413
} as Heading;
362414
case "PRE":
@@ -400,48 +452,41 @@ const processDOMNode = (el: HTMLElement | SVGElement): BlockContent => {
400452
children: [{ type: "text", value: `Not supported yet!\n\n${el.textContent}` }],
401453
data: { ...data, pre: true, border: defaultBorder },
402454
};
403-
case "INPUT":
404-
if (!/(radio|checkbox)/.test((el as HTMLInputElement).type)) {
405-
return {
406-
type: "paragraph",
407-
children: [],
408-
data: {
409-
...data,
410-
frame: {
411-
width: 5000,
412-
height: 90,
413-
alignment: { x: HorizontalPositionAlign.LEFT, y: VerticalPositionAlign.CENTER },
414-
anchor: {
415-
horizontal: FrameAnchorType.TEXT,
416-
vertical: FrameAnchorType.TEXT,
417-
},
418-
type: "alignment",
419-
},
420-
border: defaultBorder,
421-
},
422-
};
423-
}
424455
}
425456
return { type: "paragraph", children: [processInlineDOMNode(el)], data };
426457
};
427458

459+
const processInlineNode = (node: HtmlNode) => {
460+
const value = node.value?.trim() ?? "";
461+
const tag = value.split(" ")[0].slice(1);
462+
const el = document.createElement("div");
463+
el.innerHTML = value.endsWith("/>") ? value : `${value}</${tag}>`;
464+
Object.assign(node, {
465+
...processInlineDOMNode(el.children[0]),
466+
children: node.children ?? [],
467+
});
468+
};
469+
428470
/**
429471
* Consolidates inline HTML tag children inside valid tag-matching groups.
430472
*
431473
* @param pNode - MDAST parent node.
432474
*/
433-
const consolidateInlineHTML = (pNode: Parent) => {
475+
const preprocess = (pNode: Parent, isRoot = true) => {
434476
const children: RootContent[] = [];
435-
const htmlNodeStack: (Parent & { tag: string })[] = [];
477+
const htmlNodeStack: HtmlNode[] = [];
478+
436479
for (const node of pNode.children) {
437-
if ((node as Parent).children?.length) consolidateInlineHTML(node as Parent);
480+
if ((node as Parent).children?.length) preprocess(node as Parent, false);
438481
// match only inline non-self-closing html nodes.
439482
if (node.type === "html" && /^<[^>]*[^/]>$/.test(node.value)) {
440-
const tag = node.value.split(" ")[0].slice(1);
483+
const tag = node.value.split(" ")[0].replace(/^<|>$/g, "");
441484
// ending tag
442485
if (tag[0] === "/") {
443-
if (htmlNodeStack[0]?.tag === tag.slice(1, -1))
444-
children.push(htmlNodeStack.shift() as RootContent);
486+
const hNode = htmlNodeStack.shift();
487+
if (!hNode) throw new Error(`Invalid HTML: ${node.value}`);
488+
processInlineNode(hNode);
489+
(htmlNodeStack[0]?.children ?? children).push(hNode);
445490
} else {
446491
htmlNodeStack.unshift({ ...node, children: [], tag });
447492
}
@@ -450,6 +495,21 @@ const consolidateInlineHTML = (pNode: Parent) => {
450495
} else {
451496
children.push(node);
452497
}
498+
499+
const isSelfClosingTag = node.type === "html" && /^<[^>]*\/>$/.test(node.value);
500+
// self closing tags
501+
if (isSelfClosingTag && !isRoot) {
502+
// @ts-expect-error -- ok
503+
processInlineNode(node);
504+
} else if (
505+
(isSelfClosingTag && isRoot) ||
506+
(node.type === "html" && !/^<[^>]*>$/.test(node.value))
507+
) {
508+
// block html
509+
const el = document.createElement("div");
510+
el.innerHTML = node.value;
511+
Object.assign(node, createFragmentWithParentNodes(el));
512+
}
453513
}
454514
pNode.children = children;
455515
};
@@ -465,26 +525,6 @@ const consolidateInlineHTML = (pNode: Parent) => {
465525
*/
466526
export const htmlPlugin: () => IPlugin = () => {
467527
return {
468-
block: async (_docx, node) => {
469-
if (node.type === "html") {
470-
const el = document.createElement("div");
471-
el.innerHTML = node.value;
472-
473-
Object.assign(node, createFragmentWithParentNodes(el));
474-
}
475-
return [];
476-
},
477-
inline: async (_docx, node) => {
478-
if (node.type === "html") {
479-
const value = node.value?.trim() ?? "";
480-
const tag = value.split(" ")[0].slice(1);
481-
const el = document.createElement("div");
482-
el.innerHTML = value.endsWith("/>") ? value : `${value}</${tag}>`;
483-
// @ts-expect-error - changing node type here.
484-
Object.assign(node, { ...processInlineDOMNode(el.children[0]), children: node.children });
485-
}
486-
return [];
487-
},
488-
preprocess: consolidateInlineHTML,
528+
preprocess,
489529
};
490530
};

packages/shared/CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# @repo/shared
22

3+
## 0.0.3
4+
5+
### Patch Changes
6+
7+
- Updated dependencies [114454a]
8+
- Updated dependencies [2cc7d49]
9+
- Updated dependencies [5964742]
10+
- Updated dependencies [0d71109]
11+
- Updated dependencies [06a4f14]
12+
- @m2d/html@1.0.1
13+
314
## 0.0.2
415

516
### Patch Changes

packages/shared/package.json

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@repo/shared",
3-
"version": "0.0.2",
3+
"version": "0.0.3",
44
"private": true,
55
"sideEffects": false,
66
"main": "./dist/index.js",
@@ -52,11 +52,9 @@
5252
"r18gs": "^3.0.1",
5353
"react-live": "^4.1.8",
5454
"react18-loaders": "^1.1.4",
55-
"remark-frontmatter": "^5.0.0",
5655
"remark-gfm": "^4.0.1",
57-
"remark-math": "^6.0.0",
5856
"remark-parse": "^11.0.0",
5957
"unified": "^11.0.5",
6058
"unist-util-remove-position": "^5.0.0"
6159
}
62-
}
60+
}

0 commit comments

Comments
 (0)