@@ -9,48 +9,79 @@ import {
9
9
RootContent ,
10
10
BlockContent ,
11
11
TableRow ,
12
+ Html ,
12
13
} from "@m2d/core" ;
13
14
import { standardizeColor } from "./utils" ;
14
- import {
15
- AlignmentType ,
16
- BorderStyle ,
17
- FrameAnchorType ,
18
- HorizontalPositionAlign ,
19
- IBorderOptions ,
20
- VerticalPositionAlign ,
21
- } from "docx" ;
15
+ import { AlignmentType , BorderStyle , IBorderOptions } from "docx" ;
22
16
23
17
/**
24
18
* HTML inline tags supported by the plugin for conversion.
25
19
*/
26
20
const INLINE_TAGS = [
21
+ "A" ,
22
+ "ABBR" ,
23
+ "ACRONYM" , // Deprecated but still inline
24
+ "B" ,
25
+ "BDI" ,
26
+ "BDO" ,
27
+ "BIG" , // Deprecated but still inline
27
28
"BR" ,
28
- "IMG" ,
29
+ "BUTTON" , // Technically inline-block, but often treated inline
30
+ "CITE" ,
31
+ "CODE" ,
32
+ "DATA" ,
33
+ "DATALIST" ,
34
+ "DEL" ,
35
+ "DFN" ,
29
36
"EM" ,
30
37
"I" ,
31
- "STRONG" ,
32
- "B" ,
33
- "DEL" ,
38
+ "IMG" ,
39
+ "INPUT" ,
40
+ "INS" ,
41
+ "KBD" ,
42
+ "LABEL" ,
43
+ "MARK" ,
44
+ "METER" ,
45
+ "NOSCRIPT" ,
46
+ "OBJECT" ,
47
+ "OUTPUT" ,
48
+ "Q" ,
49
+ "RUBY" ,
50
+ "RP" ,
51
+ "RT" ,
34
52
"S" ,
35
- "A" ,
36
- "SUP" ,
53
+ "SAMP" ,
54
+ "SCRIPT" ,
55
+ "SELECT" ,
56
+ "SLOT" ,
57
+ "SMALL" ,
58
+ "SPAN" ,
59
+ "STRONG" ,
37
60
"SUB" ,
61
+ "SUP" ,
38
62
"svg" ,
63
+ "TEMPLATE" ,
64
+ "TEXTAREA" ,
65
+ "TIME" ,
66
+ "U" ,
67
+ "TT" , // Deprecated
68
+ "VAR" ,
69
+ "WBR" ,
39
70
] as const ;
40
71
41
72
/**
42
73
* Mapping of DOM tag names to MDAST node types.
43
74
*/
44
75
const DOM_TO_MDAST_MAP = {
76
+ A : "link" ,
77
+ B : "strong" ,
45
78
BR : "break" ,
46
- IMG : "image" ,
47
79
EM : "emphasis" ,
48
- I : "emphasis" ,
49
80
STRONG : "strong" ,
50
- B : "strong" ,
81
+ I : "emphasis" ,
82
+ IMG : "image" ,
51
83
DEL : "delete" ,
52
84
S : "delete" ,
53
- A : "link" ,
54
85
} as const ;
55
86
56
87
/**
@@ -68,6 +99,11 @@ const CSS_BORDER_STYLES = [
68
99
"outset" ,
69
100
] ;
70
101
102
+ interface HtmlNode extends Html {
103
+ tag : string ;
104
+ children : ( RootContent | PhrasingContent ) [ ] ;
105
+ }
106
+
71
107
/**
72
108
* Parsed CSS border representation.
73
109
*/
@@ -227,11 +263,14 @@ const parseStyles = (el: Node, inline = true): Data => {
227
263
* @param el - DOM node to process.
228
264
* @returns PhrasingContent-compatible node.
229
265
*/
230
- const processInlineDOMNode = ( el : Node ) : PhrasingContent => {
266
+ const processInlineDOMNode = ( el : Node , isPre = false ) : PhrasingContent => {
231
267
if ( ! ( el instanceof HTMLElement || el instanceof SVGElement ) )
232
- return { type : "text" , value : el . textContent ?? "" } ;
268
+ return {
269
+ type : "text" ,
270
+ value : ( isPre ? el . textContent : el . textContent ?. replace ( / ^ \s + | \s + $ / g, " " ) ) ?? "" ,
271
+ } ;
233
272
234
- const children = Array . from ( el . childNodes ) . map ( processInlineDOMNode ) ;
273
+ const children = Array . from ( el . childNodes ) . map ( cNode => processInlineDOMNode ( cNode , isPre ) ) ;
235
274
const data = parseStyles ( el ) ;
236
275
const attributes : Record < string , string > = el
237
276
. getAttributeNames ( )
@@ -269,7 +308,13 @@ const processInlineDOMNode = (el: Node): PhrasingContent => {
269
308
data,
270
309
} ;
271
310
case "INPUT" :
272
- if ( / ( r a d i o | c h e c k b o x ) / . test ( ( el as HTMLInputElement ) . type ) ) return { type : "checkbox" } ;
311
+ return / ( r a d i o | c h e c k b o x ) / . test ( ( el as HTMLInputElement ) . type )
312
+ ? { type : "checkbox" }
313
+ : {
314
+ type : "text" ,
315
+ value : `_${ ( el as HTMLInputElement ) . value || "_" . repeat ( 20 ) } _` ,
316
+ data : { ...data , border : { style : BorderStyle . OUTSET } } ,
317
+ } ;
273
318
}
274
319
return { type : "fragment" , children, data } ;
275
320
} ;
@@ -291,14 +336,21 @@ const createFragmentWithParentNodes = (el: Node, data?: Data): BlockContent => {
291
336
! INLINE_TAGS . includes ( node . tagName as ( typeof INLINE_TAGS ) [ number ] )
292
337
) {
293
338
if ( tmp . length ) {
294
- children . push ( { type : "paragraph" , children : tmp . map ( processInlineDOMNode ) } ) ;
339
+ children . push ( {
340
+ type : "paragraph" ,
341
+ children : tmp . map ( tNode => processInlineDOMNode ( tNode , data ?. pre ) ) ,
342
+ } ) ;
295
343
tmp . length = 0 ;
296
344
}
297
345
// skipcq: JS-0357
298
346
children . push ( processDOMNode ( node ) ) ;
299
347
} else tmp . push ( node ) ;
300
348
}
301
- if ( tmp . length ) children . push ( { type : "paragraph" , children : tmp . map ( processInlineDOMNode ) } ) ;
349
+ if ( tmp . length )
350
+ children . push ( {
351
+ type : "paragraph" ,
352
+ children : tmp . map ( tNode => processInlineDOMNode ( tNode , data ?. pre ) ) ,
353
+ } ) ;
302
354
return children . length === 1
303
355
? { ...children [ 0 ] , data : { ...data , ...children [ 0 ] . data } }
304
356
: {
@@ -356,7 +408,7 @@ const processDOMNode = (el: HTMLElement | SVGElement): BlockContent => {
356
408
return {
357
409
type : "heading" ,
358
410
depth : parseInt ( el . tagName [ 1 ] ) ,
359
- children : Array . from ( el . childNodes ) . map ( processInlineDOMNode ) ,
411
+ children : Array . from ( el . childNodes ) . map ( cNode => processInlineDOMNode ( cNode ) ) ,
360
412
data,
361
413
} as Heading ;
362
414
case "PRE" :
@@ -400,48 +452,41 @@ const processDOMNode = (el: HTMLElement | SVGElement): BlockContent => {
400
452
children : [ { type : "text" , value : `Not supported yet!\n\n${ el . textContent } ` } ] ,
401
453
data : { ...data , pre : true , border : defaultBorder } ,
402
454
} ;
403
- case "INPUT" :
404
- if ( ! / ( r a d i o | c h e c k b o x ) / . test ( ( el as HTMLInputElement ) . type ) ) {
405
- return {
406
- type : "paragraph" ,
407
- children : [ ] ,
408
- data : {
409
- ...data ,
410
- frame : {
411
- width : 5000 ,
412
- height : 90 ,
413
- alignment : { x : HorizontalPositionAlign . LEFT , y : VerticalPositionAlign . CENTER } ,
414
- anchor : {
415
- horizontal : FrameAnchorType . TEXT ,
416
- vertical : FrameAnchorType . TEXT ,
417
- } ,
418
- type : "alignment" ,
419
- } ,
420
- border : defaultBorder ,
421
- } ,
422
- } ;
423
- }
424
455
}
425
456
return { type : "paragraph" , children : [ processInlineDOMNode ( el ) ] , data } ;
426
457
} ;
427
458
459
+ const processInlineNode = ( node : HtmlNode ) => {
460
+ const value = node . value ?. trim ( ) ?? "" ;
461
+ const tag = value . split ( " " ) [ 0 ] . slice ( 1 ) ;
462
+ const el = document . createElement ( "div" ) ;
463
+ el . innerHTML = value . endsWith ( "/>" ) ? value : `${ value } </${ tag } >` ;
464
+ Object . assign ( node , {
465
+ ...processInlineDOMNode ( el . children [ 0 ] ) ,
466
+ children : node . children ?? [ ] ,
467
+ } ) ;
468
+ } ;
469
+
428
470
/**
429
471
* Consolidates inline HTML tag children inside valid tag-matching groups.
430
472
*
431
473
* @param pNode - MDAST parent node.
432
474
*/
433
- const consolidateInlineHTML = ( pNode : Parent ) => {
475
+ const preprocess = ( pNode : Parent , isRoot = true ) => {
434
476
const children : RootContent [ ] = [ ] ;
435
- const htmlNodeStack : ( Parent & { tag : string } ) [ ] = [ ] ;
477
+ const htmlNodeStack : HtmlNode [ ] = [ ] ;
478
+
436
479
for ( const node of pNode . children ) {
437
- if ( ( node as Parent ) . children ?. length ) consolidateInlineHTML ( node as Parent ) ;
480
+ if ( ( node as Parent ) . children ?. length ) preprocess ( node as Parent , false ) ;
438
481
// match only inline non-self-closing html nodes.
439
482
if ( node . type === "html" && / ^ < [ ^ > ] * [ ^ / ] > $ / . test ( node . value ) ) {
440
- const tag = node . value . split ( " " ) [ 0 ] . slice ( 1 ) ;
483
+ const tag = node . value . split ( " " ) [ 0 ] . replace ( / ^ < | > $ / g , "" ) ;
441
484
// ending tag
442
485
if ( tag [ 0 ] === "/" ) {
443
- if ( htmlNodeStack [ 0 ] ?. tag === tag . slice ( 1 , - 1 ) )
444
- children . push ( htmlNodeStack . shift ( ) as RootContent ) ;
486
+ const hNode = htmlNodeStack . shift ( ) ;
487
+ if ( ! hNode ) throw new Error ( `Invalid HTML: ${ node . value } ` ) ;
488
+ processInlineNode ( hNode ) ;
489
+ ( htmlNodeStack [ 0 ] ?. children ?? children ) . push ( hNode ) ;
445
490
} else {
446
491
htmlNodeStack . unshift ( { ...node , children : [ ] , tag } ) ;
447
492
}
@@ -450,6 +495,21 @@ const consolidateInlineHTML = (pNode: Parent) => {
450
495
} else {
451
496
children . push ( node ) ;
452
497
}
498
+
499
+ const isSelfClosingTag = node . type === "html" && / ^ < [ ^ > ] * \/ > $ / . test ( node . value ) ;
500
+ // self closing tags
501
+ if ( isSelfClosingTag && ! isRoot ) {
502
+ // @ts -expect-error -- ok
503
+ processInlineNode ( node ) ;
504
+ } else if (
505
+ ( isSelfClosingTag && isRoot ) ||
506
+ ( node . type === "html" && ! / ^ < [ ^ > ] * > $ / . test ( node . value ) )
507
+ ) {
508
+ // block html
509
+ const el = document . createElement ( "div" ) ;
510
+ el . innerHTML = node . value ;
511
+ Object . assign ( node , createFragmentWithParentNodes ( el ) ) ;
512
+ }
453
513
}
454
514
pNode . children = children ;
455
515
} ;
@@ -465,26 +525,6 @@ const consolidateInlineHTML = (pNode: Parent) => {
465
525
*/
466
526
export const htmlPlugin : ( ) => IPlugin = ( ) => {
467
527
return {
468
- block : async ( _docx , node ) => {
469
- if ( node . type === "html" ) {
470
- const el = document . createElement ( "div" ) ;
471
- el . innerHTML = node . value ;
472
-
473
- Object . assign ( node , createFragmentWithParentNodes ( el ) ) ;
474
- }
475
- return [ ] ;
476
- } ,
477
- inline : async ( _docx , node ) => {
478
- if ( node . type === "html" ) {
479
- const value = node . value ?. trim ( ) ?? "" ;
480
- const tag = value . split ( " " ) [ 0 ] . slice ( 1 ) ;
481
- const el = document . createElement ( "div" ) ;
482
- el . innerHTML = value . endsWith ( "/>" ) ? value : `${ value } </${ tag } >` ;
483
- // @ts -expect-error - changing node type here.
484
- Object . assign ( node , { ...processInlineDOMNode ( el . children [ 0 ] ) , children : node . children } ) ;
485
- }
486
- return [ ] ;
487
- } ,
488
- preprocess : consolidateInlineHTML ,
528
+ preprocess,
489
529
} ;
490
530
} ;
0 commit comments