Skip to content

Commit 80ef43a

Browse files
committed
During factory, stop casting to IHtmlElement but use IElement to avoid crash if svg node is encountered
1 parent 72c06ae commit 80ef43a

13 files changed

+191
-178
lines changed

src/Html2OpenXml/Expressions/HorizontalLineExpression.cs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,8 @@ namespace HtmlToOpenXml.Expressions;
2020
/// Process the parsing of a <c>hr</c> element
2121
/// by inserting an horizontal line as it stands in many emails.
2222
/// </summary>
23-
sealed class HorizontalLineExpression(IHtmlElement node) : HtmlElementExpression(node)
23+
sealed class HorizontalLineExpression(IHtmlElement node) : HtmlDomExpression
2424
{
25-
[System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage]
26-
public override void CascadeStyles(OpenXmlElement element)
27-
{
28-
throw new System.NotSupportedException();
29-
}
30-
3125
/// <inheritdoc/>
3226
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
3327
{

src/Html2OpenXml/Expressions/HtmlDomExpression.cs

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -24,53 +24,53 @@ namespace HtmlToOpenXml.Expressions;
2424
abstract class HtmlDomExpression
2525
{
2626
protected const string InternalNamespaceUri = "https://github.com/onizet/html2openxml";
27-
static readonly Dictionary<string, Func<IHtmlElement, HtmlElementExpression>> knownTags = InitKnownTags();
27+
static readonly Dictionary<string, Func<IElement, HtmlDomExpression>> knownTags = InitKnownTags();
2828
static readonly HashSet<string> ignoreTags = new(StringComparer.OrdinalIgnoreCase) {
2929
TagNames.Xml, TagNames.AnnotationXml, TagNames.Button, TagNames.Progress,
3030
TagNames.Select, TagNames.Input, TagNames.Textarea, TagNames.Meter };
3131

32-
private static Dictionary<string, Func<IHtmlElement, HtmlElementExpression>> InitKnownTags()
32+
private static Dictionary<string, Func<IElement, HtmlDomExpression>> InitKnownTags()
3333
{
3434
// A complete list of HTML tags can be found here: http://www.w3schools.com/tags/default.asp
3535

36-
var knownTags = new Dictionary<string, Func<IHtmlElement, HtmlElementExpression>>(StringComparer.InvariantCultureIgnoreCase) {
37-
{ TagNames.A, el => new HyperlinkExpression(el) },
38-
{ TagNames.Abbr, el => new AbbreviationExpression(el) },
39-
{ "acronym", el => new AbbreviationExpression(el) },
40-
{ TagNames.B, el => new PhrasingElementExpression(el, new Bold()) },
41-
{ TagNames.BlockQuote, el => new BlockQuoteExpression(el) },
42-
{ TagNames.Br, el => new LineBreakExpression(el) },
43-
{ TagNames.Cite, el => new CiteElementExpression(el) },
44-
{ TagNames.Dd, el => new BlockElementExpression(el, new Indentation() { FirstLine = "708" }, new SpacingBetweenLines() { After = "0" }) },
45-
{ TagNames.Del, el => new PhrasingElementExpression(el, new Strike()) },
46-
{ TagNames.Dfn, el => new AbbreviationExpression(el) },
47-
{ TagNames.Em, el => new PhrasingElementExpression(el, new Italic()) },
48-
{ TagNames.Figcaption, el => new FigureCaptionExpression(el) },
49-
{ TagNames.Font, el => new FontElementExpression(el) },
50-
{ TagNames.H1, el => new HeadingElementExpression(el) },
51-
{ TagNames.H2, el => new HeadingElementExpression(el) },
52-
{ TagNames.H3, el => new HeadingElementExpression(el) },
53-
{ TagNames.H4, el => new HeadingElementExpression(el) },
54-
{ TagNames.H5, el => new HeadingElementExpression(el) },
55-
{ TagNames.H6, el => new HeadingElementExpression(el) },
56-
{ TagNames.I, el => new PhrasingElementExpression(el, new Italic()) },
57-
{ TagNames.Hr, el => new HorizontalLineExpression(el) },
58-
{ TagNames.Img, el => new ImageExpression(el) },
59-
{ TagNames.Ins, el => new PhrasingElementExpression(el, new Underline() { Val = UnderlineValues.Single }) },
60-
{ TagNames.Ol, el => new ListExpression(el) },
61-
{ TagNames.Pre, el => new PreElementExpression(el) },
62-
{ TagNames.Q, el => new QuoteElementExpression(el) },
63-
{ TagNames.Quote, el => new QuoteElementExpression(el) },
64-
{ TagNames.Span, el => new PhrasingElementExpression(el) },
65-
{ TagNames.S, el => new PhrasingElementExpression(el, new Strike()) },
66-
{ TagNames.Strike, el => new PhrasingElementExpression(el, new Strike()) },
67-
{ TagNames.Strong, el => new PhrasingElementExpression(el, new Bold()) },
68-
{ TagNames.Sub, el => new PhrasingElementExpression(el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) },
69-
{ TagNames.Sup, el => new PhrasingElementExpression(el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) },
70-
{ TagNames.Table, el => new TableExpression(el) },
71-
{ TagNames.Time, el => new PhrasingElementExpression(el) },
72-
{ TagNames.U, el => new PhrasingElementExpression(el, new Underline() { Val = UnderlineValues.Single }) },
73-
{ TagNames.Ul, el => new ListExpression(el) },
36+
var knownTags = new Dictionary<string, Func<IElement, HtmlDomExpression>>(StringComparer.InvariantCultureIgnoreCase) {
37+
{ TagNames.A, el => new HyperlinkExpression((IHtmlAnchorElement) el) },
38+
{ TagNames.Abbr, el => new AbbreviationExpression((IHtmlElement) el) },
39+
{ "acronym", el => new AbbreviationExpression((IHtmlElement) el) },
40+
{ TagNames.B, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
41+
{ TagNames.BlockQuote, el => new BlockQuoteExpression((IHtmlElement) el) },
42+
{ TagNames.Br, _ => new LineBreakExpression() },
43+
{ TagNames.Cite, el => new CiteElementExpression((IHtmlElement) el) },
44+
{ TagNames.Dd, el => new BlockElementExpression((IHtmlElement) el, new Indentation() { FirstLine = "708" }, new SpacingBetweenLines() { After = "0" }) },
45+
{ TagNames.Del, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
46+
{ TagNames.Dfn, el => new AbbreviationExpression((IHtmlElement) el) },
47+
{ TagNames.Em, el => new PhrasingElementExpression((IHtmlElement) el, new Italic()) },
48+
{ TagNames.Figcaption, el => new FigureCaptionExpression((IHtmlElement) el) },
49+
{ TagNames.Font, el => new FontElementExpression((IHtmlElement) el) },
50+
{ TagNames.H1, el => new HeadingElementExpression((IHtmlElement) el) },
51+
{ TagNames.H2, el => new HeadingElementExpression((IHtmlElement) el) },
52+
{ TagNames.H3, el => new HeadingElementExpression((IHtmlElement) el) },
53+
{ TagNames.H4, el => new HeadingElementExpression((IHtmlElement) el) },
54+
{ TagNames.H5, el => new HeadingElementExpression((IHtmlElement) el) },
55+
{ TagNames.H6, el => new HeadingElementExpression((IHtmlElement) el) },
56+
{ TagNames.I, el => new PhrasingElementExpression((IHtmlElement) el, new Italic()) },
57+
{ TagNames.Hr, el => new HorizontalLineExpression((IHtmlElement) el) },
58+
{ TagNames.Img, el => new ImageExpression((IHtmlImageElement) el) },
59+
{ TagNames.Ins, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
60+
{ TagNames.Ol, el => new ListExpression((IHtmlElement) el) },
61+
{ TagNames.Pre, el => new PreElementExpression((IHtmlElement) el) },
62+
{ TagNames.Q, el => new QuoteElementExpression((IHtmlElement) el) },
63+
{ TagNames.Quote, el => new QuoteElementExpression((IHtmlElement) el) },
64+
{ TagNames.Span, el => new PhrasingElementExpression((IHtmlElement) el) },
65+
{ TagNames.S, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
66+
{ TagNames.Strike, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
67+
{ TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
68+
{ TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) },
69+
{ TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) },
70+
{ TagNames.Table, el => new TableExpression((IHtmlTableElement) el) },
71+
{ TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) },
72+
{ TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
73+
{ TagNames.Ul, el => new ListExpression((IHtmlElement) el) },
7474
};
7575

7676
return knownTags;
@@ -93,8 +93,8 @@ private static Dictionary<string, Func<IHtmlElement, HtmlElementExpression>> Ini
9393
else if (node.NodeType == NodeType.Element
9494
&& !ignoreTags.Contains(node.NodeName))
9595
{
96-
if (knownTags.TryGetValue(node.NodeName, out Func<IHtmlElement, HtmlElementExpression>? handler))
97-
return handler((IHtmlElement) node);
96+
if (knownTags.TryGetValue(node.NodeName, out Func<IElement, HtmlDomExpression>? handler))
97+
return handler((IElement) node);
9898

9999
// fallback on the flow element which will cover all the semantic Html5 tags
100100
return new BlockElementExpression((IHtmlElement) node);

src/Html2OpenXml/Expressions/HtmlElementExpression.cs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,15 @@
99
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
1010
* PARTICULAR PURPOSE.
1111
*/
12-
using System;
13-
using AngleSharp.Html.Dom;
1412
using DocumentFormat.OpenXml;
1513

1614
namespace HtmlToOpenXml.Expressions;
1715

1816
/// <summary>
1917
/// Represents the base definition of the processor of an HTML tag.
2018
/// </summary>
21-
abstract class HtmlElementExpression(IHtmlElement node) : HtmlDomExpression
19+
abstract class HtmlElementExpression : HtmlDomExpression
2220
{
23-
protected readonly IHtmlElement node = node ?? throw new ArgumentNullException(nameof(node));
24-
2521
/// <summary>
2622
/// Apply the style properties on the provided element.
2723
/// </summary>

src/Html2OpenXml/Expressions/HyperlinkExpression.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@ namespace HtmlToOpenXml.Expressions;
2424
/// <summary>
2525
/// Process the parsing of a link element.
2626
/// </summary>
27-
sealed class HyperlinkExpression(IHtmlElement node) : PhrasingElementExpression(node)
27+
sealed class HyperlinkExpression(IHtmlAnchorElement node) : PhrasingElementExpression(node)
2828
{
29-
private readonly IHtmlAnchorElement linkNode = (IHtmlAnchorElement) node;
29+
private readonly IHtmlAnchorElement linkNode = node;
3030

3131

3232
/// <inheritdoc/>
3333
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
3434
{
3535
var h = CreateHyperlink(context);
36-
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
36+
var childElements = Interpret(context.CreateChild(this), linkNode.ChildNodes);
3737
if (h is null)
3838
{
3939
return childElements;
@@ -112,8 +112,8 @@ public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
112112
return null;
113113
}
114114

115-
if (!string.IsNullOrEmpty(node.Title))
116-
h.Tooltip = node.Title;
115+
if (!string.IsNullOrEmpty(linkNode.Title))
116+
h.Tooltip = linkNode.Title;
117117
return h;
118118
}
119119
}

src/Html2OpenXml/Expressions/ImageExpression.cs renamed to src/Html2OpenXml/Expressions/Image/ImageExpression.cs

Lines changed: 4 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
* PARTICULAR PURPOSE.
1111
*/
1212
using System;
13-
using System.Collections.Generic;
14-
using System.Linq;
1513
using System.Threading;
1614
using AngleSharp.Html.Dom;
1715
using DocumentFormat.OpenXml;
@@ -27,61 +25,12 @@ namespace HtmlToOpenXml.Expressions;
2725
/// <summary>
2826
/// Process the parsing of an image.
2927
/// </summary>
30-
sealed class ImageExpression(IHtmlElement node) : HtmlElementExpression(node)
28+
class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
3129
{
32-
private readonly IHtmlImageElement imgNode = (IHtmlImageElement) node;
30+
private readonly IHtmlImageElement imgNode = node;
3331

3432

35-
/// <inheritdoc/>
36-
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
37-
{
38-
var drawing = CreateDrawing(context);
39-
40-
if (drawing == null)
41-
return [];
42-
43-
Run run = new(drawing);
44-
Border border = ComposeStyles();
45-
if (border.Val?.Equals(BorderValues.None) == false)
46-
{
47-
run.RunProperties ??= new();
48-
run.RunProperties.Border = border;
49-
}
50-
return [run];
51-
}
52-
53-
[System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage]
54-
public override void CascadeStyles(OpenXmlElement element)
55-
{
56-
throw new NotSupportedException();
57-
}
58-
59-
private Border ComposeStyles ()
60-
{
61-
var styleAttributes = node.GetStyles();
62-
var border = new Border() { Val = BorderValues.None };
63-
64-
// OpenXml limits the border to 4-side of the same color and style.
65-
SideBorder styleBorder = styleAttributes.GetSideBorder("border");
66-
if (styleBorder.IsValid)
67-
{
68-
border.Val = styleBorder.Style;
69-
border.Color = styleBorder.Color.ToHexString();
70-
border.Size = (uint) styleBorder.Width.ValueInPx * 4;
71-
}
72-
else
73-
{
74-
var borderWidth = Unit.Parse(imgNode.GetAttribute("border"));
75-
if (borderWidth.IsValid)
76-
{
77-
border.Val = BorderValues.Single;
78-
border.Size = (uint) borderWidth.ValueInPx * 4;
79-
}
80-
}
81-
return border;
82-
}
83-
84-
private Drawing? CreateDrawing(ParsingContext context)
33+
protected override Drawing? CreateDrawing(ParsingContext context)
8534
{
8635
string? src = imgNode.GetAttribute("src");
8736

@@ -108,47 +57,7 @@ private Border ComposeStyles ()
10857
preferredSize.Height = imgNode.DisplayHeight;
10958
}
11059

111-
var imageObjId = context.Properties<uint?>("imageObjId");
112-
var drawingObjId = context.Properties<uint?>("drawingObjId");
113-
if (!imageObjId.HasValue)
114-
{
115-
// In order to add images in the document, we need to asisgn an unique id
116-
// to each Drawing object. So we'll loop through all of the existing <wp:docPr> elements
117-
// to find the largest Id, then increment it for each new image.
118-
119-
drawingObjId = 1; // 1 is the minimum ID set by MS Office.
120-
imageObjId = 1;
121-
122-
foreach (var part in new[] {
123-
context.MainPart.Document.Body!.Descendants<Drawing>(),
124-
context.MainPart.HeaderParts.SelectMany(f => f.Header.Descendants<Drawing>()),
125-
context.MainPart.FooterParts.SelectMany(f => f.Footer.Descendants<Drawing>())
126-
})
127-
foreach (Drawing d in part)
128-
{
129-
wp.DocProperties? docProperties = null;
130-
pic.NonVisualPictureProperties? nvPr = null;
131-
132-
if (d.Anchor != null)
133-
{
134-
docProperties = d.Anchor.GetFirstChild<wp.DocProperties>();
135-
nvPr = d.Anchor.GetFirstChild<a.Graphic>()?.GraphicData?.GetFirstChild<pic.Picture>()?.GetFirstChild<pic.NonVisualPictureProperties>();
136-
}
137-
else if (d.Inline != null)
138-
{
139-
docProperties = d.Inline!.DocProperties;
140-
nvPr = d.Inline!.Graphic?.GraphicData?.GetFirstChild<pic.NonVisualPictureProperties>();
141-
}
142-
143-
if (docProperties?.Id != null && docProperties.Id.Value > drawingObjId)
144-
drawingObjId = docProperties.Id.Value;
145-
146-
if (nvPr != null && nvPr.NonVisualDrawingProperties?.Id?.Value > imageObjId)
147-
imageObjId = nvPr.NonVisualDrawingProperties.Id;
148-
}
149-
if (drawingObjId > 1) drawingObjId++;
150-
if (imageObjId > 1) imageObjId++;
151-
}
60+
var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
15261

15362
HtmlImageInfo? iinfo = context.Converter.ImagePrefetcher.Download(src, CancellationToken.None)
15463
.ConfigureAwait(false).GetAwaiter().GetResult();
@@ -169,9 +78,6 @@ private Border ComposeStyles ()
16978
long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus;
17079
long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus;
17180

172-
++drawingObjId;
173-
++imageObjId;
174-
17581
var img = new Drawing(
17682
new wp.Inline(
17783
new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus },
@@ -208,9 +114,6 @@ private Border ComposeStyles ()
208114
) { DistanceFromTop = (UInt32Value) 0U, DistanceFromBottom = (UInt32Value) 0U, DistanceFromLeft = (UInt32Value) 0U, DistanceFromRight = (UInt32Value) 0U }
209115
);
210116

211-
context.Properties("imageObjId", imageObjId);
212-
context.Properties("drawingObjId", drawingObjId!);
213-
214117
return img;
215118
}
216119
}

0 commit comments

Comments
 (0)