Skip to content

Release 3.1.0 #157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
- Support MathMl
- Support SVG

## 3.0.1
## 3.1.0

- Ensure to count existing images from header and footer too #113
- Preserve line break pre for OSX/Windows
- Prevent a crash when the provided style is missing its type
- Defensive code to avoid 2 rowSpan+colSpan with a cell in between to crash #59
- Fix table Cell borders are wrongly applied on the run #156
- Correctly handle RTL layout for text, list, table and document scope #86 #66
- Support property line-height #52
- Fallback to `background` style attribute as many users use this simplified attribute version
- In `HtmlDomExpression.CreateFromHtmlNode`, use the correct casting to `IElement` rather than `IHtmlElement`, to prevent crash if `svg` node is encountered

## 3.0.0

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,5 @@ Logo provided with the permission of [Enhanced Labs Design Studio](http://www.en

## Support

This project is open source and I do my best to support it in my spare time. I'm always happy to receive Pull Request and grateful for the time you have taken
This project is open source and I do my best to support it in my spare time. I'm always happy to receive Pull Request and grateful for the time you have taken. Please target branch `dev` only.
If you have questions, don't hesitate to get in touch with me!
10 changes: 1 addition & 9 deletions examples/Demo/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,7 @@ static async Task Main(string[] args)
const string filename = "test.docx";
string html = ResourceHelper.GetString("Resources.CompleteRunTest.html");
if (File.Exists(filename)) File.Delete(filename);
const string preformattedText = @"
^__^
(oo)\_______
(__)\ )\/\
||----w |
|| ||";

html = @$"<pre role='img' aria-label='ASCII COW'>
{preformattedText}</pre>";
using (MemoryStream generatedDocument = new MemoryStream())
{
// Uncomment and comment the second using() to open an existing template document
Expand Down Expand Up @@ -53,7 +45,7 @@ static async Task Main(string[] args)
await converter.ParseHtml(html);
mainPart.Document.Save();

//AssertThatOpenXmlDocumentIsValid(package);
AssertThatOpenXmlDocumentIsValid(package);
}

File.WriteAllBytes(filename, generatedDocument.ToArray());
Expand Down
12 changes: 6 additions & 6 deletions src/Html2OpenXml/Collections/HtmlAttributeCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ public HtmlColor GetColor(string name)
/// Gets an attribute representing an unit: 120px, 10pt, 5em, 20%, ...
/// </summary>
/// <returns>If the attribute is misformed, the <see cref="Unit.IsValid"/> property is set to false.</returns>
public Unit GetUnit(string name)
public Unit GetUnit(string name, UnitMetric defaultMetric = UnitMetric.Unitless)
{
return Unit.Parse(this[name]);
return Unit.Parse(this[name], defaultMetric);
}

/// <summary>
Expand All @@ -86,13 +86,13 @@ public Margin GetMargin(string name)
Margin margin = Margin.Parse(this[name]);
Unit u;

u = GetUnit(name + "-top");
u = GetUnit(name + "-top", UnitMetric.Pixel);
if (u.IsValid) margin.Top = u;
u = GetUnit(name + "-right");
u = GetUnit(name + "-right", UnitMetric.Pixel);
if (u.IsValid) margin.Right = u;
u = GetUnit(name + "-bottom");
u = GetUnit(name + "-bottom", UnitMetric.Pixel);
if (u.IsValid) margin.Bottom = u;
u = GetUnit(name + "-left");
u = GetUnit(name + "-left", UnitMetric.Pixel);
if (u.IsValid) margin.Left = u;

return margin;
Expand Down
50 changes: 43 additions & 7 deletions src/Html2OpenXml/Expressions/BlockElementExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,11 @@ protected override void ComposeStyles (ParsingContext context)
}

// according to w3c, dir should be used in conjonction with lang. But whatever happens, we'll apply the RTL layout
if ("rtl".Equals(node.Direction, StringComparison.OrdinalIgnoreCase))
{
paraProperties.Justification = new() { Val = JustificationValues.Right };
}
else if ("ltr".Equals(node.Direction, StringComparison.OrdinalIgnoreCase))
{
paraProperties.Justification = new() { Val = JustificationValues.Left };
var dir = node.GetTextDirection();
if (dir.HasValue) {
paraProperties.BiDi = new() {
Val = OnOffValue.FromBoolean(dir == AngleSharp.Dom.DirectionMode.Rtl)
};
}

var attrValue = styleAttributes!["text-align"];
Expand Down Expand Up @@ -200,6 +198,44 @@ protected override void ComposeStyles (ParsingContext context)

paraProperties.Indentation = indentation;
}

var lineHeight = styleAttributes.GetUnit("line-height");
if (!lineHeight.IsValid
&& "normal".Equals(styleAttributes["line-height"], StringComparison.OrdinalIgnoreCase))
{
// if `normal` is specified, reset any values
lineHeight = new Unit(UnitMetric.Unitless, 1);
}

if (lineHeight.IsValid)
{
if (lineHeight.Type == UnitMetric.Unitless)
{
// auto should be considered as 240ths of a line
// https://learn.microsoft.com/en-us/dotnet/api/documentformat.openxml.wordprocessing.spacingbetweenlines.line?view=openxml-3.0.1
paraProperties.SpacingBetweenLines = new() {
LineRule = LineSpacingRuleValues.Auto,
Line = Math.Round(lineHeight.Value * 240).ToString(CultureInfo.InvariantCulture)
};
}
else if (lineHeight.Type == UnitMetric.Percent)
{
// percentage depends on the font size which is hard to determine here
// let's rely this to "auto" behaviour
paraProperties.SpacingBetweenLines = new() {
LineRule = LineSpacingRuleValues.Auto,
Line = Math.Round(lineHeight.Value / 100 * 240).ToString(CultureInfo.InvariantCulture)
};
}
else
{
// twentieths of a point
paraProperties.SpacingBetweenLines = new() {
LineRule = LineSpacingRuleValues.Exact,
Line = Math.Round(lineHeight.ValueInPoint * 20).ToString(CultureInfo.InvariantCulture)
};
}
}
}

/// <summary>
Expand Down
11 changes: 11 additions & 0 deletions src/Html2OpenXml/Expressions/BodyExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ protected override void ComposeStyles(ParsingContext context)
}
}
}

if (paraProperties.BiDi is not null)
{
var sectionProperties = context.MainPart.Document.Body!.GetFirstChild<SectionProperties>();
if (sectionProperties == null || sectionProperties.GetFirstChild<PageSize>() == null)
{
context.MainPart.Document.Body.Append(sectionProperties = new());
}

sectionProperties.AddChild(paraProperties.BiDi.CloneNode(true));
}
}

/// <summary>
Expand Down
8 changes: 1 addition & 7 deletions src/Html2OpenXml/Expressions/HorizontalLineExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@ namespace HtmlToOpenXml.Expressions;
/// Process the parsing of a <c>hr</c> element
/// by inserting an horizontal line as it stands in many emails.
/// </summary>
sealed class HorizontalLineExpression(IHtmlElement node) : HtmlElementExpression(node)
sealed class HorizontalLineExpression(IHtmlElement node) : HtmlDomExpression
{
[System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage]
public override void CascadeStyles(OpenXmlElement element)
{
throw new System.NotSupportedException();
}

/// <inheritdoc/>
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
{
Expand Down
84 changes: 42 additions & 42 deletions src/Html2OpenXml/Expressions/HtmlDomExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,53 +24,53 @@ namespace HtmlToOpenXml.Expressions;
abstract class HtmlDomExpression
{
protected const string InternalNamespaceUri = "https://github.com/onizet/html2openxml";
static readonly Dictionary<string, Func<IHtmlElement, HtmlElementExpression>> knownTags = InitKnownTags();
static readonly Dictionary<string, Func<IElement, HtmlDomExpression>> knownTags = InitKnownTags();
static readonly HashSet<string> ignoreTags = new(StringComparer.OrdinalIgnoreCase) {
TagNames.Xml, TagNames.AnnotationXml, TagNames.Button, TagNames.Progress,
TagNames.Select, TagNames.Input, TagNames.Textarea, TagNames.Meter };

private static Dictionary<string, Func<IHtmlElement, HtmlElementExpression>> InitKnownTags()
private static Dictionary<string, Func<IElement, HtmlDomExpression>> InitKnownTags()
{
// A complete list of HTML tags can be found here: http://www.w3schools.com/tags/default.asp

var knownTags = new Dictionary<string, Func<IHtmlElement, HtmlElementExpression>>(StringComparer.InvariantCultureIgnoreCase) {
{ TagNames.A, el => new HyperlinkExpression(el) },
{ TagNames.Abbr, el => new AbbreviationExpression(el) },
{ "acronym", el => new AbbreviationExpression(el) },
{ TagNames.B, el => new PhrasingElementExpression(el, new Bold()) },
{ TagNames.BlockQuote, el => new BlockQuoteExpression(el) },
{ TagNames.Br, el => new LineBreakExpression(el) },
{ TagNames.Cite, el => new CiteElementExpression(el) },
{ TagNames.Dd, el => new BlockElementExpression(el, new Indentation() { FirstLine = "708" }, new SpacingBetweenLines() { After = "0" }) },
{ TagNames.Del, el => new PhrasingElementExpression(el, new Strike()) },
{ TagNames.Dfn, el => new AbbreviationExpression(el) },
{ TagNames.Em, el => new PhrasingElementExpression(el, new Italic()) },
{ TagNames.Figcaption, el => new FigureCaptionExpression(el) },
{ TagNames.Font, el => new FontElementExpression(el) },
{ TagNames.H1, el => new HeadingElementExpression(el) },
{ TagNames.H2, el => new HeadingElementExpression(el) },
{ TagNames.H3, el => new HeadingElementExpression(el) },
{ TagNames.H4, el => new HeadingElementExpression(el) },
{ TagNames.H5, el => new HeadingElementExpression(el) },
{ TagNames.H6, el => new HeadingElementExpression(el) },
{ TagNames.I, el => new PhrasingElementExpression(el, new Italic()) },
{ TagNames.Hr, el => new HorizontalLineExpression(el) },
{ TagNames.Img, el => new ImageExpression(el) },
{ TagNames.Ins, el => new PhrasingElementExpression(el, new Underline() { Val = UnderlineValues.Single }) },
{ TagNames.Ol, el => new ListExpression(el) },
{ TagNames.Pre, el => new PreElementExpression(el) },
{ TagNames.Q, el => new QuoteElementExpression(el) },
{ TagNames.Quote, el => new QuoteElementExpression(el) },
{ TagNames.Span, el => new PhrasingElementExpression(el) },
{ TagNames.S, el => new PhrasingElementExpression(el, new Strike()) },
{ TagNames.Strike, el => new PhrasingElementExpression(el, new Strike()) },
{ TagNames.Strong, el => new PhrasingElementExpression(el, new Bold()) },
{ TagNames.Sub, el => new PhrasingElementExpression(el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) },
{ TagNames.Sup, el => new PhrasingElementExpression(el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) },
{ TagNames.Table, el => new TableExpression(el) },
{ TagNames.Time, el => new PhrasingElementExpression(el) },
{ TagNames.U, el => new PhrasingElementExpression(el, new Underline() { Val = UnderlineValues.Single }) },
{ TagNames.Ul, el => new ListExpression(el) },
var knownTags = new Dictionary<string, Func<IElement, HtmlDomExpression>>(StringComparer.InvariantCultureIgnoreCase) {
{ TagNames.A, el => new HyperlinkExpression((IHtmlAnchorElement) el) },
{ TagNames.Abbr, el => new AbbreviationExpression((IHtmlElement) el) },
{ "acronym", el => new AbbreviationExpression((IHtmlElement) el) },
{ TagNames.B, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
{ TagNames.BlockQuote, el => new BlockQuoteExpression((IHtmlElement) el) },
{ TagNames.Br, _ => new LineBreakExpression() },
{ TagNames.Cite, el => new CiteElementExpression((IHtmlElement) el) },
{ TagNames.Dd, el => new BlockElementExpression((IHtmlElement) el, new Indentation() { FirstLine = "708" }, new SpacingBetweenLines() { After = "0" }) },
{ TagNames.Del, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
{ TagNames.Dfn, el => new AbbreviationExpression((IHtmlElement) el) },
{ TagNames.Em, el => new PhrasingElementExpression((IHtmlElement) el, new Italic()) },
{ TagNames.Figcaption, el => new FigureCaptionExpression((IHtmlElement) el) },
{ TagNames.Font, el => new FontElementExpression((IHtmlElement) el) },
{ TagNames.H1, el => new HeadingElementExpression((IHtmlElement) el) },
{ TagNames.H2, el => new HeadingElementExpression((IHtmlElement) el) },
{ TagNames.H3, el => new HeadingElementExpression((IHtmlElement) el) },
{ TagNames.H4, el => new HeadingElementExpression((IHtmlElement) el) },
{ TagNames.H5, el => new HeadingElementExpression((IHtmlElement) el) },
{ TagNames.H6, el => new HeadingElementExpression((IHtmlElement) el) },
{ TagNames.I, el => new PhrasingElementExpression((IHtmlElement) el, new Italic()) },
{ TagNames.Hr, el => new HorizontalLineExpression((IHtmlElement) el) },
{ TagNames.Img, el => new ImageExpression((IHtmlImageElement) el) },
{ TagNames.Ins, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
{ TagNames.Ol, el => new ListExpression((IHtmlElement) el) },
{ TagNames.Pre, el => new PreElementExpression((IHtmlElement) el) },
{ TagNames.Q, el => new QuoteElementExpression((IHtmlElement) el) },
{ TagNames.Quote, el => new QuoteElementExpression((IHtmlElement) el) },
{ TagNames.Span, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.S, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
{ TagNames.Strike, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
{ TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
{ TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) },
{ TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) },
{ TagNames.Table, el => new TableExpression((IHtmlTableElement) el) },
{ TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
{ TagNames.Ul, el => new ListExpression((IHtmlElement) el) },
};

return knownTags;
Expand All @@ -93,8 +93,8 @@ private static Dictionary<string, Func<IHtmlElement, HtmlElementExpression>> Ini
else if (node.NodeType == NodeType.Element
&& !ignoreTags.Contains(node.NodeName))
{
if (knownTags.TryGetValue(node.NodeName, out Func<IHtmlElement, HtmlElementExpression>? handler))
return handler((IHtmlElement) node);
if (knownTags.TryGetValue(node.NodeName, out Func<IElement, HtmlDomExpression>? handler))
return handler((IElement) node);

// fallback on the flow element which will cover all the semantic Html5 tags
return new BlockElementExpression((IHtmlElement) node);
Expand Down
6 changes: 1 addition & 5 deletions src/Html2OpenXml/Expressions/HtmlElementExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,15 @@
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
* PARTICULAR PURPOSE.
*/
using System;
using AngleSharp.Html.Dom;
using DocumentFormat.OpenXml;

namespace HtmlToOpenXml.Expressions;

/// <summary>
/// Represents the base definition of the processor of an HTML tag.
/// </summary>
abstract class HtmlElementExpression(IHtmlElement node) : HtmlDomExpression
abstract class HtmlElementExpression : HtmlDomExpression
{
protected readonly IHtmlElement node = node ?? throw new ArgumentNullException(nameof(node));

/// <summary>
/// Apply the style properties on the provided element.
/// </summary>
Expand Down
10 changes: 5 additions & 5 deletions src/Html2OpenXml/Expressions/HyperlinkExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ namespace HtmlToOpenXml.Expressions;
/// <summary>
/// Process the parsing of a link element.
/// </summary>
sealed class HyperlinkExpression(IHtmlElement node) : PhrasingElementExpression(node)
sealed class HyperlinkExpression(IHtmlAnchorElement node) : PhrasingElementExpression(node)
{
private readonly IHtmlAnchorElement linkNode = (IHtmlAnchorElement) node;
private readonly IHtmlAnchorElement linkNode = node;


/// <inheritdoc/>
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
{
var h = CreateHyperlink(context);
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
var childElements = Interpret(context.CreateChild(this), linkNode.ChildNodes);
if (h is null)
{
return childElements;
Expand Down Expand Up @@ -112,8 +112,8 @@ public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
return null;
}

if (!string.IsNullOrEmpty(node.Title))
h.Tooltip = node.Title;
if (!string.IsNullOrEmpty(linkNode.Title))
h.Tooltip = linkNode.Title;
return h;
}
}
Loading
Loading