Skip to content

Release 3.1.1 #160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Changelog

## vNext
## 3.1.1

- Support MathMl
- Support SVG
- Fix respecting layout with `div`/`p` ending with line break #158
- Prevent crash when header/footer is incomplete and parsing image #159
- Fix combining 2 runs separated by a break, 2nd line should not be prefixed by a space

## 3.1.0

Expand All @@ -13,6 +14,13 @@
- Fallback to `background` style attribute as many users use this simplified attribute version
- In `HtmlDomExpression.CreateFromHtmlNode`, use the correct casting to `IElement` rather than `IHtmlElement`, to prevent crash if `svg` node is encountered

## 3.0.1

- Ensure to count existing images from header and footer too #113
- Preserve line break pre for OSX/Windows
- Prevent a crash when the provided style is missing its type
- Defensive code to avoid 2 rowSpan+colSpan with a cell in between to crash #59

## 3.0.0

- AngleSharp is now the backend parser for Html
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ This library supports both **.Net Framework 4.6.2**, **.NET Standard 2.0** and *

Depends on [DocumentFormat.OpenXml](https://www.nuget.org/packages/DocumentFormat.OpenXml/) and [AngleSharp](https://www.nuget.org/packages/AngleSharp).

-> [Official Nuget Package](https://www.nuget.org/packages/HtmlToOpenXml.dll)

## See Also

* [Documentation](https://github.com/onizet/html2openxml/wiki)
Expand Down
12 changes: 11 additions & 1 deletion src/Html2OpenXml/Expressions/BlockElementExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ internal static IEnumerable<OpenXmlElement> ComposeChildren(ParsingContext conte
runs.Add(element);
continue;
}
// if 2 tables are consectuives, we insert a paragraph in between
// if 2 tables are consecutives, we insert a paragraph in between
// or Word will merge the two tables
else if (element is Table && previousElement is Table)
{
Expand Down Expand Up @@ -309,6 +309,16 @@ private static Paragraph CreateParagraph(ParsingContext context, IList<OpenXmlEl
context.CascadeStyles(p);

p.Append(CombineRuns(runs));

// in Html, if a paragraph is ending with a line break, it is ignored
if (p.LastChild is Run run && run.LastChild is Break lineBreak)
{
// is this a standalone <br> inside the block? If so, replace the lineBreak with an empty paragraph
if (runs.Count == 1) run.Append(new Text());
if (run.ChildElements.Count == 1) run.Remove();
else lineBreak.Remove();
}

return p;
}

Expand Down
4 changes: 2 additions & 2 deletions src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ internal static (uint imageObjId, uint drawingObjId) IncrementDrawingObjId(Parsi

foreach (var part in new[] {
context.MainPart.Document.Body!.Descendants<Drawing>(),
context.MainPart.HeaderParts.SelectMany(f => f.Header.Descendants<Drawing>()),
context.MainPart.FooterParts.SelectMany(f => f.Footer.Descendants<Drawing>())
context.MainPart.HeaderParts.Where(f => f.Header != null).SelectMany(f => f.Header.Descendants<Drawing>()),
context.MainPart.FooterParts.Where(f => f.Footer != null).SelectMany(f => f.Footer.Descendants<Drawing>())
})
foreach (Drawing d in part)
{
Expand Down
4 changes: 4 additions & 0 deletions src/Html2OpenXml/Expressions/PhrasingElementExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ protected static IEnumerable<OpenXmlElement> CombineRuns(IEnumerable<OpenXmlElem
}
endsWithSpace = text[text.Length - 1].IsSpaceCharacter();
}
else if (run.LastChild is Break)
{
endsWithSpace = true;
}
yield return run;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/Html2OpenXml/HtmlToOpenXml.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
<AssemblyName>HtmlToOpenXml</AssemblyName>
<RootNamespace>HtmlToOpenXml</RootNamespace>
<PackageId>HtmlToOpenXml.dll</PackageId>
<Version>3.1.0</Version>
<Version>3.1.1</Version>
<PackageIcon>icon.png</PackageIcon>
<Copyright>Copyright 2009-$([System.DateTime]::Now.Year) Olivier Nizet</Copyright>
<PackageReleaseNotes>See changelog https://github.com/onizet/html2openxml/blob/master/CHANGELOG.md</PackageReleaseNotes>
<PackageReadmeFile>README.md</PackageReadmeFile>
<PackageTags>office openxml netcore html</PackageTags>
<AssemblyVersion>3.1.0</AssemblyVersion>
<AssemblyVersion>3.1.1</AssemblyVersion>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageProjectUrl>https://github.com/onizet/html2openxml</PackageProjectUrl>
<RepositoryUrl>https://github.com/onizet/html2openxml</RepositoryUrl>
Expand Down
34 changes: 33 additions & 1 deletion test/HtmlToOpenXml.Tests/DivTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ public void PageBreakAfter_ReturnsTwoParagraphsThenOne()
Assert.That(elements[2].ChildElements, Has.All.TypeOf<Run>());
Assert.That(elements[2].InnerText, Is.EqualTo("Ipsum"));
});
Assert.That(elements[1].LastChild?.HasChild<Break>(), Is.True);
Assert.That(elements[1].LastChild?.HasChild<LastRenderedPageBreak>(), Is.False);
}

Expand Down Expand Up @@ -112,5 +111,38 @@ public void WithLineHeight_ReturnsSpacingBetweenLines(string lineHeight, string
Assert.That(spaces?.LineRule?.InnerText, Is.EqualTo(expectedRule));
Assert.That(spaces?.Line?.Value, Is.EqualTo(expectedSpace));
}

[Test(Description = "Block endings with line break, should ignore it #158")]
public void WithEndingLineBreak_ReturnsIgnoredBreak()
{
var elements = converter.Parse("line1<div>line2<br><div>line3<br></div></div>");
Assert.That(elements, Has.Count.EqualTo(3));
Assert.That(elements, Has.All.TypeOf<Paragraph>());
Assert.That(elements.Any(p => p.LastChild?.LastChild is Break), Is.False);
}

[Test(Description = "Block endings with 2 line breaks, should keep only one")]
public void WithEndingDoubleLineBreak_ReturnsOneBreak()
{
var elements = converter.Parse("line1<div>line2<br><br><div>line3<br></div></div>");
Assert.That(elements, Has.Count.EqualTo(3));
Assert.That(elements, Has.All.TypeOf<Paragraph>());
Assert.That(elements.ElementAt(1).LastChild?.LastChild, Is.TypeOf<Break>());
}

[Test(Description = "Block containing only 1 line break, should display empty run")]
public void WithOnlyLineBreak_ReturnsEmptyRun()
{
var elements = converter.Parse("<div><br></div>");
Assert.That(elements, Has.Count.EqualTo(1));
Assert.That(elements, Has.All.TypeOf<Paragraph>());
var lastRun = elements.First().LastChild;
Assert.That(lastRun, Is.Not.Null);
Assert.Multiple(() => {
Assert.That(lastRun.LastChild, Is.Not.TypeOf<Break>());
Assert.That(lastRun.LastChild, Is.TypeOf<Text>());
});
Assert.That(((Text)lastRun.LastChild).Text, Is.Empty);
}
}
}
2 changes: 1 addition & 1 deletion test/HtmlToOpenXml.Tests/ElementTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public void TextWithBreak_ReturnsRunWithBreak()
{
var elements = converter.Parse(@"Lorem<br/>Ipsum");
Assert.That(elements, Has.Count.EqualTo(1));
Assert.That(elements[0].ChildElements, Has.Count.EqualTo(4));
Assert.That(elements[0].ChildElements, Has.Count.EqualTo(3));

Assert.Multiple(() =>
{
Expand Down
18 changes: 18 additions & 0 deletions test/HtmlToOpenXml.Tests/ImgTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,24 @@ public async Task ImageIds_IsUniqueAcrossPackagingParts()
"New image id is incremented considering existing images in header, body and footer");
}

[GenericTestCase(typeof(HeaderPart), Description = "Incomplete header or footer definition must be skipped #159")]
[GenericTestCase(typeof(FooterPart))]
public void WithIncompleteHeader_ShouldNotThrow<T>() where T : OpenXmlPart, IFixedContentTypePart
{
using var generatedDocument = new MemoryStream();
using (var buffer = ResourceHelper.GetStream("Resources.DocWithImgHeaderFooter.docx"))
buffer.CopyTo(generatedDocument);

generatedDocument.Position = 0L;
using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true);
MainDocumentPart mainPart = package.MainDocumentPart!;
mainPart.AddNewPart<T>(); // this code is incomplete as it's missing the header content

HtmlConverter converter = new(mainPart);
Assert.DoesNotThrowAsync(async () =>
await converter.ParseHtml("<img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==' width='42' height='42'>"));
}

private Drawing AssertIsImg (OpenXmlCompositeElement element)
{
var run = element.GetFirstChild<Run>();
Expand Down
18 changes: 17 additions & 1 deletion test/HtmlToOpenXml.Tests/WhitespaceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,28 @@ public void AnchorWithImgThenText_ReturnsCollapsedStartingWhitespace()
}

[Test(Description = "`nbsp` entities should not be collapsed")]
public void NonBreakingSpaceEntities_ReturnsPreserveedWhitespace()
public void NonBreakingSpaceEntities_ReturnsPreservedWhitespace()
{
var elements = converter.Parse("<h1>&nbsp;&nbsp; Hello World! </h1>");
Assert.That(elements, Has.Count.EqualTo(1));
Assert.That(elements, Has.All.TypeOf<Paragraph>());
Assert.That(elements[0].InnerText, Is.EqualTo("   Hello World!"));
}

[Test(Description = "Consecutive runs separated by a break should not prefix the 2nd line with a space")]
public void ConsecutivePhrasingWithBreak_ReturnsSecondLineWithNoSpaces()
{
var elements = converter.Parse("<span>Hello<br><span>World</span></span>");
Assert.That(elements, Has.Count.EqualTo(1));
Assert.That(elements, Has.All.TypeOf<Paragraph>());
Assert.That(elements[0].InnerText, Is.EqualTo("HelloWorld"));
var runs = elements[0].Elements<Run>();
Assert.That(runs.Count(), Is.EqualTo(3));
Assert.Multiple(() => {
Assert.That(runs.ElementAt(1).LastChild, Is.TypeOf<Break>());
Assert.That(runs.ElementAt(2).FirstChild, Is.TypeOf<Text>());
});
Assert.That(((Text)runs.ElementAt(2).FirstChild).Text, Is.EqualTo("World"));
}
}
}
Loading