Skip to content

Commit 4dc7d69

Browse files
authored
Merge pull request #160 from onizet/dev
Release 3.1.1
2 parents ce0e374 + 112e412 commit 4dc7d69

File tree

10 files changed

+101
-11
lines changed

10 files changed

+101
-11
lines changed

CHANGELOG.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# Changelog
22

3-
## vNext
3+
## 3.1.1
44

5-
- Support MathMl
6-
- Support SVG
5+
- Fix respecting layout with `div`/`p` ending with line break #158
6+
- Prevent crash when header/footer is incomplete and parsing image #159
7+
- Fix combining 2 runs separated by a break, 2nd line should not be prefixed by a space
78

89
## 3.1.0
910

@@ -13,6 +14,13 @@
1314
- Fallback to `background` style attribute as many users use this simplified attribute version
1415
- In `HtmlDomExpression.CreateFromHtmlNode`, use the correct casting to `IElement` rather than `IHtmlElement`, to prevent crash if `svg` node is encountered
1516

17+
## 3.0.1
18+
19+
- Ensure to count existing images from header and footer too #113
20+
- Preserve line break pre for OSX/Windows
21+
- Prevent a crash when the provided style is missing its type
22+
- Defensive code to avoid 2 rowSpan+colSpan with a cell in between to crash #59
23+
1624
## 3.0.0
1725

1826
- AngleSharp is now the backend parser for Html

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ This library supports both **.Net Framework 4.6.2**, **.NET Standard 2.0** and *
1010

1111
Depends on [DocumentFormat.OpenXml](https://www.nuget.org/packages/DocumentFormat.OpenXml/) and [AngleSharp](https://www.nuget.org/packages/AngleSharp).
1212

13+
-> [Official Nuget Package](https://www.nuget.org/packages/HtmlToOpenXml.dll)
14+
1315
## See Also
1416

1517
* [Documentation](https://github.com/onizet/html2openxml/wiki)

src/Html2OpenXml/Expressions/BlockElementExpression.cs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ internal static IEnumerable<OpenXmlElement> ComposeChildren(ParsingContext conte
271271
runs.Add(element);
272272
continue;
273273
}
274-
// if 2 tables are consectuives, we insert a paragraph in between
274+
// if 2 tables are consecutives, we insert a paragraph in between
275275
// or Word will merge the two tables
276276
else if (element is Table && previousElement is Table)
277277
{
@@ -309,6 +309,16 @@ private static Paragraph CreateParagraph(ParsingContext context, IList<OpenXmlEl
309309
context.CascadeStyles(p);
310310

311311
p.Append(CombineRuns(runs));
312+
313+
// in Html, if a paragraph is ending with a line break, it is ignored
314+
if (p.LastChild is Run run && run.LastChild is Break lineBreak)
315+
{
316+
// is this a standalone <br> inside the block? If so, replace the lineBreak with an empty paragraph
317+
if (runs.Count == 1) run.Append(new Text());
318+
if (run.ChildElements.Count == 1) run.Remove();
319+
else lineBreak.Remove();
320+
}
321+
312322
return p;
313323
}
314324

src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ internal static (uint imageObjId, uint drawingObjId) IncrementDrawingObjId(Parsi
8787

8888
foreach (var part in new[] {
8989
context.MainPart.Document.Body!.Descendants<Drawing>(),
90-
context.MainPart.HeaderParts.SelectMany(f => f.Header.Descendants<Drawing>()),
91-
context.MainPart.FooterParts.SelectMany(f => f.Footer.Descendants<Drawing>())
90+
context.MainPart.HeaderParts.Where(f => f.Header != null).SelectMany(f => f.Header.Descendants<Drawing>()),
91+
context.MainPart.FooterParts.Where(f => f.Footer != null).SelectMany(f => f.Footer.Descendants<Drawing>())
9292
})
9393
foreach (Drawing d in part)
9494
{

src/Html2OpenXml/Expressions/PhrasingElementExpression.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,10 @@ protected static IEnumerable<OpenXmlElement> CombineRuns(IEnumerable<OpenXmlElem
218218
}
219219
endsWithSpace = text[text.Length - 1].IsSpaceCharacter();
220220
}
221+
else if (run.LastChild is Break)
222+
{
223+
endsWithSpace = true;
224+
}
221225
yield return run;
222226
}
223227
}

src/Html2OpenXml/HtmlToOpenXml.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
<AssemblyName>HtmlToOpenXml</AssemblyName>
1010
<RootNamespace>HtmlToOpenXml</RootNamespace>
1111
<PackageId>HtmlToOpenXml.dll</PackageId>
12-
<Version>3.1.0</Version>
12+
<Version>3.1.1</Version>
1313
<PackageIcon>icon.png</PackageIcon>
1414
<Copyright>Copyright 2009-$([System.DateTime]::Now.Year) Olivier Nizet</Copyright>
1515
<PackageReleaseNotes>See changelog https://github.com/onizet/html2openxml/blob/master/CHANGELOG.md</PackageReleaseNotes>
1616
<PackageReadmeFile>README.md</PackageReadmeFile>
1717
<PackageTags>office openxml netcore html</PackageTags>
18-
<AssemblyVersion>3.1.0</AssemblyVersion>
18+
<AssemblyVersion>3.1.1</AssemblyVersion>
1919
<PackageLicenseExpression>MIT</PackageLicenseExpression>
2020
<PackageProjectUrl>https://github.com/onizet/html2openxml</PackageProjectUrl>
2121
<RepositoryUrl>https://github.com/onizet/html2openxml</RepositoryUrl>

test/HtmlToOpenXml.Tests/DivTests.cs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ public void PageBreakAfter_ReturnsTwoParagraphsThenOne()
7979
Assert.That(elements[2].ChildElements, Has.All.TypeOf<Run>());
8080
Assert.That(elements[2].InnerText, Is.EqualTo("Ipsum"));
8181
});
82-
Assert.That(elements[1].LastChild?.HasChild<Break>(), Is.True);
8382
Assert.That(elements[1].LastChild?.HasChild<LastRenderedPageBreak>(), Is.False);
8483
}
8584

@@ -112,5 +111,38 @@ public void WithLineHeight_ReturnsSpacingBetweenLines(string lineHeight, string
112111
Assert.That(spaces?.LineRule?.InnerText, Is.EqualTo(expectedRule));
113112
Assert.That(spaces?.Line?.Value, Is.EqualTo(expectedSpace));
114113
}
114+
115+
[Test(Description = "Block endings with line break, should ignore it #158")]
116+
public void WithEndingLineBreak_ReturnsIgnoredBreak()
117+
{
118+
var elements = converter.Parse("line1<div>line2<br><div>line3<br></div></div>");
119+
Assert.That(elements, Has.Count.EqualTo(3));
120+
Assert.That(elements, Has.All.TypeOf<Paragraph>());
121+
Assert.That(elements.Any(p => p.LastChild?.LastChild is Break), Is.False);
122+
}
123+
124+
[Test(Description = "Block endings with 2 line breaks, should keep only one")]
125+
public void WithEndingDoubleLineBreak_ReturnsOneBreak()
126+
{
127+
var elements = converter.Parse("line1<div>line2<br><br><div>line3<br></div></div>");
128+
Assert.That(elements, Has.Count.EqualTo(3));
129+
Assert.That(elements, Has.All.TypeOf<Paragraph>());
130+
Assert.That(elements.ElementAt(1).LastChild?.LastChild, Is.TypeOf<Break>());
131+
}
132+
133+
[Test(Description = "Block containing only 1 line break, should display empty run")]
134+
public void WithOnlyLineBreak_ReturnsEmptyRun()
135+
{
136+
var elements = converter.Parse("<div><br></div>");
137+
Assert.That(elements, Has.Count.EqualTo(1));
138+
Assert.That(elements, Has.All.TypeOf<Paragraph>());
139+
var lastRun = elements.First().LastChild;
140+
Assert.That(lastRun, Is.Not.Null);
141+
Assert.Multiple(() => {
142+
Assert.That(lastRun.LastChild, Is.Not.TypeOf<Break>());
143+
Assert.That(lastRun.LastChild, Is.TypeOf<Text>());
144+
});
145+
Assert.That(((Text)lastRun.LastChild).Text, Is.Empty);
146+
}
115147
}
116148
}

test/HtmlToOpenXml.Tests/ElementTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ public void TextWithBreak_ReturnsRunWithBreak()
136136
{
137137
var elements = converter.Parse(@"Lorem<br/>Ipsum");
138138
Assert.That(elements, Has.Count.EqualTo(1));
139-
Assert.That(elements[0].ChildElements, Has.Count.EqualTo(4));
139+
Assert.That(elements[0].ChildElements, Has.Count.EqualTo(3));
140140

141141
Assert.Multiple(() =>
142142
{

test/HtmlToOpenXml.Tests/ImgTests.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,24 @@ public async Task ImageIds_IsUniqueAcrossPackagingParts()
133133
"New image id is incremented considering existing images in header, body and footer");
134134
}
135135

136+
[GenericTestCase(typeof(HeaderPart), Description = "Incomplete header or footer definition must be skipped #159")]
137+
[GenericTestCase(typeof(FooterPart))]
138+
public void WithIncompleteHeader_ShouldNotThrow<T>() where T : OpenXmlPart, IFixedContentTypePart
139+
{
140+
using var generatedDocument = new MemoryStream();
141+
using (var buffer = ResourceHelper.GetStream("Resources.DocWithImgHeaderFooter.docx"))
142+
buffer.CopyTo(generatedDocument);
143+
144+
generatedDocument.Position = 0L;
145+
using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true);
146+
MainDocumentPart mainPart = package.MainDocumentPart!;
147+
mainPart.AddNewPart<T>(); // this code is incomplete as it's missing the header content
148+
149+
HtmlConverter converter = new(mainPart);
150+
Assert.DoesNotThrowAsync(async () =>
151+
await converter.ParseHtml("<img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==' width='42' height='42'>"));
152+
}
153+
136154
private Drawing AssertIsImg (OpenXmlCompositeElement element)
137155
{
138156
var run = element.GetFirstChild<Run>();

test/HtmlToOpenXml.Tests/WhitespaceTests.cs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,28 @@ public void AnchorWithImgThenText_ReturnsCollapsedStartingWhitespace()
7474
}
7575

7676
[Test(Description = "`nbsp` entities should not be collapsed")]
77-
public void NonBreakingSpaceEntities_ReturnsPreserveedWhitespace()
77+
public void NonBreakingSpaceEntities_ReturnsPreservedWhitespace()
7878
{
7979
var elements = converter.Parse("<h1>&nbsp;&nbsp; Hello World! </h1>");
8080
Assert.That(elements, Has.Count.EqualTo(1));
8181
Assert.That(elements, Has.All.TypeOf<Paragraph>());
8282
Assert.That(elements[0].InnerText, Is.EqualTo("   Hello World!"));
8383
}
84+
85+
[Test(Description = "Consecutive runs separated by a break should not prefix the 2nd line with a space")]
86+
public void ConsecutivePhrasingWithBreak_ReturnsSecondLineWithNoSpaces()
87+
{
88+
var elements = converter.Parse("<span>Hello<br><span>World</span></span>");
89+
Assert.That(elements, Has.Count.EqualTo(1));
90+
Assert.That(elements, Has.All.TypeOf<Paragraph>());
91+
Assert.That(elements[0].InnerText, Is.EqualTo("HelloWorld"));
92+
var runs = elements[0].Elements<Run>();
93+
Assert.That(runs.Count(), Is.EqualTo(3));
94+
Assert.Multiple(() => {
95+
Assert.That(runs.ElementAt(1).LastChild, Is.TypeOf<Break>());
96+
Assert.That(runs.ElementAt(2).FirstChild, Is.TypeOf<Text>());
97+
});
98+
Assert.That(((Text)runs.ElementAt(2).FirstChild).Text, Is.EqualTo("World"));
99+
}
84100
}
85101
}

0 commit comments

Comments
 (0)