Skip to content

Commit 1bc74ee

Browse files
committed
Heading with digits only should not be considered as a numbering #189
1 parent c17f8ad commit 1bc74ee

File tree

2 files changed

+44
-6
lines changed

2 files changed

+44
-6
lines changed

src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
1010
* PARTICULAR PURPOSE.
1111
*/
12+
using System;
1213
using System.Collections.Generic;
1314
using System.Linq;
1415
using System.Text.RegularExpressions;
@@ -23,7 +24,8 @@ namespace HtmlToOpenXml.Expressions;
2324
/// </summary>
2425
sealed class HeadingElementExpression(IHtmlElement node) : NumberingExpressionBase(node)
2526
{
26-
private static readonly Regex numberingRegex = new(@"^\s*(\d+\.?)*\s*");
27+
private static readonly Regex numberingRegex = new(@"^\s*(?<number>[0-9\.]+\s*)[^0-9]",
28+
RegexOptions.Compiled, TimeSpan.FromMilliseconds(100));
2729

2830
/// <inheritdoc/>
2931
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
@@ -36,7 +38,7 @@ public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
3638

3739
var paragraph = childElements.FirstOrDefault() as Paragraph;
3840

39-
paragraph ??= new Paragraph(childElements);
41+
paragraph ??= new(childElements);
4042
paragraph.ParagraphProperties ??= new();
4143
paragraph.ParagraphProperties.ParagraphStyleId =
4244
context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.HeadingStyle + level);
@@ -65,16 +67,30 @@ public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
6567

6668
private static bool IsNumbering(OpenXmlElement runElement)
6769
{
70+
if (runElement.InnerText is null)
71+
return false;
72+
6873
// Check if the line starts with a number format (1., 1.1., 1.1.1.)
6974
// If it does, make sure we make the heading a numbered item
70-
Match regexMatch = numberingRegex.Match(runElement.InnerText ?? string.Empty);
75+
var headingText = runElement.InnerText;
76+
Match regexMatch;
77+
try
78+
{
79+
regexMatch = numberingRegex.Match(headingText);
80+
}
81+
catch (RegexMatchTimeoutException)
82+
{
83+
return false;
84+
}
85+
7186

7287
// Make sure we only grab the heading if it starts with a number
73-
if (regexMatch.Groups.Count > 1 && regexMatch.Groups[1].Captures.Count > 0)
88+
if (regexMatch.Success && headingText.Length > regexMatch.Groups["number"].Length)
7489
{
75-
// Strip numbers from text
90+
// Strip numbers from text
91+
headingText = headingText.Substring(regexMatch.Groups["number"].Length);
7692
runElement.InnerXml = runElement.InnerXml
77-
.Replace(runElement.InnerText!, runElement.InnerText!.Substring(regexMatch.Length));
93+
.Replace(runElement.InnerText!, headingText);
7894

7995
return true;
8096
}

test/HtmlToOpenXml.Tests/HeadingTests.cs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,5 +101,27 @@ public void MaxLevel_ShouldBeIgnored()
101101
Is.Null, $"Only {maxLevel+1} levels of heading supported");
102102
});
103103
}
104+
105+
[Test(Description = "Heading with number but no text should be ignored (issue #189)")]
106+
public void NumberingWithNoTextPattern_ReturnsSimpleHeading()
107+
{
108+
var elements = converter.Parse("<h1>00</h1>");
109+
110+
var absNum = mainPart.NumberingDefinitionsPart?.Numbering
111+
.Elements<AbstractNum>()
112+
.Where(abs => abs.AbstractNumDefinitionName?.Val == NumberingExpressionBase.HeadingNumberingName)
113+
.SingleOrDefault();
114+
Assert.That(absNum, Is.Null);
115+
116+
var paragraphs = elements.Cast<Paragraph>();
117+
Assert.Multiple(() =>
118+
{
119+
Assert.That(paragraphs.Count(), Is.EqualTo(1));
120+
Assert.That(paragraphs.First().InnerText, Is.EqualTo("00"));
121+
Assert.That(paragraphs.First().ParagraphProperties?.NumberingProperties?.NumberingLevelReference?.Val,
122+
Is.Null,
123+
"First paragraph is not a numbering");
124+
});
125+
}
104126
}
105127
}

0 commit comments

Comments
 (0)