Skip to content

Commit d506001

Browse files
committed
Improve regex parsing to skip empty attribute and handle html encoded separators to avoid an extra call to HtmlDecode
1 parent 25eba15 commit d506001

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

src/Html2OpenXml/Collections/HtmlAttributeCollection.cs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ namespace HtmlToOpenXml;
2020
/// </summary>
2121
sealed class HtmlAttributeCollection
2222
{
23-
private static readonly Regex stripStyleAttributesRegex = new(@"(?<name>.+?):\s*(?<val>[^;]+);*\s*");
24-
23+
private static readonly Regex stripStyleAttributesRegex = new(@"(?<name>[^;\s]+)\s?(&\#58;|:)\s?(?<val>[^;&]+)\s?(;|&\#59;)*");
2524
private readonly Dictionary<string, string> attributes = [];
2625

2726

@@ -37,13 +36,7 @@ public static HtmlAttributeCollection ParseStyle(string? htmlTag)
3736

3837
// Encoded ':' and ';' characters are valid for browser but not handled by the regex (bug #13812 reported by robin391)
3938
// ex= <span style="text-decoration&#58;underline&#59;color:red">
40-
MatchCollection matches = stripStyleAttributesRegex.Matches(
41-
#if NET5_0_OR_GREATER
42-
System.Web.HttpUtility.HtmlDecode(htmlTag)
43-
#else
44-
HttpUtility.HtmlDecode(htmlTag)
45-
#endif
46-
);
39+
MatchCollection matches = stripStyleAttributesRegex.Matches(htmlTag);
4740
foreach (Match m in matches)
4841
collection.attributes[m.Groups["name"].Value] = m.Groups["val"].Value;
4942

test/HtmlToOpenXml.Tests/StyleTests.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,5 +165,21 @@ public void DuplicateStyle_ReturnsLatter()
165165
var styleAttributes = HtmlAttributeCollection.ParseStyle("color:red;color:blue");
166166
Assert.That(styleAttributes["color"], Is.EqualTo("blue"));
167167
}
168+
169+
[Test(Description = "Encoded ':' and ';' characters are valid")]
170+
public void EncodedStyle_ShouldSucceed()
171+
{
172+
var styleAttributes = HtmlAttributeCollection.ParseStyle("text-decoration&#58;underline&#59;color:red");
173+
Assert.That(styleAttributes["text-decoration"], Is.EqualTo("underline"));
174+
Assert.That(styleAttributes["color"], Is.EqualTo("red"));
175+
}
176+
177+
[Test(Description = "Key style with no value should be ignored")]
178+
public void EmptyStyle_ShouldBeIgnoredd()
179+
{
180+
var styleAttributes = HtmlAttributeCollection.ParseStyle("text-decoration;color:red");
181+
Assert.That(styleAttributes["text-decoration"], Is.Null);
182+
Assert.That(styleAttributes["color"], Is.EqualTo("red"));
183+
}
168184
}
169185
}

0 commit comments

Comments
 (0)