Skip to content

Commit 036bcc6

Browse files
authored
Fix <figcaption> Conversion
No longer discard parsed child elements when adding figure number reference. Change FigureCaptionExpression from inheriting PhrasingElementExpression to BlockElementExpression to support more complex <figcaption> contents. <figcaption> can only be the first or last element of <figure>. Do not set keepnext if <figcaption> is last.
1 parent d413895 commit 036bcc6

File tree

1 file changed

+79
-22
lines changed

1 file changed

+79
-22
lines changed

src/Html2OpenXml/Expressions/FigureCaptionExpression.cs

Lines changed: 79 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
1+
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
22
*
33
* This source is subject to the Microsoft Permissive License.
44
* Please see the License.txt file for more information.
@@ -12,6 +12,7 @@
1212
using System.Collections.Generic;
1313
using System.Globalization;
1414
using System.Linq;
15+
using AngleSharp.Dom;
1516
using AngleSharp.Html.Dom;
1617
using DocumentFormat.OpenXml;
1718
using DocumentFormat.OpenXml.Wordprocessing;
@@ -21,40 +22,81 @@ namespace HtmlToOpenXml.Expressions;
2122
/// <summary>
2223
/// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
2324
/// </summary>
24-
sealed class FigureCaptionExpression(IHtmlElement node) : PhrasingElementExpression(node)
25+
sealed class FigureCaptionExpression(IHtmlElement node) : BlockElementExpression(node)
2526
{
2627

2728
/// <inheritdoc/>
2829
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
2930
{
3031
ComposeStyles(context);
31-
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
32-
if (!childElements.Any())
33-
return [];
34-
35-
var p = new Paragraph (
32+
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
33+
34+
var figNumRef = new List<OpenXmlElement>()
35+
{
3636
new Run(
3737
new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve }
3838
),
3939
new SimpleField(
4040
new Run(
4141
new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture)))
42-
) { Instruction = " SEQ Figure \\* ARABIC " }
43-
) {
44-
ParagraphProperties = new ParagraphProperties {
45-
ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
46-
KeepNext = new KeepNext()
47-
}
48-
};
42+
)
43+
{ Instruction = " SEQ Figure \\* ARABIC " }
44+
};
45+
4946

50-
if (childElements.First() is Run run) // any caption?
51-
{
52-
Text? t = run.GetFirstChild<Text>();
53-
if (t != null)
54-
t.Text = " " + t.InnerText; // append a space after the numero of the picture
47+
if (!childElements.Any())
48+
{
49+
return
50+
[new Paragraph(figNumRef)
51+
{
52+
ParagraphProperties = new ParagraphProperties
53+
{
54+
ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
55+
KeepNext = DetermineKeepNext(node),
56+
}
57+
}];
5558
}
56-
57-
return [p];
59+
60+
//Add the figure number references to the start of the first paragraph.
61+
if(childElements.FirstOrDefault() is Paragraph p)
62+
{
63+
figNumRef.Add(new Run(
64+
new Text(" ") { Space = SpaceProcessingModeValues.Preserve }
65+
));
66+
67+
figNumRef.Reverse();
68+
foreach (var element in figNumRef)
69+
{
70+
p.InsertAt(element, 0);
71+
}
72+
}
73+
else
74+
{
75+
//The first child of the figure caption is a table or something. Just prepend a new paragraph with the figure number reference.
76+
childElements =
77+
[
78+
new Paragraph(figNumRef)
79+
{
80+
ParagraphProperties = new ParagraphProperties{ }
81+
},
82+
..childElements
83+
];
84+
}
85+
86+
foreach (var paragraph in childElements.OfType<Paragraph>())
87+
{
88+
paragraph.ParagraphProperties ??= new ParagraphProperties();
89+
paragraph.ParagraphProperties.ParagraphStyleId ??= context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle);
90+
//Keep caption paragraphs together.
91+
paragraph.ParagraphProperties.KeepNext = new KeepNext();
92+
}
93+
94+
if(childElements.OfType<Paragraph>().LastOrDefault() is Paragraph lastPara)
95+
{
96+
lastPara.ParagraphProperties!.KeepNext = DetermineKeepNext(node);
97+
}
98+
99+
return childElements;
58100
}
59101

60102
/// <summary>
@@ -78,4 +120,19 @@ private static int AddFigureCaption(ParsingContext context)
78120
context.Properties("figCaptionRef", figCaptionRef);
79121
return figCaptionRef.Value;
80122
}
81-
}
123+
124+
/// <summary>
125+
/// Determines whether the KeepNext property should apply this this caption.
126+
/// </summary>
127+
/// <param name="node"></param>
128+
/// <returns>A new <see cref="KeepNext"/> or null./></returns>
129+
private static KeepNext? DetermineKeepNext(IHtmlElement node)
130+
{
131+
// A caption at the end of a figure will have no next sibling.
132+
if(node.NextElementSibling is null)
133+
{
134+
return null;
135+
}
136+
return new();
137+
}
138+
}

0 commit comments

Comments
 (0)