Skip to content

Commit b290bf1

Browse files
committed
Improve ImageHeader and guessing file type. Move img unit testing to be async
1 parent 1bc74ee commit b290bf1

File tree

3 files changed

+41
-56
lines changed

3 files changed

+41
-56
lines changed

src/Html2OpenXml/HtmlConverter.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
using System.Threading;
1616
using System.Threading.Tasks;
1717
using AngleSharp;
18+
using AngleSharp.Html.Dom;
1819
using DocumentFormat.OpenXml;
1920
using DocumentFormat.OpenXml.Packaging;
2021
using DocumentFormat.OpenXml.Wordprocessing;
@@ -277,18 +278,18 @@ private async Task<IEnumerable<OpenXmlCompositeElement>> ParseCoreAsync(string h
277278
/// <summary>
278279
/// Walk through all the <c>img</c> tags and preload all the remote images.
279280
/// </summary>
280-
private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument,
281+
private static async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument,
281282
IImageLoader imageLoader, ParallelOptions parallelOptions)
282283
{
283284
var imageUris = htmlDocument.QuerySelectorAll("img[src]")
284-
.Cast<AngleSharp.Html.Dom.IHtmlImageElement>()
285+
.Cast<IHtmlImageElement>()
285286
.Where(e => AngleSharpExtensions.TryParseUrl(e.GetAttribute("src"), UriKind.RelativeOrAbsolute, out var _))
286287
.Select(e => e.GetAttribute("src")!);
287288
if (!imageUris.Any())
288289
return;
289290

290291
await imageUris.ForEachAsync(
291-
async (img, cts) => await imageLoader.Download(img, cts).ConfigureAwait(false),
292+
async (img, cts) => await imageLoader.Download(img, cts),
292293
parallelOptions).ConfigureAwait(false);
293294
}
294295

src/Html2OpenXml/IO/ImageHeader.cs

Lines changed: 26 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,10 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml }
5858
/// <returns>Returns true if the detection was successful.</returns>
5959
public static bool TryDetectFileType(Stream stream, out FileType type)
6060
{
61-
using (SequentialBinaryReader reader = new SequentialBinaryReader(stream, leaveOpen: true))
62-
{
63-
type = DetectFileType(reader);
64-
stream.Seek(0L, SeekOrigin.Begin);
65-
return type != FileType.Unrecognized;
66-
}
61+
using var reader = new SequentialBinaryReader(stream, leaveOpen: true);
62+
type = DetectFileType(reader);
63+
stream.Seek(0L, SeekOrigin.Begin);
64+
return type != FileType.Unrecognized;
6765
}
6866

6967
/// <summary>
@@ -74,21 +72,19 @@ public static bool TryDetectFileType(Stream stream, out FileType type)
7472
/// <exception cref="ArgumentException">The image was of an unrecognised format.</exception>
7573
public static Size GetDimensions(Stream stream)
7674
{
77-
using (SequentialBinaryReader reader = new SequentialBinaryReader(stream, leaveOpen: true))
75+
using var reader = new SequentialBinaryReader(stream, leaveOpen: true);
76+
FileType type = DetectFileType(reader);
77+
stream.Seek(0L, SeekOrigin.Begin);
78+
return type switch
7879
{
79-
FileType type = DetectFileType (reader);
80-
stream.Seek(0L, SeekOrigin.Begin);
81-
switch (type)
82-
{
83-
case FileType.Bitmap: return DecodeBitmap(reader);
84-
case FileType.Gif: return DecodeGif(reader);
85-
case FileType.Jpeg: return DecodeJfif(reader);
86-
case FileType.Png: return DecodePng(reader);
87-
case FileType.Emf: return DecodeEmf(reader);
88-
case FileType.Xml: return DecodeXml(stream);
89-
default: return Size.Empty;
90-
}
91-
}
80+
FileType.Bitmap => DecodeBitmap(reader),
81+
FileType.Gif => DecodeGif(reader),
82+
FileType.Jpeg => DecodeJfif(reader),
83+
FileType.Png => DecodePng(reader),
84+
FileType.Emf => DecodeEmf(reader),
85+
FileType.Xml => DecodeXml(stream),
86+
_ => Size.Empty,
87+
};
9288
}
9389

9490
/// <summary>
@@ -123,36 +119,19 @@ public static Size KeepAspectRatio(Size actualSize, Size preferredSize)
123119
private static FileType DetectFileType (SequentialBinaryReader reader)
124120
{
125121
byte[] magicBytes = new byte[MaxMagicBytesLength];
126-
for (int i = 0; i < MaxMagicBytesLength; i += 1)
127-
{
128-
magicBytes[i] = reader.ReadByte();
129-
foreach (var kvPair in imageFormatDecoders)
130-
{
131-
if (StartsWith(magicBytes, kvPair.Key))
132-
{
133-
return kvPair.Value;
134-
}
135-
}
136-
}
137-
138-
return FileType.Unrecognized;
139-
}
122+
reader.Read(magicBytes, 0, MaxMagicBytesLength);
140123

141-
/// <summary>
142-
/// Determines whether the beginning of this byte array instance matches the specified byte array.
143-
/// </summary>
144-
/// <returns>Returns true if the first array starts with the bytes of the second array.</returns>
145-
private static bool StartsWith(byte[] thisBytes, byte[] thatBytes)
146-
{
147-
for (int i = 0; i < thatBytes.Length; i += 1)
124+
var headerSpan = magicBytes.AsSpan();
125+
foreach (var kvPair in imageFormatDecoders)
148126
{
149-
if (thisBytes[i] != thatBytes[i])
127+
// Determines whether the beginning of this array matches s known header.
128+
if (headerSpan.StartsWith(kvPair.Key))
150129
{
151-
return false;
130+
return kvPair.Value;
152131
}
153132
}
154133

155-
return true;
134+
return FileType.Unrecognized;
156135
}
157136

158137
private static Size DecodeBitmap(SequentialBinaryReader reader)
@@ -220,16 +199,16 @@ private static Size DecodeJfif(SequentialBinaryReader reader)
220199
return Size.Empty;
221200

222201
// next 2-bytes are <segment-size>: [high-byte] [low-byte]
223-
var segmentLength = (int)reader.ReadUInt16();
202+
int segmentLength = reader.ReadUInt16();
224203

225204
// segment length includes size bytes, so subtract two
226205
segmentLength -= 2;
227206

228207
if (segmentType == 0xC0 || segmentType == 0xC2)
229208
{
230209
reader.ReadByte(); // bits/sample, usually 8
231-
int height = (int) reader.ReadUInt16();
232-
int width = (int) reader.ReadUInt16();
210+
int height = reader.ReadUInt16();
211+
int width = reader.ReadUInt16();
233212
return new Size(width, height);
234213
}
235214
else

test/HtmlToOpenXml.Tests/ImgTests.cs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@ namespace HtmlToOpenXml.Tests
1616
public class ImgTests : HtmlConverterTestBase
1717
{
1818
[TestCase("https://www.w3schools.com/tags/smiley.gif", "image/gif")]
19-
[TestCase("https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/helloworld.svg", "image/svg+xml")]
20-
public void AbsoluteUri_ReturnsDrawing_WithDownloadedData(string imageUri, string contentType)
19+
[TestCase("https://developer.mozilla.org/en-US/docs/Web/SVG/Tutorial/Introduction/dino.svg", "image/svg+xml")]
20+
public async Task AbsoluteUri_ReturnsDrawing_WithDownloadedData(string imageUri, string contentType)
2121
{
22-
var elements = converter.Parse(@$"<img src='{imageUri}' alt='Smiley face' width='42' height='42'>");
23-
Assert.That(elements, Has.Count.EqualTo(1));
24-
var (_, imagePart) = AssertIsImg(mainPart, elements[0]);
22+
await converter.ParseBody(
23+
@$"<img src='{imageUri}' alt='Smiley face' width='42' height='42'>",
24+
TestContext.CurrentContext.CancellationToken);
25+
26+
var paragraphs = mainPart.Document.Body!.Elements<Paragraph>();
27+
Assert.That(paragraphs.Count(), Is.EqualTo(1));
28+
var (_, imagePart) = AssertIsImg(mainPart, paragraphs.First());
2529
Assert.That(imagePart.ContentType, Is.EqualTo(contentType));
2630
}
2731

@@ -128,7 +132,8 @@ public async Task RemoteImage_WithBaseUri_ShouldSucceed()
128132
converter = new HtmlConverter(mainPart, new IO.DefaultWebRequest() {
129133
BaseImageUrl = new Uri("http://github.com/onizet/html2openxml")
130134
});
131-
var elements = await converter.ParseAsync($"<img src='/blob/dev/icon.png'>");
135+
var elements = await converter.ParseAsync($"<img src='/blob/dev/icon.png'>",
136+
TestContext.CurrentContext.CancellationToken);
132137
Assert.That(elements, Is.Not.Empty);
133138
AssertIsImg(mainPart, elements.First());
134139
}

0 commit comments

Comments
 (0)