Skip to content

Commit e951813

Browse files
committed
Fix crash when the html contains 2 images with identical source path #193
1 parent 905abbd commit e951813

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

src/Html2OpenXml/HtmlConverter.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,8 @@ private static async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument,
284284
var imageUris = htmlDocument.QuerySelectorAll("img[src]")
285285
.Cast<IHtmlImageElement>()
286286
.Where(e => AngleSharpExtensions.TryParseUrl(e.GetAttribute("src"), UriKind.RelativeOrAbsolute, out var _))
287-
.Select(e => e.GetAttribute("src")!);
287+
.Select(e => e.GetAttribute("src")!)
288+
.Distinct();
288289
if (!imageUris.Any())
289290
return;
290291

test/HtmlToOpenXml.Tests/ImgTests.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,34 @@ public bool CenterImg_ReturnsFramedImg(string displayMode)
250250
Justification?.Val?.Value == JustificationValues.Center;
251251
}
252252

253+
[Test]
254+
public async Task DuplicateImgSource__DownloadOnce()
255+
{
256+
var webRequest = new Mock<IO.IWebRequest>();
257+
webRequest.Setup(x => x.FetchAsync(It.IsAny<Uri>(), It.IsAny<CancellationToken>()))
258+
.Returns(Task.FromResult<IO.Resource?>(new() {
259+
Content = new MemoryStream(Convert.FromBase64String(@"/9j/4AAQSkZJRgABAQAAAQABAAD/4gKgSUNDX1BST0ZJTEUAAQEAAAKQbGNtcwQwAABtbnRyUkdCIFhZWiAH4QAHAAEAAAABAAZhY3NwQVBQTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLWxjbXMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAtkZXNjAAABCAAAADhjcHJ0AAABQAAAAE53dHB0AAABkAAAABRjaGFkAAABpAAAACxyWFlaAAAB0AAAABRiWFlaAAAB5AAAABRnWFlaAAAB+AAAABRyVFJDAAACDAAAACBnVFJDAAACLAAAACBiVFJDAAACTAAAACBjaHJtAAACbAAAACRtbHVjAAAAAAAAAAEAAAAMZW5VUwAAABwAAAAcAHMAUgBHAEIAIABiAHUAaQBsAHQALQBpAG4AAG1sdWMAAAAAAAAAAQAAAAxlblVTAAAAMgAAABwATgBvACAAYwBvAHAAeQByAGkAZwBoAHQALAAgAHUAcwBlACAAZgByAGUAZQBsAHkAAAAAWFlaIAAAAAAAAPbWAAEAAAAA0y1zZjMyAAAAAAABDEoAAAXj///zKgAAB5sAAP2H///7ov///aMAAAPYAADAlFhZWiAAAAAAAABvlAAAOO4AAAOQWFlaIAAAAAAAACSdAAAPgwAAtr5YWVogAAAAAAAAYqUAALeQAAAY3nBhcmEAAAAAAAMAAAACZmYAAPKnAAANWQAAE9AAAApbcGFyYQAAAAAAAwAAAAJmZgAA8qcAAA1ZAAAT0AAACltwYXJhAAAAAAADAAAAAmZmAADypwAADVkAABPQAAAKW2Nocm0AAAAAAAMAAAAAo9cAAFR7AABMzQAAmZoAACZmAAAPXP/bAEMABQMEBAQDBQQEBAUFBQYHDAgHBwcHDwsLCQwRDxISEQ8RERMWHBcTFBoVEREYIRgaHR0fHx8TFyIkIh4kHB4fHv/bAEMBBQUFBwYHDggIDh4UERQeHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHv/CABEIAX4BfgMBIgACEQEDEQH/xAAcAAEBAAIDAQEAAAAAAAAAAAAAAQcIBAUGAgP/xAAUAQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIQAxAAAAHMoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADxmKzYPj6q8c22/fUTuzaFh3KZzwAAAAAAAAAAAAAAAAAAflhJjUlAlIonZ9ZTZH1epuyB6IAAAAAAAAAAAAAAAADxvstezwv1BYFlhYFIX1PlRt19+O9iAAAAAAAAAAAAAAAAfOpu0+px9AlCUIsFCKMrZmwFns+gAAAAAAAAAAAAAAAfhqXt3rAdDYKgsCoKQqDI2dcUZYKAAAAAAAAAAAAAAABiPLnENTna9UShKEUATk8fJ5kn0nH5AAAAAAAAAAAAAAAAAB5jXbbDzxrHfWeSFQWBeTkg6LOTtSgAAAAAAAAAAAAAAAAAA/Pyvrhhvp89/Jgv0OU/o873fIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH//EACYQAAEEAQQBBAMBAAAAAAAAAAQBAgMFBgARMFASFCAxkBMVIiP/2gAIAQEAAQUC+xZXI3XqhtMmif1lvkINep+TWpOpZppdbaT+VDtrERazMF3EKgLh6Z7msbkeSSkr7684qvnoLiG1g6XMrlSJuEQiYQijso7ILo8ssf19Xx4xYOr7Ni+TeizYv1N1x7axUz1dV0Ll2QuVZy+TAZtk6E534wm/HJhDlQ5PjoJm+cPirV5MIZ/u346HIhlEuuTCh1YF0Wfg7t4xYXEEVECRRdEVBGTBbASVx3FilarUgZ4M6O/qYrQU0WcMjgoKhxDgoPBOlt6wWyhtqA0F3tGHmJfT4+jHCCozqHsa5LCjCL0TibNPxYnePF36ExoVijVzY2xxNYnV+Ka8U+xj/8QAFBEBAAAAAAAAAAAAAAAAAAAAkP/aAAgBAwEBPwEQP//EABQRAQAAAAAAAAAAAAAAAAAAAJD/2gAIAQIBAT8BED//xAA7EAABAgIGBAoJBQEAAAAAAAABAgMAEQQSITFBUCIwUXEFEBMjMlJhYpHBIDNCU3KQobHhFDSBgpLR/9oACAEBAAY/AvmLaRA3x+4Z/wBiNFxB3HLC3W5d7qN4bzBDbgoyNjYt8THOvOOnvKnF30iYmD2RzNMdl1VGY8IqcIsWe8aH3EB2jupcQcU5OVKIAFpMGjcHrU2z7TgvXu2ajlqK6UHEYK3xZoPJ6beTK4PoypMoscI9s7N2qQ+wsoWk2Ql5Nirlp2HJDyapPu6DfZtPFdqk28y7oL8jE8jU0OhRxUG+861lSjNQFVW8ZFOHnyfWLUrxOtpDPereI/GRPr6raj9NXjxvDuD75EtHWEoqm8WcV3oXem+v4RkdKawr1huNutC/erreQyNrhBA6Og55axthF6zKEpSJJQJDI1sOiaFiRhdFdwtSqXSG3V/qnEmusaPYmJZJUVouJ9WuV34g0ekIqrH13akUikIk17KZdP8AEVjfk1R9EzgrEQSlJfa6yRb/ACPSqMNKWezCA5SpOrwTgP8AsTN+UWiJrYTPaLD4xzTzqd9sWUlEvgjTpR/qiBWbW8e+fKAkJSlIwAlFgyy6LvmMf//EACkQAAIBAQcEAQUBAAAAAAAAAAABESExQVFhcZGhUIGx8OEQQMHR8ZD/2gAIAQEAAT8h/wA710aRVOqD0j8nAesp0ufpLhst7S5eQ8aTLVnWnNQO5zpvy2RwbP0T8iHAjOKvu+ptBFqSdopqvw+xYb1Zw8Hk+jpKtIcJK+pYmhQeYnLyQlFElt8FlqJImzwX/BfVF1CzGsqpYLf+Jo0NjWPWdVmsU+jNgsSzo26HL0Ikj2GU9RGa5Ix8EZ8M9sIzQlrsSBzPD7p4p4FtH1tb9aX6dEbWBzP0Vy0JRSB5CvqGpu4KwWXfRqbuCq/RTDyVHC2oosVXwPgShehva3BqXob+OxGuxuQ89jcinwVIyexuJR/Dc3KrUy0AOdroN97ehIc1xkgExGa+DQ0Njvwd+D2z6aMpA/o6QVF4DF3QaI/gGYlNXYvBGFexGTIz4Y8yJvWwlqR7BrJF8kRaIvkXcUOYcmqDTLoK2ENNbuoGzVVbUsaoyrdTRNivqIk7HZsRlwZEUEqWIrgivqHuFT9jEhV0FjQpp2p5DzBGRDuIIuggh4EQRAkQQ8CMUMUqs7FH7BWdCdpu7m633cdy8g9sKE3/AE9sL5JpaXECCKVHFive1SPsOmlC+zf2Cjb9eQrJ6UbZa/jIIzZKIvngjEiHayUQrZVMiNSFbJZ+ErVcbV+NRKk+0X2FDU91TfB4tgMk5eiXNr08S+3wb/TRLZFt3gri/e5ZUlJWIe6Rb1ZmvT0FwvRlEVsqPxT9QiZLKYtWe6lCh2NPv8kMrd5I9n5IfrOKJtTbhd2QTGHe+eLgQpYSSp0eBIHzev8AyVTw9pfwxHuxsnw2PPvfmY3M4EbIR6akxEeR0uExttTYSbmwklYv9F//2gAMAwEAAgADAAAAEPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPOEPPPPPPPPPPPPPPPPPPKOEGPPPPPPPPPPPPPPPPPPPCAAAEPPPPPPPPPPPPPPPPPPLIAAAAEHPPPPPPPPPPPPPPPPFAAABAABPPPPPPPPPPPPPPPPBAAEAAABPPPPPPPPOPPPPPPPLCAAAAFPPPPPPPPPPHPPPPPPPHCAAAJPPPPPPPPPPPPPPPPPPPDLDPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPOPPPPPPPPPPPPPPPPPPPPPPPDPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPKPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP/EABQRAQAAAAAAAAAAAAAAAAAAAJD/2gAIAQMBAT8QED//xAAUEQEAAAAAAAAAAAAAAAAAAACQ/9oACAECAQE/EBA//8QAKxAAAQIDBgUFAQEAAAAAAAAAAQARECExIEFQcZHwUWGB0fEwkKGx4UDB/9oACAEBAAE/EPcVFib9eIQprKgdJjoC+FDEzxCNghCi8ZAABmUKmHgw60qRAr29L1UAZaD5iCF/QZQDg4BhRWEVRQI8k9OZbJzKRuQgAobAUgFw5oQgArA6aeDWDdEmcxPW4CbDAFGouiAfaeRCGDirM/QEazHUgX4HNgh9n/WQDCADYQEjkjIaEN2JTzYnEVTVIwMiE1EnaBJQaAGgTQA0EMjIeBEXbOkYDVgVA4HKdAFxiAX/AEiDbApZAPsLiGAhzNCQEngp5EA2FZhBM1oEBT+F5gmnkwEAoLjfAY7sDmWdgA1RA2daHCFshEhnZDYEKQa73fHQjAGReR6KBAGQe7wu1KjgXLO/9RHRgPhEMAprWEOeN9Eh3QmfZr+QAU9cvFp3kiSDtswrwDYAzFsAAADSaU9+AIAZNsEGZWJHT3AhVaoEDcEKfMQAxAEM6J0yBXCYMQsCSdEkkDgACCSP9K2BPlADCLqEAB0wAs1A7DogDMHcCCA6egIcoAggieI4BucuMo4wQrApAUZBhiCGoCraShe4xMP/2Q==")),
260+
StatusCode = System.Net.HttpStatusCode.OK
261+
}));
262+
converter = new HtmlConverter(mainPart, webRequest.Object);
263+
264+
await converter.ParseBody(@"
265+
<img src='/images/5499ef48-29e6-4fc2-b652-5a85b49a1f0f.png'>
266+
<img src='/images/5499ef48-29e6-4fc2-b652-5a85b49a1f0f.png'>
267+
");
268+
269+
webRequest.Verify(s => s.FetchAsync(
270+
It.IsAny<Uri>(),
271+
It.IsAny<CancellationToken>()),
272+
Times.Once);
273+
Assert.That(mainPart.ImageParts.Count(), Is.EqualTo(1));
274+
var paragraph = mainPart.Document.Body!.GetFirstChild<Paragraph>();
275+
Assert.That(paragraph, Is.Not.Null);
276+
var runs = paragraph.Elements<Run>();
277+
Assert.That(runs.Count(), Is.EqualTo(2));
278+
Assert.That(runs.Select(r => r.GetFirstChild<Drawing>()), Has.All.Not.Null);
279+
}
280+
253281
private static (Drawing, ImagePart) AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph)
254282
{
255283
var run = paragraph.GetFirstChild<Run>();

0 commit comments

Comments
 (0)