From 6c34794a505ca1fbfdc25593caced45037b85cb9 Mon Sep 17 00:00:00 2001 From: Gauthier Roebroeck Date: Tue, 22 Jun 2021 17:08:34 +0800 Subject: [PATCH] fix: some images could be missing from epub files closes #556 --- .../mediacontainer/EpubExtractor.kt | 9 ++++-- .../mediacontainer/EpubExtractorTest.kt | 31 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 komga/src/test/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractorTest.kt diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractor.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractor.kt index 849163ccd..ac06cb889 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractor.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractor.kt @@ -40,9 +40,14 @@ class EpubExtractor( .map { opfDir?.resolve(it)?.normalize() ?: Paths.get(it) } .flatMap { pagePath -> val doc = zip.getInputStream(zip.getEntry(pagePath.separatorsToUnix())).use { Jsoup.parse(it, null, "") } - doc.getElementsByTag("img") + + val img = doc.getElementsByTag("img") .map { it.attr("src") } // get the src, which can be a relative path - .map { pagePath.parentOrEmpty().resolve(it).normalize() } // resolve it against the page folder + + val svg = doc.select("svg > image[xlink:href]") + .map { it.attr("xlink:href") } // get the source, which can be a relative path + + (img + svg).map { pagePath.parentOrEmpty().resolve(it).normalize() } // resolve it against the page folder } return images.map { image -> diff --git a/komga/src/test/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractorTest.kt b/komga/src/test/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractorTest.kt new file mode 100644 index 000000000..8b5a4714a --- /dev/null +++ b/komga/src/test/kotlin/org/gotson/komga/infrastructure/mediacontainer/EpubExtractorTest.kt @@ -0,0 +1,31 @@ +package org.gotson.komga.infrastructure.mediacontainer + +import org.apache.tika.config.TikaConfig +import org.assertj.core.api.Assertions.assertThat +import org.gotson.komga.domain.model.Dimension +import org.gotson.komga.infrastructure.image.ImageAnalyzer +import org.junit.jupiter.api.Test +import org.springframework.core.io.ClassPathResource + +class EpubExtractorTest { + + private val contentDetector = ContentDetector(TikaConfig()) + private val imageAnalyzer = ImageAnalyzer() + private val zipExtractor = ZipExtractor(contentDetector, imageAnalyzer) + + private val epubExtractor = EpubExtractor(zipExtractor, contentDetector, imageAnalyzer) + + @Test + fun `given epub 3 file when parsing for entries then returns all images contained in pages`() { + val epubResource = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub") + + val entries = epubExtractor.getEntries(epubResource.file.toPath()) + + assertThat(entries).hasSize(1) + with(entries.first()) { + assertThat(name).isEqualTo("cover.jpeg") + assertThat(mediaType).isEqualTo("image/jpeg") + assertThat(dimension).isEqualTo(Dimension(461, 616)) + } + } +}