fix: some images could be missing from epub files

closes #556
This commit is contained in:
Gauthier Roebroeck 2021-06-22 17:08:34 +08:00
parent 7a566326b0
commit 6c34794a50
2 changed files with 38 additions and 2 deletions

View file

@ -40,9 +40,14 @@ class EpubExtractor(
.map { opfDir?.resolve(it)?.normalize() ?: Paths.get(it) }
.flatMap { pagePath ->
val doc = zip.getInputStream(zip.getEntry(pagePath.separatorsToUnix())).use { Jsoup.parse(it, null, "") }
doc.getElementsByTag("img")
val img = doc.getElementsByTag("img")
.map { it.attr("src") } // get the src, which can be a relative path
.map { pagePath.parentOrEmpty().resolve(it).normalize() } // resolve it against the page folder
val svg = doc.select("svg > image[xlink:href]")
.map { it.attr("xlink:href") } // get the source, which can be a relative path
(img + svg).map { pagePath.parentOrEmpty().resolve(it).normalize() } // resolve it against the page folder
}
return images.map { image ->

View file

@ -0,0 +1,31 @@
package org.gotson.komga.infrastructure.mediacontainer
import org.apache.tika.config.TikaConfig
import org.assertj.core.api.Assertions.assertThat
import org.gotson.komga.domain.model.Dimension
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.junit.jupiter.api.Test
import org.springframework.core.io.ClassPathResource
class EpubExtractorTest {
private val contentDetector = ContentDetector(TikaConfig())
private val imageAnalyzer = ImageAnalyzer()
private val zipExtractor = ZipExtractor(contentDetector, imageAnalyzer)
private val epubExtractor = EpubExtractor(zipExtractor, contentDetector, imageAnalyzer)
@Test
fun `given epub 3 file when parsing for entries then returns all images contained in pages`() {
val epubResource = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
val entries = epubExtractor.getEntries(epubResource.file.toPath())
assertThat(entries).hasSize(1)
with(entries.first()) {
assertThat(name).isEqualTo("cover.jpeg")
assertThat(mediaType).isEqualTo("image/jpeg")
assertThat(dimension).isEqualTo(Dimension(461, 616))
}
}
}