From 42047cdafbbbb6c8ef0a1ea6e5a4c97cebd729a3 Mon Sep 17 00:00:00 2001 From: Gauthier Roebroeck Date: Thu, 23 Jan 2025 11:37:28 +0800 Subject: [PATCH] fix: better handling of broken epub Closes: #1844 --- .../mediacontainer/epub/Epub.kt | 10 +++---- .../mediacontainer/epub/EpubExtractor.kt | 26 ++++++++++--------- .../infrastructure/mediacontainer/epub/Nav.kt | 4 ++- .../infrastructure/mediacontainer/epub/Ncx.kt | 4 ++- .../komga/infrastructure/util/ZipFileUtils.kt | 4 +-- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt index d2cb8b785..5f7a06c7d 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt @@ -20,7 +20,7 @@ data class EpubPackage( inline fun Path.epub(block: (EpubPackage) -> R): R = ZipFile.builder().setPath(this).use { zip -> val opfFile = zip.getPackagePath() - val opfDoc = zip.getEntryInputStream(opfFile).use { Jsoup.parse(it, null, "", Parser.xmlParser()) } + val opfDoc = zip.getEntryInputStream(opfFile)?.use { Jsoup.parse(it, null, "", Parser.xmlParser()) } ?: throw MediaUnsupportedException("Could not open OPF resource") val opfDir = Paths.get(opfFile).parent block(EpubPackage(zip, opfDoc, opfDir, opfDoc.getManifest())) } @@ -30,9 +30,9 @@ inline fun Path.epub(block: (EpubPackage) -> R): R = */ fun ZipFile.getPackagePath(): String = getEntryInputStream("META-INF/container.xml") - .use { Jsoup.parse(it, null, "") } - .getElementsByTag("rootfile") - .first() + ?.use { Jsoup.parse(it, null, "") } + ?.getElementsByTag("rootfile") + ?.first() ?.attr("full-path") ?: throw MediaUnsupportedException("META-INF/container.xml does not contain rootfile tag") /** @@ -41,7 +41,7 @@ fun ZipFile.getPackagePath(): String = fun getPackageFileContent(path: Path): String? = ZipFile.builder().setPath(path).use { zip -> try { - zip.getEntryInputStream(zip.getPackagePath()).reader().use { it.readText() } + zip.getEntryInputStream(zip.getPackagePath())?.reader()?.use { it.readText() } } catch (e: Exception) { null } diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt index e092be2e3..ba88870a2 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt @@ -70,10 +70,12 @@ class EpubExtractor( val href = coverManifestItem.href val mediaType = coverManifestItem.mediaType val coverPath = normalizeHref(opfDir, href) - TypedBytes( - zip.getEntryBytes(coverPath), - mediaType, - ) + zip.getEntryBytes(coverPath)?.let { coverBytes -> + TypedBytes( + coverBytes, + mediaType, + ) + } } else { null } @@ -151,7 +153,7 @@ class EpubExtractor( .map { it.attr("idref") } .mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } } .map { pagePath -> - val doc = epub.zip.getEntryInputStream(pagePath).use { Jsoup.parse(it, null, "") } + val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList() // if a page has text over the threshold then the book is not divina compatible if (doc.body().text().length > letterCountThreshold) return emptyList() @@ -217,8 +219,8 @@ class EpubExtractor( val readingOrder = resources.filter { it.subType == MediaFile.SubType.EPUB_PAGE } readingOrder.forEach { mediaFile -> - val doc = epub.zip.getEntryInputStream(mediaFile.fileName).use { Jsoup.parse(it, null, "") } - if (!doc.getElementsByClass("koboSpan").isNullOrEmpty()) return true + val doc = epub.zip.getEntryInputStream(mediaFile.fileName)?.use { Jsoup.parse(it, null, "") } + if (!doc?.getElementsByClass("koboSpan").isNullOrEmpty()) return true } } catch (e: Exception) { logger.warn(e) { "Error while checking if EPUB is KEPUB" } @@ -257,7 +259,7 @@ class EpubExtractor( val koboPositions = when { isFixedLayout -> emptyMap() - isKepub -> computePositionsFromKoboSpan(readingOrder) { filename -> epub.zip.getEntryInputStream(filename).use { it.readBytes().decodeToString() } } + isKepub -> computePositionsFromKoboSpan(readingOrder) { filename -> epub.zip.getEntryInputStream(filename).use { it?.readBytes()?.decodeToString() } } kepubConverter.isAvailable -> { try { val kepub = @@ -327,12 +329,12 @@ class EpubExtractor( */ private fun computePositionsFromKoboSpan( readingOrder: List, - resourceSupplier: (String) -> String, - ): Map>> = + resourceSupplier: (String) -> String?, + ): Map>?> = readingOrder.associate { file -> - val doc = Jsoup.parse(resourceSupplier(file.fileName), Parser.htmlParser().setTrackPosition(true)) + val doc = resourceSupplier(file.fileName)?.let { resource -> Jsoup.parse(resource, Parser.htmlParser().setTrackPosition(true)) } file.fileName to - doc.select("span.koboSpan").mapNotNull { koboSpan -> + doc?.select("span.koboSpan")?.mapNotNull { koboSpan -> val id = koboSpan.id() if (!id.isNullOrBlank()) { // progression is built from the position in the file of each koboSpan, divided by the file size diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Nav.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Nav.kt index c19d5328b..aa53a4c35 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Nav.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Nav.kt @@ -11,7 +11,9 @@ import kotlin.io.path.Path fun EpubPackage.getNavResource(): ResourceContent? = manifest.values.firstOrNull { it.properties.contains("nav") }?.let { nav -> val href = normalizeHref(opfDir, nav.href) - ResourceContent(Path(href), zip.getEntryBytes(href).decodeToString()) + zip.getEntryBytes(href)?.decodeToString()?.let { navContent -> + ResourceContent(Path(href), navContent) + } } fun processNav( diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Ncx.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Ncx.kt index d902a3b06..9063f4424 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Ncx.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Ncx.kt @@ -13,7 +13,9 @@ private val possibleNcxItemIds = listOf("toc", "ncx", "ncxtoc") fun EpubPackage.getNcxResource(): ResourceContent? = (manifest.values.firstOrNull { it.mediaType == "application/x-dtbncx+xml" } ?: manifest.values.firstOrNull { possibleNcxItemIds.contains(it.id) })?.let { ncx -> val href = normalizeHref(opfDir, ncx.href) - ResourceContent(Path(href), zip.getEntryBytes(href).decodeToString()) + zip.getEntryBytes(href)?.decodeToString()?.let { ncxContent -> + ResourceContent(Path(href), ncxContent) + } } fun processNcx( diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt index 457e9d6e5..f44a59c2e 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt @@ -7,9 +7,9 @@ import java.nio.file.Path inline fun ZipFile.Builder.use(block: (ZipFile) -> R) = this.get().use(block) -fun ZipFile.getEntryInputStream(entryName: String): InputStream = this.getInputStream(this.getEntry(entryName)) +fun ZipFile.getEntryInputStream(entryName: String): InputStream? = this.getEntry(entryName)?.let { entry -> this.getInputStream(entry) } -fun ZipFile.getEntryBytes(entryName: String): ByteArray = this.getInputStream(this.getEntry(entryName)).use { it.readBytes() } +fun ZipFile.getEntryBytes(entryName: String): ByteArray? = this.getEntry(entryName)?.let { entry -> this.getInputStream(entry).use { it.readBytes() } } fun getZipEntryBytes( path: Path,