From eeb58982109cd8d332e8d806dd71bd29b68424fd Mon Sep 17 00:00:00 2001 From: Snd-R <76580768+Snd-R@users.noreply.github.com> Date: Mon, 26 Aug 2024 05:58:04 +0300 Subject: [PATCH] perf: faster zip entry extraction --- komga/build.gradle.kts | 2 +- .../mediacontainer/divina/ZipExtractor.kt | 9 +++-- .../mediacontainer/epub/Epub.kt | 5 +-- .../mediacontainer/epub/EpubExtractor.kt | 9 ++--- .../komga/infrastructure/util/ZipFileUtils.kt | 35 +++++++++++++++++++ 5 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt diff --git a/komga/build.gradle.kts b/komga/build.gradle.kts index f24107391..0dded6cc6 100644 --- a/komga/build.gradle.kts +++ b/komga/build.gradle.kts @@ -84,7 +84,7 @@ dependencies { implementation("com.appmattus.crypto:cryptohash:0.10.1") implementation("org.apache.tika:tika-core:2.9.1") - implementation("org.apache.commons:commons-compress:1.25.0") + implementation("org.apache.commons:commons-compress:1.27.1") implementation("com.github.junrar:junrar:7.5.5") implementation("com.github.gotson.nightcompress:nightcompress:0.2.0") implementation("org.apache.pdfbox:pdfbox:3.0.1") diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/divina/ZipExtractor.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/divina/ZipExtractor.kt index a9ab1b540..40aac828e 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/divina/ZipExtractor.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/divina/ZipExtractor.kt @@ -8,6 +8,8 @@ import org.gotson.komga.domain.model.MediaContainerEntry import org.gotson.komga.domain.model.MediaType import org.gotson.komga.infrastructure.image.ImageAnalyzer import org.gotson.komga.infrastructure.mediacontainer.ContentDetector +import org.gotson.komga.infrastructure.util.getZipEntryBytes +import org.gotson.komga.infrastructure.util.use import org.springframework.stereotype.Service import java.nio.file.Path @@ -26,7 +28,7 @@ class ZipExtractor( path: Path, analyzeDimensions: Boolean, ): List = - ZipFile(path.toFile()).use { zip -> + ZipFile.builder().setPath(path).use { zip -> zip.entries.toList() .filter { !it.isDirectory } .map { entry -> @@ -52,8 +54,5 @@ class ZipExtractor( override fun getEntryStream( path: Path, entryName: String, - ): ByteArray = - ZipFile(path.toFile()).use { zip -> - zip.getInputStream(zip.getEntry(entryName)).use { it.readBytes() } - } + ): ByteArray = getZipEntryBytes(path, entryName) } diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt index c7adaf1c2..5f30662e6 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Epub.kt @@ -2,6 +2,7 @@ package org.gotson.komga.infrastructure.mediacontainer.epub import org.apache.commons.compress.archivers.zip.ZipFile import org.gotson.komga.domain.model.MediaUnsupportedException +import org.gotson.komga.infrastructure.util.use import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.jsoup.parser.Parser @@ -16,7 +17,7 @@ data class EpubPackage( ) inline fun Path.epub(block: (EpubPackage) -> R): R = - ZipFile(this.toFile()).use { zip -> + ZipFile.builder().setPath(this).use { zip -> val opfFile = zip.getPackagePath() val opfDoc = zip.getInputStream(zip.getEntry(opfFile)).use { Jsoup.parse(it, null, "", Parser.xmlParser()) } val opfDir = Paths.get(opfFile).parent @@ -30,7 +31,7 @@ fun ZipFile.getPackagePath(): String = } fun getPackageFile(path: Path): String? = - ZipFile(path.toFile()).use { zip -> + ZipFile.builder().setPath(path).use { zip -> try { zip.getInputStream(zip.getEntry(zip.getPackagePath())).reader().use { it.readText() } } catch (e: Exception) { diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt index 6e5c7403c..aa28aa675 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt @@ -2,15 +2,14 @@ package org.gotson.komga.infrastructure.mediacontainer.epub import io.github.oshai.kotlinlogging.KotlinLogging import org.apache.commons.compress.archivers.ArchiveEntry -import org.apache.commons.compress.archivers.zip.ZipFile import org.gotson.komga.domain.model.BookPage -import org.gotson.komga.domain.model.EntryNotFoundException import org.gotson.komga.domain.model.EpubTocEntry import org.gotson.komga.domain.model.MediaFile import org.gotson.komga.domain.model.R2Locator import org.gotson.komga.domain.model.TypedBytes import org.gotson.komga.infrastructure.image.ImageAnalyzer import org.gotson.komga.infrastructure.mediacontainer.ContentDetector +import org.gotson.komga.infrastructure.util.getZipEntryBytes import org.jsoup.Jsoup import org.springframework.beans.factory.annotation.Value import org.springframework.stereotype.Service @@ -35,11 +34,7 @@ class EpubExtractor( fun getEntryStream( path: Path, entryName: String, - ): ByteArray = - ZipFile(path.toFile()).use { zip -> - zip.getEntry(entryName)?.let { entry -> zip.getInputStream(entry).use { it.readBytes() } } - ?: throw EntryNotFoundException("Entry does not exist: $entryName") - } + ): ByteArray = getZipEntryBytes(path, entryName) fun isEpub(path: Path): Boolean = try { diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt new file mode 100644 index 000000000..727cfede1 --- /dev/null +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/util/ZipFileUtils.kt @@ -0,0 +1,35 @@ +package org.gotson.komga.infrastructure.util + +import org.apache.commons.compress.archivers.zip.ZipFile +import org.gotson.komga.domain.model.EntryNotFoundException +import java.nio.file.Path + +inline fun ZipFile.Builder.use(block: (ZipFile) -> R) = this.get().use(block) + +fun getZipEntryBytes( + path: Path, + entryName: String, +): ByteArray { + // fast path. Only read central directory record and try to find entry in it + val zipBuilder = + ZipFile.builder() + .setPath(path) + .setUseUnicodeExtraFields(true) + .setIgnoreLocalFileHeader(true) + val bytes = zipBuilder.use { it.getEntryBytes(entryName) } + if (bytes != null) return bytes + + // slow path. Entry with that name wasn't in central directory record + // Iterate each entry and, if present, set name from Unicode extra field in local file header + return zipBuilder.setIgnoreLocalFileHeader(false).use { + it.getEntryBytes(entryName) + ?: throw EntryNotFoundException("Entry does not exist: $entryName") + } +} + +private fun ZipFile.getEntryBytes(entryName: String) = + this.use { zip -> + zip.getEntry(entryName)?.let { entry -> + zip.getInputStream(entry).use { it.readBytes() } + } + }