fix: better stream handling for rar and zip files

some rar entries could not be analyzed because of the incorrect use of streams
reduce disk access by using buffered streams
some streams were not closed properly
This commit is contained in:
Gauthier Roebroeck 2021-05-03 09:30:34 +08:00
parent 910af6f36f
commit ebc8df2053
5 changed files with 35 additions and 33 deletions

View file

@ -11,8 +11,12 @@ private val logger = KotlinLogging.logger {}
@Service @Service
class ImageAnalyzer { class ImageAnalyzer {
/**
* Returns the Dimension of the image contained in the stream.
* The stream will not be closed, nor marked or reset.
*/
fun getDimension(stream: InputStream): Dimension? = fun getDimension(stream: InputStream): Dimension? =
stream.use { try {
ImageIO.createImageInputStream(stream).use { fis -> ImageIO.createImageInputStream(stream).use { fis ->
val readers = ImageIO.getImageReaders(fis) val readers = ImageIO.getImageReaders(fis)
if (readers.hasNext()) { if (readers.hasNext()) {
@ -24,5 +28,8 @@ class ImageAnalyzer {
null null
} }
} }
} catch (e: Exception) {
logger.warn(e) { "Could not get image dimensions" }
null
} }
} }

View file

@ -26,13 +26,12 @@ class ContentDetector(
} }
} }
/**
* Detects the media type of the content of the stream.
* The stream will not be closed.
*/
fun detectMediaType(stream: InputStream): String = fun detectMediaType(stream: InputStream): String =
stream.use { tika.detector.detect(stream, Metadata()).toString()
TikaInputStream.get(it).use { tikaStream ->
val mediaType = tika.detector.detect(tikaStream, Metadata())
mediaType.toString()
}
}
fun isImage(mediaType: String): Boolean = fun isImage(mediaType: String): Boolean =
mediaType.startsWith("image/") mediaType.startsWith("image/")

View file

@ -51,7 +51,7 @@ class EpubExtractor(
it.href == (opfDir?.relativize(image) ?: image).separatorsToUnix() it.href == (opfDir?.relativize(image) ?: image).separatorsToUnix()
}.mediaType }.mediaType
val dimension = if (contentDetector.isImage(mediaType)) val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(zip.getInputStream(zip.getEntry(name))) zip.getInputStream(zip.getEntry(name)).use { imageAnalyzer.getDimension(it) }
else else
null null
MediaContainerEntry(name = name, mediaType = mediaType, dimension = dimension) MediaContainerEntry(name = name, mediaType = mediaType, dimension = dimension)

View file

@ -3,15 +3,11 @@ package org.gotson.komga.infrastructure.mediacontainer
import com.github.junrar.Archive import com.github.junrar.Archive
import mu.KotlinLogging import mu.KotlinLogging
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
import org.apache.commons.io.input.TeeInputStream
import org.gotson.komga.domain.model.MediaContainerEntry import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.domain.model.MediaUnsupportedException import org.gotson.komga.domain.model.MediaUnsupportedException
import org.gotson.komga.infrastructure.image.ImageAnalyzer import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service import org.springframework.stereotype.Service
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.nio.file.Path import java.nio.file.Path
import java.util.Comparator
private val logger = KotlinLogging.logger {} private val logger = KotlinLogging.logger {}
@ -32,20 +28,19 @@ class RarExtractor(
if (rar.mainHeader.isMultiVolume) throw MediaUnsupportedException("Multi-Volume RAR archives are not supported", "ERR_1004") if (rar.mainHeader.isMultiVolume) throw MediaUnsupportedException("Multi-Volume RAR archives are not supported", "ERR_1004")
rar.fileHeaders rar.fileHeaders
.filter { !it.isDirectory } .filter { !it.isDirectory }
.map { hd -> .map { entry ->
try { try {
val buffer = ByteArrayOutputStream() rar.getInputStream(entry).buffered().use { stream ->
TeeInputStream(rar.getInputStream(hd), buffer).use { tee -> val mediaType = contentDetector.detectMediaType(stream)
val mediaType = contentDetector.detectMediaType(tee)
val dimension = if (contentDetector.isImage(mediaType)) val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(ByteArrayInputStream(buffer.toByteArray())) imageAnalyzer.getDimension(stream)
else else
null null
MediaContainerEntry(name = hd.fileName, mediaType = mediaType, dimension = dimension) MediaContainerEntry(name = entry.fileName, mediaType = mediaType, dimension = dimension)
} }
} catch (e: Exception) { } catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${hd.fileName}" } logger.warn(e) { "Could not analyze entry: ${entry.fileName}" }
MediaContainerEntry(name = hd.fileName, comment = e.message) MediaContainerEntry(name = entry.fileName, comment = e.message)
} }
} }
.sortedWith(compareBy(natSortComparator) { it.name }) .sortedWith(compareBy(natSortComparator) { it.name })
@ -54,6 +49,6 @@ class RarExtractor(
override fun getEntryStream(path: Path, entryName: String): ByteArray = override fun getEntryStream(path: Path, entryName: String): ByteArray =
Archive(path.toFile()).use { rar -> Archive(path.toFile()).use { rar ->
val header = rar.fileHeaders.find { it.fileName == entryName } val header = rar.fileHeaders.find { it.fileName == entryName }
rar.getInputStream(header).readBytes() rar.getInputStream(header).use { it.readBytes() }
} }
} }

View file

@ -7,7 +7,6 @@ import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service import org.springframework.stereotype.Service
import java.nio.file.Path import java.nio.file.Path
import java.util.Comparator
private val logger = KotlinLogging.logger {} private val logger = KotlinLogging.logger {}
@ -25,24 +24,26 @@ class ZipExtractor(
ZipFile(path.toFile()).use { zip -> ZipFile(path.toFile()).use { zip ->
zip.entries.toList() zip.entries.toList()
.filter { !it.isDirectory } .filter { !it.isDirectory }
.map { .map { entry ->
try { try {
val mediaType = contentDetector.detectMediaType(zip.getInputStream(it)) zip.getInputStream(entry).buffered().use { stream ->
val dimension = if (contentDetector.isImage(mediaType)) val mediaType = contentDetector.detectMediaType(stream)
imageAnalyzer.getDimension(zip.getInputStream(it)) val dimension = if (contentDetector.isImage(mediaType))
else imageAnalyzer.getDimension(stream)
null else
MediaContainerEntry(name = it.name, mediaType = mediaType, dimension = dimension) null
MediaContainerEntry(name = entry.name, mediaType = mediaType, dimension = dimension)
}
} catch (e: Exception) { } catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${it.name}" } logger.warn(e) { "Could not analyze entry: ${entry.name}" }
MediaContainerEntry(name = it.name, comment = e.message) MediaContainerEntry(name = entry.name, comment = e.message)
} }
} }
.sortedWith(compareBy(natSortComparator) { it.name }) .sortedWith(compareBy(natSortComparator) { it.name })
} }
override fun getEntryStream(path: Path, entryName: String): ByteArray = override fun getEntryStream(path: Path, entryName: String): ByteArray =
ZipFile(path.toFile()).use { ZipFile(path.toFile()).use { zip ->
it.getInputStream(it.getEntry(entryName)).readBytes() zip.getInputStream(zip.getEntry(entryName)).use { it.readBytes() }
} }
} }