fix: better stream handling for rar and zip files

some rar entries could not be analyzed because of the incorrect use of streams
reduce disk access by using buffered streams
some streams were not closed properly
This commit is contained in:
Gauthier Roebroeck 2021-05-03 09:30:34 +08:00
parent 910af6f36f
commit ebc8df2053
5 changed files with 35 additions and 33 deletions

View file

@ -11,8 +11,12 @@ private val logger = KotlinLogging.logger {}
@Service
class ImageAnalyzer {
/**
* Returns the Dimension of the image contained in the stream.
* The stream will not be closed, nor marked or reset.
*/
fun getDimension(stream: InputStream): Dimension? =
stream.use {
try {
ImageIO.createImageInputStream(stream).use { fis ->
val readers = ImageIO.getImageReaders(fis)
if (readers.hasNext()) {
@ -24,5 +28,8 @@ class ImageAnalyzer {
null
}
}
} catch (e: Exception) {
logger.warn(e) { "Could not get image dimensions" }
null
}
}

View file

@ -26,13 +26,12 @@ class ContentDetector(
}
}
/**
* Detects the media type of the content of the stream.
* The stream will not be closed.
*/
fun detectMediaType(stream: InputStream): String =
stream.use {
TikaInputStream.get(it).use { tikaStream ->
val mediaType = tika.detector.detect(tikaStream, Metadata())
mediaType.toString()
}
}
tika.detector.detect(stream, Metadata()).toString()
fun isImage(mediaType: String): Boolean =
mediaType.startsWith("image/")

View file

@ -51,7 +51,7 @@ class EpubExtractor(
it.href == (opfDir?.relativize(image) ?: image).separatorsToUnix()
}.mediaType
val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(zip.getInputStream(zip.getEntry(name)))
zip.getInputStream(zip.getEntry(name)).use { imageAnalyzer.getDimension(it) }
else
null
MediaContainerEntry(name = name, mediaType = mediaType, dimension = dimension)

View file

@ -3,15 +3,11 @@ package org.gotson.komga.infrastructure.mediacontainer
import com.github.junrar.Archive
import mu.KotlinLogging
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
import org.apache.commons.io.input.TeeInputStream
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.domain.model.MediaUnsupportedException
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.nio.file.Path
import java.util.Comparator
private val logger = KotlinLogging.logger {}
@ -32,20 +28,19 @@ class RarExtractor(
if (rar.mainHeader.isMultiVolume) throw MediaUnsupportedException("Multi-Volume RAR archives are not supported", "ERR_1004")
rar.fileHeaders
.filter { !it.isDirectory }
.map { hd ->
.map { entry ->
try {
val buffer = ByteArrayOutputStream()
TeeInputStream(rar.getInputStream(hd), buffer).use { tee ->
val mediaType = contentDetector.detectMediaType(tee)
rar.getInputStream(entry).buffered().use { stream ->
val mediaType = contentDetector.detectMediaType(stream)
val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(ByteArrayInputStream(buffer.toByteArray()))
imageAnalyzer.getDimension(stream)
else
null
MediaContainerEntry(name = hd.fileName, mediaType = mediaType, dimension = dimension)
MediaContainerEntry(name = entry.fileName, mediaType = mediaType, dimension = dimension)
}
} catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${hd.fileName}" }
MediaContainerEntry(name = hd.fileName, comment = e.message)
logger.warn(e) { "Could not analyze entry: ${entry.fileName}" }
MediaContainerEntry(name = entry.fileName, comment = e.message)
}
}
.sortedWith(compareBy(natSortComparator) { it.name })
@ -54,6 +49,6 @@ class RarExtractor(
override fun getEntryStream(path: Path, entryName: String): ByteArray =
Archive(path.toFile()).use { rar ->
val header = rar.fileHeaders.find { it.fileName == entryName }
rar.getInputStream(header).readBytes()
rar.getInputStream(header).use { it.readBytes() }
}
}

View file

@ -7,7 +7,6 @@ import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.nio.file.Path
import java.util.Comparator
private val logger = KotlinLogging.logger {}
@ -25,24 +24,26 @@ class ZipExtractor(
ZipFile(path.toFile()).use { zip ->
zip.entries.toList()
.filter { !it.isDirectory }
.map {
.map { entry ->
try {
val mediaType = contentDetector.detectMediaType(zip.getInputStream(it))
val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(zip.getInputStream(it))
else
null
MediaContainerEntry(name = it.name, mediaType = mediaType, dimension = dimension)
zip.getInputStream(entry).buffered().use { stream ->
val mediaType = contentDetector.detectMediaType(stream)
val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(stream)
else
null
MediaContainerEntry(name = entry.name, mediaType = mediaType, dimension = dimension)
}
} catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${it.name}" }
MediaContainerEntry(name = it.name, comment = e.message)
logger.warn(e) { "Could not analyze entry: ${entry.name}" }
MediaContainerEntry(name = entry.name, comment = e.message)
}
}
.sortedWith(compareBy(natSortComparator) { it.name })
}
override fun getEntryStream(path: Path, entryName: String): ByteArray =
ZipFile(path.toFile()).use {
it.getInputStream(it.getEntry(entryName)).readBytes()
ZipFile(path.toFile()).use { zip ->
zip.getInputStream(zip.getEntry(entryName)).use { it.readBytes() }
}
}