mirror of
https://github.com/gotson/komga.git
synced 2025-12-21 16:03:03 +01:00
feat(book analyzer): partial handling of archives with errors
entries of zip/rar archives which cannot be extracted will be skipped (closes #57) move images detection from extractors to BookAnalyzer rename archive package to mediacontainer
This commit is contained in:
parent
a7548e298a
commit
2605b1d943
13 changed files with 169 additions and 134 deletions
|
|
@ -0,0 +1,7 @@
|
|||
package org.gotson.komga.domain.model
|
||||
|
||||
class MediaContainerEntry(
|
||||
val name: String,
|
||||
val mediaType: String? = null,
|
||||
val comment: String? = null
|
||||
)
|
||||
|
|
@ -4,12 +4,13 @@ import mu.KotlinLogging
|
|||
import net.coobird.thumbnailator.Thumbnails
|
||||
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
|
||||
import org.gotson.komga.domain.model.Book
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.gotson.komga.domain.model.Media
|
||||
import org.gotson.komga.domain.model.MediaNotReadyException
|
||||
import org.gotson.komga.infrastructure.archive.ContentDetector
|
||||
import org.gotson.komga.infrastructure.archive.PdfExtractor
|
||||
import org.gotson.komga.infrastructure.archive.RarExtractor
|
||||
import org.gotson.komga.infrastructure.archive.ZipExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||
import org.gotson.komga.infrastructure.mediacontainer.PdfExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.RarExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ZipExtractor
|
||||
import org.springframework.stereotype.Service
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.util.*
|
||||
|
|
@ -43,13 +44,31 @@ class BookAnalyzer(
|
|||
if (!supportedMediaTypes.keys.contains(mediaType))
|
||||
return Media(mediaType = mediaType, status = Media.Status.UNSUPPORTED, comment = "Media type $mediaType is not supported")
|
||||
|
||||
val pages = try {
|
||||
supportedMediaTypes.getValue(mediaType).getPagesList(book.path()).sortedWith(compareBy(natSortComparator) { it.fileName })
|
||||
val entries = try {
|
||||
supportedMediaTypes.getValue(mediaType).getEntries(book.path())
|
||||
} catch (ex: Exception) {
|
||||
logger.error(ex) { "Error while analyzing book: $book" }
|
||||
return Media(mediaType = mediaType, status = Media.Status.ERROR, comment = ex.message)
|
||||
}
|
||||
|
||||
val (pages, others) = entries
|
||||
.partition { entry ->
|
||||
entry.mediaType?.let { contentDetector.isImage(it) } ?: false
|
||||
}.let { (images, others) ->
|
||||
Pair(
|
||||
images
|
||||
.map { BookPage(it.name, it.mediaType!!) }
|
||||
.sortedWith(compareBy(natSortComparator) { it.fileName }),
|
||||
others
|
||||
)
|
||||
}
|
||||
|
||||
val entriesErrorSummary = others
|
||||
.filter { it.mediaType.isNullOrBlank() }
|
||||
.map { it.name }
|
||||
.ifEmpty { null }
|
||||
?.joinToString(prefix = "Some entries could not be analyzed: [", postfix = "]") { it }
|
||||
|
||||
if (pages.isEmpty()) {
|
||||
logger.warn { "Book $book does not contain any pages" }
|
||||
return Media(mediaType = mediaType, status = Media.Status.ERROR, comment = "Book does not contain any pages")
|
||||
|
|
@ -59,7 +78,7 @@ class BookAnalyzer(
|
|||
logger.info { "Trying to generate cover for book: $book" }
|
||||
val thumbnail = generateThumbnail(book, mediaType, pages.first().fileName)
|
||||
|
||||
return Media(mediaType = mediaType, status = Media.Status.READY, pages = pages, thumbnail = thumbnail)
|
||||
return Media(mediaType = mediaType, status = Media.Status.READY, pages = pages, thumbnail = thumbnail, comment = entriesErrorSummary)
|
||||
}
|
||||
|
||||
@Throws(MediaNotReadyException::class)
|
||||
|
|
@ -84,7 +103,7 @@ class BookAnalyzer(
|
|||
private fun generateThumbnail(book: Book, mediaType: String, entry: String): ByteArray? =
|
||||
try {
|
||||
ByteArrayOutputStream().use {
|
||||
supportedMediaTypes.getValue(mediaType).getPageStream(book.path(), entry).let { cover ->
|
||||
supportedMediaTypes.getValue(mediaType).getEntryStream(book.path(), entry).let { cover ->
|
||||
Thumbnails.of(cover.inputStream())
|
||||
.size(thumbnailSize, thumbnailSize)
|
||||
.outputFormat(thumbnailFormat)
|
||||
|
|
@ -114,6 +133,6 @@ class BookAnalyzer(
|
|||
throw IndexOutOfBoundsException("Page $number does not exist")
|
||||
}
|
||||
|
||||
return supportedMediaTypes.getValue(book.media.mediaType!!).getPageStream(book.path(), book.media.pages[number - 1].fileName)
|
||||
return supportedMediaTypes.getValue(book.media.mediaType!!).getEntryStream(book.path(), book.media.pages[number - 1].fileName)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +0,0 @@
|
|||
package org.gotson.komga.infrastructure.archive
|
||||
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import java.nio.file.Path
|
||||
|
||||
abstract class ArchiveExtractor {
|
||||
abstract fun getPagesList(path: Path): List<BookPage>
|
||||
abstract fun getPageStream(path: Path, entryName: String): ByteArray
|
||||
}
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
package org.gotson.komga.infrastructure.archive
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument
|
||||
import org.apache.pdfbox.rendering.ImageType
|
||||
import org.apache.pdfbox.rendering.PDFRenderer
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.springframework.stereotype.Service
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import javax.imageio.ImageIO
|
||||
|
||||
@Service
|
||||
class PdfExtractor : ArchiveExtractor() {
|
||||
|
||||
private val mediaType = "image/jpeg"
|
||||
private val imageIOFormat = "jpeg"
|
||||
private val resolution = 1536F
|
||||
|
||||
override fun getPagesList(path: Path): List<BookPage> =
|
||||
Files.newInputStream(path).use { inputStream ->
|
||||
PDDocument.load(inputStream).use { pdf ->
|
||||
(0 until pdf.numberOfPages).map { index ->
|
||||
BookPage(index.toString(), mediaType)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun getPageStream(path: Path, entryName: String): ByteArray =
|
||||
Files.newInputStream(path).use { inputStream ->
|
||||
PDDocument.load(inputStream).use { pdf ->
|
||||
val pageNumber = entryName.toInt()
|
||||
val page = pdf.getPage(pageNumber)
|
||||
val scale = resolution / minOf(page.cropBox.width, page.cropBox.height)
|
||||
val image = PDFRenderer(pdf).renderImage(pageNumber, scale, ImageType.RGB)
|
||||
ByteArrayOutputStream().use { out ->
|
||||
ImageIO.write(image, imageIOFormat, out)
|
||||
out.toByteArray()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
package org.gotson.komga.infrastructure.archive
|
||||
|
||||
import com.github.junrar.Archive
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.springframework.stereotype.Service
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
|
||||
@Service
|
||||
class RarExtractor(
|
||||
private val contentDetector: ContentDetector
|
||||
) : ArchiveExtractor() {
|
||||
|
||||
override fun getPagesList(path: Path): List<BookPage> =
|
||||
Archive(Files.newInputStream(path)).use { rar ->
|
||||
rar.fileHeaders
|
||||
.filter { !it.isDirectory }
|
||||
.map {
|
||||
BookPage(
|
||||
it.fileNameString,
|
||||
contentDetector.detectMediaType(rar.getInputStream(it))
|
||||
)
|
||||
}
|
||||
.filter { contentDetector.isImage(it.mediaType) }
|
||||
}
|
||||
|
||||
override fun getPageStream(path: Path, entryName: String): ByteArray =
|
||||
Archive(Files.newInputStream(path)).use { rar ->
|
||||
val header = rar.fileHeaders.find { it.fileNameString == entryName }
|
||||
rar.getInputStream(header).readBytes()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
package org.gotson.komga.infrastructure.archive
|
||||
|
||||
import org.apache.commons.compress.archivers.zip.ZipFile
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.springframework.stereotype.Service
|
||||
import java.nio.file.Path
|
||||
|
||||
@Service
|
||||
class ZipExtractor(
|
||||
private val contentDetector: ContentDetector
|
||||
) : ArchiveExtractor() {
|
||||
|
||||
override fun getPagesList(path: Path): List<BookPage> =
|
||||
ZipFile(path.toFile()).use { zip ->
|
||||
zip.entries.toList()
|
||||
.filter { !it.isDirectory }
|
||||
.map {
|
||||
BookPage(
|
||||
it.name,
|
||||
contentDetector.detectMediaType(zip.getInputStream(it))
|
||||
)
|
||||
}
|
||||
.filter { contentDetector.isImage(it.mediaType) }
|
||||
}
|
||||
|
||||
override fun getPageStream(path: Path, entryName: String): ByteArray =
|
||||
ZipFile(path.toFile()).use {
|
||||
it.getInputStream(it.getEntry(entryName)).readBytes()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.gotson.komga.infrastructure.archive
|
||||
package org.gotson.komga.infrastructure.mediacontainer
|
||||
|
||||
import mu.KotlinLogging
|
||||
import org.apache.tika.config.TikaConfig
|
||||
|
|
@ -36,4 +36,4 @@ class ContentDetector(
|
|||
|
||||
fun isImage(mediaType: String): Boolean =
|
||||
mediaType.startsWith("image/")
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
package org.gotson.komga.infrastructure.mediacontainer
|
||||
|
||||
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||
import java.nio.file.Path
|
||||
|
||||
abstract class MediaContainerExtractor {
|
||||
abstract fun getEntries(path: Path): List<MediaContainerEntry>
|
||||
abstract fun getEntryStream(path: Path, entryName: String): ByteArray
|
||||
}
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
package org.gotson.komga.infrastructure.mediacontainer
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument
|
||||
import org.apache.pdfbox.rendering.ImageType
|
||||
import org.apache.pdfbox.rendering.PDFRenderer
|
||||
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||
import org.springframework.stereotype.Service
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import javax.imageio.ImageIO
|
||||
|
||||
@Service
|
||||
class PdfExtractor : MediaContainerExtractor() {
|
||||
|
||||
private val mediaType = "image/jpeg"
|
||||
private val imageIOFormat = "jpeg"
|
||||
private val resolution = 1536F
|
||||
|
||||
override fun getEntries(path: Path): List<MediaContainerEntry> =
|
||||
Files.newInputStream(path).use { inputStream ->
|
||||
PDDocument.load(inputStream).use { pdf ->
|
||||
(0 until pdf.numberOfPages).map { index ->
|
||||
MediaContainerEntry(index.toString(), mediaType)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun getEntryStream(path: Path, entryName: String): ByteArray =
|
||||
Files.newInputStream(path).use { inputStream ->
|
||||
PDDocument.load(inputStream).use { pdf ->
|
||||
val pageNumber = entryName.toInt()
|
||||
val page = pdf.getPage(pageNumber)
|
||||
val scale = resolution / minOf(page.cropBox.width, page.cropBox.height)
|
||||
val image = PDFRenderer(pdf).renderImage(pageNumber, scale, ImageType.RGB)
|
||||
ByteArrayOutputStream().use { out ->
|
||||
ImageIO.write(image, imageIOFormat, out)
|
||||
out.toByteArray()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package org.gotson.komga.infrastructure.mediacontainer
|
||||
|
||||
import com.github.junrar.Archive
|
||||
import mu.KotlinLogging
|
||||
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||
import org.springframework.stereotype.Service
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
@Service
|
||||
class RarExtractor(
|
||||
private val contentDetector: ContentDetector
|
||||
) : MediaContainerExtractor() {
|
||||
|
||||
override fun getEntries(path: Path): List<MediaContainerEntry> =
|
||||
Archive(Files.newInputStream(path)).use { rar ->
|
||||
rar.fileHeaders
|
||||
.filter { !it.isDirectory }
|
||||
.map {
|
||||
try {
|
||||
MediaContainerEntry(name = it.fileNameString, mediaType = contentDetector.detectMediaType(rar.getInputStream(it)))
|
||||
} catch (e: Exception) {
|
||||
logger.warn(e) { "Could not analyze entry: ${it.fileNameString}" }
|
||||
MediaContainerEntry(name = it.fileNameString, comment = e.message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun getEntryStream(path: Path, entryName: String): ByteArray =
|
||||
Archive(Files.newInputStream(path)).use { rar ->
|
||||
val header = rar.fileHeaders.find { it.fileNameString == entryName }
|
||||
rar.getInputStream(header).readBytes()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.gotson.komga.infrastructure.archive
|
||||
package org.gotson.komga.infrastructure.mediacontainer
|
||||
|
||||
import org.apache.tika.config.TikaConfig
|
||||
import org.springframework.context.annotation.Bean
|
||||
|
|
@ -9,4 +9,4 @@ class TikaConfiguration {
|
|||
|
||||
@Bean
|
||||
fun tika() = TikaConfig()
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
package org.gotson.komga.infrastructure.mediacontainer
|
||||
|
||||
import mu.KotlinLogging
|
||||
import org.apache.commons.compress.archivers.zip.ZipFile
|
||||
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||
import org.springframework.stereotype.Service
|
||||
import java.nio.file.Path
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
@Service
|
||||
class ZipExtractor(
|
||||
private val contentDetector: ContentDetector
|
||||
) : MediaContainerExtractor() {
|
||||
|
||||
override fun getEntries(path: Path): List<MediaContainerEntry> =
|
||||
ZipFile(path.toFile()).use { zip ->
|
||||
zip.entries.toList()
|
||||
.filter { !it.isDirectory }
|
||||
.map {
|
||||
try {
|
||||
MediaContainerEntry(name = it.name, mediaType = contentDetector.detectMediaType(zip.getInputStream(it)))
|
||||
} catch (e: Exception) {
|
||||
logger.warn(e) { "Could not analyze entry: ${it.name}" }
|
||||
MediaContainerEntry(name = it.name, comment = e.message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun getEntryStream(path: Path, entryName: String): ByteArray =
|
||||
ZipFile(path.toFile()).use {
|
||||
it.getInputStream(it.getEntry(entryName)).readBytes()
|
||||
}
|
||||
}
|
||||
|
|
@ -4,12 +4,12 @@ import io.mockk.every
|
|||
import io.mockk.mockk
|
||||
import io.mockk.slot
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||
import org.gotson.komga.domain.model.makeBook
|
||||
import org.gotson.komga.domain.model.makeBookPage
|
||||
import org.gotson.komga.infrastructure.archive.ContentDetector
|
||||
import org.gotson.komga.infrastructure.archive.PdfExtractor
|
||||
import org.gotson.komga.infrastructure.archive.RarExtractor
|
||||
import org.gotson.komga.infrastructure.archive.ZipExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||
import org.gotson.komga.infrastructure.mediacontainer.PdfExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.RarExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ZipExtractor
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class BookAnalyzerTest {
|
||||
|
|
@ -25,13 +25,14 @@ class BookAnalyzerTest {
|
|||
// given
|
||||
val book = makeBook("book")
|
||||
every { mockContent.detectMediaType(book.path()) } returns "application/zip"
|
||||
every { mockContent.isImage(any()) } returns true
|
||||
|
||||
val unorderedPages = listOf("08", "01", "02").map { makeBookPage(it) }
|
||||
every { mockZip.getPagesList(book.path()) } returns unorderedPages
|
||||
val unorderedPages = listOf("08", "01", "02").map { MediaContainerEntry(it, "image/png") }
|
||||
every { mockZip.getEntries(book.path()) } returns unorderedPages
|
||||
|
||||
//when
|
||||
val thumbnailFile = slot<String>()
|
||||
every { mockZip.getPageStream(book.path(), capture(thumbnailFile)) } returns ByteArray(1)
|
||||
every { mockZip.getEntryStream(book.path(), capture(thumbnailFile)) } returns ByteArray(1)
|
||||
bookAnalyzer.analyze(book)
|
||||
|
||||
// then
|
||||
|
|
|
|||
Loading…
Reference in a new issue