mirror of
https://github.com/gotson/komga.git
synced 2026-05-08 12:35:30 +02:00
feat(book analyzer): partial handling of archives with errors
entries of zip/rar archives which cannot be extracted will be skipped (closes #57) move images detection from extractors to BookAnalyzer rename archive package to mediacontainer
This commit is contained in:
parent
a7548e298a
commit
2605b1d943
13 changed files with 169 additions and 134 deletions
|
|
@ -0,0 +1,7 @@
|
||||||
|
package org.gotson.komga.domain.model
|
||||||
|
|
||||||
|
class MediaContainerEntry(
|
||||||
|
val name: String,
|
||||||
|
val mediaType: String? = null,
|
||||||
|
val comment: String? = null
|
||||||
|
)
|
||||||
|
|
@ -4,12 +4,13 @@ import mu.KotlinLogging
|
||||||
import net.coobird.thumbnailator.Thumbnails
|
import net.coobird.thumbnailator.Thumbnails
|
||||||
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
|
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
|
||||||
import org.gotson.komga.domain.model.Book
|
import org.gotson.komga.domain.model.Book
|
||||||
|
import org.gotson.komga.domain.model.BookPage
|
||||||
import org.gotson.komga.domain.model.Media
|
import org.gotson.komga.domain.model.Media
|
||||||
import org.gotson.komga.domain.model.MediaNotReadyException
|
import org.gotson.komga.domain.model.MediaNotReadyException
|
||||||
import org.gotson.komga.infrastructure.archive.ContentDetector
|
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||||
import org.gotson.komga.infrastructure.archive.PdfExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.PdfExtractor
|
||||||
import org.gotson.komga.infrastructure.archive.RarExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.RarExtractor
|
||||||
import org.gotson.komga.infrastructure.archive.ZipExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.ZipExtractor
|
||||||
import org.springframework.stereotype.Service
|
import org.springframework.stereotype.Service
|
||||||
import java.io.ByteArrayOutputStream
|
import java.io.ByteArrayOutputStream
|
||||||
import java.util.*
|
import java.util.*
|
||||||
|
|
@ -43,13 +44,31 @@ class BookAnalyzer(
|
||||||
if (!supportedMediaTypes.keys.contains(mediaType))
|
if (!supportedMediaTypes.keys.contains(mediaType))
|
||||||
return Media(mediaType = mediaType, status = Media.Status.UNSUPPORTED, comment = "Media type $mediaType is not supported")
|
return Media(mediaType = mediaType, status = Media.Status.UNSUPPORTED, comment = "Media type $mediaType is not supported")
|
||||||
|
|
||||||
val pages = try {
|
val entries = try {
|
||||||
supportedMediaTypes.getValue(mediaType).getPagesList(book.path()).sortedWith(compareBy(natSortComparator) { it.fileName })
|
supportedMediaTypes.getValue(mediaType).getEntries(book.path())
|
||||||
} catch (ex: Exception) {
|
} catch (ex: Exception) {
|
||||||
logger.error(ex) { "Error while analyzing book: $book" }
|
logger.error(ex) { "Error while analyzing book: $book" }
|
||||||
return Media(mediaType = mediaType, status = Media.Status.ERROR, comment = ex.message)
|
return Media(mediaType = mediaType, status = Media.Status.ERROR, comment = ex.message)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val (pages, others) = entries
|
||||||
|
.partition { entry ->
|
||||||
|
entry.mediaType?.let { contentDetector.isImage(it) } ?: false
|
||||||
|
}.let { (images, others) ->
|
||||||
|
Pair(
|
||||||
|
images
|
||||||
|
.map { BookPage(it.name, it.mediaType!!) }
|
||||||
|
.sortedWith(compareBy(natSortComparator) { it.fileName }),
|
||||||
|
others
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
val entriesErrorSummary = others
|
||||||
|
.filter { it.mediaType.isNullOrBlank() }
|
||||||
|
.map { it.name }
|
||||||
|
.ifEmpty { null }
|
||||||
|
?.joinToString(prefix = "Some entries could not be analyzed: [", postfix = "]") { it }
|
||||||
|
|
||||||
if (pages.isEmpty()) {
|
if (pages.isEmpty()) {
|
||||||
logger.warn { "Book $book does not contain any pages" }
|
logger.warn { "Book $book does not contain any pages" }
|
||||||
return Media(mediaType = mediaType, status = Media.Status.ERROR, comment = "Book does not contain any pages")
|
return Media(mediaType = mediaType, status = Media.Status.ERROR, comment = "Book does not contain any pages")
|
||||||
|
|
@ -59,7 +78,7 @@ class BookAnalyzer(
|
||||||
logger.info { "Trying to generate cover for book: $book" }
|
logger.info { "Trying to generate cover for book: $book" }
|
||||||
val thumbnail = generateThumbnail(book, mediaType, pages.first().fileName)
|
val thumbnail = generateThumbnail(book, mediaType, pages.first().fileName)
|
||||||
|
|
||||||
return Media(mediaType = mediaType, status = Media.Status.READY, pages = pages, thumbnail = thumbnail)
|
return Media(mediaType = mediaType, status = Media.Status.READY, pages = pages, thumbnail = thumbnail, comment = entriesErrorSummary)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Throws(MediaNotReadyException::class)
|
@Throws(MediaNotReadyException::class)
|
||||||
|
|
@ -84,7 +103,7 @@ class BookAnalyzer(
|
||||||
private fun generateThumbnail(book: Book, mediaType: String, entry: String): ByteArray? =
|
private fun generateThumbnail(book: Book, mediaType: String, entry: String): ByteArray? =
|
||||||
try {
|
try {
|
||||||
ByteArrayOutputStream().use {
|
ByteArrayOutputStream().use {
|
||||||
supportedMediaTypes.getValue(mediaType).getPageStream(book.path(), entry).let { cover ->
|
supportedMediaTypes.getValue(mediaType).getEntryStream(book.path(), entry).let { cover ->
|
||||||
Thumbnails.of(cover.inputStream())
|
Thumbnails.of(cover.inputStream())
|
||||||
.size(thumbnailSize, thumbnailSize)
|
.size(thumbnailSize, thumbnailSize)
|
||||||
.outputFormat(thumbnailFormat)
|
.outputFormat(thumbnailFormat)
|
||||||
|
|
@ -114,6 +133,6 @@ class BookAnalyzer(
|
||||||
throw IndexOutOfBoundsException("Page $number does not exist")
|
throw IndexOutOfBoundsException("Page $number does not exist")
|
||||||
}
|
}
|
||||||
|
|
||||||
return supportedMediaTypes.getValue(book.media.mediaType!!).getPageStream(book.path(), book.media.pages[number - 1].fileName)
|
return supportedMediaTypes.getValue(book.media.mediaType!!).getEntryStream(book.path(), book.media.pages[number - 1].fileName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
package org.gotson.komga.infrastructure.archive
|
|
||||||
|
|
||||||
import org.gotson.komga.domain.model.BookPage
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
abstract class ArchiveExtractor {
|
|
||||||
abstract fun getPagesList(path: Path): List<BookPage>
|
|
||||||
abstract fun getPageStream(path: Path, entryName: String): ByteArray
|
|
||||||
}
|
|
||||||
|
|
@ -1,42 +0,0 @@
|
||||||
package org.gotson.komga.infrastructure.archive
|
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument
|
|
||||||
import org.apache.pdfbox.rendering.ImageType
|
|
||||||
import org.apache.pdfbox.rendering.PDFRenderer
|
|
||||||
import org.gotson.komga.domain.model.BookPage
|
|
||||||
import org.springframework.stereotype.Service
|
|
||||||
import java.io.ByteArrayOutputStream
|
|
||||||
import java.nio.file.Files
|
|
||||||
import java.nio.file.Path
|
|
||||||
import javax.imageio.ImageIO
|
|
||||||
|
|
||||||
@Service
|
|
||||||
class PdfExtractor : ArchiveExtractor() {
|
|
||||||
|
|
||||||
private val mediaType = "image/jpeg"
|
|
||||||
private val imageIOFormat = "jpeg"
|
|
||||||
private val resolution = 1536F
|
|
||||||
|
|
||||||
override fun getPagesList(path: Path): List<BookPage> =
|
|
||||||
Files.newInputStream(path).use { inputStream ->
|
|
||||||
PDDocument.load(inputStream).use { pdf ->
|
|
||||||
(0 until pdf.numberOfPages).map { index ->
|
|
||||||
BookPage(index.toString(), mediaType)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun getPageStream(path: Path, entryName: String): ByteArray =
|
|
||||||
Files.newInputStream(path).use { inputStream ->
|
|
||||||
PDDocument.load(inputStream).use { pdf ->
|
|
||||||
val pageNumber = entryName.toInt()
|
|
||||||
val page = pdf.getPage(pageNumber)
|
|
||||||
val scale = resolution / minOf(page.cropBox.width, page.cropBox.height)
|
|
||||||
val image = PDFRenderer(pdf).renderImage(pageNumber, scale, ImageType.RGB)
|
|
||||||
ByteArrayOutputStream().use { out ->
|
|
||||||
ImageIO.write(image, imageIOFormat, out)
|
|
||||||
out.toByteArray()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
package org.gotson.komga.infrastructure.archive
|
|
||||||
|
|
||||||
import com.github.junrar.Archive
|
|
||||||
import org.gotson.komga.domain.model.BookPage
|
|
||||||
import org.springframework.stereotype.Service
|
|
||||||
import java.nio.file.Files
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
@Service
|
|
||||||
class RarExtractor(
|
|
||||||
private val contentDetector: ContentDetector
|
|
||||||
) : ArchiveExtractor() {
|
|
||||||
|
|
||||||
override fun getPagesList(path: Path): List<BookPage> =
|
|
||||||
Archive(Files.newInputStream(path)).use { rar ->
|
|
||||||
rar.fileHeaders
|
|
||||||
.filter { !it.isDirectory }
|
|
||||||
.map {
|
|
||||||
BookPage(
|
|
||||||
it.fileNameString,
|
|
||||||
contentDetector.detectMediaType(rar.getInputStream(it))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
.filter { contentDetector.isImage(it.mediaType) }
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun getPageStream(path: Path, entryName: String): ByteArray =
|
|
||||||
Archive(Files.newInputStream(path)).use { rar ->
|
|
||||||
val header = rar.fileHeaders.find { it.fileNameString == entryName }
|
|
||||||
rar.getInputStream(header).readBytes()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
package org.gotson.komga.infrastructure.archive
|
|
||||||
|
|
||||||
import org.apache.commons.compress.archivers.zip.ZipFile
|
|
||||||
import org.gotson.komga.domain.model.BookPage
|
|
||||||
import org.springframework.stereotype.Service
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
@Service
|
|
||||||
class ZipExtractor(
|
|
||||||
private val contentDetector: ContentDetector
|
|
||||||
) : ArchiveExtractor() {
|
|
||||||
|
|
||||||
override fun getPagesList(path: Path): List<BookPage> =
|
|
||||||
ZipFile(path.toFile()).use { zip ->
|
|
||||||
zip.entries.toList()
|
|
||||||
.filter { !it.isDirectory }
|
|
||||||
.map {
|
|
||||||
BookPage(
|
|
||||||
it.name,
|
|
||||||
contentDetector.detectMediaType(zip.getInputStream(it))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
.filter { contentDetector.isImage(it.mediaType) }
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun getPageStream(path: Path, entryName: String): ByteArray =
|
|
||||||
ZipFile(path.toFile()).use {
|
|
||||||
it.getInputStream(it.getEntry(entryName)).readBytes()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.gotson.komga.infrastructure.archive
|
package org.gotson.komga.infrastructure.mediacontainer
|
||||||
|
|
||||||
import mu.KotlinLogging
|
import mu.KotlinLogging
|
||||||
import org.apache.tika.config.TikaConfig
|
import org.apache.tika.config.TikaConfig
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
package org.gotson.komga.infrastructure.mediacontainer
|
||||||
|
|
||||||
|
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||||
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
abstract class MediaContainerExtractor {
|
||||||
|
abstract fun getEntries(path: Path): List<MediaContainerEntry>
|
||||||
|
abstract fun getEntryStream(path: Path, entryName: String): ByteArray
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
package org.gotson.komga.infrastructure.mediacontainer
|
||||||
|
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument
|
||||||
|
import org.apache.pdfbox.rendering.ImageType
|
||||||
|
import org.apache.pdfbox.rendering.PDFRenderer
|
||||||
|
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||||
|
import org.springframework.stereotype.Service
|
||||||
|
import java.io.ByteArrayOutputStream
|
||||||
|
import java.nio.file.Files
|
||||||
|
import java.nio.file.Path
|
||||||
|
import javax.imageio.ImageIO
|
||||||
|
|
||||||
|
@Service
|
||||||
|
class PdfExtractor : MediaContainerExtractor() {
|
||||||
|
|
||||||
|
private val mediaType = "image/jpeg"
|
||||||
|
private val imageIOFormat = "jpeg"
|
||||||
|
private val resolution = 1536F
|
||||||
|
|
||||||
|
override fun getEntries(path: Path): List<MediaContainerEntry> =
|
||||||
|
Files.newInputStream(path).use { inputStream ->
|
||||||
|
PDDocument.load(inputStream).use { pdf ->
|
||||||
|
(0 until pdf.numberOfPages).map { index ->
|
||||||
|
MediaContainerEntry(index.toString(), mediaType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun getEntryStream(path: Path, entryName: String): ByteArray =
|
||||||
|
Files.newInputStream(path).use { inputStream ->
|
||||||
|
PDDocument.load(inputStream).use { pdf ->
|
||||||
|
val pageNumber = entryName.toInt()
|
||||||
|
val page = pdf.getPage(pageNumber)
|
||||||
|
val scale = resolution / minOf(page.cropBox.width, page.cropBox.height)
|
||||||
|
val image = PDFRenderer(pdf).renderImage(pageNumber, scale, ImageType.RGB)
|
||||||
|
ByteArrayOutputStream().use { out ->
|
||||||
|
ImageIO.write(image, imageIOFormat, out)
|
||||||
|
out.toByteArray()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.gotson.komga.infrastructure.mediacontainer
|
||||||
|
|
||||||
|
import com.github.junrar.Archive
|
||||||
|
import mu.KotlinLogging
|
||||||
|
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||||
|
import org.springframework.stereotype.Service
|
||||||
|
import java.nio.file.Files
|
||||||
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
private val logger = KotlinLogging.logger {}
|
||||||
|
|
||||||
|
@Service
|
||||||
|
class RarExtractor(
|
||||||
|
private val contentDetector: ContentDetector
|
||||||
|
) : MediaContainerExtractor() {
|
||||||
|
|
||||||
|
override fun getEntries(path: Path): List<MediaContainerEntry> =
|
||||||
|
Archive(Files.newInputStream(path)).use { rar ->
|
||||||
|
rar.fileHeaders
|
||||||
|
.filter { !it.isDirectory }
|
||||||
|
.map {
|
||||||
|
try {
|
||||||
|
MediaContainerEntry(name = it.fileNameString, mediaType = contentDetector.detectMediaType(rar.getInputStream(it)))
|
||||||
|
} catch (e: Exception) {
|
||||||
|
logger.warn(e) { "Could not analyze entry: ${it.fileNameString}" }
|
||||||
|
MediaContainerEntry(name = it.fileNameString, comment = e.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun getEntryStream(path: Path, entryName: String): ByteArray =
|
||||||
|
Archive(Files.newInputStream(path)).use { rar ->
|
||||||
|
val header = rar.fileHeaders.find { it.fileNameString == entryName }
|
||||||
|
rar.getInputStream(header).readBytes()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.gotson.komga.infrastructure.archive
|
package org.gotson.komga.infrastructure.mediacontainer
|
||||||
|
|
||||||
import org.apache.tika.config.TikaConfig
|
import org.apache.tika.config.TikaConfig
|
||||||
import org.springframework.context.annotation.Bean
|
import org.springframework.context.annotation.Bean
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
package org.gotson.komga.infrastructure.mediacontainer
|
||||||
|
|
||||||
|
import mu.KotlinLogging
|
||||||
|
import org.apache.commons.compress.archivers.zip.ZipFile
|
||||||
|
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||||
|
import org.springframework.stereotype.Service
|
||||||
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
private val logger = KotlinLogging.logger {}
|
||||||
|
|
||||||
|
@Service
|
||||||
|
class ZipExtractor(
|
||||||
|
private val contentDetector: ContentDetector
|
||||||
|
) : MediaContainerExtractor() {
|
||||||
|
|
||||||
|
override fun getEntries(path: Path): List<MediaContainerEntry> =
|
||||||
|
ZipFile(path.toFile()).use { zip ->
|
||||||
|
zip.entries.toList()
|
||||||
|
.filter { !it.isDirectory }
|
||||||
|
.map {
|
||||||
|
try {
|
||||||
|
MediaContainerEntry(name = it.name, mediaType = contentDetector.detectMediaType(zip.getInputStream(it)))
|
||||||
|
} catch (e: Exception) {
|
||||||
|
logger.warn(e) { "Could not analyze entry: ${it.name}" }
|
||||||
|
MediaContainerEntry(name = it.name, comment = e.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun getEntryStream(path: Path, entryName: String): ByteArray =
|
||||||
|
ZipFile(path.toFile()).use {
|
||||||
|
it.getInputStream(it.getEntry(entryName)).readBytes()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -4,12 +4,12 @@ import io.mockk.every
|
||||||
import io.mockk.mockk
|
import io.mockk.mockk
|
||||||
import io.mockk.slot
|
import io.mockk.slot
|
||||||
import org.assertj.core.api.Assertions.assertThat
|
import org.assertj.core.api.Assertions.assertThat
|
||||||
|
import org.gotson.komga.domain.model.MediaContainerEntry
|
||||||
import org.gotson.komga.domain.model.makeBook
|
import org.gotson.komga.domain.model.makeBook
|
||||||
import org.gotson.komga.domain.model.makeBookPage
|
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||||
import org.gotson.komga.infrastructure.archive.ContentDetector
|
import org.gotson.komga.infrastructure.mediacontainer.PdfExtractor
|
||||||
import org.gotson.komga.infrastructure.archive.PdfExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.RarExtractor
|
||||||
import org.gotson.komga.infrastructure.archive.RarExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.ZipExtractor
|
||||||
import org.gotson.komga.infrastructure.archive.ZipExtractor
|
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
class BookAnalyzerTest {
|
class BookAnalyzerTest {
|
||||||
|
|
@ -25,13 +25,14 @@ class BookAnalyzerTest {
|
||||||
// given
|
// given
|
||||||
val book = makeBook("book")
|
val book = makeBook("book")
|
||||||
every { mockContent.detectMediaType(book.path()) } returns "application/zip"
|
every { mockContent.detectMediaType(book.path()) } returns "application/zip"
|
||||||
|
every { mockContent.isImage(any()) } returns true
|
||||||
|
|
||||||
val unorderedPages = listOf("08", "01", "02").map { makeBookPage(it) }
|
val unorderedPages = listOf("08", "01", "02").map { MediaContainerEntry(it, "image/png") }
|
||||||
every { mockZip.getPagesList(book.path()) } returns unorderedPages
|
every { mockZip.getEntries(book.path()) } returns unorderedPages
|
||||||
|
|
||||||
//when
|
//when
|
||||||
val thumbnailFile = slot<String>()
|
val thumbnailFile = slot<String>()
|
||||||
every { mockZip.getPageStream(book.path(), capture(thumbnailFile)) } returns ByteArray(1)
|
every { mockZip.getEntryStream(book.path(), capture(thumbnailFile)) } returns ByteArray(1)
|
||||||
bookAnalyzer.analyze(book)
|
bookAnalyzer.analyze(book)
|
||||||
|
|
||||||
// then
|
// then
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue