store each page mediaType during parsing

return mediaType of pages in API
simplify getting stream of Page since the mediaType is already available
parse books at startup
by default don't return books that are not ready in API
This commit is contained in:
Gauthier Roebroeck 2019-08-18 17:52:15 +08:00
parent 990e443f77
commit 264be393b8
15 changed files with 96 additions and 84 deletions

View file

@ -16,7 +16,7 @@ class BookMetadata(
@Enumerated(EnumType.STRING)
val status: Status = Status.UNKNOWN,
val mediaType: String? = null,
pages: List<String> = emptyList()
pages: List<BookPage> = emptyList()
) {
@Id
@GeneratedValue
@ -28,9 +28,9 @@ class BookMetadata(
@ElementCollection(fetch = FetchType.EAGER)
@CollectionTable(name = "book_metadata_pages")
@Column(name = "pages")
private val _pages: MutableList<String> = mutableListOf()
private val _pages: MutableList<BookPage> = mutableListOf()
val pages: List<String>
val pages: List<BookPage>
get() = _pages.toList()
init {

View file

@ -1,6 +1,9 @@
package org.gotson.komga.domain.model
data class BookPage(
val mediaType: String,
val content: ByteArray
import javax.persistence.Embeddable
@Embeddable
class BookPage(
val fileName: String,
val mediaType: String
)

View file

@ -1,6 +1,7 @@
package org.gotson.komga.domain.persistence
import org.gotson.komga.domain.model.Book
import org.gotson.komga.domain.model.Status
import org.springframework.data.domain.Page
import org.springframework.data.domain.Pageable
import org.springframework.data.jpa.repository.JpaRepository
@ -11,4 +12,6 @@ import java.net.URL
interface BookRepository : JpaRepository<Book, Long> {
fun findAllBySerieId(serieId: Long, pageable: Pageable): Page<Book>
fun findByUrl(url: URL): Book?
fun findAllByMetadataStatus(status: Status): List<Book>
fun findAllByMetadataStatusAndSerieId(status: Status, serieId: Long, pageable: Pageable): Page<Book>
}

View file

@ -3,21 +3,16 @@ package org.gotson.komga.domain.service
import mu.KotlinLogging
import org.gotson.komga.domain.model.Book
import org.gotson.komga.domain.model.BookMetadata
import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.Status
import org.gotson.komga.domain.persistence.BookRepository
import org.gotson.komga.infrastructure.archive.ContentDetector
import org.springframework.stereotype.Service
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
private val logger = KotlinLogging.logger {}
@Service
class BookManager(
private val bookRepository: BookRepository,
private val bookParser: BookParser,
private val contentDetector: ContentDetector
private val bookParser: BookParser
) {
fun parseAndPersist(book: Book) {
@ -34,7 +29,7 @@ class BookManager(
bookRepository.save(book)
}
fun getPage(book: Book, number: Int): BookPage {
fun getPageContent(book: Book, number: Int): ByteArray {
logger.info { "Get page #$number for book: ${book.url}" }
if (book.metadata.status == Status.UNKNOWN) {
@ -47,27 +42,6 @@ class BookManager(
throw MetadataNotReadyException()
}
lateinit var mediaType: String
lateinit var content: ByteArray
bookParser.getPage(book, number).use { stream ->
if (stream.markSupported()) {
logger.debug { "Stream supports mark, passing it as is for content detection" }
mediaType = contentDetector.detectMediaType(stream)
content = stream.readBytes()
} else {
logger.debug { "Stream does not support mark, using a cloned stream for content detection" }
val buffer = ByteArrayOutputStream()
stream.copyTo(buffer)
val clonedStream = ByteArrayInputStream(buffer.toByteArray())
mediaType = clonedStream.use { contentDetector.detectMediaType(it) }
content = buffer.toByteArray()
}
}
logger.info { "Page media type: $mediaType" }
return BookPage(mediaType, content)
return bookParser.getPageStrema(book, number).readBytes()
}
}

View file

@ -33,13 +33,13 @@ class BookParser(
if (!supportedMediaTypes.keys.contains(mediaType))
throw UnsupportedMediaTypeException("Unsupported mime type: $mediaType. File: ${book.url}", mediaType)
val pageNames = supportedMediaTypes.getValue(mediaType).getFilenames(book.path())
logger.info { "Book has ${pageNames.size} pages" }
val pages = supportedMediaTypes.getValue(mediaType).getPagesList(book.path())
logger.info { "Book has ${pages.size} pages" }
return BookMetadata(mediaType = mediaType, status = Status.READY, pages = pageNames)
return BookMetadata(mediaType = mediaType, status = Status.READY, pages = pages)
}
fun getPage(book: Book, number: Int): InputStream {
fun getPageStrema(book: Book, number: Int): InputStream {
logger.info { "Get page #$number for book: ${book.url}" }
if (book.metadata.status != Status.READY) {
@ -52,7 +52,7 @@ class BookParser(
throw ArrayIndexOutOfBoundsException("Page $number does not exist")
}
return supportedMediaTypes.getValue(book.metadata.mediaType!!).getEntryStream(book.path(), book.metadata.pages[number - 1])
return supportedMediaTypes.getValue(book.metadata.mediaType!!).getPageStream(book.path(), book.metadata.pages[number - 1].fileName)
}
}

View file

@ -2,6 +2,7 @@ package org.gotson.komga.domain.service
import mu.KotlinLogging
import org.gotson.komga.domain.model.Library
import org.gotson.komga.domain.model.Status
import org.gotson.komga.domain.persistence.BookRepository
import org.gotson.komga.domain.persistence.SerieRepository
import org.springframework.stereotype.Service
@ -14,7 +15,8 @@ private val logger = KotlinLogging.logger {}
class LibraryManager(
private val fileSystemScanner: FileSystemScanner,
private val serieRepository: SerieRepository,
private val bookRepository: BookRepository
private val bookRepository: BookRepository,
private val bookManager: BookManager
) {
@Transactional
@ -55,6 +57,15 @@ class LibraryManager(
serieRepository.saveAll(series)
}.also { logger.info { "Update finished in $it ms" } }
}.also { logger.info { "Library update finished in $it ms" } }
}
@Transactional
fun parseUnparsedBooks() {
logger.info { "Parsing all books in status: unkown" }
val booksToParse = bookRepository.findAllByMetadataStatus(Status.UNKNOWN)
measureTimeMillis {
booksToParse.forEach { bookManager.parseAndPersist(it) }
}.also { logger.info { "Parsed ${booksToParse.size} books in $it ms" } }
}
}

View file

@ -1,6 +1,7 @@
package org.gotson.komga.infrastructure.archive
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
import org.gotson.komga.domain.model.BookPage
import java.io.InputStream
import java.nio.file.Path
import java.util.*
@ -8,6 +9,6 @@ import java.util.*
abstract class ArchiveExtractor {
protected val natSortComparator: Comparator<String> = CaseInsensitiveSimpleNaturalComparator.getInstance()
abstract fun getFilenames(path: Path): List<String>
abstract fun getEntryStream(path: Path, entryName: String): InputStream
abstract fun getPagesList(path: Path): List<BookPage>
abstract fun getPageStream(path: Path, entryName: String): InputStream
}

View file

@ -16,31 +16,21 @@ class ContentDetector(
) {
fun detectMediaType(path: Path): String {
logger.info { "detect media type for path: $path" }
val metadata = Metadata().also {
it[Metadata.RESOURCE_NAME_KEY] = path.fileName.toString()
}
val mediaType = tika.detector.detect(TikaInputStream.get(path), metadata)
logger.info { "media type detected: $mediaType" }
return mediaType.toString()
}
fun detectMediaType(stream: InputStream): String {
logger.info { "detect media type for stream" }
stream.use {
val mediaType = tika.detector.detect(TikaInputStream.get(it), Metadata())
logger.info { "media type detected: $mediaType" }
return mediaType.toString()
}
}
fun isImage(stream: InputStream): Boolean =
try {
detectMediaType(stream).startsWith("image/")
} catch (ex: Exception) {
false
}
fun isImage(mediaType: String): Boolean =
mediaType.startsWith("image/")
}

View file

@ -1,6 +1,7 @@
package org.gotson.komga.infrastructure.archive
import com.github.junrar.Archive
import org.gotson.komga.domain.model.BookPage
import org.springframework.stereotype.Service
import java.io.InputStream
import java.nio.file.Files
@ -11,17 +12,24 @@ class RarExtractor(
private val contentDetector: ContentDetector
) : ArchiveExtractor() {
override fun getFilenames(path: Path): List<String> {
val archive = Archive(Files.newInputStream(path))
override fun getPagesList(path: Path): List<BookPage> {
val rar = Archive(Files.newInputStream(path))
return archive.fileHeaders
return rar.fileHeaders
.filter { !it.isDirectory }
.filter { contentDetector.isImage(archive.getInputStream(it)) }
.map { it.fileNameString }
.sortedWith(natSortComparator)
.map {
BookPage(
it.fileNameString,
contentDetector.detectMediaType(rar.getInputStream(it))
)
}
.filter { contentDetector.isImage(it.mediaType) }
.sortedWith(
compareBy(natSortComparator) { it.fileName }
)
}
override fun getEntryStream(path: Path, entryName: String): InputStream {
override fun getPageStream(path: Path, entryName: String): InputStream {
val archive = Archive(Files.newInputStream(path))
val header = archive.fileHeaders.find { it.fileNameString == entryName }
return archive.getInputStream(header)

View file

@ -1,5 +1,6 @@
package org.gotson.komga.infrastructure.archive
import org.gotson.komga.domain.model.BookPage
import org.springframework.stereotype.Service
import java.io.InputStream
import java.nio.file.Path
@ -10,16 +11,23 @@ class ZipExtractor(
private val contentDetector: ContentDetector
) : ArchiveExtractor() {
override fun getFilenames(path: Path): List<String> {
override fun getPagesList(path: Path): List<BookPage> {
val zip = ZipFile(path.toFile())
return zip.entries().toList()
.filter { !it.isDirectory }
.filter { contentDetector.isImage(zip.getInputStream(it)) }
.map { it.name }
.sortedWith(natSortComparator)
.map {
BookPage(
it.name,
contentDetector.detectMediaType(zip.getInputStream(it))
)
}
.filter { contentDetector.isImage(it.mediaType) }
.sortedWith(
compareBy(natSortComparator) { it.fileName }
)
}
override fun getEntryStream(path: Path, entryName: String): InputStream =
override fun getPageStream(path: Path, entryName: String): InputStream =
ZipFile(path.toFile()).let {
it.getInputStream(it.getEntry(entryName))
}

View file

@ -24,5 +24,8 @@ class RootScannerController(
fun scanRootFolder() {
logger.info { "Starting periodic library scan" }
libraryManager.scanRootFolder(Library("default", komgaProperties.rootFolder))
logger.info { "Starting periodic book parsing" }
libraryManager.parseUnparsedBooks()
}
}

View file

@ -16,6 +16,7 @@ import org.springframework.http.ResponseEntity
import org.springframework.web.bind.annotation.GetMapping
import org.springframework.web.bind.annotation.PathVariable
import org.springframework.web.bind.annotation.RequestMapping
import org.springframework.web.bind.annotation.RequestParam
import org.springframework.web.bind.annotation.RestController
import org.springframework.web.server.ResponseStatusException
import java.io.File
@ -41,10 +42,15 @@ class SerieController(
@GetMapping("{id}/books")
fun getAllBooksBySerie(
@PathVariable id: Long,
@RequestParam(value = "readyonly", defaultValue = "true") readyFilter: Boolean,
page: Pageable
): Page<BookDto> {
if (!serieRepository.existsById(id)) throw ResponseStatusException(HttpStatus.NOT_FOUND)
return bookRepository.findAllBySerieId(id, page).map { it.toDto() }
return if (readyFilter) {
bookRepository.findAllByMetadataStatusAndSerieId(Status.READY, id, page)
} else {
bookRepository.findAllBySerieId(id, page)
}.map { it.toDto() }
}
@GetMapping("{serieId}/books/{bookId}")
@ -73,11 +79,12 @@ class SerieController(
@PathVariable bookId: Long
): List<PageDto> {
if (!serieRepository.existsById(serieId)) throw ResponseStatusException(HttpStatus.NOT_FOUND)
return bookRepository.findByIdOrNull((bookId))?.let {
if (it.metadata.status == Status.UNKNOWN) bookManager.parseAndPersist(it)
if (it.metadata.status == Status.UNKNOWN) throw ResponseStatusException(HttpStatus.NO_CONTENT, "Book is not parsed yet")
if (it.metadata.status in listOf(Status.ERROR, Status.UNSUPPORTED)) throw ResponseStatusException(HttpStatus.NO_CONTENT, "Book cannot be parsed")
it.metadata.pages.mapIndexed { index, s -> PageDto(index + 1, s) }
it.metadata.pages.mapIndexed { index, s -> PageDto(index + 1, s.fileName, s.mediaType) }
} ?: throw ResponseStatusException(HttpStatus.NOT_FOUND)
}
@ -91,17 +98,17 @@ class SerieController(
try {
return bookRepository.findByIdOrNull((bookId))?.let { book ->
val page = bookManager.getPage(book, pageNumber)
val pageContent = bookManager.getPageContent(book, pageNumber)
val mediaType = try {
MediaType.parseMediaType(page.mediaType)
MediaType.parseMediaType(book.metadata.mediaType!!)
} catch (ex: Exception) {
MediaType.APPLICATION_OCTET_STREAM
}
ResponseEntity.ok()
.contentType(mediaType)
.body(page.content)
.body(pageContent)
} ?: throw ResponseStatusException(HttpStatus.NOT_FOUND)
} catch (ex: ArrayIndexOutOfBoundsException) {
throw ResponseStatusException(HttpStatus.BAD_REQUEST, "Page number does not exist")
@ -149,5 +156,6 @@ fun Book.toDto() =
data class PageDto(
val number: Int,
val fileName: String
val fileName: String,
val mediaType: String
)

View file

@ -7,4 +7,7 @@ fun makeBook(name: String, url: String = "file:/$name") =
Book(name = name, url = URL(url), updated = LocalDateTime.now())
fun makeSerie(name: String, url: String = "file:/$name", books: List<Book> = listOf()) =
Serie(name = name, url = URL(url), updated = LocalDateTime.now()).also { it.setBooks(books) }
Serie(name = name, url = URL(url), updated = LocalDateTime.now()).also { it.setBooks(books) }
fun makeBookPage(name: String) =
BookPage(name, "image/png")

View file

@ -4,6 +4,7 @@ import org.assertj.core.api.Assertions.assertThat
import org.gotson.komga.domain.model.BookMetadata
import org.gotson.komga.domain.model.Status
import org.gotson.komga.domain.model.makeBook
import org.gotson.komga.domain.model.makeBookPage
import org.gotson.komga.domain.model.makeSerie
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Test
@ -51,7 +52,7 @@ class PersistenceTest(
// when
val book = bookRepository.findAll().first()
book.metadata = BookMetadata(status = Status.READY, mediaType = "test", pages = listOf("page1"))
book.metadata = BookMetadata(status = Status.READY, mediaType = "test", pages = listOf(makeBookPage("page1")))
bookRepository.save(book)
@ -62,9 +63,8 @@ class PersistenceTest(
bookMetadataRepository.findAll().first().let {
assertThat(it.status == Status.READY)
assertThat(it.mediaType == "test")
assertThat(it.pages)
.hasSize(1)
.containsExactly("page1")
assertThat(it.pages).hasSize(1)
assertThat(it.pages.first().fileName).isEqualTo("page1")
}
}
}

View file

@ -8,6 +8,7 @@ import org.gotson.komga.domain.model.BookMetadata
import org.gotson.komga.domain.model.Library
import org.gotson.komga.domain.model.Status
import org.gotson.komga.domain.model.makeBook
import org.gotson.komga.domain.model.makeBookPage
import org.gotson.komga.domain.model.makeSerie
import org.gotson.komga.domain.persistence.BookRepository
import org.gotson.komga.domain.persistence.SerieRepository
@ -156,7 +157,7 @@ class LibraryManagerTest(
)
libraryManager.scanRootFolder(library)
every { mockParser.parse(any()) } returns BookMetadata(status = Status.READY, mediaType = "application/zip", pages = listOf("1.jpg", "2.jpg"))
every { mockParser.parse(any()) } returns BookMetadata(status = Status.READY, mediaType = "application/zip", pages = listOf(makeBookPage("1.jpg"), makeBookPage("2.jpg")))
bookRepository.findAll().forEach { bookManager.parseAndPersist(it) }
// when
@ -169,8 +170,7 @@ class LibraryManagerTest(
val book = bookRepository.findAll().first()
assertThat(book.metadata.status).isEqualTo(Status.READY)
assertThat(book.metadata.mediaType).isEqualTo("application/zip")
assertThat(book.metadata.pages)
.hasSize(2)
.containsExactly("1.jpg", "2.jpg")
assertThat(book.metadata.pages).hasSize(2)
assertThat(book.metadata.pages.map { it.fileName }).containsExactly("1.jpg", "2.jpg")
}
}