feat: get file size for pages during analysis

This commit is contained in:
Gauthier Roebroeck 2022-01-06 16:13:51 +08:00
parent 5d4ec94e01
commit 432ed4e14c
18 changed files with 178 additions and 12 deletions

View file

@ -0,0 +1,2 @@
alter table media_page
add column FILE_SIZE int8 NULL;

View file

@ -5,4 +5,5 @@ data class BookPage(
val mediaType: String,
val dimension: Dimension? = null,
val fileHash: String = "",
val fileSize: Long? = null,
)

View file

@ -5,4 +5,5 @@ data class MediaContainerEntry(
val mediaType: String? = null,
val comment: String? = null,
val dimension: Dimension? = null,
val fileSize: Long? = null,
)

View file

@ -60,7 +60,7 @@ class BookAnalyzer(
entry.mediaType?.let { contentDetector.isImage(it) } ?: false
}.let { (images, others) ->
Pair(
images.map { BookPage(it.name, it.mediaType!!, it.dimension) },
images.map { BookPage(fileName = it.name, mediaType = it.mediaType!!, dimension = it.dimension, fileSize = it.fileSize) },
others,
)
}

View file

@ -131,7 +131,8 @@ class MediaDao(
p.WIDTH,
p.HEIGHT,
p.FILE_HASH,
).values(null as String?, null, null, null, null, null, null),
p.FILE_SIZE
).values(null as String?, null, null, null, null, null, null, null),
).also { step ->
chunk.forEach { media ->
media.pages.forEachIndexed { index, page ->
@ -143,6 +144,7 @@ class MediaDao(
page.dimension?.width,
page.dimension?.height,
page.fileHash,
page.fileSize,
)
}
}
@ -233,5 +235,6 @@ class MediaDao(
mediaType = mediaType,
dimension = if (width != null && height != null) Dimension(width, height) else null,
fileHash = fileHash,
fileSize = fileSize
)
}

View file

@ -1,6 +1,7 @@
package org.gotson.komga.infrastructure.mediacontainer
import mu.KotlinLogging
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.domain.model.MediaUnsupportedException
@ -56,11 +57,13 @@ class EpubExtractor(
val mediaType = manifest.values.first {
it.href == (opfDir?.relativize(image) ?: image).invariantSeparatorsPathString
}.mediaType
val zipEntry = zip.getEntry(name)
val dimension = if (analyzeDimensions && contentDetector.isImage(mediaType))
zip.getInputStream(zip.getEntry(name)).use { imageAnalyzer.getDimension(it) }
zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
else
null
MediaContainerEntry(name = name, mediaType = mediaType, dimension = dimension)
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
MediaContainerEntry(name = name, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
}
} catch (e: Exception) {
logger.error(e) { "File is not a proper Epub, treating it as a zip file" }

View file

@ -8,7 +8,6 @@ import org.apache.pdfbox.rendering.ImageType
import org.apache.pdfbox.rendering.PDFRenderer
import org.gotson.komga.domain.model.Dimension
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.io.ByteArrayOutputStream
import java.nio.file.Path
@ -19,9 +18,7 @@ import kotlin.math.roundToInt
private val logger = KotlinLogging.logger {}
@Service
class PdfExtractor(
private val imageAnalyzer: ImageAnalyzer,
) : MediaContainerExtractor {
class PdfExtractor : MediaContainerExtractor {
private val mediaType = "image/jpeg"
private val imageIOFormat = "jpeg"

View file

@ -36,7 +36,8 @@ class RarExtractor(
buffer.inputStream().use { imageAnalyzer.getDimension(it) }
else
null
MediaContainerEntry(name = entry.fileName, mediaType = mediaType, dimension = dimension)
val fileSize = entry.fullUnpackSize
MediaContainerEntry(name = entry.fileName, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
} catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${entry.fileName}" }
MediaContainerEntry(name = entry.fileName, comment = e.message)

View file

@ -3,6 +3,7 @@ package org.gotson.komga.infrastructure.mediacontainer
import com.github.benmanes.caffeine.cache.Caffeine
import mu.KotlinLogging
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer
@ -40,7 +41,8 @@ class ZipExtractor(
imageAnalyzer.getDimension(stream)
else
null
MediaContainerEntry(name = entry.name, mediaType = mediaType, dimension = dimension)
val fileSize = if (entry.size == ArchiveEntry.SIZE_UNKNOWN) null else entry.size
MediaContainerEntry(name = entry.name, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
}
} catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${entry.name}" }

View file

@ -432,8 +432,15 @@ class BookController(
)
Media.Status.ERROR -> throw ResponseStatusException(HttpStatus.NOT_FOUND, "Book analysis failed")
Media.Status.UNSUPPORTED -> throw ResponseStatusException(HttpStatus.NOT_FOUND, "Book format is not supported")
Media.Status.READY -> media.pages.mapIndexed { index, s ->
PageDto(index + 1, s.fileName, s.mediaType, s.dimension?.width, s.dimension?.height)
Media.Status.READY -> media.pages.mapIndexed { index, bookPage ->
PageDto(
number = index + 1,
fileName = bookPage.fileName,
mediaType = bookPage.mediaType,
width = bookPage.dimension?.width,
height = bookPage.dimension?.height,
sizeBytes = bookPage.fileSize,
)
}
}
} ?: throw ResponseStatusException(HttpStatus.NOT_FOUND)

View file

@ -96,6 +96,7 @@ private fun BookWithMedia.toDto() =
mediaType = bookPage.mediaType,
width = bookPage.dimension?.width,
height = bookPage.dimension?.height,
sizeBytes = bookPage.fileSize,
)
},
files = media.files,

View file

@ -1,9 +1,13 @@
package org.gotson.komga.interfaces.api.rest.dto
import com.jakewharton.byteunits.BinaryByteUnit
data class PageDto(
val number: Int,
val fileName: String,
val mediaType: String,
val width: Int?,
val height: Int?,
val sizeBytes: Long?,
val size: String = sizeBytes?.let { BinaryByteUnit.format(it) } ?: "",
)

View file

@ -3,6 +3,7 @@ package org.gotson.komga.infrastructure.jooq
import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.api.Assertions.catchThrowable
import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.Dimension
import org.gotson.komga.domain.model.Media
import org.gotson.komga.domain.model.makeBook
import org.gotson.komga.domain.model.makeLibrary
@ -68,6 +69,9 @@ class MediaDaoTest(
BookPage(
fileName = "1.jpg",
mediaType = "image/jpeg",
dimension = Dimension(10, 10),
fileHash = "hashed",
fileSize = 10,
),
),
files = listOf("ComicInfo.xml"),
@ -88,6 +92,9 @@ class MediaDaoTest(
with(created.pages.first()) {
assertThat(fileName).isEqualTo(media.pages.first().fileName)
assertThat(mediaType).isEqualTo(media.pages.first().mediaType)
assertThat(dimension).isEqualTo(media.pages.first().dimension)
assertThat(fileHash).isEqualTo(media.pages.first().fileHash)
assertThat(fileSize).isEqualTo(media.pages.first().fileSize)
}
assertThat(created.files).hasSize(1)
assertThat(created.files.first()).isEqualTo(media.files.first())
@ -135,6 +142,9 @@ class MediaDaoTest(
BookPage(
fileName = "2.png",
mediaType = "image/png",
dimension = Dimension(10, 10),
fileHash = "hashed",
fileSize = 10,
),
),
files = listOf("id.txt"),
@ -155,6 +165,9 @@ class MediaDaoTest(
assertThat(modified.comment).isEqualTo(updated.comment)
assertThat(modified.pages.first().fileName).isEqualTo(updated.pages.first().fileName)
assertThat(modified.pages.first().mediaType).isEqualTo(updated.pages.first().mediaType)
assertThat(modified.pages.first().dimension).isEqualTo(updated.pages.first().dimension)
assertThat(modified.pages.first().fileHash).isEqualTo(updated.pages.first().fileHash)
assertThat(modified.pages.first().fileSize).isEqualTo(updated.pages.first().fileSize)
assertThat(modified.files.first()).isEqualTo(updated.files.first())
}

View file

@ -26,6 +26,7 @@ class EpubExtractorTest {
assertThat(name).isEqualTo("cover.jpeg")
assertThat(mediaType).isEqualTo("image/jpeg")
assertThat(dimension).isEqualTo(Dimension(461, 616))
assertThat(fileSize).isEqualTo(56756)
}
}

View file

@ -0,0 +1,40 @@
package org.gotson.komga.infrastructure.mediacontainer
import org.assertj.core.api.Assertions
import org.gotson.komga.domain.model.Dimension
import org.junit.jupiter.api.Test
import org.springframework.core.io.ClassPathResource
class PdfExtractorTest {
private val pdfExtractor = PdfExtractor()
@Test
fun `given pdf file when parsing for entries then returns all images`() {
val fileResource = ClassPathResource("pdf/komga.pdf")
val entries = pdfExtractor.getEntries(fileResource.file.toPath(), true)
Assertions.assertThat(entries).hasSize(1)
with(entries.first()) {
Assertions.assertThat(name).isEqualTo("0")
Assertions.assertThat(mediaType).isEqualTo("image/jpeg")
Assertions.assertThat(dimension).isEqualTo(Dimension(1536, 1536))
Assertions.assertThat(fileSize).isNull()
}
}
@Test
fun `given pdf file when parsing for entries without analyzing dimensions then returns all images without dimensions`() {
val fileResource = ClassPathResource("pdf/komga.pdf")
val entries = pdfExtractor.getEntries(fileResource.file.toPath(), false)
Assertions.assertThat(entries).hasSize(1)
with(entries.first()) {
Assertions.assertThat(name).isEqualTo("0")
Assertions.assertThat(mediaType).isEqualTo("image/jpeg")
Assertions.assertThat(dimension).isNull()
Assertions.assertThat(fileSize).isNull()
}
}
}

View file

@ -0,0 +1,45 @@
package org.gotson.komga.infrastructure.mediacontainer
import org.apache.tika.config.TikaConfig
import org.assertj.core.api.Assertions
import org.gotson.komga.domain.model.Dimension
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.junit.jupiter.api.Test
import org.springframework.core.io.ClassPathResource
class RarExtractorTest {
private val contentDetector = ContentDetector(TikaConfig())
private val imageAnalyzer = ImageAnalyzer()
private val rarExtractor = RarExtractor(contentDetector, imageAnalyzer)
@Test
fun `given rar file when parsing for entries then returns all images`() {
val fileResource = ClassPathResource("archives/rar4.rar")
val entries = rarExtractor.getEntries(fileResource.file.toPath(), true)
Assertions.assertThat(entries).hasSize(3)
with(entries.first()) {
Assertions.assertThat(name).isEqualTo("komga-1.png")
Assertions.assertThat(mediaType).isEqualTo("image/png")
Assertions.assertThat(dimension).isEqualTo(Dimension(48, 48))
Assertions.assertThat(fileSize).isEqualTo(3108)
}
}
@Test
fun `given rar file when parsing for entries without analyzing dimensions then returns all images without dimensions`() {
val fileResource = ClassPathResource("archives/rar4.rar")
val entries = rarExtractor.getEntries(fileResource.file.toPath(), false)
Assertions.assertThat(entries).hasSize(3)
with(entries.first()) {
Assertions.assertThat(name).isEqualTo("komga-1.png")
Assertions.assertThat(mediaType).isEqualTo("image/png")
Assertions.assertThat(dimension).isNull()
Assertions.assertThat(fileSize).isEqualTo(3108)
}
}
}

View file

@ -0,0 +1,45 @@
package org.gotson.komga.infrastructure.mediacontainer
import org.apache.tika.config.TikaConfig
import org.assertj.core.api.Assertions.assertThat
import org.gotson.komga.domain.model.Dimension
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.junit.jupiter.api.Test
import org.springframework.core.io.ClassPathResource
class ZipExtractorTest {
private val contentDetector = ContentDetector(TikaConfig())
private val imageAnalyzer = ImageAnalyzer()
private val zipExtractor = ZipExtractor(contentDetector, imageAnalyzer)
@Test
fun `given zip file when parsing for entries then returns all images`() {
val fileResource = ClassPathResource("archives/zip.zip")
val entries = zipExtractor.getEntries(fileResource.file.toPath(), true)
assertThat(entries).hasSize(1)
with(entries.first()) {
assertThat(name).isEqualTo("komga.png")
assertThat(mediaType).isEqualTo("image/png")
assertThat(dimension).isEqualTo(Dimension(48, 48))
assertThat(fileSize).isEqualTo(3108)
}
}
@Test
fun `given zip file when parsing for entries without analyzing dimensions then returns all images without dimensions`() {
val fileResource = ClassPathResource("archives/zip.zip")
val entries = zipExtractor.getEntries(fileResource.file.toPath(), false)
assertThat(entries).hasSize(1)
with(entries.first()) {
assertThat(name).isEqualTo("komga.png")
assertThat(mediaType).isEqualTo("image/png")
assertThat(dimension).isNull()
assertThat(fileSize).isEqualTo(3108)
}
}
}

Binary file not shown.