fix(komga): do not cache PDF documents

this can cause some issues when multiple tasks are running in parallel
This commit is contained in:
Gauthier Roebroeck 2023-10-19 15:53:50 +08:00
parent 3467d90bf7
commit 6d5d69532a

View file

@ -1,6 +1,5 @@
package org.gotson.komga.infrastructure.mediacontainer
import com.github.benmanes.caffeine.cache.Caffeine
import mu.KotlinLogging
import org.apache.pdfbox.io.MemoryUsageSetting
import org.apache.pdfbox.multipdf.PageExtractor
@ -15,7 +14,6 @@ import org.gotson.komga.domain.model.MediaType
import org.springframework.stereotype.Service
import java.io.ByteArrayOutputStream
import java.nio.file.Path
import java.util.concurrent.TimeUnit
import javax.imageio.ImageIO
import kotlin.math.roundToInt
@ -28,12 +26,6 @@ class PdfExtractor : MediaContainerRawExtractor {
private val imageIOFormat = "jpeg"
private val resolution = 1536F
private val cache = Caffeine.newBuilder()
.maximumSize(20)
.expireAfterAccess(1, TimeUnit.MINUTES)
.evictionListener { _: Path?, pdf: PDDocument?, _ -> pdf?.close() }
.build<Path, PDDocument>()
override fun mediaTypes(): List<String> = listOf(MediaType.PDF.type)
override fun getEntries(path: Path, analyzeDimensions: Boolean): List<MediaContainerEntry> =
@ -47,24 +39,26 @@ class PdfExtractor : MediaContainerRawExtractor {
}
override fun getEntryStream(path: Path, entryName: String): ByteArray {
val pdf = cache.get(path) { PDDocument.load(path.toFile(), MemoryUsageSetting.setupTempFileOnly()) }!!
val pageNumber = entryName.toInt()
val page = pdf.getPage(pageNumber)
val image = PDFRenderer(pdf).renderImage(pageNumber, page.getScale(), ImageType.RGB)
return ByteArrayOutputStream().use { out ->
ImageIO.write(image, imageIOFormat, out)
out.toByteArray()
PDDocument.load(path.toFile(), MemoryUsageSetting.setupTempFileOnly()).use { pdf ->
val pageNumber = entryName.toInt()
val page = pdf.getPage(pageNumber)
val image = PDFRenderer(pdf).renderImage(pageNumber, page.getScale(), ImageType.RGB)
return ByteArrayOutputStream().use { out ->
ImageIO.write(image, imageIOFormat, out)
out.toByteArray()
}
}
}
override fun getRawEntryStream(path: Path, entryName: String): BookPageContent {
val pdf = cache.get(path) { PDDocument.load(path.toFile(), MemoryUsageSetting.setupTempFileOnly()) }!!
val pageNumber = entryName.toInt() + 1
val bytes = ByteArrayOutputStream().use { out ->
PageExtractor(pdf, pageNumber, pageNumber).extract().save(out)
out.toByteArray()
PDDocument.load(path.toFile(), MemoryUsageSetting.setupTempFileOnly()).use { pdf ->
val pageNumber = entryName.toInt() + 1
val bytes = ByteArrayOutputStream().use { out ->
PageExtractor(pdf, pageNumber, pageNumber).extract().save(out)
out.toByteArray()
}
return BookPageContent(bytes, MediaType.PDF.type)
}
return BookPageContent(bytes, MediaType.PDF.type)
}
private fun PDPage.getScale() = resolution / minOf(cropBox.width, cropBox.height)