perf: optimize task FindDuplicatePagesToDelete

This commit is contained in:
Gauthier Roebroeck 2022-11-02 15:39:34 +08:00
parent 61bed99c1e
commit 59a0048385
3 changed files with 29 additions and 22 deletions

View file

@ -1,5 +1,6 @@
package org.gotson.komga.domain.persistence
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.PageHash
import org.gotson.komga.domain.model.PageHashKnown
import org.gotson.komga.domain.model.PageHashMatch
@ -13,6 +14,7 @@ interface PageHashRepository {
fun findAllUnknown(pageable: Pageable): Page<PageHashUnknown>
fun findMatchesByHash(pageHash: PageHash, libraryId: String?, pageable: Pageable): Page<PageHashMatch>
fun findMatchesByKnownHashAction(actions: List<PageHashKnown.Action>?, libraryId: String?): Map<String, Collection<BookPageNumbered>>
fun getKnownThumbnail(pageHash: PageHash): ByteArray?

View file

@ -46,28 +46,8 @@ class PageHashLifecycle(
return bookLifecycle.getBookPage(book, match.pageNumber, resizeTo = resizeTo)
}
fun getBookPagesToDeleteAutomatically(library: Library): Map<String, Collection<BookPageNumbered>> {
val hashesAutoDelete = pageHashRepository.findAllKnown(listOf(PageHashKnown.Action.DELETE_AUTO), Pageable.unpaged()).content
return hashesAutoDelete.map { hash ->
pageHashRepository.findMatchesByHash(hash, library.id, Pageable.unpaged()).content
.groupBy(
{ it.bookId },
{
BookPageNumbered(
fileName = it.fileName,
mediaType = hash.mediaType,
fileHash = hash.hash,
fileSize = hash.size,
pageNumber = it.pageNumber,
)
},
)
}.flatMap { it.entries }
.groupBy({ it.key }, { it.value })
.mapValues { it.value.flatten() }
.filter { it.value.isNotEmpty() }
}
fun getBookPagesToDeleteAutomatically(library: Library): Map<String, Collection<BookPageNumbered>> =
pageHashRepository.findMatchesByKnownHashAction(listOf(PageHashKnown.Action.DELETE_AUTO), library.id)
fun createOrUpdate(pageHash: PageHashKnown) {
if (pageHash.action == PageHashKnown.Action.DELETE_AUTO && pageHash.size == null) throw IllegalArgumentException("cannot create PageHash without size and Action.DELETE_AUTO")

View file

@ -1,5 +1,6 @@
package org.gotson.komga.infrastructure.jooq
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.PageHash
import org.gotson.komga.domain.model.PageHashKnown
import org.gotson.komga.domain.model.PageHashMatch
@ -164,6 +165,30 @@ class PageHashDao(
)
}
override fun findMatchesByKnownHashAction(actions: List<PageHashKnown.Action>?, libraryId: String?): Map<String, Collection<BookPageNumbered>> =
dsl.select(p.BOOK_ID, p.FILE_NAME, p.NUMBER, p.FILE_HASH, p.MEDIA_TYPE, p.FILE_SIZE)
.from(p)
.innerJoin(ph).on(
p.FILE_HASH.eq(ph.HASH)
.and(p.MEDIA_TYPE.eq(ph.MEDIA_TYPE))
.and(
p.FILE_SIZE.isNull.and(ph.SIZE.isNull).or(p.FILE_SIZE.isNotNull.and(ph.SIZE.isNotNull).and(p.FILE_SIZE.eq(ph.SIZE)))
)
)
.apply { libraryId?.let<String, Unit> { innerJoin(b).on(b.ID.eq(p.BOOK_ID)) } }
.where(ph.ACTION.`in`(actions))
.apply { libraryId?.let<String, Unit> { and(b.LIBRARY_ID.eq(it)) } }
.fetch {
it.value1() to BookPageNumbered(
fileName = it.value2(),
pageNumber = it.value3() + 1,
fileHash = it.value4(),
mediaType = it.value5(),
fileSize = it.value6(),
)
}.groupingBy { it.first }
.fold(emptyList()) { acc, (_, new) -> acc + new }
override fun getKnownThumbnail(pageHash: PageHash): ByteArray? =
dsl.select(pht.THUMBNAIL)
.from(pht)