From b436e90a8c429749b5c10a52ae39821eb939d5a0 Mon Sep 17 00:00:00 2001 From: Gauthier Roebroeck Date: Thu, 17 Feb 2022 11:55:26 +0800 Subject: [PATCH] perf: hash and delete pages in a single scan --- .../gotson/komga/application/tasks/Task.kt | 11 +++++++++ .../komga/application/tasks/TaskHandler.kt | 24 +++++++++++++++---- .../komga/application/tasks/TaskReceiver.kt | 19 +++++++-------- .../komga/domain/service/PageHashLifecycle.kt | 11 ++++++++- 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt b/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt index 360c2b010..cb39a2e56 100644 --- a/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt +++ b/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt @@ -9,6 +9,7 @@ import java.io.Serializable const val HIGHEST_PRIORITY = 8 const val HIGH_PRIORITY = 6 const val DEFAULT_PRIORITY = 4 +const val LOW_PRIORITY = 2 const val LOWEST_PRIORITY = 0 sealed class Task(priority: Int = DEFAULT_PRIORITY, val groupId: String? = null) : Serializable { @@ -25,6 +26,16 @@ sealed class Task(priority: Int = DEFAULT_PRIORITY, val groupId: String? = null) override fun toString(): String = "FindBooksToConvert(libraryId='$libraryId', priority='$priority')" } + class FindBooksWithMissingPageHash(val libraryId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) { + override fun uniqueId() = "FIND_BOOKS_WITH_MISSING_PAGE_HASH_$libraryId" + override fun toString(): String = "FindBooksWithMissingPageHash(libraryId='$libraryId', priority='$priority')" + } + + class FindDuplicatePagesToDelete(val libraryId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) { + override fun uniqueId() = "FIND_DUPLICATE_PAGES_TO_DELETE_$libraryId" + override fun toString(): String = "FindDuplicatePagesToDelete(libraryId='$libraryId', priority='$priority')" + } + class EmptyTrash(val libraryId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) { override fun uniqueId() = "EMPTY_TRASH_$libraryId" override fun toString(): String = "EmptyTrash(libraryId='$libraryId', priority='$priority')" diff --git a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt index 9fbe985ba..80ed30d03 100644 --- a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt +++ b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt @@ -12,6 +12,7 @@ import org.gotson.komga.domain.service.BookMetadataLifecycle import org.gotson.komga.domain.service.BookPageEditor import org.gotson.komga.domain.service.LibraryContentLifecycle import org.gotson.komga.domain.service.LocalArtworkLifecycle +import org.gotson.komga.domain.service.PageHashLifecycle import org.gotson.komga.domain.service.SeriesLifecycle import org.gotson.komga.domain.service.SeriesMetadataLifecycle import org.gotson.komga.infrastructure.jms.QUEUE_FACTORY @@ -43,6 +44,7 @@ class TaskHandler( private val bookConverter: BookConverter, private val bookPageEditor: BookPageEditor, private val searchIndexLifecycle: SearchIndexLifecycle, + private val pageHashLifecycle: PageHashLifecycle, private val meterRegistry: MeterRegistry, ) { @@ -57,11 +59,11 @@ class TaskHandler( libraryRepository.findByIdOrNull(task.libraryId)?.let { library -> libraryContentLifecycle.scanRootFolder(library) taskReceiver.analyzeUnknownAndOutdatedBooks(library) - taskReceiver.hashBooksWithoutHash(library) - taskReceiver.hashBookPagesWithMissingHash(library) - taskReceiver.repairExtensions(library, LOWEST_PRIORITY) + taskReceiver.repairExtensions(library, LOW_PRIORITY) taskReceiver.findBooksToConvert(library, LOWEST_PRIORITY) - taskReceiver.removeDuplicatePages(library, LOWEST_PRIORITY) + taskReceiver.findBooksWithMissingPageHash(library, LOWEST_PRIORITY) + taskReceiver.findDuplicatePagesToDelete(library, LOWEST_PRIORITY) + taskReceiver.hashBooksWithoutHash(library) } ?: logger.warn { "Cannot execute task $task: Library does not exist" } is Task.FindBooksToConvert -> @@ -71,6 +73,20 @@ class TaskHandler( } } ?: logger.warn { "Cannot execute task $task: Library does not exist" } + is Task.FindBooksWithMissingPageHash -> + libraryRepository.findByIdOrNull(task.libraryId)?.let { library -> + pageHashLifecycle.getBookAndSeriesIdsWithMissingPageHash(library).forEach { + taskReceiver.hashBookPages(it.first, it.second, task.priority + 1) + } + } ?: logger.warn { "Cannot execute task $task: Library does not exist" } + + is Task.FindDuplicatePagesToDelete -> + libraryRepository.findByIdOrNull(task.libraryId)?.let { library -> + pageHashLifecycle.getBookPagesToDeleteAutomatically(library).forEach { (bookId, pages) -> + taskReceiver.removeDuplicatePages(bookId, pages, task.priority + 1) + } + } ?: logger.warn { "Cannot execute task $task: Library does not exist" } + is Task.EmptyTrash -> libraryRepository.findByIdOrNull(task.libraryId)?.let { library -> libraryContentLifecycle.emptyTrash(library) diff --git a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt index e0f4c68d0..ee115bfe8 100644 --- a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt +++ b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt @@ -11,7 +11,6 @@ import org.gotson.komga.domain.model.Media import org.gotson.komga.domain.persistence.BookRepository import org.gotson.komga.domain.persistence.LibraryRepository import org.gotson.komga.domain.service.BookConverter -import org.gotson.komga.domain.service.PageHashLifecycle import org.gotson.komga.infrastructure.jms.JMS_PROPERTY_TYPE import org.gotson.komga.infrastructure.jms.QUEUE_TASKS import org.gotson.komga.infrastructure.jms.QUEUE_UNIQUE_ID @@ -30,7 +29,6 @@ class TaskReceiver( private val libraryRepository: LibraryRepository, private val bookRepository: BookRepository, private val bookConverter: BookConverter, - private val pageHashLifecycle: PageHashLifecycle, ) { private val jmsTemplates = (0..9).associateWith { @@ -71,11 +69,12 @@ class TaskReceiver( } } - fun hashBookPagesWithMissingHash(library: Library) { - if (library.hashPages) - pageHashLifecycle.getBookAndSeriesIdsWithMissingPageHash(library).forEach { - submitTask(Task.HashBookPages(it.first, LOWEST_PRIORITY, it.second)) - } + fun findBooksWithMissingPageHash(library: Library, priority: Int = DEFAULT_PRIORITY) { + submitTask(Task.FindBooksWithMissingPageHash(library.id, priority)) + } + + fun hashBookPages(bookId: String, seriesId: String, priority: Int = DEFAULT_PRIORITY) { + submitTask(Task.HashBookPages(bookId, priority, seriesId)) } fun findBooksToConvert(library: Library, priority: Int = DEFAULT_PRIORITY) { @@ -93,10 +92,8 @@ class TaskReceiver( } } - fun removeDuplicatePages(library: Library, priority: Int = DEFAULT_PRIORITY) { - pageHashLifecycle.getBookPagesToDeleteAutomatically(library).forEach { (bookId, pages) -> - removeDuplicatePages(bookId, pages, priority) - } + fun findDuplicatePagesToDelete(library: Library, priority: Int = DEFAULT_PRIORITY) { + submitTask(Task.FindDuplicatePagesToDelete(library.id, priority)) } fun removeDuplicatePages(bookId: String, pages: Collection, priority: Int = DEFAULT_PRIORITY) { diff --git a/komga/src/main/kotlin/org/gotson/komga/domain/service/PageHashLifecycle.kt b/komga/src/main/kotlin/org/gotson/komga/domain/service/PageHashLifecycle.kt index 425c01a23..c3001d92b 100644 --- a/komga/src/main/kotlin/org/gotson/komga/domain/service/PageHashLifecycle.kt +++ b/komga/src/main/kotlin/org/gotson/komga/domain/service/PageHashLifecycle.kt @@ -1,5 +1,6 @@ package org.gotson.komga.domain.service +import mu.KotlinLogging import org.gotson.komga.domain.model.BookPageContent import org.gotson.komga.domain.model.BookPageNumbered import org.gotson.komga.domain.model.Library @@ -13,6 +14,8 @@ import org.gotson.komga.infrastructure.configuration.KomgaProperties import org.springframework.data.domain.Pageable import org.springframework.stereotype.Service +private val logger = KotlinLogging.logger {} + @Service class PageHashLifecycle( private val pageHashRepository: PageHashRepository, @@ -28,7 +31,13 @@ class PageHashLifecycle( * @return a Collection of Pair of BookId/SeriesId */ fun getBookAndSeriesIdsWithMissingPageHash(library: Library): Collection> = - mediaRepository.findAllBookAndSeriesIdsByLibraryIdAndMediaTypeAndWithMissingPageHash(library.id, hashableMediaTypes, komgaProperties.pageHashing) + if (library.hashPages) + mediaRepository.findAllBookAndSeriesIdsByLibraryIdAndMediaTypeAndWithMissingPageHash(library.id, hashableMediaTypes, komgaProperties.pageHashing) + .also { logger.info { "Found ${it.size} books with missing page hash" } } + else { + logger.info { "Page hashing is not enabled, skipping" } + emptyList() + } fun getPage(pageHash: PageHash, resizeTo: Int? = null): BookPageContent? { val match = pageHashRepository.findMatchesByHash(pageHash, null, Pageable.ofSize(1)).firstOrNull() ?: return null