diff --git a/komga/src/flyway/resources/db/migration/sqlite/V20210706162229__file_hash.sql b/komga/src/flyway/resources/db/migration/sqlite/V20210706162229__file_hash.sql new file mode 100644 index 000000000..ceb54edfc --- /dev/null +++ b/komga/src/flyway/resources/db/migration/sqlite/V20210706162229__file_hash.sql @@ -0,0 +1,2 @@ +ALTER TABLE BOOK + ADD COLUMN FILE_HASH varchar NOT NULL DEFAULT ''; diff --git a/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt b/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt index fd33d31fd..ae227b264 100644 --- a/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt +++ b/komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt @@ -32,6 +32,11 @@ sealed class Task(priority: Int = DEFAULT_PRIORITY) : Serializable { override fun toString(): String = "RefreshBookMetadata(bookId='$bookId', capabilities=$capabilities, priority='$priority')" } + class HashBook(val bookId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) { + override fun uniqueId() = "HASH_BOOK_$bookId" + override fun toString(): String = "HashBook(bookId='$bookId', priority='$priority')" + } + class RefreshSeriesMetadata(val seriesId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) { override fun uniqueId() = "REFRESH_SERIES_METADATA_$seriesId" override fun toString(): String = "RefreshSeriesMetadata(seriesId='$seriesId', priority='$priority')" diff --git a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt index d8f0eeaca..5d4687bb5 100644 --- a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt +++ b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskHandler.kt @@ -44,6 +44,7 @@ class TaskHandler( libraryRepository.findByIdOrNull(task.libraryId)?.let { library -> libraryContentLifecycle.scanRootFolder(library) taskReceiver.analyzeUnknownAndOutdatedBooks(library) + taskReceiver.hashBooksWithoutHash(library) if (library.repairExtensions) taskReceiver.repairExtensions(library, LOWEST_PRIORITY) if (library.convertToCbz) taskReceiver.convertBooksToCbz(library, LOWEST_PRIORITY) } ?: logger.warn { "Cannot execute task $task: Library does not exist" } @@ -103,6 +104,11 @@ class TaskHandler( bookRepository.findByIdOrNull(task.bookId)?.let { book -> bookConverter.repairExtension(book) } ?: logger.warn { "Cannot execute task $task: Book does not exist" } + + is Task.HashBook -> + bookRepository.findByIdOrNull(task.bookId)?.let { book -> + bookLifecycle.hashAndPersist(book) + } ?: logger.warn { "Cannot execute task $task: Book does not exist" } } }.also { logger.info { "Task $task executed in $it" } diff --git a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt index ffb54fbc9..a2e1f3743 100644 --- a/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt +++ b/komga/src/main/kotlin/org/gotson/komga/application/tasks/TaskReceiver.kt @@ -56,6 +56,12 @@ class TaskReceiver( } } + fun hashBooksWithoutHash(library: Library) { + bookRepository.findAllIdsByLibraryIdAndWithEmptyHash(library.id).forEach { + submitTask(Task.HashBook(it, LOWEST_PRIORITY)) + } + } + fun convertBooksToCbz(library: Library, priority: Int = DEFAULT_PRIORITY) { bookConverter.getConvertibleBookIds(library).forEach { submitTask(Task.ConvertBook(it, priority)) diff --git a/komga/src/main/kotlin/org/gotson/komga/domain/model/Book.kt b/komga/src/main/kotlin/org/gotson/komga/domain/model/Book.kt index f92a3df90..4cd380a85 100644 --- a/komga/src/main/kotlin/org/gotson/komga/domain/model/Book.kt +++ b/komga/src/main/kotlin/org/gotson/komga/domain/model/Book.kt @@ -13,6 +13,7 @@ data class Book( val url: URL, val fileLastModified: LocalDateTime, val fileSize: Long = 0, + val fileHash: String = "", val number: Int = 0, val id: String = TsidCreator.getTsid256().toString(), diff --git a/komga/src/main/kotlin/org/gotson/komga/domain/persistence/BookRepository.kt b/komga/src/main/kotlin/org/gotson/komga/domain/persistence/BookRepository.kt index 2325f25aa..e0a710711 100644 --- a/komga/src/main/kotlin/org/gotson/komga/domain/persistence/BookRepository.kt +++ b/komga/src/main/kotlin/org/gotson/komga/domain/persistence/BookRepository.kt @@ -26,6 +26,7 @@ interface BookRepository { fun findAllIdsByLibraryId(libraryId: String): Collection fun findAllIdsByLibraryIdAndMediaTypes(libraryId: String, mediaTypes: Collection): Collection fun findAllIdsByLibraryIdAndMismatchedExtension(libraryId: String, mediaType: String, extension: String): Collection + fun findAllIdsByLibraryIdAndWithEmptyHash(libraryId: String): Collection fun findAllIds(bookSearch: BookSearch, sort: Sort): Collection fun insert(book: Book) diff --git a/komga/src/main/kotlin/org/gotson/komga/domain/service/BookLifecycle.kt b/komga/src/main/kotlin/org/gotson/komga/domain/service/BookLifecycle.kt index 578ec0bc7..f062ee2ce 100644 --- a/komga/src/main/kotlin/org/gotson/komga/domain/service/BookLifecycle.kt +++ b/komga/src/main/kotlin/org/gotson/komga/domain/service/BookLifecycle.kt @@ -18,6 +18,7 @@ import org.gotson.komga.domain.persistence.MediaRepository import org.gotson.komga.domain.persistence.ReadListRepository import org.gotson.komga.domain.persistence.ReadProgressRepository import org.gotson.komga.domain.persistence.ThumbnailBookRepository +import org.gotson.komga.infrastructure.hash.Hasher import org.gotson.komga.infrastructure.image.ImageConverter import org.gotson.komga.infrastructure.image.ImageType import org.springframework.stereotype.Service @@ -41,6 +42,7 @@ class BookLifecycle( private val imageConverter: ImageConverter, private val eventPublisher: EventPublisher, private val transactionTemplate: TransactionTemplate, + private val hasher: Hasher, ) { fun analyzeAndPersist(book: Book): Boolean { @@ -63,6 +65,16 @@ class BookLifecycle( return media.status == Media.Status.READY } + fun hashAndPersist(book: Book) { + logger.info { "Hash and persist book: $book" } + if (book.fileHash.isBlank()) { + val hash = hasher.computeHash(book.path) + bookRepository.update(book.copy(fileHash = hash)) + } else { + logger.info { "Book already has a hash, skipping" } + } + } + fun generateThumbnailAndPersist(book: Book) { logger.info { "Generate thumbnail and persist for book: $book" } try { diff --git a/komga/src/main/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycle.kt b/komga/src/main/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycle.kt index d4deade74..21734b3b4 100644 --- a/komga/src/main/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycle.kt +++ b/komga/src/main/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycle.kt @@ -92,7 +92,8 @@ class LibraryContentLifecycle( logger.info { "Book changed on disk, update and reset media status: $existingBook" } val updatedBook = existingBook.copy( fileLastModified = newBook.fileLastModified, - fileSize = newBook.fileSize + fileSize = newBook.fileSize, + fileHash = "", ) transactionTemplate.executeWithoutResult { mediaRepository.findById(existingBook.id).let { diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/hash/Hasher.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/hash/Hasher.kt new file mode 100644 index 000000000..827461ee8 --- /dev/null +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/hash/Hasher.kt @@ -0,0 +1,33 @@ +package org.gotson.komga.infrastructure.hash + +import mu.KotlinLogging +import org.apache.commons.codec.digest.XXHash32 +import org.springframework.stereotype.Component +import java.nio.file.Path +import kotlin.io.path.inputStream + +private val logger = KotlinLogging.logger {} + +private const val DEFAULT_BUFFER_SIZE = 4096 +private const val SEED = 0 + +@Component +class Hasher { + + fun computeHash(path: Path): String { + logger.info { "Hashing: $path" } + val hash = XXHash32(SEED) + + path.inputStream().use { + val buffer = ByteArray(DEFAULT_BUFFER_SIZE) + var len: Int + + do { + len = it.read(buffer) + if (len >= 0) hash.update(buffer, 0, len) + } while (len >= 0) + } + + return hash.value.toString(36) + } +} diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/jooq/BookDao.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/jooq/BookDao.kt index 291d98a77..ed69a38c0 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/jooq/BookDao.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/jooq/BookDao.kt @@ -62,8 +62,7 @@ class BookDao( .map { it.toDomain() } override fun findAll(): Collection = - dsl.select(*b.fields()) - .from(b) + dsl.selectFrom(b) .fetchInto(b) .map { it.toDomain() } @@ -177,6 +176,13 @@ class BookDao( .and(b.URL.notLike("%.$extension")) .fetch(b.ID) + override fun findAllIdsByLibraryIdAndWithEmptyHash(libraryId: String): Collection = + dsl.select(b.ID) + .from(b) + .where(b.LIBRARY_ID.eq(libraryId)) + .and(b.FILE_HASH.eq("")) + .fetch(b.ID) + @Transactional override fun insert(book: Book) { insert(listOf(book)) @@ -194,9 +200,10 @@ class BookDao( b.NUMBER, b.FILE_LAST_MODIFIED, b.FILE_SIZE, + b.FILE_HASH, b.LIBRARY_ID, b.SERIES_ID - ).values(null as String?, null, null, null, null, null, null, null) + ).values(null as String?, null, null, null, null, null, null, null, null) ).also { step -> books.forEach { step.bind( @@ -206,6 +213,7 @@ class BookDao( it.number, it.fileLastModified, it.fileSize, + it.fileHash, it.libraryId, it.seriesId ) @@ -231,6 +239,7 @@ class BookDao( .set(b.NUMBER, book.number) .set(b.FILE_LAST_MODIFIED, book.fileLastModified) .set(b.FILE_SIZE, book.fileSize) + .set(b.FILE_HASH, book.fileHash) .set(b.LIBRARY_ID, book.libraryId) .set(b.SERIES_ID, book.seriesId) .set(b.LAST_MODIFIED_DATE, LocalDateTime.now(ZoneId.of("Z"))) @@ -269,6 +278,7 @@ class BookDao( url = URL(url), fileLastModified = fileLastModified, fileSize = fileSize, + fileHash = fileHash, id = id, libraryId = libraryId, seriesId = seriesId, diff --git a/komga/src/test/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycleTest.kt b/komga/src/test/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycleTest.kt index 5b781618f..7b584cae1 100644 --- a/komga/src/test/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycleTest.kt +++ b/komga/src/test/kotlin/org/gotson/komga/domain/service/LibraryContentLifecycleTest.kt @@ -16,6 +16,7 @@ import org.gotson.komga.domain.persistence.BookRepository import org.gotson.komga.domain.persistence.LibraryRepository import org.gotson.komga.domain.persistence.MediaRepository import org.gotson.komga.domain.persistence.SeriesRepository +import org.gotson.komga.infrastructure.hash.Hasher import org.junit.jupiter.api.AfterEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.extension.ExtendWith @@ -42,6 +43,9 @@ class LibraryContentLifecycleTest( @MockkBean private lateinit var mockAnalyzer: BookAnalyzer + @MockkBean + private lateinit var mockHasher: Hasher + @AfterEach fun `clear repositories`() { libraryRepository.findAll().forEach { @@ -229,7 +233,7 @@ class LibraryContentLifecycleTest( } @Test - fun `given existing Book with different last modified date when rescanning then media is marked as outdated`() { + fun `given existing Book with different last modified date when rescanning then media is marked as outdated and hash is reset`() { // given val library = makeLibrary() libraryRepository.insert(library) @@ -243,7 +247,12 @@ class LibraryContentLifecycleTest( libraryContentLifecycle.scanRootFolder(library) every { mockAnalyzer.analyze(any()) } returns Media(status = Media.Status.READY, mediaType = "application/zip", pages = mutableListOf(makeBookPage("1.jpg"), makeBookPage("2.jpg")), bookId = book1.id) - bookRepository.findAll().map { bookLifecycle.analyzeAndPersist(it) } + every { mockHasher.computeHash(any()) }.returnsMany("abc", "def") + + bookRepository.findAll().map { + bookLifecycle.analyzeAndPersist(it) + bookLifecycle.hashAndPersist(it) + } // when libraryContentLifecycle.scanRootFolder(library) @@ -251,9 +260,11 @@ class LibraryContentLifecycleTest( // then verify(exactly = 2) { mockScanner.scanRootFolder(any()) } verify(exactly = 1) { mockAnalyzer.analyze(any()) } + verify(exactly = 1) { mockHasher.computeHash(any()) } bookRepository.findAll().first().let { book -> assertThat(book.lastModifiedDate).isNotEqualTo(book.createdDate) + assertThat(book.fileHash).isEmpty() mediaRepository.findById(book.id).let { media -> assertThat(media.status).isEqualTo(Media.Status.OUTDATED) diff --git a/komga/src/test/kotlin/org/gotson/komga/infrastructure/jooq/BookDaoTest.kt b/komga/src/test/kotlin/org/gotson/komga/infrastructure/jooq/BookDaoTest.kt index 94eba3e4f..2b0b69a11 100644 --- a/komga/src/test/kotlin/org/gotson/komga/infrastructure/jooq/BookDaoTest.kt +++ b/komga/src/test/kotlin/org/gotson/komga/infrastructure/jooq/BookDaoTest.kt @@ -1,6 +1,5 @@ package org.gotson.komga.infrastructure.jooq -import mu.KotlinLogging import org.assertj.core.api.Assertions.assertThat import org.gotson.komga.domain.model.Book import org.gotson.komga.domain.model.BookSearch @@ -57,6 +56,7 @@ class BookDaoTest( url = URL("file://book"), fileLastModified = now, fileSize = 3, + fileHash = "abc", seriesId = series.id, libraryId = library.id ) @@ -71,6 +71,7 @@ class BookDaoTest( assertThat(created.url).isEqualTo(book.url) assertThat(created.fileLastModified).isEqualToIgnoringNanos(book.fileLastModified) assertThat(created.fileSize).isEqualTo(book.fileSize) + assertThat(created.fileHash).isEqualTo(book.fileHash) } @Test @@ -92,7 +93,8 @@ class BookDaoTest( name = "Updated", url = URL("file://updated"), fileLastModified = modificationDate, - fileSize = 5 + fileSize = 5, + fileHash = "def", ) } @@ -108,6 +110,7 @@ class BookDaoTest( assertThat(modified.url).isEqualTo(URL("file://updated")) assertThat(modified.fileLastModified).isEqualToIgnoringNanos(modificationDate) assertThat(modified.fileSize).isEqualTo(5) + assertThat(modified.fileHash).isEqualTo("def") } @Test @@ -188,34 +191,4 @@ class BookDaoTest( assertThat(bookDao.count()).isEqualTo(0) } - - private val logger = KotlinLogging.logger {} - -// @Test -// fun benchmark() { -// val books = (1..10000).map { -// makeBook(it.toString(), libraryId = library.id, seriesId = series.id) -// } -// -// val single = measureTime { -// books.map { bookDao.insert(it) } -// } -// bookDao.deleteAll() -// -// val singleBatch = measureTime { -// books.map { bookDao.insertBatch(it) } -// } -// bookDao.deleteAll() -// -// val transaction = measureTime { -// bookDao.insertMany(books) -// } -// bookDao.deleteAll() -// -// logger.info { "Single: $single" } -// logger.info { "SingleBatch: $singleBatch" } -// logger.info { "Transaction: $transaction" } -// -// assertThat(single).isEqualTo(transaction) -// } }