mirror of
https://github.com/gotson/komga.git
synced 2025-12-20 15:34:17 +01:00
feat: generate file hash for books
This commit is contained in:
parent
d2309a5e79
commit
7ad738a645
12 changed files with 99 additions and 38 deletions
|
|
@ -0,0 +1,2 @@
|
|||
ALTER TABLE BOOK
|
||||
ADD COLUMN FILE_HASH varchar NOT NULL DEFAULT '';
|
||||
|
|
@ -32,6 +32,11 @@ sealed class Task(priority: Int = DEFAULT_PRIORITY) : Serializable {
|
|||
override fun toString(): String = "RefreshBookMetadata(bookId='$bookId', capabilities=$capabilities, priority='$priority')"
|
||||
}
|
||||
|
||||
class HashBook(val bookId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
|
||||
override fun uniqueId() = "HASH_BOOK_$bookId"
|
||||
override fun toString(): String = "HashBook(bookId='$bookId', priority='$priority')"
|
||||
}
|
||||
|
||||
class RefreshSeriesMetadata(val seriesId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
|
||||
override fun uniqueId() = "REFRESH_SERIES_METADATA_$seriesId"
|
||||
override fun toString(): String = "RefreshSeriesMetadata(seriesId='$seriesId', priority='$priority')"
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ class TaskHandler(
|
|||
libraryRepository.findByIdOrNull(task.libraryId)?.let { library ->
|
||||
libraryContentLifecycle.scanRootFolder(library)
|
||||
taskReceiver.analyzeUnknownAndOutdatedBooks(library)
|
||||
taskReceiver.hashBooksWithoutHash(library)
|
||||
if (library.repairExtensions) taskReceiver.repairExtensions(library, LOWEST_PRIORITY)
|
||||
if (library.convertToCbz) taskReceiver.convertBooksToCbz(library, LOWEST_PRIORITY)
|
||||
} ?: logger.warn { "Cannot execute task $task: Library does not exist" }
|
||||
|
|
@ -103,6 +104,11 @@ class TaskHandler(
|
|||
bookRepository.findByIdOrNull(task.bookId)?.let { book ->
|
||||
bookConverter.repairExtension(book)
|
||||
} ?: logger.warn { "Cannot execute task $task: Book does not exist" }
|
||||
|
||||
is Task.HashBook ->
|
||||
bookRepository.findByIdOrNull(task.bookId)?.let { book ->
|
||||
bookLifecycle.hashAndPersist(book)
|
||||
} ?: logger.warn { "Cannot execute task $task: Book does not exist" }
|
||||
}
|
||||
}.also {
|
||||
logger.info { "Task $task executed in $it" }
|
||||
|
|
|
|||
|
|
@ -56,6 +56,12 @@ class TaskReceiver(
|
|||
}
|
||||
}
|
||||
|
||||
fun hashBooksWithoutHash(library: Library) {
|
||||
bookRepository.findAllIdsByLibraryIdAndWithEmptyHash(library.id).forEach {
|
||||
submitTask(Task.HashBook(it, LOWEST_PRIORITY))
|
||||
}
|
||||
}
|
||||
|
||||
fun convertBooksToCbz(library: Library, priority: Int = DEFAULT_PRIORITY) {
|
||||
bookConverter.getConvertibleBookIds(library).forEach {
|
||||
submitTask(Task.ConvertBook(it, priority))
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ data class Book(
|
|||
val url: URL,
|
||||
val fileLastModified: LocalDateTime,
|
||||
val fileSize: Long = 0,
|
||||
val fileHash: String = "",
|
||||
val number: Int = 0,
|
||||
|
||||
val id: String = TsidCreator.getTsid256().toString(),
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ interface BookRepository {
|
|||
fun findAllIdsByLibraryId(libraryId: String): Collection<String>
|
||||
fun findAllIdsByLibraryIdAndMediaTypes(libraryId: String, mediaTypes: Collection<String>): Collection<String>
|
||||
fun findAllIdsByLibraryIdAndMismatchedExtension(libraryId: String, mediaType: String, extension: String): Collection<String>
|
||||
fun findAllIdsByLibraryIdAndWithEmptyHash(libraryId: String): Collection<String>
|
||||
fun findAllIds(bookSearch: BookSearch, sort: Sort): Collection<String>
|
||||
|
||||
fun insert(book: Book)
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import org.gotson.komga.domain.persistence.MediaRepository
|
|||
import org.gotson.komga.domain.persistence.ReadListRepository
|
||||
import org.gotson.komga.domain.persistence.ReadProgressRepository
|
||||
import org.gotson.komga.domain.persistence.ThumbnailBookRepository
|
||||
import org.gotson.komga.infrastructure.hash.Hasher
|
||||
import org.gotson.komga.infrastructure.image.ImageConverter
|
||||
import org.gotson.komga.infrastructure.image.ImageType
|
||||
import org.springframework.stereotype.Service
|
||||
|
|
@ -41,6 +42,7 @@ class BookLifecycle(
|
|||
private val imageConverter: ImageConverter,
|
||||
private val eventPublisher: EventPublisher,
|
||||
private val transactionTemplate: TransactionTemplate,
|
||||
private val hasher: Hasher,
|
||||
) {
|
||||
|
||||
fun analyzeAndPersist(book: Book): Boolean {
|
||||
|
|
@ -63,6 +65,16 @@ class BookLifecycle(
|
|||
return media.status == Media.Status.READY
|
||||
}
|
||||
|
||||
fun hashAndPersist(book: Book) {
|
||||
logger.info { "Hash and persist book: $book" }
|
||||
if (book.fileHash.isBlank()) {
|
||||
val hash = hasher.computeHash(book.path)
|
||||
bookRepository.update(book.copy(fileHash = hash))
|
||||
} else {
|
||||
logger.info { "Book already has a hash, skipping" }
|
||||
}
|
||||
}
|
||||
|
||||
fun generateThumbnailAndPersist(book: Book) {
|
||||
logger.info { "Generate thumbnail and persist for book: $book" }
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -92,7 +92,8 @@ class LibraryContentLifecycle(
|
|||
logger.info { "Book changed on disk, update and reset media status: $existingBook" }
|
||||
val updatedBook = existingBook.copy(
|
||||
fileLastModified = newBook.fileLastModified,
|
||||
fileSize = newBook.fileSize
|
||||
fileSize = newBook.fileSize,
|
||||
fileHash = "",
|
||||
)
|
||||
transactionTemplate.executeWithoutResult {
|
||||
mediaRepository.findById(existingBook.id).let {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
package org.gotson.komga.infrastructure.hash
|
||||
|
||||
import mu.KotlinLogging
|
||||
import org.apache.commons.codec.digest.XXHash32
|
||||
import org.springframework.stereotype.Component
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.inputStream
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
private const val DEFAULT_BUFFER_SIZE = 4096
|
||||
private const val SEED = 0
|
||||
|
||||
@Component
|
||||
class Hasher {
|
||||
|
||||
fun computeHash(path: Path): String {
|
||||
logger.info { "Hashing: $path" }
|
||||
val hash = XXHash32(SEED)
|
||||
|
||||
path.inputStream().use {
|
||||
val buffer = ByteArray(DEFAULT_BUFFER_SIZE)
|
||||
var len: Int
|
||||
|
||||
do {
|
||||
len = it.read(buffer)
|
||||
if (len >= 0) hash.update(buffer, 0, len)
|
||||
} while (len >= 0)
|
||||
}
|
||||
|
||||
return hash.value.toString(36)
|
||||
}
|
||||
}
|
||||
|
|
@ -62,8 +62,7 @@ class BookDao(
|
|||
.map { it.toDomain() }
|
||||
|
||||
override fun findAll(): Collection<Book> =
|
||||
dsl.select(*b.fields())
|
||||
.from(b)
|
||||
dsl.selectFrom(b)
|
||||
.fetchInto(b)
|
||||
.map { it.toDomain() }
|
||||
|
||||
|
|
@ -177,6 +176,13 @@ class BookDao(
|
|||
.and(b.URL.notLike("%.$extension"))
|
||||
.fetch(b.ID)
|
||||
|
||||
override fun findAllIdsByLibraryIdAndWithEmptyHash(libraryId: String): Collection<String> =
|
||||
dsl.select(b.ID)
|
||||
.from(b)
|
||||
.where(b.LIBRARY_ID.eq(libraryId))
|
||||
.and(b.FILE_HASH.eq(""))
|
||||
.fetch(b.ID)
|
||||
|
||||
@Transactional
|
||||
override fun insert(book: Book) {
|
||||
insert(listOf(book))
|
||||
|
|
@ -194,9 +200,10 @@ class BookDao(
|
|||
b.NUMBER,
|
||||
b.FILE_LAST_MODIFIED,
|
||||
b.FILE_SIZE,
|
||||
b.FILE_HASH,
|
||||
b.LIBRARY_ID,
|
||||
b.SERIES_ID
|
||||
).values(null as String?, null, null, null, null, null, null, null)
|
||||
).values(null as String?, null, null, null, null, null, null, null, null)
|
||||
).also { step ->
|
||||
books.forEach {
|
||||
step.bind(
|
||||
|
|
@ -206,6 +213,7 @@ class BookDao(
|
|||
it.number,
|
||||
it.fileLastModified,
|
||||
it.fileSize,
|
||||
it.fileHash,
|
||||
it.libraryId,
|
||||
it.seriesId
|
||||
)
|
||||
|
|
@ -231,6 +239,7 @@ class BookDao(
|
|||
.set(b.NUMBER, book.number)
|
||||
.set(b.FILE_LAST_MODIFIED, book.fileLastModified)
|
||||
.set(b.FILE_SIZE, book.fileSize)
|
||||
.set(b.FILE_HASH, book.fileHash)
|
||||
.set(b.LIBRARY_ID, book.libraryId)
|
||||
.set(b.SERIES_ID, book.seriesId)
|
||||
.set(b.LAST_MODIFIED_DATE, LocalDateTime.now(ZoneId.of("Z")))
|
||||
|
|
@ -269,6 +278,7 @@ class BookDao(
|
|||
url = URL(url),
|
||||
fileLastModified = fileLastModified,
|
||||
fileSize = fileSize,
|
||||
fileHash = fileHash,
|
||||
id = id,
|
||||
libraryId = libraryId,
|
||||
seriesId = seriesId,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import org.gotson.komga.domain.persistence.BookRepository
|
|||
import org.gotson.komga.domain.persistence.LibraryRepository
|
||||
import org.gotson.komga.domain.persistence.MediaRepository
|
||||
import org.gotson.komga.domain.persistence.SeriesRepository
|
||||
import org.gotson.komga.infrastructure.hash.Hasher
|
||||
import org.junit.jupiter.api.AfterEach
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
|
|
@ -42,6 +43,9 @@ class LibraryContentLifecycleTest(
|
|||
@MockkBean
|
||||
private lateinit var mockAnalyzer: BookAnalyzer
|
||||
|
||||
@MockkBean
|
||||
private lateinit var mockHasher: Hasher
|
||||
|
||||
@AfterEach
|
||||
fun `clear repositories`() {
|
||||
libraryRepository.findAll().forEach {
|
||||
|
|
@ -229,7 +233,7 @@ class LibraryContentLifecycleTest(
|
|||
}
|
||||
|
||||
@Test
|
||||
fun `given existing Book with different last modified date when rescanning then media is marked as outdated`() {
|
||||
fun `given existing Book with different last modified date when rescanning then media is marked as outdated and hash is reset`() {
|
||||
// given
|
||||
val library = makeLibrary()
|
||||
libraryRepository.insert(library)
|
||||
|
|
@ -243,7 +247,12 @@ class LibraryContentLifecycleTest(
|
|||
libraryContentLifecycle.scanRootFolder(library)
|
||||
|
||||
every { mockAnalyzer.analyze(any()) } returns Media(status = Media.Status.READY, mediaType = "application/zip", pages = mutableListOf(makeBookPage("1.jpg"), makeBookPage("2.jpg")), bookId = book1.id)
|
||||
bookRepository.findAll().map { bookLifecycle.analyzeAndPersist(it) }
|
||||
every { mockHasher.computeHash(any()) }.returnsMany("abc", "def")
|
||||
|
||||
bookRepository.findAll().map {
|
||||
bookLifecycle.analyzeAndPersist(it)
|
||||
bookLifecycle.hashAndPersist(it)
|
||||
}
|
||||
|
||||
// when
|
||||
libraryContentLifecycle.scanRootFolder(library)
|
||||
|
|
@ -251,9 +260,11 @@ class LibraryContentLifecycleTest(
|
|||
// then
|
||||
verify(exactly = 2) { mockScanner.scanRootFolder(any()) }
|
||||
verify(exactly = 1) { mockAnalyzer.analyze(any()) }
|
||||
verify(exactly = 1) { mockHasher.computeHash(any()) }
|
||||
|
||||
bookRepository.findAll().first().let { book ->
|
||||
assertThat(book.lastModifiedDate).isNotEqualTo(book.createdDate)
|
||||
assertThat(book.fileHash).isEmpty()
|
||||
|
||||
mediaRepository.findById(book.id).let { media ->
|
||||
assertThat(media.status).isEqualTo(Media.Status.OUTDATED)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.gotson.komga.infrastructure.jooq
|
||||
|
||||
import mu.KotlinLogging
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.gotson.komga.domain.model.Book
|
||||
import org.gotson.komga.domain.model.BookSearch
|
||||
|
|
@ -57,6 +56,7 @@ class BookDaoTest(
|
|||
url = URL("file://book"),
|
||||
fileLastModified = now,
|
||||
fileSize = 3,
|
||||
fileHash = "abc",
|
||||
seriesId = series.id,
|
||||
libraryId = library.id
|
||||
)
|
||||
|
|
@ -71,6 +71,7 @@ class BookDaoTest(
|
|||
assertThat(created.url).isEqualTo(book.url)
|
||||
assertThat(created.fileLastModified).isEqualToIgnoringNanos(book.fileLastModified)
|
||||
assertThat(created.fileSize).isEqualTo(book.fileSize)
|
||||
assertThat(created.fileHash).isEqualTo(book.fileHash)
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -92,7 +93,8 @@ class BookDaoTest(
|
|||
name = "Updated",
|
||||
url = URL("file://updated"),
|
||||
fileLastModified = modificationDate,
|
||||
fileSize = 5
|
||||
fileSize = 5,
|
||||
fileHash = "def",
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -108,6 +110,7 @@ class BookDaoTest(
|
|||
assertThat(modified.url).isEqualTo(URL("file://updated"))
|
||||
assertThat(modified.fileLastModified).isEqualToIgnoringNanos(modificationDate)
|
||||
assertThat(modified.fileSize).isEqualTo(5)
|
||||
assertThat(modified.fileHash).isEqualTo("def")
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -188,34 +191,4 @@ class BookDaoTest(
|
|||
|
||||
assertThat(bookDao.count()).isEqualTo(0)
|
||||
}
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
// @Test
|
||||
// fun benchmark() {
|
||||
// val books = (1..10000).map {
|
||||
// makeBook(it.toString(), libraryId = library.id, seriesId = series.id)
|
||||
// }
|
||||
//
|
||||
// val single = measureTime {
|
||||
// books.map { bookDao.insert(it) }
|
||||
// }
|
||||
// bookDao.deleteAll()
|
||||
//
|
||||
// val singleBatch = measureTime {
|
||||
// books.map { bookDao.insertBatch(it) }
|
||||
// }
|
||||
// bookDao.deleteAll()
|
||||
//
|
||||
// val transaction = measureTime {
|
||||
// bookDao.insertMany(books)
|
||||
// }
|
||||
// bookDao.deleteAll()
|
||||
//
|
||||
// logger.info { "Single: $single" }
|
||||
// logger.info { "SingleBatch: $singleBatch" }
|
||||
// logger.info { "Transaction: $transaction" }
|
||||
//
|
||||
// assertThat(single).isEqualTo(transaction)
|
||||
// }
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue