feat: generate file hash for books

This commit is contained in:
Gauthier Roebroeck 2021-07-06 16:47:07 +08:00
parent d2309a5e79
commit 7ad738a645
12 changed files with 99 additions and 38 deletions

View file

@ -0,0 +1,2 @@
ALTER TABLE BOOK
ADD COLUMN FILE_HASH varchar NOT NULL DEFAULT '';

View file

@ -32,6 +32,11 @@ sealed class Task(priority: Int = DEFAULT_PRIORITY) : Serializable {
override fun toString(): String = "RefreshBookMetadata(bookId='$bookId', capabilities=$capabilities, priority='$priority')"
}
class HashBook(val bookId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
override fun uniqueId() = "HASH_BOOK_$bookId"
override fun toString(): String = "HashBook(bookId='$bookId', priority='$priority')"
}
class RefreshSeriesMetadata(val seriesId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
override fun uniqueId() = "REFRESH_SERIES_METADATA_$seriesId"
override fun toString(): String = "RefreshSeriesMetadata(seriesId='$seriesId', priority='$priority')"

View file

@ -44,6 +44,7 @@ class TaskHandler(
libraryRepository.findByIdOrNull(task.libraryId)?.let { library ->
libraryContentLifecycle.scanRootFolder(library)
taskReceiver.analyzeUnknownAndOutdatedBooks(library)
taskReceiver.hashBooksWithoutHash(library)
if (library.repairExtensions) taskReceiver.repairExtensions(library, LOWEST_PRIORITY)
if (library.convertToCbz) taskReceiver.convertBooksToCbz(library, LOWEST_PRIORITY)
} ?: logger.warn { "Cannot execute task $task: Library does not exist" }
@ -103,6 +104,11 @@ class TaskHandler(
bookRepository.findByIdOrNull(task.bookId)?.let { book ->
bookConverter.repairExtension(book)
} ?: logger.warn { "Cannot execute task $task: Book does not exist" }
is Task.HashBook ->
bookRepository.findByIdOrNull(task.bookId)?.let { book ->
bookLifecycle.hashAndPersist(book)
} ?: logger.warn { "Cannot execute task $task: Book does not exist" }
}
}.also {
logger.info { "Task $task executed in $it" }

View file

@ -56,6 +56,12 @@ class TaskReceiver(
}
}
fun hashBooksWithoutHash(library: Library) {
bookRepository.findAllIdsByLibraryIdAndWithEmptyHash(library.id).forEach {
submitTask(Task.HashBook(it, LOWEST_PRIORITY))
}
}
fun convertBooksToCbz(library: Library, priority: Int = DEFAULT_PRIORITY) {
bookConverter.getConvertibleBookIds(library).forEach {
submitTask(Task.ConvertBook(it, priority))

View file

@ -13,6 +13,7 @@ data class Book(
val url: URL,
val fileLastModified: LocalDateTime,
val fileSize: Long = 0,
val fileHash: String = "",
val number: Int = 0,
val id: String = TsidCreator.getTsid256().toString(),

View file

@ -26,6 +26,7 @@ interface BookRepository {
fun findAllIdsByLibraryId(libraryId: String): Collection<String>
fun findAllIdsByLibraryIdAndMediaTypes(libraryId: String, mediaTypes: Collection<String>): Collection<String>
fun findAllIdsByLibraryIdAndMismatchedExtension(libraryId: String, mediaType: String, extension: String): Collection<String>
fun findAllIdsByLibraryIdAndWithEmptyHash(libraryId: String): Collection<String>
fun findAllIds(bookSearch: BookSearch, sort: Sort): Collection<String>
fun insert(book: Book)

View file

@ -18,6 +18,7 @@ import org.gotson.komga.domain.persistence.MediaRepository
import org.gotson.komga.domain.persistence.ReadListRepository
import org.gotson.komga.domain.persistence.ReadProgressRepository
import org.gotson.komga.domain.persistence.ThumbnailBookRepository
import org.gotson.komga.infrastructure.hash.Hasher
import org.gotson.komga.infrastructure.image.ImageConverter
import org.gotson.komga.infrastructure.image.ImageType
import org.springframework.stereotype.Service
@ -41,6 +42,7 @@ class BookLifecycle(
private val imageConverter: ImageConverter,
private val eventPublisher: EventPublisher,
private val transactionTemplate: TransactionTemplate,
private val hasher: Hasher,
) {
fun analyzeAndPersist(book: Book): Boolean {
@ -63,6 +65,16 @@ class BookLifecycle(
return media.status == Media.Status.READY
}
fun hashAndPersist(book: Book) {
logger.info { "Hash and persist book: $book" }
if (book.fileHash.isBlank()) {
val hash = hasher.computeHash(book.path)
bookRepository.update(book.copy(fileHash = hash))
} else {
logger.info { "Book already has a hash, skipping" }
}
}
fun generateThumbnailAndPersist(book: Book) {
logger.info { "Generate thumbnail and persist for book: $book" }
try {

View file

@ -92,7 +92,8 @@ class LibraryContentLifecycle(
logger.info { "Book changed on disk, update and reset media status: $existingBook" }
val updatedBook = existingBook.copy(
fileLastModified = newBook.fileLastModified,
fileSize = newBook.fileSize
fileSize = newBook.fileSize,
fileHash = "",
)
transactionTemplate.executeWithoutResult {
mediaRepository.findById(existingBook.id).let {

View file

@ -0,0 +1,33 @@
package org.gotson.komga.infrastructure.hash
import mu.KotlinLogging
import org.apache.commons.codec.digest.XXHash32
import org.springframework.stereotype.Component
import java.nio.file.Path
import kotlin.io.path.inputStream
private val logger = KotlinLogging.logger {}
private const val DEFAULT_BUFFER_SIZE = 4096
private const val SEED = 0
@Component
class Hasher {
fun computeHash(path: Path): String {
logger.info { "Hashing: $path" }
val hash = XXHash32(SEED)
path.inputStream().use {
val buffer = ByteArray(DEFAULT_BUFFER_SIZE)
var len: Int
do {
len = it.read(buffer)
if (len >= 0) hash.update(buffer, 0, len)
} while (len >= 0)
}
return hash.value.toString(36)
}
}

View file

@ -62,8 +62,7 @@ class BookDao(
.map { it.toDomain() }
override fun findAll(): Collection<Book> =
dsl.select(*b.fields())
.from(b)
dsl.selectFrom(b)
.fetchInto(b)
.map { it.toDomain() }
@ -177,6 +176,13 @@ class BookDao(
.and(b.URL.notLike("%.$extension"))
.fetch(b.ID)
override fun findAllIdsByLibraryIdAndWithEmptyHash(libraryId: String): Collection<String> =
dsl.select(b.ID)
.from(b)
.where(b.LIBRARY_ID.eq(libraryId))
.and(b.FILE_HASH.eq(""))
.fetch(b.ID)
@Transactional
override fun insert(book: Book) {
insert(listOf(book))
@ -194,9 +200,10 @@ class BookDao(
b.NUMBER,
b.FILE_LAST_MODIFIED,
b.FILE_SIZE,
b.FILE_HASH,
b.LIBRARY_ID,
b.SERIES_ID
).values(null as String?, null, null, null, null, null, null, null)
).values(null as String?, null, null, null, null, null, null, null, null)
).also { step ->
books.forEach {
step.bind(
@ -206,6 +213,7 @@ class BookDao(
it.number,
it.fileLastModified,
it.fileSize,
it.fileHash,
it.libraryId,
it.seriesId
)
@ -231,6 +239,7 @@ class BookDao(
.set(b.NUMBER, book.number)
.set(b.FILE_LAST_MODIFIED, book.fileLastModified)
.set(b.FILE_SIZE, book.fileSize)
.set(b.FILE_HASH, book.fileHash)
.set(b.LIBRARY_ID, book.libraryId)
.set(b.SERIES_ID, book.seriesId)
.set(b.LAST_MODIFIED_DATE, LocalDateTime.now(ZoneId.of("Z")))
@ -269,6 +278,7 @@ class BookDao(
url = URL(url),
fileLastModified = fileLastModified,
fileSize = fileSize,
fileHash = fileHash,
id = id,
libraryId = libraryId,
seriesId = seriesId,

View file

@ -16,6 +16,7 @@ import org.gotson.komga.domain.persistence.BookRepository
import org.gotson.komga.domain.persistence.LibraryRepository
import org.gotson.komga.domain.persistence.MediaRepository
import org.gotson.komga.domain.persistence.SeriesRepository
import org.gotson.komga.infrastructure.hash.Hasher
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith
@ -42,6 +43,9 @@ class LibraryContentLifecycleTest(
@MockkBean
private lateinit var mockAnalyzer: BookAnalyzer
@MockkBean
private lateinit var mockHasher: Hasher
@AfterEach
fun `clear repositories`() {
libraryRepository.findAll().forEach {
@ -229,7 +233,7 @@ class LibraryContentLifecycleTest(
}
@Test
fun `given existing Book with different last modified date when rescanning then media is marked as outdated`() {
fun `given existing Book with different last modified date when rescanning then media is marked as outdated and hash is reset`() {
// given
val library = makeLibrary()
libraryRepository.insert(library)
@ -243,7 +247,12 @@ class LibraryContentLifecycleTest(
libraryContentLifecycle.scanRootFolder(library)
every { mockAnalyzer.analyze(any()) } returns Media(status = Media.Status.READY, mediaType = "application/zip", pages = mutableListOf(makeBookPage("1.jpg"), makeBookPage("2.jpg")), bookId = book1.id)
bookRepository.findAll().map { bookLifecycle.analyzeAndPersist(it) }
every { mockHasher.computeHash(any()) }.returnsMany("abc", "def")
bookRepository.findAll().map {
bookLifecycle.analyzeAndPersist(it)
bookLifecycle.hashAndPersist(it)
}
// when
libraryContentLifecycle.scanRootFolder(library)
@ -251,9 +260,11 @@ class LibraryContentLifecycleTest(
// then
verify(exactly = 2) { mockScanner.scanRootFolder(any()) }
verify(exactly = 1) { mockAnalyzer.analyze(any()) }
verify(exactly = 1) { mockHasher.computeHash(any()) }
bookRepository.findAll().first().let { book ->
assertThat(book.lastModifiedDate).isNotEqualTo(book.createdDate)
assertThat(book.fileHash).isEmpty()
mediaRepository.findById(book.id).let { media ->
assertThat(media.status).isEqualTo(Media.Status.OUTDATED)

View file

@ -1,6 +1,5 @@
package org.gotson.komga.infrastructure.jooq
import mu.KotlinLogging
import org.assertj.core.api.Assertions.assertThat
import org.gotson.komga.domain.model.Book
import org.gotson.komga.domain.model.BookSearch
@ -57,6 +56,7 @@ class BookDaoTest(
url = URL("file://book"),
fileLastModified = now,
fileSize = 3,
fileHash = "abc",
seriesId = series.id,
libraryId = library.id
)
@ -71,6 +71,7 @@ class BookDaoTest(
assertThat(created.url).isEqualTo(book.url)
assertThat(created.fileLastModified).isEqualToIgnoringNanos(book.fileLastModified)
assertThat(created.fileSize).isEqualTo(book.fileSize)
assertThat(created.fileHash).isEqualTo(book.fileHash)
}
@Test
@ -92,7 +93,8 @@ class BookDaoTest(
name = "Updated",
url = URL("file://updated"),
fileLastModified = modificationDate,
fileSize = 5
fileSize = 5,
fileHash = "def",
)
}
@ -108,6 +110,7 @@ class BookDaoTest(
assertThat(modified.url).isEqualTo(URL("file://updated"))
assertThat(modified.fileLastModified).isEqualToIgnoringNanos(modificationDate)
assertThat(modified.fileSize).isEqualTo(5)
assertThat(modified.fileHash).isEqualTo("def")
}
@Test
@ -188,34 +191,4 @@ class BookDaoTest(
assertThat(bookDao.count()).isEqualTo(0)
}
private val logger = KotlinLogging.logger {}
// @Test
// fun benchmark() {
// val books = (1..10000).map {
// makeBook(it.toString(), libraryId = library.id, seriesId = series.id)
// }
//
// val single = measureTime {
// books.map { bookDao.insert(it) }
// }
// bookDao.deleteAll()
//
// val singleBatch = measureTime {
// books.map { bookDao.insertBatch(it) }
// }
// bookDao.deleteAll()
//
// val transaction = measureTime {
// bookDao.insertMany(books)
// }
// bookDao.deleteAll()
//
// logger.info { "Single: $single" }
// logger.info { "SingleBatch: $singleBatch" }
// logger.info { "Transaction: $transaction" }
//
// assertThat(single).isEqualTo(transaction)
// }
}