feat: deletion of duplicate pages

This commit is contained in:
Gauthier Roebroeck 2022-02-07 18:54:28 +08:00
parent a96335dbee
commit c080f433af
18 changed files with 354 additions and 13 deletions

View file

@ -65,9 +65,15 @@
</v-container>
<v-card-actions>
<v-btn v-if="hash.action === PageHashAction.DELETE_MANUAL" color="primary" @click="deleteMatches">
<v-btn v-if="hash.action === PageHashAction.DELETE_MANUAL"
:color="deleteRequested ? 'success': 'primary'"
:disabled="matchCount === 0"
@click="deleteMatches"
>
<v-icon left v-if="deleteRequested">mdi-check</v-icon>
{{ $t('duplicate_pages.action_delete_matches') }}
</v-btn>
<v-btn v-if="hash.action !== PageHashAction.IGNORE" text @click="ignore">{{
$t('duplicate_pages.action_ignore')
}}
@ -102,6 +108,7 @@ export default Vue.extend({
getFileSize,
PageHashAction,
matchCount: undefined as number | undefined,
deleteRequested: false,
}
},
computed: {
@ -134,7 +141,11 @@ export default Vue.extend({
else
this.matchCount = undefined
},
deleteMatches() {
async deleteMatches() {
if(!this.deleteRequested) {
await this.$komgaPageHashes.performDelete(this.hash)
this.deleteRequested = true
}
},
ignore() {
this.updatePageHash(PageHashAction.IGNORE)

View file

@ -545,7 +545,8 @@
"matches_n": "No matches | 1 match | {count} matches",
"saved_size": "Saved {size}",
"title": "Duplicate pages",
"unknown_size": "Unknown size"
"unknown_size": "Unknown size",
"info": "Deleting duplicate pages will modify your files. Backup your files and use manual deletion before using automatic deletion."
},
"duplicates": {
"file_hash": "File hash",

View file

@ -81,4 +81,23 @@ export default class KomgaPageHashesService {
throw new Error(msg)
}
}
async performDelete(pageHash: PageHashKnownDto) {
try {
const params = {
media_type: pageHash.mediaType,
file_size: pageHash.size || -1,
}
await this.http.post(`${API_PAGE_HASH}/${pageHash.hash}/perform-delete`, pageHash, {
params: params,
paramsSerializer: params => qs.stringify(params, {indices: false}),
})
} catch (e) {
let msg = `An error occurred while trying to execute perform-delete on page hash ${pageHash}`
if (e.response.data.message) {
msg += `: ${e.response.data.message}`
}
throw new Error(msg)
}
}
}

View file

@ -1,5 +1,9 @@
<template>
<v-container fluid class="pa-6">
<v-alert type="warning" dismissible text class="body-2">
<div>{{ $t('duplicate_pages.info') }}</div>
</v-alert>
<v-row align="center">
<v-col cols="auto">
<v-pagination

View file

@ -1,5 +1,9 @@
<template>
<v-container fluid class="pa-6">
<v-alert type="warning" dismissible text class="body-2">
<div>{{ $t('duplicate_pages.info') }}</div>
</v-alert>
<v-row align="center">
<v-col cols="auto">
<v-pagination
@ -142,11 +146,11 @@ export default Vue.extend({
paginationVisible(): number {
switch (this.$vuetify.breakpoint.name) {
case 'xs':
return 5
case 'sm':
case 'md':
return 10
return 5
case 'lg':
return 10
case 'xl':
default:
return 15

View file

@ -1,6 +1,7 @@
package org.gotson.komga.application.tasks
import org.gotson.komga.domain.model.BookMetadataPatchCapability
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.CopyMode
import org.gotson.komga.infrastructure.search.LuceneEntity
import java.io.Serializable
@ -85,6 +86,11 @@ sealed class Task(priority: Int = DEFAULT_PRIORITY, val groupId: String? = null)
override fun toString(): String = "RepairExtension(bookId='$bookId', priority='$priority')"
}
class RemoveHashedPages(val bookId: String, val pages: Collection<BookPageNumbered>, priority: Int = DEFAULT_PRIORITY, groupId: String) : Task(priority, groupId.takeLast(1)) {
override fun uniqueId(): String = "REMOVE_HASHED_PAGES_$bookId"
override fun toString(): String = "RemoveHashedPages(bookId='$bookId', priority='$priority')"
}
class RebuildIndex(val entities: Set<LuceneEntity>?, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
override fun uniqueId() = "REBUILD_INDEX"
override fun toString(): String = "RebuildIndex(priority='$priority',entities='${entities?.map { it.type }}')"

View file

@ -8,6 +8,7 @@ import org.gotson.komga.domain.service.BookConverter
import org.gotson.komga.domain.service.BookImporter
import org.gotson.komga.domain.service.BookLifecycle
import org.gotson.komga.domain.service.BookMetadataLifecycle
import org.gotson.komga.domain.service.BookPageEditor
import org.gotson.komga.domain.service.LibraryContentLifecycle
import org.gotson.komga.domain.service.LocalArtworkLifecycle
import org.gotson.komga.domain.service.SeriesLifecycle
@ -37,6 +38,7 @@ class TaskHandler(
private val localArtworkLifecycle: LocalArtworkLifecycle,
private val bookImporter: BookImporter,
private val bookConverter: BookConverter,
private val bookPageEditor: BookPageEditor,
private val searchIndexLifecycle: SearchIndexLifecycle,
) {
@ -55,6 +57,7 @@ class TaskHandler(
taskReceiver.hashBookPagesWithMissingHash(library)
taskReceiver.repairExtensions(library, LOWEST_PRIORITY)
taskReceiver.convertBooksToCbz(library, LOWEST_PRIORITY)
taskReceiver.removeDuplicatePages(library, LOWEST_PRIORITY)
} ?: logger.warn { "Cannot execute task $task: Library does not exist" }
is Task.EmptyTrash ->
@ -118,6 +121,11 @@ class TaskHandler(
bookConverter.repairExtension(book)
} ?: logger.warn { "Cannot execute task $task: Book does not exist" }
is Task.RemoveHashedPages ->
bookRepository.findByIdOrNull(task.bookId)?.let { book ->
bookPageEditor.removeHashedPages(book, task.pages)
} ?: logger.warn { "Cannot execute task $task: Book does not exist" }
is Task.HashBook ->
bookRepository.findByIdOrNull(task.bookId)?.let { book ->
bookLifecycle.hashAndPersist(book)

View file

@ -3,6 +3,7 @@ package org.gotson.komga.application.tasks
import mu.KotlinLogging
import org.gotson.komga.domain.model.Book
import org.gotson.komga.domain.model.BookMetadataPatchCapability
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.BookSearch
import org.gotson.komga.domain.model.CopyMode
import org.gotson.komga.domain.model.Library
@ -93,6 +94,16 @@ class TaskReceiver(
}
}
fun removeDuplicatePages(library: Library, priority: Int = DEFAULT_PRIORITY) {
pageHashLifecycle.getBookPagesToDeleteAutomatically(library).forEach { (bookId, pages) ->
removeDuplicatePages(bookId, pages, priority)
}
}
fun removeDuplicatePages(bookId: String, pages: Collection<BookPageNumbered>, priority: Int = DEFAULT_PRIORITY) {
submitTask(Task.RemoveHashedPages(bookId, pages, priority, bookId))
}
fun analyzeBook(book: Book, priority: Int = DEFAULT_PRIORITY) {
submitTask(Task.AnalyzeBook(book.id, priority, book.seriesId))
}

View file

@ -1,9 +1,27 @@
package org.gotson.komga.domain.model
data class BookPage(
import java.io.Serializable
open class BookPage(
val fileName: String,
val mediaType: String,
val dimension: Dimension? = null,
val fileHash: String = "",
val fileSize: Long? = null,
)
) : Serializable {
override fun toString(): String = "BookPage(fileName='$fileName', mediaType='$mediaType', dimension=$dimension, fileHash='$fileHash', fileSize=$fileSize)"
fun copy(
fileName: String = this.fileName,
mediaType: String = this.mediaType,
dimension: Dimension? = this.dimension,
fileHash: String = this.fileHash,
fileSize: Long? = this.fileSize,
) = BookPage(
fileName = fileName,
mediaType = mediaType,
dimension = dimension,
fileHash = fileHash,
fileSize = fileSize,
)
}

View file

@ -0,0 +1,21 @@
package org.gotson.komga.domain.model
import java.io.Serializable
class BookPageNumbered(
fileName: String,
mediaType: String,
dimension: Dimension? = null,
fileHash: String = "",
fileSize: Long? = null,
val pageNumber: Int,
) : BookPage(
fileName = fileName,
mediaType = mediaType,
dimension = dimension,
fileHash = fileHash,
fileSize = fileSize,
),
Serializable {
override fun toString(): String = "BookPageNumbered(fileName='$fileName', mediaType='$mediaType', dimension=$dimension, fileHash='$fileHash', fileSize=$fileSize, pageNumber=$pageNumber)"
}

View file

@ -1,6 +1,8 @@
package org.gotson.komga.domain.model
import java.io.Serializable
data class Dimension(
val width: Int,
val height: Int,
)
) : Serializable

View file

@ -17,4 +17,18 @@ class PageHashKnown(
DELETE_MANUAL,
IGNORE,
}
fun copy(
hash: String = this.hash,
mediaType: String = this.mediaType,
size: Long? = this.size,
action: Action = this.action,
deleteCount: Int = this.deleteCount,
) = PageHashKnown(
hash = hash,
mediaType = mediaType,
size = size,
action = action,
deleteCount = deleteCount,
)
}

View file

@ -12,7 +12,7 @@ interface PageHashRepository {
fun findAllKnown(actions: List<PageHashKnown.Action>?, pageable: Pageable): Page<PageHashKnown>
fun findAllUnknown(pageable: Pageable): Page<PageHashUnknown>
fun findMatchesByHash(pageHash: PageHash, pageable: Pageable): Page<PageHashMatch>
fun findMatchesByHash(pageHash: PageHash, libraryId: String?, pageable: Pageable): Page<PageHashMatch>
fun getKnownThumbnail(pageHash: PageHash): ByteArray?

View file

@ -55,6 +55,7 @@ class BookConverter(
bookRepository.findAllByLibraryIdAndMediaTypes(library.id, convertibleTypes)
fun convertToCbz(book: Book) {
// TODO: check if file has changed on disk before doing conversion
if (!libraryRepository.findById(book.libraryId).convertToCbz)
return logger.info { "Book conversion is disabled for the library, it may have changed since the task was submitted, skipping" }
@ -127,6 +128,7 @@ class BookConverter(
transactionTemplate.executeWithoutResult {
bookRepository.update(convertedBook)
// TODO: restore page hash from existing media
mediaRepository.update(convertedMedia)
}
}

View file

@ -0,0 +1,162 @@
package org.gotson.komga.domain.service
import mu.KotlinLogging
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream
import org.apache.commons.io.FilenameUtils
import org.gotson.komga.domain.model.Book
import org.gotson.komga.domain.model.BookConversionException
import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.BookWithMedia
import org.gotson.komga.domain.model.Media
import org.gotson.komga.domain.model.MediaNotReadyException
import org.gotson.komga.domain.model.MediaType
import org.gotson.komga.domain.model.MediaUnsupportedException
import org.gotson.komga.domain.model.PageHash
import org.gotson.komga.domain.persistence.BookRepository
import org.gotson.komga.domain.persistence.LibraryRepository
import org.gotson.komga.domain.persistence.MediaRepository
import org.gotson.komga.domain.persistence.PageHashRepository
import org.gotson.komga.infrastructure.language.notEquals
import org.springframework.stereotype.Service
import org.springframework.transaction.support.TransactionTemplate
import java.io.FileNotFoundException
import java.nio.file.Files
import java.util.zip.Deflater
import kotlin.io.path.deleteIfExists
import kotlin.io.path.moveTo
import kotlin.io.path.outputStream
private val logger = KotlinLogging.logger {}
private const val TEMP_PREFIX = "komga_page_removal_"
private const val TEMP_SUFFIX = ".tmp"
@Service
class BookPageEditor(
private val bookAnalyzer: BookAnalyzer,
private val fileSystemScanner: FileSystemScanner,
private val bookRepository: BookRepository,
private val mediaRepository: MediaRepository,
private val libraryRepository: LibraryRepository,
private val pageHashRepository: PageHashRepository,
private val transactionTemplate: TransactionTemplate,
) {
private val convertibleTypes = listOf(MediaType.ZIP.value)
private val failedPageRemoval = mutableListOf<String>()
fun removeHashedPages(book: Book, pagesToDelete: Collection<BookPageNumbered>) {
// perform various checks
if (failedPageRemoval.contains(book.id))
return logger.info { "Book page removal already failed before, skipping" }
fileSystemScanner.scanFile(book.path)?.let { scannedBook ->
if (scannedBook.fileLastModified.notEquals(book.fileLastModified))
return logger.info { "Book has changed on disk, skipping" }
} ?: throw FileNotFoundException("File not found: ${book.path}")
val media = mediaRepository.findById(book.id)
if (!convertibleTypes.contains(media.mediaType))
throw MediaUnsupportedException("${media.mediaType} cannot be converted. Must be one of $convertibleTypes")
if (media.status != Media.Status.READY)
throw MediaNotReadyException()
// create a temp file with the pages removed
val pagesToKeep = media.pages.filterIndexed { index, page ->
pagesToDelete.find { candidate ->
candidate.fileHash == page.fileHash &&
candidate.mediaType == page.mediaType &&
candidate.fileName == page.fileName &&
candidate.pageNumber == index + 1
} == null
}
if (media.pages.size != (pagesToKeep.size + pagesToDelete.size))
return logger.info { "Should be removing ${pagesToDelete.size} pages from book, but count doesn't add up, skipping" }
logger.info { "Start removal of ${pagesToDelete.size} pages for book: $book" }
logger.debug { "Pages: ${media.pages}" }
logger.debug { "Pages to delete: $pagesToDelete" }
logger.debug { "Pages to keep: $pagesToKeep" }
val tempFile = Files.createTempFile(book.path.parent, TEMP_PREFIX, TEMP_SUFFIX)
logger.info { "Creating new file: $tempFile" }
ZipArchiveOutputStream(tempFile.outputStream()).use { zipStream ->
zipStream.setMethod(ZipArchiveOutputStream.DEFLATED)
zipStream.setLevel(Deflater.NO_COMPRESSION)
pagesToKeep.map { it.fileName }
.union(media.files)
.forEach { entry ->
zipStream.putArchiveEntry(ZipArchiveEntry(entry))
zipStream.write(bookAnalyzer.getFileContent(BookWithMedia(book, media), entry))
zipStream.closeArchiveEntry()
}
}
// perform checks on new file
val createdBook = fileSystemScanner.scanFile(tempFile)
?.copy(
id = book.id,
seriesId = book.seriesId,
libraryId = book.libraryId,
)
?: throw IllegalStateException("Newly created book could not be scanned: $tempFile")
val createdMedia = bookAnalyzer.analyze(createdBook, libraryRepository.findById(book.libraryId).analyzeDimensions)
try {
when {
createdMedia.status != Media.Status.READY
-> throw BookConversionException("Created file could not be analyzed, aborting page removal")
createdMedia.mediaType != MediaType.ZIP.value
-> throw BookConversionException("Created file is not a zip file, aborting page removal")
!createdMedia.pages.map { FilenameUtils.getName(it.fileName) to it.mediaType }
.containsAll(pagesToKeep.map { FilenameUtils.getName(it.fileName) to it.mediaType })
-> throw BookConversionException("Created file does not contain all pages to keep from existing file, aborting conversion")
!createdMedia.files.map { FilenameUtils.getName(it) }
.containsAll(media.files.map { FilenameUtils.getName(it) })
-> throw BookConversionException("Created file does not contain all files from existing file, aborting page removal")
}
} catch (e: BookConversionException) {
tempFile.deleteIfExists()
failedPageRemoval += book.id
throw e
}
tempFile.moveTo(book.path, true)
val newBook = fileSystemScanner.scanFile(book.path)
?.copy(
id = book.id,
seriesId = book.seriesId,
libraryId = book.libraryId,
)
?: throw IllegalStateException("Newly created book could not be scanned after replacing existing one: ${book.path}")
val mediaWithHashes = createdMedia.copy(pages = restorePageHash(createdMedia.pages, media.pages))
transactionTemplate.executeWithoutResult {
bookRepository.update(newBook)
mediaRepository.update(mediaWithHashes)
pagesToDelete
.mapNotNull { pageHashRepository.findKnown(PageHash(it.fileHash, it.mediaType, it.fileSize)) }
.forEach { pageHashRepository.update(it.copy(deleteCount = it.deleteCount + 1)) }
}
}
private fun restorePageHash(newPages: List<BookPage>, restoreFrom: List<BookPage>): List<BookPage> =
newPages.map { newPage ->
restoreFrom.find {
it.fileSize == newPage.fileSize &&
it.mediaType == newPage.mediaType &&
it.fileName == newPage.fileName &&
it.fileHash.isNotBlank()
}?.let { newPage.copy(fileHash = it.fileHash) }
?: newPage
}
}

View file

@ -1,6 +1,7 @@
package org.gotson.komga.domain.service
import org.gotson.komga.domain.model.BookPageContent
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.Library
import org.gotson.komga.domain.model.MediaType
import org.gotson.komga.domain.model.PageHash
@ -30,12 +31,35 @@ class PageHashLifecycle(
mediaRepository.findAllBookAndSeriesIdsByLibraryIdAndMediaTypeAndWithMissingPageHash(library.id, hashableMediaTypes, komgaProperties.pageHashing)
fun getPage(pageHash: PageHash, resizeTo: Int? = null): BookPageContent? {
val match = pageHashRepository.findMatchesByHash(pageHash, Pageable.ofSize(1)).firstOrNull() ?: return null
val match = pageHashRepository.findMatchesByHash(pageHash, null, Pageable.ofSize(1)).firstOrNull() ?: return null
val book = bookRepository.findByIdOrNull(match.bookId) ?: return null
return bookLifecycle.getBookPage(book, match.pageNumber, resizeTo = resizeTo)
}
fun getBookPagesToDeleteAutomatically(library: Library): Map<String, Collection<BookPageNumbered>> {
val hashesAutoDelete = pageHashRepository.findAllKnown(listOf(PageHashKnown.Action.DELETE_AUTO), Pageable.unpaged()).content
return hashesAutoDelete.map { hash ->
pageHashRepository.findMatchesByHash(hash, library.id, Pageable.unpaged()).content
.groupBy(
{ it.bookId },
{
BookPageNumbered(
fileName = it.fileName,
mediaType = hash.mediaType,
fileHash = hash.hash,
fileSize = hash.size,
pageNumber = it.pageNumber,
)
},
)
}.flatMap { it.entries }
.groupBy({ it.key }, { it.value })
.mapValues { it.value.flatten() }
.filter { it.value.isNotEmpty() }
}
fun createOrUpdate(pageHash: PageHashKnown) {
if (pageHash.action == PageHashKnown.Action.DELETE_AUTO && pageHash.size == null) throw IllegalArgumentException("cannot create PageHash without size and Action.DELETE_AUTO")
@ -43,7 +67,7 @@ class PageHashLifecycle(
if (existing == null) {
pageHashRepository.insert(pageHash, getPage(pageHash, 500)?.content)
} else {
pageHashRepository.update(pageHash)
pageHashRepository.update(existing.copy(action = pageHash.action))
}
}

View file

@ -128,7 +128,7 @@ class PageHashDao(
)
}
override fun findMatchesByHash(pageHash: PageHash, pageable: Pageable): Page<PageHashMatch> {
override fun findMatchesByHash(pageHash: PageHash, libraryId: String?, pageable: Pageable): Page<PageHashMatch> {
val query = dsl.select(p.BOOK_ID, b.URL, p.NUMBER, p.FILE_NAME)
.from(p)
.leftJoin(b).on(p.BOOK_ID.eq(b.ID))
@ -138,6 +138,7 @@ class PageHashDao(
if (pageHash.size == null) and(p.FILE_SIZE.isNull)
else and(p.FILE_SIZE.eq(pageHash.size))
}
.apply { libraryId?.let { and(b.LIBRARY_ID.eq(it)) } }
val count = dsl.fetchCount(query)
@ -196,6 +197,7 @@ class PageHashDao(
override fun update(pageHash: PageHashKnown) {
dsl.update(ph)
.set(ph.ACTION, pageHash.action.name)
.set(ph.DELETE_COUNT, pageHash.deleteCount)
.set(ph.LAST_MODIFIED_DATE, LocalDateTime.now(ZoneId.of("Z")))
.where(ph.HASH.eq(pageHash.hash))
.and(ph.MEDIA_TYPE.eq(pageHash.mediaType))

View file

@ -4,6 +4,8 @@ import io.swagger.v3.oas.annotations.Parameter
import io.swagger.v3.oas.annotations.media.Content
import io.swagger.v3.oas.annotations.media.Schema
import io.swagger.v3.oas.annotations.responses.ApiResponse
import org.gotson.komga.application.tasks.TaskReceiver
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.PageHash
import org.gotson.komga.domain.model.PageHashKnown
import org.gotson.komga.domain.model.ROLE_ADMIN
@ -24,6 +26,7 @@ import org.springframework.http.ResponseEntity
import org.springframework.security.access.prepost.PreAuthorize
import org.springframework.web.bind.annotation.GetMapping
import org.springframework.web.bind.annotation.PathVariable
import org.springframework.web.bind.annotation.PostMapping
import org.springframework.web.bind.annotation.PutMapping
import org.springframework.web.bind.annotation.RequestBody
import org.springframework.web.bind.annotation.RequestMapping
@ -39,6 +42,7 @@ import javax.validation.Valid
class PageHashController(
private val pageHashRepository: PageHashRepository,
private val pageHashLifecycle: PageHashLifecycle,
private val taskReceiver: TaskReceiver,
) {
@GetMapping
@ -76,6 +80,7 @@ class PageHashController(
): Page<PageHashMatchDto> =
pageHashRepository.findMatchesByHash(
PageHash(pageHash, mediaType, size),
null,
page,
).map { it.toDto() }
@ -98,7 +103,7 @@ class PageHashController(
@PutMapping
@ResponseStatus(HttpStatus.ACCEPTED)
fun createKnownPageHash(
fun createOrUpdateKnownPageHash(
@Valid @RequestBody pageHash: PageHashCreationDto,
) {
try {
@ -114,4 +119,31 @@ class PageHashController(
throw ResponseStatusException(HttpStatus.BAD_REQUEST, e.message)
}
}
@PostMapping("{pageHash}/perform-delete")
@ResponseStatus(HttpStatus.ACCEPTED)
fun performDelete(
@PathVariable pageHash: String,
@RequestParam("media_type") mediaType: String,
@RequestParam("file_size") size: Long,
) {
val hash = pageHashRepository.findKnown(PageHash(pageHash, mediaType, size))
?: throw ResponseStatusException(HttpStatus.NOT_FOUND)
val toRemove = pageHashRepository.findMatchesByHash(hash, null, Pageable.unpaged())
.groupBy(
{ it.bookId },
{
BookPageNumbered(
fileName = it.fileName,
mediaType = hash.mediaType,
fileHash = hash.hash,
fileSize = hash.size,
pageNumber = it.pageNumber,
)
},
)
toRemove.forEach { taskReceiver.removeDuplicatePages(it.key, it.value) }
}
}