diff --git a/ERRORCODES.md b/ERRORCODES.md index 4ebdc635c..2e7bdefee 100644 --- a/ERRORCODES.md +++ b/ERRORCODES.md @@ -38,3 +38,8 @@ | ERR_1032 | EPUB file has wrong media type | | ERR_1033 | Some entries are missing | | ERR_1034 | An API key with that comment already exists | +| ERR_1035 | Error while getting EPUB TOC | +| ERR_1036 | Error while getting EPUB Landmarks | +| ERR_1037 | Error while getting EPUB page list | +| ERR_1038 | Error while getting EPUB divina pages | +| ERR_1039 | Error while getting EPUB positions | diff --git a/komga-webui/src/locales/en.json b/komga-webui/src/locales/en.json index c4147e813..e29d92c4c 100644 --- a/komga-webui/src/locales/en.json +++ b/komga-webui/src/locales/en.json @@ -827,7 +827,12 @@ "ERR_1031": "ComicRack CBL Book is missing series or number", "ERR_1032": "EPUB file has wrong media type", "ERR_1033": "Some entries are missing", - "ERR_1034": "An API key with that comment already exists" + "ERR_1034": "An API key with that comment already exists", + "ERR_1035": "Error while getting EPUB TOC", + "ERR_1036": "Error while getting EPUB Landmarks", + "ERR_1037": "Error while getting EPUB page list", + "ERR_1038": "Error while getting EPUB divina pages", + "ERR_1039": "Error while getting EPUB positions" }, "filter": { "age_rating": "age rating", diff --git a/komga/src/main/kotlin/org/gotson/komga/domain/service/BookAnalyzer.kt b/komga/src/main/kotlin/org/gotson/komga/domain/service/BookAnalyzer.kt index 10d14fafe..2dbf1ebb3 100644 --- a/komga/src/main/kotlin/org/gotson/komga/domain/service/BookAnalyzer.kt +++ b/komga/src/main/kotlin/org/gotson/komga/domain/service/BookAnalyzer.kt @@ -23,6 +23,7 @@ import org.gotson.komga.infrastructure.image.ImageType import org.gotson.komga.infrastructure.mediacontainer.ContentDetector import org.gotson.komga.infrastructure.mediacontainer.divina.DivinaExtractor import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor +import org.gotson.komga.infrastructure.mediacontainer.epub.epub import org.gotson.komga.infrastructure.mediacontainer.pdf.PdfExtractor import org.springframework.beans.factory.annotation.Qualifier import org.springframework.beans.factory.annotation.Value @@ -143,29 +144,84 @@ class BookAnalyzer( book: Book, analyzeDimensions: Boolean, ): Media { - val manifest = epubExtractor.getManifest(book.path, analyzeDimensions) - val entriesErrorSummary = - manifest.missingResources - .map { it.fileName } - .ifEmpty { null } - ?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it } - return Media( - status = Media.Status.READY, - pages = manifest.divinaPages, - files = manifest.resources, - pageCount = manifest.pageCount, - epubDivinaCompatible = manifest.divinaPages.isNotEmpty(), - epubIsKepub = manifest.isKepub, - extension = - MediaExtensionEpub( - toc = manifest.toc, - landmarks = manifest.landmarks, - pageList = manifest.pageList, - isFixedLayout = manifest.isFixedLayout, - positions = manifest.positions, - ), - comment = entriesErrorSummary, - ) + book.path.epub { epub -> + val (resources, missingResources) = epubExtractor.getResources(epub).partition { it.fileSize != null } + val isFixedLayout = epubExtractor.isFixedLayout(epub) + val pageCount = epubExtractor.computePageCount(epub) + val isKepub = epubExtractor.isKepub(epub, resources) + + val errors = mutableListOf() + + val toc = + try { + epubExtractor.getToc(epub) + } catch (e: Exception) { + logger.error(e) { "Error while getting EPUB TOC" } + errors.add("ERR_1035") + emptyList() + } + + val landmarks = + try { + epubExtractor.getLandmarks(epub) + } catch (e: Exception) { + logger.error(e) { "Error while getting EPUB Landmarks" } + errors.add("ERR_1036") + emptyList() + } + + val pageList = + try { + epubExtractor.getPageList(epub) + } catch (e: Exception) { + logger.error(e) { "Error while getting EPUB page list" } + errors.add("ERR_1037") + emptyList() + } + + val divinaPages = + try { + epubExtractor.getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions) + } catch (e: Exception) { + logger.error(e) { "Error while getting EPUB Divina pages" } + errors.add("ERR_1038") + emptyList() + } + + val positions = + try { + epubExtractor.computePositions(epub, book.path, resources, isFixedLayout, isKepub) + } catch (e: Exception) { + logger.error(e) { "Error while getting EPUB positions" } + errors.add("ERR_1039") + emptyList() + } + + val entriesErrorSummary = + missingResources + .map { it.fileName } + .ifEmpty { null } + ?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it } + val allErrors = (errors + entriesErrorSummary).joinToString(" ") + + return Media( + status = Media.Status.READY, + pages = divinaPages, + files = resources, + pageCount = pageCount, + epubDivinaCompatible = divinaPages.isNotEmpty(), + epubIsKepub = isKepub, + extension = + MediaExtensionEpub( + toc = toc, + landmarks = landmarks, + pageList = pageList, + isFixedLayout = isFixedLayout, + positions = positions, + ), + comment = allErrors, + ) + } } private fun analyzePdf( diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt index ba88870a2..69089f3da 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt @@ -81,30 +81,7 @@ class EpubExtractor( } } - fun getManifest( - path: Path, - analyzeDimensions: Boolean, - ): EpubManifest = - path.epub { epub -> - val (resources, missingResources) = getResources(epub).partition { it.fileSize != null } - val isFixedLayout = isFixedLayout(epub) - val pageCount = computePageCount(epub) - val isKepub = isKepub(epub, resources) - EpubManifest( - resources = resources, - missingResources = missingResources, - toc = getToc(epub), - landmarks = getLandmarks(epub), - pageList = getPageList(epub), - pageCount = pageCount, - isFixedLayout = isFixedLayout, - positions = computePositions(epub, path, resources, isFixedLayout, isKepub), - divinaPages = getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions), - isKepub = isKepub, - ) - } - - private fun getResources(epub: EpubPackage): List { + fun getResources(epub: EpubPackage): List { val spine = epub.opfDoc .select("spine > itemref") @@ -135,7 +112,7 @@ class EpubExtractor( } } - private fun getDivinaPages( + fun getDivinaPages( epub: EpubPackage, isFixedLayout: Boolean, pageCount: Int, @@ -146,72 +123,67 @@ class EpubExtractor( return emptyList() } - try { - val pagesWithImages = - epub.opfDoc - .select("spine > itemref") - .map { it.attr("idref") } - .mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } } - .map { pagePath -> - val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList() + val pagesWithImages = + epub.opfDoc + .select("spine > itemref") + .map { it.attr("idref") } + .mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } } + .map { pagePath -> + val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList() - // if a page has text over the threshold then the book is not divina compatible - if (doc.body().text().length > letterCountThreshold) return emptyList() + // if a page has text over the threshold then the book is not divina compatible + if (doc.body().text().length > letterCountThreshold) return emptyList() - val img = - doc - .getElementsByTag("img") - .map { it.attr("src") } // get the src, which can be a relative path + val img = + doc + .getElementsByTag("img") + .map { it.attr("src") } // get the src, which can be a relative path - val svg = - doc - .select("svg > image[xlink:href]") - .map { it.attr("xlink:href") } // get the source, which can be a relative path + val svg = + doc + .select("svg > image[xlink:href]") + .map { it.attr("xlink:href") } // get the source, which can be a relative path - (img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder - } - - if (pagesWithImages.size != pageCount) { - logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" } - return emptyList() - } - // Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image. - val imagesPath = pagesWithImages.map { it.distinct() }.flatten() - if (imagesPath.size != pageCount) { - logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" } - return emptyList() - } - - val divinaPages = - imagesPath.mapNotNull { imagePath -> - val mediaType = - epub.manifest.values - .firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath } - ?.mediaType ?: return@mapNotNull null - val zipEntry = epub.zip.getEntry(imagePath) - if (!contentDetector.isImage(mediaType)) return@mapNotNull null - - val dimension = - if (analyzeDimensions) - epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) } - else - null - val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size - BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize) + (img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder } - if (divinaPages.size != pageCount) { - logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" } - return emptyList() - } - return divinaPages - } catch (e: Exception) { - logger.warn(e) { "Error while getting divina pages" } + if (pagesWithImages.size != pageCount) { + logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" } return emptyList() } + // Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image. + val imagesPath = pagesWithImages.map { it.distinct() }.flatten() + if (imagesPath.size != pageCount) { + logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" } + return emptyList() + } + + val divinaPages = + imagesPath.mapNotNull { imagePath -> + val mediaType = + epub.manifest.values + .firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath } + ?.mediaType ?: return@mapNotNull null + val zipEntry = epub.zip.getEntry(imagePath) + if (!contentDetector.isImage(mediaType)) return@mapNotNull null + + val dimension = + if (analyzeDimensions) + epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) } + else + null + val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size + BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize) + } + + if (divinaPages.size != pageCount) { + logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" } + return emptyList() + } + return divinaPages } - private fun isKepub( + fun isKepub( epub: EpubPackage, resources: List, ): Boolean { @@ -228,7 +200,7 @@ class EpubExtractor( return false } - private fun computePageCount(epub: EpubPackage): Int { + fun computePageCount(epub: EpubPackage): Int { val spine = epub.opfDoc .select("spine > itemref") @@ -241,11 +213,11 @@ class EpubExtractor( .sumOf { ceil(it.compressedSize / 1024.0).toInt() } } - private fun isFixedLayout(epub: EpubPackage) = + fun isFixedLayout(epub: EpubPackage) = epub.opfDoc.selectFirst("metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" || epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true" - private fun computePositions( + fun computePositions( epub: EpubPackage, path: Path, resources: List, @@ -346,7 +318,7 @@ class EpubExtractor( } } - private fun getToc(epub: EpubPackage): List { + fun getToc(epub: EpubPackage): List { // Epub 3 epub.getNavResource()?.let { return processNav(it, Epub3Nav.TOC) } // Epub 2 @@ -354,7 +326,7 @@ class EpubExtractor( return emptyList() } - private fun getPageList(epub: EpubPackage): List { + fun getPageList(epub: EpubPackage): List { // Epub 3 epub.getNavResource()?.let { return processNav(it, Epub3Nav.PAGELIST) } // Epub 2 @@ -362,7 +334,7 @@ class EpubExtractor( return emptyList() } - private fun getLandmarks(epub: EpubPackage): List { + fun getLandmarks(epub: EpubPackage): List { // Epub 3 epub.getNavResource()?.let { return processNav(it, Epub3Nav.LANDMARKS) } diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubManifest.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubManifest.kt deleted file mode 100644 index 862e1c799..000000000 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubManifest.kt +++ /dev/null @@ -1,19 +0,0 @@ -package org.gotson.komga.infrastructure.mediacontainer.epub - -import org.gotson.komga.domain.model.BookPage -import org.gotson.komga.domain.model.EpubTocEntry -import org.gotson.komga.domain.model.MediaFile -import org.gotson.komga.domain.model.R2Locator - -data class EpubManifest( - val resources: List, - val missingResources: List, - val toc: List, - val landmarks: List, - val pageList: List, - val pageCount: Int, - val isFixedLayout: Boolean, - val positions: List, - val divinaPages: List, - val isKepub: Boolean, -) diff --git a/komga/src/test/kotlin/org/gotson/komga/domain/service/BookAnalyzerTest.kt b/komga/src/test/kotlin/org/gotson/komga/domain/service/BookAnalyzerTest.kt index 366dfaa1f..4bfe84efc 100644 --- a/komga/src/test/kotlin/org/gotson/komga/domain/service/BookAnalyzerTest.kt +++ b/komga/src/test/kotlin/org/gotson/komga/domain/service/BookAnalyzerTest.kt @@ -1,6 +1,7 @@ package org.gotson.komga.domain.service import com.ninjasquad.springmockk.SpykBean +import io.mockk.clearAllMocks import io.mockk.every import io.mockk.verify import org.assertj.core.api.Assertions.assertThat @@ -8,8 +9,12 @@ import org.gotson.komga.domain.model.Book import org.gotson.komga.domain.model.BookPage import org.gotson.komga.domain.model.BookWithMedia import org.gotson.komga.domain.model.Media +import org.gotson.komga.domain.model.MediaExtensionEpub import org.gotson.komga.domain.model.makeBook import org.gotson.komga.infrastructure.configuration.KomgaProperties +import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.Nested import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource @@ -32,194 +37,291 @@ class BookAnalyzerTest( @SpykBean private lateinit var bookAnalyzer: BookAnalyzer - @Test - fun `given rar4 archive when analyzing then media status is READY`() { - val file = ClassPathResource("archives/rar4.rar") - val book = Book("book", file.url, LocalDateTime.now()) + @SpykBean + private lateinit var epubExtractor: EpubExtractor - val media = bookAnalyzer.analyze(book, false) - - assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4") - assertThat(media.status).isEqualTo(Media.Status.READY) - assertThat(media.pages).hasSize(3) + @AfterEach + fun afterEach() { + clearAllMocks() } - @ParameterizedTest - @ValueSource( - strings = [ - "rar4-solid.rar", "rar4-encrypted.rar", - ], - ) - fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) { - val file = ClassPathResource("archives/rar4-solid.rar") - val book = Book("book", file.url, LocalDateTime.now()) + @Nested + inner class ArchiveFormats { + @Test + fun `given rar4 archive when analyzing then media status is READY`() { + val file = ClassPathResource("archives/rar4.rar") + val book = Book("book", file.url, LocalDateTime.now()) - val media = bookAnalyzer.analyze(book, false) + val media = bookAnalyzer.analyze(book, false) - assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4") - assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED) + assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.pages).hasSize(3) + } + + @ParameterizedTest + @ValueSource( + strings = [ + "rar4-solid.rar", "rar4-encrypted.rar", + ], + ) + fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) { + val file = ClassPathResource("archives/rar4-solid.rar") + val book = Book("book", file.url, LocalDateTime.now()) + + val media = bookAnalyzer.analyze(book, false) + + assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4") + assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED) + } + + @ParameterizedTest + @ValueSource( + strings = [ + "rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar", + ], + ) + fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) { + val file = ClassPathResource("archives/$fileName") + val book = Book("book", file.url, LocalDateTime.now()) + + val media = bookAnalyzer.analyze(book, false) + + assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5") + assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED) + } + + @ParameterizedTest + @ValueSource( + strings = [ + "7zip.7z", "7zip-encrypted.7z", + ], + ) + fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) { + val file = ClassPathResource("archives/$fileName") + val book = Book("book", file.url, LocalDateTime.now()) + + val media = bookAnalyzer.analyze(book, false) + + assertThat(media.mediaType).isEqualTo("application/x-7z-compressed") + assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED) + } + + @ParameterizedTest + @ValueSource( + strings = [ + "zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip", + ], + ) + fun `given zip archive when analyzing then media status is READY`(fileName: String) { + val file = ClassPathResource("archives/$fileName") + val book = Book("book", file.url, LocalDateTime.now()) + + val media = bookAnalyzer.analyze(book, false) + + assertThat(media.mediaType).isEqualTo("application/zip") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.pages).hasSize(1) + } + + @Test + fun `given zip encrypted archive when analyzing then media status is ERROR`() { + val file = ClassPathResource("archives/zip-encrypted.zip") + val book = Book("book", file.url, LocalDateTime.now()) + + val media = bookAnalyzer.analyze(book, false) + + assertThat(media.mediaType).isEqualTo("application/zip") + assertThat(media.status).isEqualTo(Media.Status.ERROR) + } + + @Test + fun `given epub archive when analyzing then media status is READY`() { + val file = ClassPathResource("archives/epub3.epub") + val book = Book("book", file.url, LocalDateTime.now()) + + val media = bookAnalyzer.analyze(book, false) + + assertThat(media.mediaType).isEqualTo("application/epub+zip") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.pages).hasSize(0) + } } - @ParameterizedTest - @ValueSource( - strings = [ - "rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar", - ], - ) - fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) { - val file = ClassPathResource("archives/$fileName") - val book = Book("book", file.url, LocalDateTime.now()) + @Nested + inner class Epub { + @Test + fun `given broken epub archive when analyzing then media status is ERROR`() { + val file = ClassPathResource("archives/zip-as-epub.epub") + val book = Book("book", file.url, LocalDateTime.now()) - val media = bookAnalyzer.analyze(book, false) + val media = bookAnalyzer.analyze(book, false) - assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5") - assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED) + assertThat(media.mediaType).isEqualTo("application/zip") + assertThat(media.status).isEqualTo(Media.Status.ERROR) + assertThat(media.pages).hasSize(0) + } + + @Test + fun `given epub archive when toc cannot be extracted then media status is READY with comments`() { + val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub") + val book = Book("book", file.url, LocalDateTime.now()) + + every { epubExtractor.getToc(any()) } throws Exception("mock exception") + + val media = bookAnalyzer.analyze(book, false) + val extension = media.extension as? MediaExtensionEpub + + assertThat(media.mediaType).isEqualTo("application/epub+zip") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.comment).contains("ERR_1035") + assertThat(extension).isNotNull + assertThat(extension!!.toc).isEmpty() + } + + @Test + fun `given epub archive when landmarks cannot be extracted then media status is READY with comments`() { + val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub") + val book = Book("book", file.url, LocalDateTime.now()) + + every { epubExtractor.getLandmarks(any()) } throws Exception("mock exception") + + val media = bookAnalyzer.analyze(book, false) + val extension = media.extension as? MediaExtensionEpub + + assertThat(media.mediaType).isEqualTo("application/epub+zip") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.comment).contains("ERR_1036") + assertThat(extension).isNotNull + assertThat(extension!!.landmarks).isEmpty() + } + + @Test + fun `given epub archive when page list cannot be extracted then media status is READY with comments`() { + val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub") + val book = Book("book", file.url, LocalDateTime.now()) + + every { epubExtractor.getPageList(any()) } throws Exception("mock exception") + + val media = bookAnalyzer.analyze(book, false) + val extension = media.extension as? MediaExtensionEpub + + assertThat(media.mediaType).isEqualTo("application/epub+zip") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.comment).contains("ERR_1037") + assertThat(extension).isNotNull + assertThat(extension!!.pageList).isEmpty() + } + + @Test + fun `given epub archive when divina pages cannot be extracted then media status is READY with comments`() { + val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub") + val book = Book("book", file.url, LocalDateTime.now()) + + every { epubExtractor.getDivinaPages(any(), any(), any(), any()) } throws Exception("mock exception") + + val media = bookAnalyzer.analyze(book, false) + + assertThat(media.mediaType).isEqualTo("application/epub+zip") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.comment).contains("ERR_1038") + assertThat(media.pages).isEmpty() + } + + @Test + fun `given epub archive when positions cannot be extracted then media status is READY with comments`() { + val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub") + val book = Book("book", file.url, LocalDateTime.now()) + + every { epubExtractor.computePositions(any(), any(), any(), any(), any()) } throws Exception("mock exception") + + val media = bookAnalyzer.analyze(book, false) + val extension = media.extension as? MediaExtensionEpub + + assertThat(media.mediaType).isEqualTo("application/epub+zip") + assertThat(media.status).isEqualTo(Media.Status.READY) + assertThat(media.comment).contains("ERR_1039") + assertThat(extension).isNotNull + assertThat(extension!!.positions).isEmpty() + } } - @ParameterizedTest - @ValueSource( - strings = [ - "7zip.7z", "7zip-encrypted.7z", - ], - ) - fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) { - val file = ClassPathResource("archives/$fileName") - val book = Book("book", file.url, LocalDateTime.now()) + @Nested + inner class PageHashing { + @Test + fun `given book with a single page when hashing then all pages are hashed`() { + val book = makeBook("book1") + val pages = listOf(BookPage("1.jpeg", "image/jpeg")) + val media = Media(Media.Status.READY, pages = pages) - val media = bookAnalyzer.analyze(book, false) + every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1) + every { bookAnalyzer.hashPage(any(), any()) } returns "hashed" - assertThat(media.mediaType).isEqualTo("application/x-7z-compressed") - assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED) - } + val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media)) - @ParameterizedTest - @ValueSource( - strings = [ - "zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip", - ], - ) - fun `given zip archive when analyzing then media status is READY`(fileName: String) { - val file = ClassPathResource("archives/$fileName") - val book = Book("book", file.url, LocalDateTime.now()) + assertThat(hashedMedia.pages).hasSize(1) + assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed") + } - val media = bookAnalyzer.analyze(book, false) + @Test + fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() { + val book = makeBook("book1") + val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") } + val media = Media(Media.Status.READY, pages = pages) - assertThat(media.mediaType).isEqualTo("application/zip") - assertThat(media.status).isEqualTo(Media.Status.READY) - assertThat(media.pages).hasSize(1) - } + every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1) + every { bookAnalyzer.hashPage(any(), any()) } returns "hashed" - @Test - fun `given zip encrypted archive when analyzing then media status is ERROR`() { - val file = ClassPathResource("archives/zip-encrypted.zip") - val book = Book("book", file.url, LocalDateTime.now()) + val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media)) - val media = bookAnalyzer.analyze(book, false) + assertThat(hashedMedia.pages).hasSize(30) + assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash }) + .hasSize(komgaProperties.pageHashing) + .containsOnly("hashed") + assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash }) + .hasSize(komgaProperties.pageHashing) + .containsOnly("hashed") + assertThat( + hashedMedia.pages + .drop(komgaProperties.pageHashing) + .dropLast(komgaProperties.pageHashing) + .map { it.fileHash }, + ).hasSize(30 - (komgaProperties.pageHashing * 2)) + .containsOnly("") + } - assertThat(media.mediaType).isEqualTo("application/zip") - assertThat(media.status).isEqualTo(Media.Status.ERROR) - } + @Test + fun `given book with already hashed pages when hashing then no hashing is done`() { + val book = makeBook("book1") + val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") } + val media = Media(Media.Status.READY, pages = pages) - @Test - fun `given epub archive when analyzing then media status is READY`() { - val file = ClassPathResource("archives/epub3.epub") - val book = Book("book", file.url, LocalDateTime.now()) + val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media)) - val media = bookAnalyzer.analyze(book, false) + verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) } + verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) } - assertThat(media.mediaType).isEqualTo("application/epub+zip") - assertThat(media.status).isEqualTo(Media.Status.READY) - assertThat(media.pages).hasSize(0) - } + assertThat(hashedMedia.pages.map { it.fileHash }) + .hasSize(30) + .containsOnly("hashed") + } - @Test - fun `given broken epub archive when analyzing then media status is ERROR`() { - val file = ClassPathResource("archives/zip-as-epub.epub") - val book = Book("book", file.url, LocalDateTime.now()) + @ParameterizedTest + @MethodSource("provideDirectoriesForPageHashing") + fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) { + val files = directory.listDirectoryEntries() + assertThat(files).hasSize(2) - val media = bookAnalyzer.analyze(book, false) + val mediaType = "image/${directory.fileName.extension}" - assertThat(media.mediaType).isEqualTo("application/zip") - assertThat(media.status).isEqualTo(Media.Status.ERROR) - assertThat(media.pages).hasSize(0) - } + val hashes = + files.map { + bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes()) + } - @Test - fun `given book with a single page when hashing then all pages are hashed`() { - val book = makeBook("book1") - val pages = listOf(BookPage("1.jpeg", "image/jpeg")) - val media = Media(Media.Status.READY, pages = pages) + assertThat(hashes.first()).isEqualTo(hashes.last()) + } - every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1) - every { bookAnalyzer.hashPage(any(), any()) } returns "hashed" - - val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media)) - - assertThat(hashedMedia.pages).hasSize(1) - assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed") - } - - @Test - fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() { - val book = makeBook("book1") - val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") } - val media = Media(Media.Status.READY, pages = pages) - - every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1) - every { bookAnalyzer.hashPage(any(), any()) } returns "hashed" - - val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media)) - - assertThat(hashedMedia.pages).hasSize(30) - assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash }) - .hasSize(komgaProperties.pageHashing) - .containsOnly("hashed") - assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash }) - .hasSize(komgaProperties.pageHashing) - .containsOnly("hashed") - assertThat( - hashedMedia.pages - .drop(komgaProperties.pageHashing) - .dropLast(komgaProperties.pageHashing) - .map { it.fileHash }, - ).hasSize(30 - (komgaProperties.pageHashing * 2)) - .containsOnly("") - } - - @Test - fun `given book with already hashed pages when hashing then no hashing is done`() { - val book = makeBook("book1") - val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") } - val media = Media(Media.Status.READY, pages = pages) - - val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media)) - - verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) } - verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) } - - assertThat(hashedMedia.pages.map { it.fileHash }) - .hasSize(30) - .containsOnly("hashed") - } - - @ParameterizedTest - @MethodSource("provideDirectoriesForPageHashing") - fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) { - val files = directory.listDirectoryEntries() - assertThat(files).hasSize(2) - - val mediaType = "image/${directory.fileName.extension}" - - val hashes = - files.map { - bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes()) - } - - assertThat(hashes.first()).isEqualTo(hashes.last()) - } - - companion object { - @JvmStatic - fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries() + private fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries() } }