From 3ab21ff6aa119980fcbbb36e8e4c75c8a878a908 Mon Sep 17 00:00:00 2001 From: Gauthier Roebroeck Date: Wed, 20 Aug 2025 14:30:33 +0800 Subject: [PATCH] fix: ignore xml namespace in EPUB opf file Closes: #2043 --- .../mediacontainer/epub/EpubExtractor.kt | 14 +- .../infrastructure/mediacontainer/epub/Opf.kt | 6 +- .../metadata/epub/EpubMetadataProvider.kt | 26 ++-- .../metadata/epub/EpubMetadataProviderTest.kt | 26 +++- .../epub/Panik im Paradies - namespace.opf | 128 ++++++++++++++++++ 5 files changed, 171 insertions(+), 29 deletions(-) create mode 100644 komga/src/test/resources/epub/Panik im Paradies - namespace.opf diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt index 51098fa12..d2072e310 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/EpubExtractor.kt @@ -60,7 +60,7 @@ class EpubExtractor( manifest.values.firstOrNull { it.properties.contains("cover-image") } ?: // EPUB 2 - get cover from meta element with name="cover" opfDoc - .selectFirst("metadata > meta[name=cover]") + .selectFirst("*|metadata > *|meta[name=cover]") ?.attr("content") ?.ifBlank { null } ?.let { manifest[it] } @@ -84,7 +84,7 @@ class EpubExtractor( fun getResources(epub: EpubPackage): List { val spine = epub.opfDoc - .select("spine > itemref") + .select("*|spine > *|itemref") .map { it.attr("idref") } .mapNotNull { epub.manifest[it] } @@ -126,7 +126,7 @@ class EpubExtractor( run { val spine = epub.opfDoc - .select("spine > itemref") + .select("*|spine > *|itemref") .map { it.attr("idref") } .mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } } @@ -137,7 +137,7 @@ class EpubExtractor( val pagesWithImages = epub.opfDoc - .select("spine > itemref") + .select("*|spine > *|itemref") .map { it.attr("idref") } .mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } } .map { pagePath -> @@ -219,7 +219,7 @@ class EpubExtractor( fun computePageCount(epub: EpubPackage): Int { val spine = epub.opfDoc - .select("spine > itemref") + .select("*|spine > *|itemref") .map { it.attr("idref") } .mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } } @@ -230,8 +230,8 @@ class EpubExtractor( } fun isFixedLayout(epub: EpubPackage) = - epub.opfDoc.selectFirst("metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" || - epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true" + epub.opfDoc.selectFirst("*|metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" || + epub.opfDoc.selectFirst("*|metadata > *|meta[name=fixed-layout]")?.attr("content") == "true" fun computePositions( epub: EpubPackage, diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Opf.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Opf.kt index 2ce8daad2..be7f004d8 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Opf.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/mediacontainer/epub/Opf.kt @@ -8,7 +8,7 @@ import java.nio.file.Paths import kotlin.io.path.invariantSeparatorsPathString fun Document.getManifest() = - select("manifest > item").associate { + select("*|manifest > *|item").associate { it.attr("id") to ManifestItem( it.attr("id"), @@ -36,8 +36,8 @@ fun processOpfGuide( opf: Document, opfDir: Path?, ): List { - val guide = opf.selectFirst("guide") ?: return emptyList() - return guide.select("reference").map { ref -> + val guide = opf.selectFirst("*|guide") ?: return emptyList() + return guide.select("*|reference").map { ref -> EpubTocEntry( ref.attr("title"), ref.attr("href").ifBlank { null }?.let { normalizeHref(opfDir, URLDecoder.decode(it, Charsets.UTF_8)) }, diff --git a/komga/src/main/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProvider.kt b/komga/src/main/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProvider.kt index 4249f017f..4dacba0cc 100644 --- a/komga/src/main/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProvider.kt +++ b/komga/src/main/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProvider.kt @@ -51,22 +51,22 @@ class EpubMetadataProvider( getPackageFileContent(book.book.path)?.let { packageFile -> val opf = Jsoup.parse(packageFile, "", Parser.xmlParser()) - val title = opf.selectFirst("metadata > dc|title")?.text()?.ifBlank { null } + val title = opf.selectFirst("*|metadata > *|title")?.text()?.ifBlank { null } val description = opf - .selectFirst("metadata > dc|description") + .selectFirst("*|metadata > *|description") ?.text() ?.let { Jsoup.clean(it, Safelist.none()) } ?.ifBlank { null } - val date = opf.selectFirst("metadata > dc|date")?.text()?.let { parseDate(it) } + val date = opf.selectFirst("*|metadata > *|date")?.text()?.let { parseDate(it) } val authorRoles = opf - .select("metadata > *|meta[property=role][scheme=marc:relators]") + .select("*|metadata > *|meta[property=role][scheme=marc:relators]") .associate { it.attr("refines").removePrefix("#") to it.text() } val authors = opf - .select("metadata > dc|creator") + .select("*|metadata > *|creator") .mapNotNull { el -> val name = el.text().trim() if (name.isBlank()) { @@ -81,16 +81,16 @@ class EpubMetadataProvider( val isbn = opf - .select("metadata > dc|identifier") + .select("*|metadata > *|identifier") .map { it.text().lowercase().removePrefix("isbn:") } .firstNotNullOfOrNull { isbnValidator.validate(it) } val seriesIndex = opf - .selectFirst("metadata > *|meta[property=belongs-to-collection]") + .selectFirst("*|metadata > *|meta[property=belongs-to-collection]") ?.attr("id") ?.let { id -> - opf.selectFirst("metadata > *|meta[refines=#$id][property=group-position]") + opf.selectFirst("*|metadata > *|meta[refines=#$id][property=group-position]") }?.text() return BookMetadataPatch( @@ -116,18 +116,18 @@ class EpubMetadataProvider( getPackageFileContent(book.book.path)?.let { packageFile -> val opf = Jsoup.parse(packageFile, "", Parser.xmlParser()) - val series = opf.selectFirst("metadata > *|meta[property=belongs-to-collection]")?.text()?.ifBlank { null } - val publisher = opf.selectFirst("metadata > dc|publisher")?.text()?.ifBlank { null } - val language = opf.selectFirst("metadata > dc|language")?.text()?.ifBlank { null } + val series = opf.selectFirst("*|metadata > *|meta[property=belongs-to-collection]")?.text()?.ifBlank { null } + val publisher = opf.selectFirst("*|metadata > *|publisher")?.text()?.ifBlank { null } + val language = opf.selectFirst("*|metadata > *|language")?.text()?.ifBlank { null } val genres = opf - .select("metadata > dc|subject") + .select("*|metadata > *|subject") .mapNotNull { it.text().trim().ifBlank { null } } .toSet() .ifEmpty { null } val direction = - opf.getElementsByTag("spine").first()?.attr("page-progression-direction")?.let { + opf.selectFirst("*|spine")?.attr("page-progression-direction")?.let { when (it) { "rtl" -> SeriesMetadata.ReadingDirection.RIGHT_TO_LEFT "ltr" -> SeriesMetadata.ReadingDirection.LEFT_TO_RIGHT diff --git a/komga/src/test/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProviderTest.kt b/komga/src/test/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProviderTest.kt index 6499fcbd0..59285c754 100644 --- a/komga/src/test/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProviderTest.kt +++ b/komga/src/test/kotlin/org/gotson/komga/infrastructure/metadata/epub/EpubMetadataProviderTest.kt @@ -14,6 +14,8 @@ import org.gotson.komga.infrastructure.mediacontainer.epub.getPackageFileContent import org.junit.jupiter.api.AfterEach import org.junit.jupiter.api.Nested import org.junit.jupiter.api.Test +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.ValueSource import org.springframework.core.io.ClassPathResource import java.time.LocalDate @@ -37,9 +39,15 @@ class EpubMetadataProviderTest { @Nested inner class Book { - @Test - fun `given epub 3 opf when getting book metadata then metadata patch is valid`() { - val opf = ClassPathResource("epub/Panik im Paradies.opf") + @ParameterizedTest + @ValueSource( + strings = [ + "epub/Panik im Paradies.opf", + "epub/Panik im Paradies - namespace.opf", + ], + ) + fun `given epub 3 opf when getting book metadata then metadata patch is valid`(opfFile: String) { + val opf = ClassPathResource(opfFile) mockkStatic(::getPackageFileContent) every { getPackageFileContent(any()) } returns opf.file.readText() @@ -128,9 +136,15 @@ class EpubMetadataProviderTest { @Nested inner class Series { - @Test - fun `given epub 3 opf when getting series metadata then metadata patch is valid`() { - val opf = ClassPathResource("epub/Panik im Paradies.opf") + @ParameterizedTest + @ValueSource( + strings = [ + "epub/Panik im Paradies.opf", + "epub/Panik im Paradies - namespace.opf", + ], + ) + fun `given epub 3 opf when getting series metadata then metadata patch is valid`(opfFile: String) { + val opf = ClassPathResource(opfFile) mockkStatic(::getPackageFileContent) every { getPackageFileContent(any()) } returns opf.file.readText() diff --git a/komga/src/test/resources/epub/Panik im Paradies - namespace.opf b/komga/src/test/resources/epub/Panik im Paradies - namespace.opf new file mode 100644 index 000000000..64b755b59 --- /dev/null +++ b/komga/src/test/resources/epub/Panik im Paradies - namespace.opf @@ -0,0 +1,128 @@ + + + Panik im Paradies + Ulf Blanck + The Editor + goodreads:222735 + isbn:9783440077894 + calibre:255 + uuid:499def46-39dc-4e79-b474-d0ec12ea5dc5 + uuid:499def46-39dc-4e79-b474-d0ec12ea5dc5 + de + 1999-07-31T16:00:00+00:00 + <div> + <p>Bereits im ersten Band "Panik im Paradies" machen die drei berühmten Detektive ihrem Namen alle Ehre. Eigentlich haben sie ja gerade Ferien. Doch dann treffen sie auf diesen schrulligen Kapitän Larsson, der sich einen kleinen Privatzoo mit exotischen Tieren hält. Als plötzlich alle Tiere an rätselhaften Infektionen erkranken und die Besucher ausbleiben, werden Justus, Peter und Bob neugierig. Schon bald merken sie, daß da jemand ein düsteres Geheimnis hütet...</p></div> + Kosmos + Kinder- und Jugendbücher + main + Panik im Paradies + + 2020-08-09T08:40:58Z + 2021-06-19T08:20:33Z + aut + Blanck, Ulf + edt + Editor, The + 6 + Die drei ??? Kids + series + 1.5 + {"Ulf Blanck": ""} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +