mirror of
https://github.com/gotson/komga.git
synced 2025-12-06 08:32:25 +01:00
parent
4e7c49d5d8
commit
3ab21ff6aa
5 changed files with 171 additions and 29 deletions
|
|
@ -60,7 +60,7 @@ class EpubExtractor(
|
|||
manifest.values.firstOrNull { it.properties.contains("cover-image") }
|
||||
?: // EPUB 2 - get cover from meta element with name="cover"
|
||||
opfDoc
|
||||
.selectFirst("metadata > meta[name=cover]")
|
||||
.selectFirst("*|metadata > *|meta[name=cover]")
|
||||
?.attr("content")
|
||||
?.ifBlank { null }
|
||||
?.let { manifest[it] }
|
||||
|
|
@ -84,7 +84,7 @@ class EpubExtractor(
|
|||
fun getResources(epub: EpubPackage): List<MediaFile> {
|
||||
val spine =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
.select("*|spine > *|itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { epub.manifest[it] }
|
||||
|
||||
|
|
@ -126,7 +126,7 @@ class EpubExtractor(
|
|||
run {
|
||||
val spine =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
.select("*|spine > *|itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ class EpubExtractor(
|
|||
|
||||
val pagesWithImages =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
.select("*|spine > *|itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||
.map { pagePath ->
|
||||
|
|
@ -219,7 +219,7 @@ class EpubExtractor(
|
|||
fun computePageCount(epub: EpubPackage): Int {
|
||||
val spine =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
.select("*|spine > *|itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||
|
||||
|
|
@ -230,8 +230,8 @@ class EpubExtractor(
|
|||
}
|
||||
|
||||
fun isFixedLayout(epub: EpubPackage) =
|
||||
epub.opfDoc.selectFirst("metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" ||
|
||||
epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true"
|
||||
epub.opfDoc.selectFirst("*|metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" ||
|
||||
epub.opfDoc.selectFirst("*|metadata > *|meta[name=fixed-layout]")?.attr("content") == "true"
|
||||
|
||||
fun computePositions(
|
||||
epub: EpubPackage,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import java.nio.file.Paths
|
|||
import kotlin.io.path.invariantSeparatorsPathString
|
||||
|
||||
fun Document.getManifest() =
|
||||
select("manifest > item").associate {
|
||||
select("*|manifest > *|item").associate {
|
||||
it.attr("id") to
|
||||
ManifestItem(
|
||||
it.attr("id"),
|
||||
|
|
@ -36,8 +36,8 @@ fun processOpfGuide(
|
|||
opf: Document,
|
||||
opfDir: Path?,
|
||||
): List<EpubTocEntry> {
|
||||
val guide = opf.selectFirst("guide") ?: return emptyList()
|
||||
return guide.select("reference").map { ref ->
|
||||
val guide = opf.selectFirst("*|guide") ?: return emptyList()
|
||||
return guide.select("*|reference").map { ref ->
|
||||
EpubTocEntry(
|
||||
ref.attr("title"),
|
||||
ref.attr("href").ifBlank { null }?.let { normalizeHref(opfDir, URLDecoder.decode(it, Charsets.UTF_8)) },
|
||||
|
|
|
|||
|
|
@ -51,22 +51,22 @@ class EpubMetadataProvider(
|
|||
getPackageFileContent(book.book.path)?.let { packageFile ->
|
||||
val opf = Jsoup.parse(packageFile, "", Parser.xmlParser())
|
||||
|
||||
val title = opf.selectFirst("metadata > dc|title")?.text()?.ifBlank { null }
|
||||
val title = opf.selectFirst("*|metadata > *|title")?.text()?.ifBlank { null }
|
||||
val description =
|
||||
opf
|
||||
.selectFirst("metadata > dc|description")
|
||||
.selectFirst("*|metadata > *|description")
|
||||
?.text()
|
||||
?.let { Jsoup.clean(it, Safelist.none()) }
|
||||
?.ifBlank { null }
|
||||
val date = opf.selectFirst("metadata > dc|date")?.text()?.let { parseDate(it) }
|
||||
val date = opf.selectFirst("*|metadata > *|date")?.text()?.let { parseDate(it) }
|
||||
|
||||
val authorRoles =
|
||||
opf
|
||||
.select("metadata > *|meta[property=role][scheme=marc:relators]")
|
||||
.select("*|metadata > *|meta[property=role][scheme=marc:relators]")
|
||||
.associate { it.attr("refines").removePrefix("#") to it.text() }
|
||||
val authors =
|
||||
opf
|
||||
.select("metadata > dc|creator")
|
||||
.select("*|metadata > *|creator")
|
||||
.mapNotNull { el ->
|
||||
val name = el.text().trim()
|
||||
if (name.isBlank()) {
|
||||
|
|
@ -81,16 +81,16 @@ class EpubMetadataProvider(
|
|||
|
||||
val isbn =
|
||||
opf
|
||||
.select("metadata > dc|identifier")
|
||||
.select("*|metadata > *|identifier")
|
||||
.map { it.text().lowercase().removePrefix("isbn:") }
|
||||
.firstNotNullOfOrNull { isbnValidator.validate(it) }
|
||||
|
||||
val seriesIndex =
|
||||
opf
|
||||
.selectFirst("metadata > *|meta[property=belongs-to-collection]")
|
||||
.selectFirst("*|metadata > *|meta[property=belongs-to-collection]")
|
||||
?.attr("id")
|
||||
?.let { id ->
|
||||
opf.selectFirst("metadata > *|meta[refines=#$id][property=group-position]")
|
||||
opf.selectFirst("*|metadata > *|meta[refines=#$id][property=group-position]")
|
||||
}?.text()
|
||||
|
||||
return BookMetadataPatch(
|
||||
|
|
@ -116,18 +116,18 @@ class EpubMetadataProvider(
|
|||
getPackageFileContent(book.book.path)?.let { packageFile ->
|
||||
val opf = Jsoup.parse(packageFile, "", Parser.xmlParser())
|
||||
|
||||
val series = opf.selectFirst("metadata > *|meta[property=belongs-to-collection]")?.text()?.ifBlank { null }
|
||||
val publisher = opf.selectFirst("metadata > dc|publisher")?.text()?.ifBlank { null }
|
||||
val language = opf.selectFirst("metadata > dc|language")?.text()?.ifBlank { null }
|
||||
val series = opf.selectFirst("*|metadata > *|meta[property=belongs-to-collection]")?.text()?.ifBlank { null }
|
||||
val publisher = opf.selectFirst("*|metadata > *|publisher")?.text()?.ifBlank { null }
|
||||
val language = opf.selectFirst("*|metadata > *|language")?.text()?.ifBlank { null }
|
||||
val genres =
|
||||
opf
|
||||
.select("metadata > dc|subject")
|
||||
.select("*|metadata > *|subject")
|
||||
.mapNotNull { it.text().trim().ifBlank { null } }
|
||||
.toSet()
|
||||
.ifEmpty { null }
|
||||
|
||||
val direction =
|
||||
opf.getElementsByTag("spine").first()?.attr("page-progression-direction")?.let {
|
||||
opf.selectFirst("*|spine")?.attr("page-progression-direction")?.let {
|
||||
when (it) {
|
||||
"rtl" -> SeriesMetadata.ReadingDirection.RIGHT_TO_LEFT
|
||||
"ltr" -> SeriesMetadata.ReadingDirection.LEFT_TO_RIGHT
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ import org.gotson.komga.infrastructure.mediacontainer.epub.getPackageFileContent
|
|||
import org.junit.jupiter.api.AfterEach
|
||||
import org.junit.jupiter.api.Nested
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.params.ParameterizedTest
|
||||
import org.junit.jupiter.params.provider.ValueSource
|
||||
import org.springframework.core.io.ClassPathResource
|
||||
import java.time.LocalDate
|
||||
|
||||
|
|
@ -37,9 +39,15 @@ class EpubMetadataProviderTest {
|
|||
|
||||
@Nested
|
||||
inner class Book {
|
||||
@Test
|
||||
fun `given epub 3 opf when getting book metadata then metadata patch is valid`() {
|
||||
val opf = ClassPathResource("epub/Panik im Paradies.opf")
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"epub/Panik im Paradies.opf",
|
||||
"epub/Panik im Paradies - namespace.opf",
|
||||
],
|
||||
)
|
||||
fun `given epub 3 opf when getting book metadata then metadata patch is valid`(opfFile: String) {
|
||||
val opf = ClassPathResource(opfFile)
|
||||
mockkStatic(::getPackageFileContent)
|
||||
every { getPackageFileContent(any()) } returns opf.file.readText()
|
||||
|
||||
|
|
@ -128,9 +136,15 @@ class EpubMetadataProviderTest {
|
|||
|
||||
@Nested
|
||||
inner class Series {
|
||||
@Test
|
||||
fun `given epub 3 opf when getting series metadata then metadata patch is valid`() {
|
||||
val opf = ClassPathResource("epub/Panik im Paradies.opf")
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"epub/Panik im Paradies.opf",
|
||||
"epub/Panik im Paradies - namespace.opf",
|
||||
],
|
||||
)
|
||||
fun `given epub 3 opf when getting series metadata then metadata patch is valid`(opfFile: String) {
|
||||
val opf = ClassPathResource(opfFile)
|
||||
mockkStatic(::getPackageFileContent)
|
||||
every { getPackageFileContent(any()) } returns opf.file.readText()
|
||||
|
||||
|
|
|
|||
128
komga/src/test/resources/epub/Panik im Paradies - namespace.opf
Normal file
128
komga/src/test/resources/epub/Panik im Paradies - namespace.opf
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
<myopf:package xmlns:myopf="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uuid_id" prefix="calibre: https://calibre-ebook.com">
|
||||
<myopf:metadata xmlns:mydc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||
xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata">
|
||||
<mydc:title id="id">Panik im Paradies</mydc:title>
|
||||
<mydc:creator id="id-1">Ulf Blanck</mydc:creator>
|
||||
<mydc:creator id="id-3">The Editor</mydc:creator>
|
||||
<mydc:identifier>goodreads:222735</mydc:identifier>
|
||||
<mydc:identifier>isbn:9783440077894</mydc:identifier>
|
||||
<mydc:identifier>calibre:255</mydc:identifier>
|
||||
<mydc:identifier>uuid:499def46-39dc-4e79-b474-d0ec12ea5dc5</mydc:identifier>
|
||||
<mydc:identifier id="uuid_id">uuid:499def46-39dc-4e79-b474-d0ec12ea5dc5</mydc:identifier>
|
||||
<mydc:language>de</mydc:language>
|
||||
<mydc:date>1999-07-31T16:00:00+00:00</mydc:date>
|
||||
<mydc:description><div>
|
||||
<p>Bereits im ersten Band "Panik im Paradies" machen die drei berühmten Detektive ihrem Namen alle Ehre. Eigentlich haben sie ja gerade Ferien. Doch dann treffen sie auf diesen schrulligen Kapitän Larsson, der sich einen kleinen Privatzoo mit exotischen Tieren hält. Als plötzlich alle Tiere an rätselhaften Infektionen erkranken und die Besucher ausbleiben, werden Justus, Peter und Bob neugierig. Schon bald merken sie, daß da jemand ein düsteres Geheimnis hütet...</p></div></mydc:description>
|
||||
<mydc:publisher>Kosmos</mydc:publisher>
|
||||
<mydc:subject>Kinder- und Jugendbücher</mydc:subject>
|
||||
<myopf:meta refines="#id" property="title-type">main</myopf:meta>
|
||||
<myopf:meta refines="#id" property="file-as">Panik im Paradies</myopf:meta>
|
||||
<myopf:meta name="cover" content="cover"/>
|
||||
<myopf:meta property="calibre:timestamp" scheme="dcterms:W3CDTF">2020-08-09T08:40:58Z</myopf:meta>
|
||||
<myopf:meta property="dcterms:modified" scheme="dcterms:W3CDTF">2021-06-19T08:20:33Z</myopf:meta>
|
||||
<myopf:meta refines="#id-1" property="role" scheme="marc:relators">aut</myopf:meta>
|
||||
<myopf:meta refines="#id-1" property="file-as">Blanck, Ulf</myopf:meta>
|
||||
<myopf:meta refines="#id-3" property="role" scheme="marc:relators">edt</myopf:meta>
|
||||
<myopf:meta refines="#id-3" property="file-as">Editor, The</myopf:meta>
|
||||
<myopf:meta property="calibre:rating">6</myopf:meta>
|
||||
<myopf:meta property="belongs-to-collection" id="id-2">Die drei ??? Kids</myopf:meta>
|
||||
<myopf:meta refines="#id-2" property="collection-type">series</myopf:meta>
|
||||
<myopf:meta refines="#id-2" property="group-position">1.5</myopf:meta>
|
||||
<myopf:meta property="calibre:author_link_map">{"Ulf Blanck": ""}</myopf:meta>
|
||||
</myopf:metadata>
|
||||
<myopf:manifest>
|
||||
<myopf:item id="titlepage" href="titlepage.xhtml" media-type="application/xhtml+xml" properties="svg calibre:title-page"/>
|
||||
<myopf:item id="TableOfContents_html" href="OPS/TableOfContents.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0001_html" href="OPS/section-0001.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0002_html" href="OPS/section-0002.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0003_html" href="OPS/section-0003.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0004_html" href="OPS/section-0004.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0005_html" href="OPS/section-0005.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0006_html" href="OPS/section-0006.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0007_html" href="OPS/section-0007.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0008_html" href="OPS/section-0008.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0009_html" href="OPS/section-0009.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0010_html" href="OPS/section-0010.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0011_html" href="OPS/section-0011.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0012_html" href="OPS/section-0012.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0013_html" href="OPS/section-0013.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0014_html" href="OPS/section-0014.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0015_html" href="OPS/section-0015.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0016_html" href="OPS/section-0016.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0017_html" href="OPS/section-0017.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0018_html" href="OPS/section-0018.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0019_html" href="OPS/section-0019.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0020_html" href="OPS/section-0020.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0021_html" href="OPS/section-0021.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0022_html" href="OPS/section-0022.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="section-0023_html" href="OPS/section-0023.html" media-type="application/xhtml+xml"/>
|
||||
<myopf:item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
|
||||
<myopf:item id="page_css" href="page_styles.css" media-type="text/css"/>
|
||||
<myopf:item id="css" href="stylesheet.css" media-type="text/css"/>
|
||||
<myopf:item id="cover" href="cover.jpeg" media-type="image/jpeg" properties="cover-image"/>
|
||||
<myopf:item id="image0_jpg" href="OPS/image0.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image1_jpg" href="OPS/image1.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image10_jpg" href="OPS/image10.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image11_jpg" href="OPS/image11.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image12_jpg" href="OPS/image12.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image13_jpg" href="OPS/image13.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image14_jpg" href="OPS/image14.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image15_jpg" href="OPS/image15.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image16_jpg" href="OPS/image16.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image17_jpg" href="OPS/image17.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image18_jpg" href="OPS/image18.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image19_jpg" href="OPS/image19.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image2_jpg" href="OPS/image2.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image20_jpg" href="OPS/image20.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image21_jpg" href="OPS/image21.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image22_jpg" href="OPS/image22.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image23_jpg" href="OPS/image23.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image24_jpg" href="OPS/image24.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image25_jpg" href="OPS/image25.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image26_jpg" href="OPS/image26.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image27_jpg" href="OPS/image27.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image28_jpg" href="OPS/image28.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image29_jpg" href="OPS/image29.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image3_jpg" href="OPS/image3.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image30_jpg" href="OPS/image30.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image31_jpg" href="OPS/image31.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image32_jpg" href="OPS/image32.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image33_jpg" href="OPS/image33.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image34_jpg" href="OPS/image34.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image35_jpg" href="OPS/image35.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image4_jpg" href="OPS/image4.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image5_jpg" href="OPS/image5.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image6_jpg" href="OPS/image6.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image7_jpg" href="OPS/image7.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image8_jpg" href="OPS/image8.jpg" media-type="image/jpeg"/>
|
||||
<myopf:item id="image9_jpg" href="OPS/image9.jpg" media-type="image/jpeg"/>
|
||||
</myopf:manifest>
|
||||
<myopf:spine page-progression-direction="rtl">
|
||||
<myopf:itemref idref="titlepage"/>
|
||||
<myopf:itemref idref="TableOfContents_html"/>
|
||||
<myopf:itemref idref="section-0001_html"/>
|
||||
<myopf:itemref idref="section-0002_html"/>
|
||||
<myopf:itemref idref="section-0003_html"/>
|
||||
<myopf:itemref idref="section-0004_html"/>
|
||||
<myopf:itemref idref="section-0005_html"/>
|
||||
<myopf:itemref idref="section-0006_html"/>
|
||||
<myopf:itemref idref="section-0007_html"/>
|
||||
<myopf:itemref idref="section-0008_html"/>
|
||||
<myopf:itemref idref="section-0009_html"/>
|
||||
<myopf:itemref idref="section-0010_html"/>
|
||||
<myopf:itemref idref="section-0011_html"/>
|
||||
<myopf:itemref idref="section-0012_html"/>
|
||||
<myopf:itemref idref="section-0013_html"/>
|
||||
<myopf:itemref idref="section-0014_html"/>
|
||||
<myopf:itemref idref="section-0015_html"/>
|
||||
<myopf:itemref idref="section-0016_html"/>
|
||||
<myopf:itemref idref="section-0017_html"/>
|
||||
<myopf:itemref idref="section-0018_html"/>
|
||||
<myopf:itemref idref="section-0019_html"/>
|
||||
<myopf:itemref idref="section-0020_html"/>
|
||||
<myopf:itemref idref="section-0021_html"/>
|
||||
<myopf:itemref idref="section-0022_html"/>
|
||||
<myopf:itemref idref="section-0023_html"/>
|
||||
</myopf:spine>
|
||||
</myopf:package>
|
||||
Loading…
Reference in a new issue