mirror of
https://github.com/gotson/komga.git
synced 2025-12-20 23:45:11 +01:00
fix: epub parsing namespace issue
This commit is contained in:
parent
55ec5a3478
commit
7a566326b0
3 changed files with 40 additions and 7 deletions
|
|
@ -51,16 +51,19 @@ class EpubMetadataProvider(
|
|||
val description = opf.selectFirst("metadata > dc|description")?.text()?.let { Jsoup.clean(it, Whitelist.none()) }?.ifBlank { null }
|
||||
val date = opf.selectFirst("metadata > dc|date")?.text()?.let { parseDate(it) }
|
||||
|
||||
val creatorRefines = opf.select("metadata > meta[property=role][scheme=marc:relators]")
|
||||
val authorRoles = (
|
||||
opf.select("metadata > *|meta[property=role][scheme=marc:relators]") +
|
||||
opf.select("metadata > meta[property=role][scheme=marc:relators]")
|
||||
)
|
||||
.associate { it.attr("refines").removePrefix("#") to it.text() }
|
||||
val authors = opf.select("metadata > dc|creator")
|
||||
.mapNotNull { el ->
|
||||
val name = el.text()?.trim()
|
||||
if (name.isNullOrBlank()) null
|
||||
else {
|
||||
val opfRole = el.attr("opf|role").ifBlank { null }
|
||||
val opfRole = el.attr("opf:role").ifBlank { null }
|
||||
val id = el.attr("id").ifBlank { null }
|
||||
val refineRole = creatorRefines[id]?.ifBlank { null }
|
||||
val refineRole = authorRoles[id]?.ifBlank { null }
|
||||
Author(name, relators[opfRole ?: refineRole] ?: "writer")
|
||||
}
|
||||
}
|
||||
|
|
@ -86,7 +89,10 @@ class EpubMetadataProvider(
|
|||
epubExtractor.getPackageFile(book.book.path)?.let { packageFile ->
|
||||
val opf = Jsoup.parse(packageFile)
|
||||
|
||||
val series = opf.selectFirst("metadata > *|meta[property=belongs-to-collection]")?.text()?.ifBlank { null }
|
||||
val series = (
|
||||
opf.selectFirst("metadata > meta[property=belongs-to-collection]")
|
||||
?: opf.selectFirst("metadata > *|meta[property=belongs-to-collection]")
|
||||
)?.text()?.ifBlank { null }
|
||||
val publisher = opf.selectFirst("metadata > dc|publisher")?.text()?.ifBlank { null }
|
||||
val language = opf.selectFirst("metadata > dc|language")?.text()?.ifBlank { null }
|
||||
val genre = opf.selectFirst("metadata > dc|subject")?.text()?.ifBlank { null }
|
||||
|
|
|
|||
|
|
@ -3,13 +3,17 @@ package org.gotson.komga.infrastructure.metadata.epub
|
|||
import io.mockk.every
|
||||
import io.mockk.mockk
|
||||
import org.apache.commons.validator.routines.ISBNValidator
|
||||
import org.apache.tika.config.TikaConfig
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.gotson.komga.domain.model.Author
|
||||
import org.gotson.komga.domain.model.BookWithMedia
|
||||
import org.gotson.komga.domain.model.Media
|
||||
import org.gotson.komga.domain.model.SeriesMetadata
|
||||
import org.gotson.komga.domain.model.makeBook
|
||||
import org.gotson.komga.infrastructure.image.ImageAnalyzer
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||
import org.gotson.komga.infrastructure.mediacontainer.EpubExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ZipExtractor
|
||||
import org.junit.jupiter.api.Nested
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.springframework.core.io.ClassPathResource
|
||||
|
|
@ -21,6 +25,10 @@ class EpubMetadataProviderTest {
|
|||
private val isbnValidator = ISBNValidator(true)
|
||||
private val epubMetadataProvider = EpubMetadataProvider(mockExtractor, isbnValidator)
|
||||
|
||||
private val contentDetector = ContentDetector(TikaConfig())
|
||||
private val imageAnalyzer = ImageAnalyzer()
|
||||
private val epubMetadataProviderProper = EpubMetadataProvider(EpubExtractor(ZipExtractor(contentDetector, imageAnalyzer), contentDetector, imageAnalyzer), ISBNValidator(true))
|
||||
|
||||
private val book = makeBook("book")
|
||||
private val media = Media(
|
||||
status = Media.Status.READY,
|
||||
|
|
@ -42,13 +50,32 @@ class EpubMetadataProviderTest {
|
|||
assertThat(summary).isEqualTo("Bereits im ersten Band \"Panik im Paradies\" machen die drei berühmten Detektive ihrem Namen alle Ehre. Eigentlich haben sie ja gerade Ferien. Doch dann treffen sie auf diesen schrulligen Kapitän Larsson, der sich einen kleinen Privatzoo mit exotischen Tieren hält. Als plötzlich alle Tiere an rätselhaften Infektionen erkranken und die Besucher ausbleiben, werden Justus, Peter und Bob neugierig. Schon bald merken sie, daß da jemand ein düsteres Geheimnis hütet...")
|
||||
assertThat(releaseDate).isEqualTo(LocalDate.of(1999, 7, 31))
|
||||
assertThat(authors).containsExactlyInAnyOrder(
|
||||
Author("Blanck, Ulf", "writer"),
|
||||
Author("Editor, The", "editor"),
|
||||
Author("Ulf Blanck", "writer"),
|
||||
Author("The Editor", "editor"),
|
||||
)
|
||||
assertThat(isbn).isEqualTo("9783440077894")
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given real epub 3 when getting book metadata then metadata patch is valid`() {
|
||||
val epubResource = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||
val epubBook = BookWithMedia(
|
||||
makeBook("Epub", url = epubResource.url),
|
||||
media,
|
||||
)
|
||||
|
||||
val patch = epubMetadataProviderProper.getBookMetadataFromBook(epubBook)
|
||||
|
||||
with(patch!!) {
|
||||
assertThat(title).isEqualTo("The Incomplete Theft")
|
||||
assertThat(summary).isNull()
|
||||
assertThat(releaseDate).isEqualTo(LocalDate.of(2021, 6, 20))
|
||||
assertThat(authors).containsExactlyInAnyOrder(Author("Ralph Burke", "writer"))
|
||||
assertThat(isbn).isNull()
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given epub 2 opf when getting book metadata then metadata patch is valid`() {
|
||||
val opf = ClassPathResource("epub/1979.opf")
|
||||
|
|
@ -62,7 +89,7 @@ class EpubMetadataProviderTest {
|
|||
assertThat(releaseDate).isEqualTo(LocalDate.of(101, 1, 1))
|
||||
assertThat(authors).containsExactlyInAnyOrder(
|
||||
Author("Kracht, Christian", "writer"),
|
||||
Author("Editor, The", "editor"),
|
||||
Author("The Editor", "editor"),
|
||||
)
|
||||
assertThat(isbn).isNull()
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Reference in a new issue