mirror of
https://github.com/gotson/komga.git
synced 2026-05-08 12:35:30 +02:00
feat: support epub with images in spine
This commit is contained in:
parent
a4958b001f
commit
525b37fce7
2 changed files with 43 additions and 35 deletions
|
|
@ -212,7 +212,7 @@ class BookAnalyzer(
|
|||
status = Media.Status.READY,
|
||||
pages = divinaPages,
|
||||
files = resources,
|
||||
pageCount = epubExtractor.computePageCount(epub),
|
||||
pageCount = if (divinaPages.isNotEmpty()) divinaPages.size else epubExtractor.computePageCount(epub),
|
||||
epubDivinaCompatible = divinaPages.isNotEmpty(),
|
||||
epubIsKepub = isKepub,
|
||||
extension =
|
||||
|
|
@ -265,27 +265,28 @@ class BookAnalyzer(
|
|||
|
||||
fun getPoster(book: BookWithMedia): TypedBytes? =
|
||||
when (book.media.profile) {
|
||||
MediaProfile.DIVINA ->
|
||||
divinaExtractors[book.media.mediaType]
|
||||
?.getEntryStream(
|
||||
book.book.path,
|
||||
book.media.pages
|
||||
.first()
|
||||
.fileName,
|
||||
)?.let {
|
||||
TypedBytes(
|
||||
it,
|
||||
book.media.pages
|
||||
.first()
|
||||
.mediaType,
|
||||
)
|
||||
}
|
||||
|
||||
MediaProfile.DIVINA -> divinaExtractors[book.media.mediaType]?.getPoster(book)
|
||||
MediaProfile.PDF -> pdfExtractor.getPageContentAsImage(book.book.path, 1)
|
||||
MediaProfile.EPUB -> epubExtractor.getCover(book.book.path)
|
||||
MediaProfile.EPUB -> epubExtractor.getCover(book.book.path) ?: if (book.media.epubDivinaCompatible) divinaExtractors[MediaType.ZIP.type]?.getPoster(book) else null
|
||||
null -> null
|
||||
}
|
||||
|
||||
private fun DivinaExtractor.getPoster(book: BookWithMedia): TypedBytes =
|
||||
this
|
||||
.getEntryStream(
|
||||
book.book.path,
|
||||
book.media.pages
|
||||
.first()
|
||||
.fileName,
|
||||
).let {
|
||||
TypedBytes(
|
||||
it,
|
||||
book.media.pages
|
||||
.first()
|
||||
.mediaType,
|
||||
)
|
||||
}
|
||||
|
||||
@Throws(
|
||||
MediaNotReadyException::class,
|
||||
IndexOutOfBoundsException::class,
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class EpubExtractor(
|
|||
fun isEpub(path: Path): Boolean =
|
||||
try {
|
||||
getEntryStream(path, "mimetype").decodeToString().trim() == "application/epub+zip"
|
||||
} catch (e: Exception) {
|
||||
} catch (_: Exception) {
|
||||
false
|
||||
}
|
||||
|
||||
|
|
@ -139,24 +139,31 @@ class EpubExtractor(
|
|||
epub.opfDoc
|
||||
.select("*|spine > *|itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||
.map { pagePath ->
|
||||
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
|
||||
.mapNotNull { idref ->
|
||||
val manifestItem = epub.manifest[idref] ?: return@mapNotNull null
|
||||
normalizeHref(epub.opfDir, manifestItem.href) to manifestItem.mediaType
|
||||
}.map { (pagePath, mediaType) ->
|
||||
if (mediaType.startsWith("image", true)) {
|
||||
// image in spine
|
||||
listOf(Path(pagePath).normalize().invariantSeparatorsPathString)
|
||||
} else {
|
||||
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
|
||||
|
||||
// if a page has text over the threshold then the book is not divina compatible
|
||||
if (doc.body().text().length > letterCountThreshold) return emptyList()
|
||||
// if a page has text over the threshold then the book is not divina compatible
|
||||
if (doc.body().text().length > letterCountThreshold) return emptyList()
|
||||
|
||||
val img =
|
||||
doc
|
||||
.getElementsByTag("img")
|
||||
.map { it.attr("src") } // get the src, which can be a relative path
|
||||
val img =
|
||||
doc
|
||||
.getElementsByTag("img")
|
||||
.map { it.attr("src") } // get the src, which can be a relative path
|
||||
|
||||
val svg =
|
||||
doc
|
||||
.select("svg > image[xlink:href]")
|
||||
.map { it.attr("xlink:href") } // get the source, which can be a relative path
|
||||
val svg =
|
||||
doc
|
||||
.select("svg > image[xlink:href]")
|
||||
.map { it.attr("xlink:href") } // get the source, which can be a relative path
|
||||
|
||||
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
|
||||
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
|
||||
}
|
||||
}
|
||||
|
||||
if (pagesWithImages.size != pageCount) {
|
||||
|
|
@ -164,7 +171,7 @@ class EpubExtractor(
|
|||
return emptyList()
|
||||
}
|
||||
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
|
||||
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
|
||||
val imagesPath = pagesWithImages.flatMap { it.distinct() }
|
||||
if (imagesPath.size != pageCount) {
|
||||
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
|
||||
return emptyList()
|
||||
|
|
@ -259,7 +266,7 @@ class EpubExtractor(
|
|||
val positions = computePositionsFromKoboSpan(readingOrder) { filename -> getZipEntryBytes(kepub, filename).decodeToString() }
|
||||
kepub.deleteIfExists()
|
||||
positions
|
||||
} catch (e: Exception) {
|
||||
} catch (_: Exception) {
|
||||
logger.warn { "Could not convert to Kepub to compute positions: $path" }
|
||||
emptyMap()
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue