feat: support epub with images in spine

This commit is contained in:
Gauthier Roebroeck 2026-01-28 11:32:04 +08:00
parent a4958b001f
commit 525b37fce7
2 changed files with 43 additions and 35 deletions

View file

@ -212,7 +212,7 @@ class BookAnalyzer(
status = Media.Status.READY,
pages = divinaPages,
files = resources,
pageCount = epubExtractor.computePageCount(epub),
pageCount = if (divinaPages.isNotEmpty()) divinaPages.size else epubExtractor.computePageCount(epub),
epubDivinaCompatible = divinaPages.isNotEmpty(),
epubIsKepub = isKepub,
extension =
@ -265,27 +265,28 @@ class BookAnalyzer(
fun getPoster(book: BookWithMedia): TypedBytes? =
when (book.media.profile) {
MediaProfile.DIVINA ->
divinaExtractors[book.media.mediaType]
?.getEntryStream(
book.book.path,
book.media.pages
.first()
.fileName,
)?.let {
TypedBytes(
it,
book.media.pages
.first()
.mediaType,
)
}
MediaProfile.DIVINA -> divinaExtractors[book.media.mediaType]?.getPoster(book)
MediaProfile.PDF -> pdfExtractor.getPageContentAsImage(book.book.path, 1)
MediaProfile.EPUB -> epubExtractor.getCover(book.book.path)
MediaProfile.EPUB -> epubExtractor.getCover(book.book.path) ?: if (book.media.epubDivinaCompatible) divinaExtractors[MediaType.ZIP.type]?.getPoster(book) else null
null -> null
}
private fun DivinaExtractor.getPoster(book: BookWithMedia): TypedBytes =
this
.getEntryStream(
book.book.path,
book.media.pages
.first()
.fileName,
).let {
TypedBytes(
it,
book.media.pages
.first()
.mediaType,
)
}
@Throws(
MediaNotReadyException::class,
IndexOutOfBoundsException::class,

View file

@ -46,7 +46,7 @@ class EpubExtractor(
fun isEpub(path: Path): Boolean =
try {
getEntryStream(path, "mimetype").decodeToString().trim() == "application/epub+zip"
} catch (e: Exception) {
} catch (_: Exception) {
false
}
@ -139,24 +139,31 @@ class EpubExtractor(
epub.opfDoc
.select("*|spine > *|itemref")
.map { it.attr("idref") }
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
.map { pagePath ->
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
.mapNotNull { idref ->
val manifestItem = epub.manifest[idref] ?: return@mapNotNull null
normalizeHref(epub.opfDir, manifestItem.href) to manifestItem.mediaType
}.map { (pagePath, mediaType) ->
if (mediaType.startsWith("image", true)) {
// image in spine
listOf(Path(pagePath).normalize().invariantSeparatorsPathString)
} else {
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
// if a page has text over the threshold then the book is not divina compatible
if (doc.body().text().length > letterCountThreshold) return emptyList()
// if a page has text over the threshold then the book is not divina compatible
if (doc.body().text().length > letterCountThreshold) return emptyList()
val img =
doc
.getElementsByTag("img")
.map { it.attr("src") } // get the src, which can be a relative path
val img =
doc
.getElementsByTag("img")
.map { it.attr("src") } // get the src, which can be a relative path
val svg =
doc
.select("svg > image[xlink:href]")
.map { it.attr("xlink:href") } // get the source, which can be a relative path
val svg =
doc
.select("svg > image[xlink:href]")
.map { it.attr("xlink:href") } // get the source, which can be a relative path
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
}
}
if (pagesWithImages.size != pageCount) {
@ -164,7 +171,7 @@ class EpubExtractor(
return emptyList()
}
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
val imagesPath = pagesWithImages.flatMap { it.distinct() }
if (imagesPath.size != pageCount) {
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
return emptyList()
@ -259,7 +266,7 @@ class EpubExtractor(
val positions = computePositionsFromKoboSpan(readingOrder) { filename -> getZipEntryBytes(kepub, filename).decodeToString() }
kepub.deleteIfExists()
positions
} catch (e: Exception) {
} catch (_: Exception) {
logger.warn { "Could not convert to Kepub to compute positions: $path" }
emptyMap()
}