feat: detect if epub is a kepub

This commit is contained in:
Gauthier Roebroeck 2024-09-19 15:42:55 +08:00
parent d0143bf979
commit 0fd783c9cd
9 changed files with 33 additions and 1 deletions

View file

@ -0,0 +1,2 @@
alter table MEDIA
add column EPUB_IS_KEPUB boolean NOT NULL DEFAULT 0;

View file

@ -12,6 +12,7 @@ data class Media(
val extension: MediaExtension? = null,
val bookId: String = "",
val epubDivinaCompatible: Boolean = false,
val epubIsKepub: Boolean = false,
override val createdDate: LocalDateTime = LocalDateTime.now(),
override val lastModifiedDate: LocalDateTime = createdDate,
) : Auditable {

View file

@ -155,6 +155,7 @@ class BookAnalyzer(
files = manifest.resources,
pageCount = manifest.pageCount,
epubDivinaCompatible = manifest.divinaPages.isNotEmpty(),
epubIsKepub = manifest.isKepub,
extension =
MediaExtensionEpub(
toc = manifest.toc,

View file

@ -486,6 +486,7 @@ class BookDtoDao(
pagesCount = pageCount.toInt(),
comment = comment ?: "",
epubDivinaCompatible = epubDivinaCompatible,
epubIsKepub = epubIsKepub,
)
private fun BookMetadataRecord.toDto(

View file

@ -47,6 +47,7 @@ class KoboDtoDao(
sd.LANGUAGE,
b.FILE_SIZE,
b.ONESHOT,
m.EPUB_IS_KEPUB,
m.EXTENSION_CLASS,
m.EXTENSION_VALUE_BLOB,
).from(b)
@ -107,6 +108,7 @@ class KoboDtoDao(
null,
title = dr.title,
workId = dr.bookId,
isKepub = mr.epubIsKepub,
)
}
}

View file

@ -50,6 +50,7 @@ class MediaDao(
m.PAGE_COUNT,
m.EXTENSION_CLASS,
m.EPUB_DIVINA_COMPATIBLE,
m.EPUB_IS_KEPUB,
*p.fields(),
)
@ -136,9 +137,10 @@ class MediaDao(
m.COMMENT,
m.PAGE_COUNT,
m.EPUB_DIVINA_COMPATIBLE,
m.EPUB_IS_KEPUB,
m.EXTENSION_CLASS,
m.EXTENSION_VALUE_BLOB,
).values(null as String?, null, null, null, null, null, null, null),
).values(null as String?, null, null, null, null, null, null, null, null),
).also { step ->
chunk.forEach { media ->
step.bind(
@ -148,6 +150,7 @@ class MediaDao(
media.comment,
media.pageCount,
media.epubDivinaCompatible,
media.epubIsKepub,
media.extension?.let { if (it is ProxyExtension) null else it::class.qualifiedName },
media.extension?.let { if (it is ProxyExtension) null else mapper.serializeJsonGz(it) },
)
@ -232,6 +235,7 @@ class MediaDao(
.set(m.COMMENT, media.comment)
.set(m.PAGE_COUNT, media.pageCount)
.set(m.EPUB_DIVINA_COMPATIBLE, media.epubDivinaCompatible)
.set(m.EPUB_IS_KEPUB, media.epubIsKepub)
.apply {
if (media.extension != null && media.extension !is ProxyExtension) {
set(m.EXTENSION_CLASS, media.extension::class.qualifiedName)
@ -286,6 +290,7 @@ class MediaDao(
comment = comment,
bookId = bookId,
epubDivinaCompatible = epubDivinaCompatible,
epubIsKepub = epubIsKepub,
createdDate = createdDate.toCurrentTimeZone(),
lastModifiedDate = lastModifiedDate.toCurrentTimeZone(),
)

View file

@ -87,6 +87,7 @@ class EpubExtractor(
isFixedLayout = isFixedLayout,
positions = computePositions(resources, isFixedLayout),
divinaPages = getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions),
isKepub = isKepub(epub, resources)
)
}
@ -174,6 +175,23 @@ class EpubExtractor(
}
}
private fun isKepub(
epub: EpubPackage,
resources: List<MediaFile>
): Boolean {
try {
val readingOrder = resources.filter { it.subType == MediaFile.SubType.EPUB_PAGE }
readingOrder.forEach { mediaFile ->
val doc = epub.zip.getEntryInputStream(mediaFile.fileName).use { Jsoup.parse(it, null, "") }
if(!doc.getElementsByClass("koboSpan").isNullOrEmpty()) return true
}
} catch (e: Exception) {
logger.warn(e) { "Error while checking if EPUB is KEPUB" }
}
return false
}
private fun computePageCount(epub: EpubPackage): Int {
val spine =
epub.opfDoc.select("spine > itemref")

View file

@ -15,4 +15,5 @@ data class EpubManifest(
val isFixedLayout: Boolean,
val positions: List<R2Locator>,
val divinaPages: List<BookPage>,
val isKepub: Boolean,
)

View file

@ -40,6 +40,7 @@ data class MediaDto(
val pagesCount: Int,
val comment: String,
val epubDivinaCompatible: Boolean,
val epubIsKepub: Boolean,
) {
val mediaProfile: String by lazy { MediaType.fromMediaType(mediaType)?.profile?.name ?: "" }
}