fix: better unicode sorting

This commit is contained in:
Gauthier Roebroeck 2021-08-30 16:56:49 +08:00
parent 660ab8a5cf
commit 773858eddd
8 changed files with 56 additions and 36 deletions

View file

@ -91,11 +91,8 @@ dependencies {
implementation("com.github.ben-manes.caffeine:caffeine:2.9.0")
// While waiting for https://github.com/xerial/sqlite-jdbc/pull/491 and https://github.com/xerial/sqlite-jdbc/pull/494
// runtimeOnly("org.xerial:sqlite-jdbc:3.32.3.2")
// jooqGenerator("org.xerial:sqlite-jdbc:3.32.3.2")
implementation("com.github.gotson:sqlite-jdbc:3.32.3.8")
jooqGenerator("com.github.gotson:sqlite-jdbc:3.32.3.8")
implementation("org.xerial:sqlite-jdbc:3.36.0.3")
jooqGenerator("org.xerial:sqlite-jdbc:3.36.0.3")
testImplementation("org.springframework.boot:spring-boot-starter-test") {
exclude(module = "mockito-core")

View file

@ -1,8 +1,10 @@
package org.gotson.komga.infrastructure.datasource
import com.ibm.icu.text.Collator
import mu.KotlinLogging
import org.gotson.komga.infrastructure.language.stripAccents
import org.springframework.jdbc.datasource.SimpleDriverDataSource
import org.sqlite.Collation
import org.sqlite.Function
import org.sqlite.SQLiteConnection
import java.sql.Connection
@ -13,6 +15,7 @@ class SqliteUdfDataSource : SimpleDriverDataSource() {
companion object {
const val udfStripAccents = "UDF_STRIP_ACCENTS"
const val collationUnicode3 = "COLLATION_UNICODE_3"
}
override fun getConnection(): Connection =
@ -24,6 +27,7 @@ class SqliteUdfDataSource : SimpleDriverDataSource() {
private fun addAllUdf(connection: SQLiteConnection) {
createUdfRegexp(connection)
createUdfStripAccents(connection)
createUnicode3Collation(connection)
}
private fun createUdfRegexp(connection: SQLiteConnection) {
@ -54,4 +58,19 @@ class SqliteUdfDataSource : SimpleDriverDataSource() {
}
)
}
private fun createUnicode3Collation(connection: SQLiteConnection) {
log.debug { "Adding custom $collationUnicode3 collation" }
Collation.create(
connection, collationUnicode3,
object : Collation() {
val collator = Collator.getInstance().apply {
strength = Collator.TERTIARY
decomposition = Collator.CANONICAL_DECOMPOSITION
}
override fun xCompare(str1: String, str2: String): Int = collator.compare(str1, str2)
}
)
}
}

View file

@ -2,6 +2,7 @@ package org.gotson.komga.infrastructure.jooq
import org.gotson.komga.domain.model.BookSearchWithReadProgress
import org.gotson.komga.domain.model.ReadStatus
import org.gotson.komga.infrastructure.datasource.SqliteUdfDataSource
import org.gotson.komga.infrastructure.search.LuceneEntity
import org.gotson.komga.infrastructure.search.LuceneHelper
import org.gotson.komga.infrastructure.web.toFilePath
@ -48,19 +49,19 @@ class BookDtoDao(
private val bt = Tables.BOOK_METADATA_TAG
private val sorts = mapOf(
"name" to lower(b.NAME.udfStripAccents()),
"name" to b.NAME.collate(SqliteUdfDataSource.collationUnicode3),
"created" to b.CREATED_DATE,
"createdDate" to b.CREATED_DATE,
"lastModified" to b.LAST_MODIFIED_DATE,
"lastModifiedDate" to b.LAST_MODIFIED_DATE,
"fileSize" to b.FILE_SIZE,
"size" to b.FILE_SIZE,
"url" to lower(b.URL),
"media.status" to lower(m.STATUS),
"media.comment" to lower(m.COMMENT),
"media.mediaType" to lower(m.MEDIA_TYPE),
"url" to b.URL.noCase(),
"media.status" to m.STATUS.noCase(),
"media.comment" to m.COMMENT.noCase(),
"media.mediaType" to m.MEDIA_TYPE.noCase(),
"metadata.title" to d.TITLE.collate(SqliteUdfDataSource.collationUnicode3),
"metadata.numberSort" to d.NUMBER_SORT,
"metadata.title" to lower(d.TITLE.udfStripAccents()),
"metadata.releaseDate" to d.RELEASE_DATE,
"readProgress.lastModified" to r.LAST_MODIFIED_DATE,
"readList.number" to rlb.NUMBER,

View file

@ -2,6 +2,7 @@ package org.gotson.komga.infrastructure.jooq
import org.gotson.komga.domain.model.ReadList
import org.gotson.komga.domain.persistence.ReadListRepository
import org.gotson.komga.infrastructure.datasource.SqliteUdfDataSource
import org.gotson.komga.infrastructure.search.LuceneEntity
import org.gotson.komga.infrastructure.search.LuceneHelper
import org.gotson.komga.jooq.Tables
@ -9,7 +10,6 @@ import org.gotson.komga.jooq.tables.records.ReadlistRecord
import org.jooq.DSLContext
import org.jooq.Record
import org.jooq.ResultQuery
import org.jooq.impl.DSL
import org.springframework.data.domain.Page
import org.springframework.data.domain.PageImpl
import org.springframework.data.domain.PageRequest
@ -32,7 +32,7 @@ class ReadListDao(
private val b = Tables.BOOK
private val sorts = mapOf(
"name" to DSL.lower(rl.NAME.udfStripAccents()),
"name" to rl.NAME.collate(SqliteUdfDataSource.collationUnicode3),
)
override fun findByIdOrNull(readListId: String): ReadList? =

View file

@ -2,12 +2,12 @@ package org.gotson.komga.infrastructure.jooq
import org.gotson.komga.domain.model.Author
import org.gotson.komga.domain.persistence.ReferentialRepository
import org.gotson.komga.infrastructure.datasource.SqliteUdfDataSource
import org.gotson.komga.infrastructure.language.stripAccents
import org.gotson.komga.jooq.Tables
import org.gotson.komga.jooq.tables.records.BookMetadataAggregationAuthorRecord
import org.gotson.komga.jooq.tables.records.BookMetadataAuthorRecord
import org.jooq.DSLContext
import org.jooq.impl.DSL.lower
import org.jooq.impl.DSL.noCondition
import org.jooq.impl.DSL.select
import org.springframework.data.domain.Page
@ -42,7 +42,7 @@ class ReferentialDao(
.apply { filterOnLibraryIds?.let { leftJoin(b).on(a.BOOK_ID.eq(b.ID)) } }
.where(a.NAME.udfStripAccents().containsIgnoreCase(search.stripAccents()))
.apply { filterOnLibraryIds?.let { and(b.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(a.NAME.udfStripAccents()), a.ROLE)
.orderBy(a.NAME.collate(SqliteUdfDataSource.collationUnicode3))
.fetchInto(a)
.map { it.toDomain() }
@ -53,7 +53,7 @@ class ReferentialDao(
.where(bmaa.NAME.udfStripAccents().containsIgnoreCase(search.stripAccents()))
.and(s.LIBRARY_ID.eq(libraryId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(bmaa.NAME.udfStripAccents()), bmaa.ROLE)
.orderBy(bmaa.NAME.collate(SqliteUdfDataSource.collationUnicode3))
.fetchInto(bmaa)
.map { it.toDomain() }
@ -65,7 +65,7 @@ class ReferentialDao(
.where(bmaa.NAME.udfStripAccents().containsIgnoreCase(search.stripAccents()))
.and(cs.COLLECTION_ID.eq(collectionId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(bmaa.NAME.udfStripAccents()), bmaa.ROLE)
.orderBy(bmaa.NAME.collate(SqliteUdfDataSource.collationUnicode3))
.fetchInto(bmaa)
.map { it.toDomain() }
@ -76,7 +76,7 @@ class ReferentialDao(
.where(bmaa.NAME.udfStripAccents().containsIgnoreCase(search.stripAccents()))
.and(bmaa.SERIES_ID.eq(seriesId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(bmaa.NAME.udfStripAccents()), bmaa.ROLE)
.orderBy(bmaa.NAME.collate(SqliteUdfDataSource.collationUnicode3))
.fetchInto(bmaa)
.map { it.toDomain() }
@ -138,7 +138,7 @@ class ReferentialDao(
}
val count = dsl.fetchCount(query)
val sort = lower(bmaa.NAME.udfStripAccents())
val sort = bmaa.NAME.collate(SqliteUdfDataSource.collationUnicode3)
val items = query
.orderBy(sort)
@ -161,7 +161,7 @@ class ReferentialDao(
.apply { filterOnLibraryIds?.let { leftJoin(b).on(a.BOOK_ID.eq(b.ID)) } }
.where(a.NAME.udfStripAccents().containsIgnoreCase(search.stripAccents()))
.apply { filterOnLibraryIds?.let { and(b.LIBRARY_ID.`in`(it)) } }
.orderBy(a.NAME.udfStripAccents())
.orderBy(a.NAME.collate(SqliteUdfDataSource.collationUnicode3))
.fetch(a.NAME)
override fun findAllAuthorsRoles(filterOnLibraryIds: Collection<String>?): List<String> =
@ -185,7 +185,7 @@ class ReferentialDao(
.where(s.LIBRARY_ID.`in`(it))
}
}
.orderBy(lower(g.GENRE.udfStripAccents()))
.orderBy(g.GENRE.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(g.GENRE)
override fun findAllGenresByLibrary(libraryId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -194,7 +194,7 @@ class ReferentialDao(
.leftJoin(s).on(g.SERIES_ID.eq(s.ID))
.where(s.LIBRARY_ID.eq(libraryId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(g.GENRE.udfStripAccents()))
.orderBy(g.GENRE.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(g.GENRE)
override fun findAllGenresByCollection(collectionId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -204,7 +204,7 @@ class ReferentialDao(
.apply { filterOnLibraryIds?.let { leftJoin(s).on(g.SERIES_ID.eq(s.ID)) } }
.where(cs.COLLECTION_ID.eq(collectionId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(g.GENRE.udfStripAccents()))
.orderBy(g.GENRE.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(g.GENRE)
override fun findAllSeriesAndBookTags(filterOnLibraryIds: Collection<String>?): Set<String> =
@ -265,7 +265,7 @@ class ReferentialDao(
.where(s.LIBRARY_ID.`in`(it))
}
}
.orderBy(lower(st.TAG.udfStripAccents()))
.orderBy(st.TAG.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(st.TAG)
override fun findAllSeriesTagsByLibrary(libraryId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -274,7 +274,7 @@ class ReferentialDao(
.leftJoin(s).on(st.SERIES_ID.eq(s.ID))
.where(s.LIBRARY_ID.eq(libraryId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(st.TAG.udfStripAccents()))
.orderBy(st.TAG.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(st.TAG)
override fun findAllBookTagsBySeries(seriesId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -283,7 +283,7 @@ class ReferentialDao(
.leftJoin(b).on(bt.BOOK_ID.eq(b.ID))
.where(b.SERIES_ID.eq(seriesId))
.apply { filterOnLibraryIds?.let { and(b.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(bt.TAG.udfStripAccents()))
.orderBy(bt.TAG.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(bt.TAG)
override fun findAllBookTagsByReadList(readListId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -293,7 +293,7 @@ class ReferentialDao(
.leftJoin(rb).on(bt.BOOK_ID.eq(rb.BOOK_ID))
.where(rb.READLIST_ID.eq(readListId))
.apply { filterOnLibraryIds?.let { and(b.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(bt.TAG.udfStripAccents()))
.orderBy(bt.TAG.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(bt.TAG)
override fun findAllSeriesTagsByCollection(collectionId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -303,7 +303,7 @@ class ReferentialDao(
.apply { filterOnLibraryIds?.let { leftJoin(s).on(st.SERIES_ID.eq(s.ID)) } }
.where(cs.COLLECTION_ID.eq(collectionId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(st.TAG.udfStripAccents()))
.orderBy(st.TAG.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(st.TAG)
override fun findAllBookTags(filterOnLibraryIds: Collection<String>?): Set<String> =
@ -315,7 +315,7 @@ class ReferentialDao(
.where(b.LIBRARY_ID.`in`(it))
}
}
.orderBy(lower(st.TAG.udfStripAccents()))
.orderBy(st.TAG.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(st.TAG)
override fun findAllLanguages(filterOnLibraryIds: Collection<String>?): Set<String> =
@ -354,7 +354,7 @@ class ReferentialDao(
.apply { filterOnLibraryIds?.let { leftJoin(s).on(sd.SERIES_ID.eq(s.ID)) } }
.where(sd.PUBLISHER.ne(""))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(sd.PUBLISHER.udfStripAccents()))
.orderBy(sd.PUBLISHER.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(sd.PUBLISHER)
override fun findAllPublishersByLibrary(libraryId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -364,7 +364,7 @@ class ReferentialDao(
.where(sd.PUBLISHER.ne(""))
.and(s.LIBRARY_ID.eq(libraryId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(sd.PUBLISHER.udfStripAccents()))
.orderBy(sd.PUBLISHER.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(sd.PUBLISHER)
override fun findAllPublishersByCollection(collectionId: String, filterOnLibraryIds: Collection<String>?): Set<String> =
@ -375,7 +375,7 @@ class ReferentialDao(
.where(sd.PUBLISHER.ne(""))
.and(cs.COLLECTION_ID.eq(collectionId))
.apply { filterOnLibraryIds?.let { and(s.LIBRARY_ID.`in`(it)) } }
.orderBy(lower(sd.PUBLISHER.udfStripAccents()))
.orderBy(sd.PUBLISHER.collate(SqliteUdfDataSource.collationUnicode3))
.fetchSet(sd.PUBLISHER)
override fun findAllAgeRatings(filterOnLibraryIds: Collection<String>?): Set<Int> =

View file

@ -2,6 +2,7 @@ package org.gotson.komga.infrastructure.jooq
import org.gotson.komga.domain.model.SeriesCollection
import org.gotson.komga.domain.persistence.SeriesCollectionRepository
import org.gotson.komga.infrastructure.datasource.SqliteUdfDataSource
import org.gotson.komga.infrastructure.search.LuceneEntity
import org.gotson.komga.infrastructure.search.LuceneHelper
import org.gotson.komga.jooq.Tables
@ -9,7 +10,6 @@ import org.gotson.komga.jooq.tables.records.CollectionRecord
import org.jooq.DSLContext
import org.jooq.Record
import org.jooq.ResultQuery
import org.jooq.impl.DSL
import org.springframework.data.domain.Page
import org.springframework.data.domain.PageImpl
import org.springframework.data.domain.PageRequest
@ -31,7 +31,7 @@ class SeriesCollectionDao(
private val s = Tables.SERIES
private val sorts = mapOf(
"name" to DSL.lower(c.NAME.udfStripAccents()),
"name" to c.NAME.collate(SqliteUdfDataSource.collationUnicode3),
)
override fun findByIdOrNull(collectionId: String): SeriesCollection? =

View file

@ -4,6 +4,7 @@ import mu.KotlinLogging
import org.gotson.komga.domain.model.ReadStatus
import org.gotson.komga.domain.model.SeriesSearch
import org.gotson.komga.domain.model.SeriesSearchWithReadProgress
import org.gotson.komga.infrastructure.datasource.SqliteUdfDataSource
import org.gotson.komga.infrastructure.search.LuceneEntity
import org.gotson.komga.infrastructure.search.LuceneHelper
import org.gotson.komga.infrastructure.web.toFilePath
@ -74,14 +75,14 @@ class SeriesDtoDao(
)
private val sorts = mapOf(
"metadata.titleSort" to lower(d.TITLE_SORT),
"metadata.titleSort" to d.TITLE_SORT.noCase(),
"createdDate" to s.CREATED_DATE,
"created" to s.CREATED_DATE,
"lastModifiedDate" to s.LAST_MODIFIED_DATE,
"lastModified" to s.LAST_MODIFIED_DATE,
"booksMetadata.releaseDate" to bma.RELEASE_DATE,
"collection.number" to cs.NUMBER,
"name" to lower(s.NAME.udfStripAccents()),
"name" to s.NAME.collate(SqliteUdfDataSource.collationUnicode3),
"booksCount" to s.BOOK_COUNT,
)

View file

@ -10,6 +10,8 @@ import java.time.LocalDateTime
import java.time.ZoneId
import java.time.ZoneOffset
fun Field<String>.noCase() = this.collate("NOCASE")
fun LocalDateTime.toUTC(): LocalDateTime =
atZone(ZoneId.systemDefault()).withZoneSameInstant(ZoneOffset.UTC).toLocalDateTime()