mirror of
https://github.com/stashapp/stash.git
synced 2025-12-15 04:44:28 +01:00
* Add collation to directory listings. Closes #1806 Introduce a new `locale` arg to the `Query.directory` field. Set "en" as the default for the field for backward compatibility. Use the given locale, sending it through a language matcher, and use `x/text` as the collation engine for the matched language. Augment the file `ListDirs` call to optionally take a Collator. If the Collator is given, sort file listings according to the collators rules. While here, document the GraphQL schema a bit more. Add matchers by looking at the current front-end locales, and make sure each of these occur in the matcher list. * Language matcher touchups * Avoid having `en-US` twice. * Introduce `en-AU`. * Pass IgnoreCase and Numeric collation Allow the collator to be configured with options. Pass the options IgnoreCase and Numeric to the collator.
239 lines
6.3 KiB
Go
239 lines
6.3 KiB
Go
// Copyright 2014 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package collate
|
|
|
|
import (
|
|
"sort"
|
|
|
|
"golang.org/x/text/internal/colltab"
|
|
"golang.org/x/text/language"
|
|
"golang.org/x/text/unicode/norm"
|
|
)
|
|
|
|
// newCollator creates a new collator with default options configured.
|
|
func newCollator(t colltab.Weighter) *Collator {
|
|
// Initialize a collator with default options.
|
|
c := &Collator{
|
|
options: options{
|
|
ignore: [colltab.NumLevels]bool{
|
|
colltab.Quaternary: true,
|
|
colltab.Identity: true,
|
|
},
|
|
f: norm.NFD,
|
|
t: t,
|
|
},
|
|
}
|
|
|
|
// TODO: store vt in tags or remove.
|
|
c.variableTop = t.Top()
|
|
|
|
return c
|
|
}
|
|
|
|
// An Option is used to change the behavior of a Collator. Options override the
|
|
// settings passed through the locale identifier.
|
|
type Option struct {
|
|
priority int
|
|
f func(o *options)
|
|
}
|
|
|
|
type prioritizedOptions []Option
|
|
|
|
func (p prioritizedOptions) Len() int {
|
|
return len(p)
|
|
}
|
|
|
|
func (p prioritizedOptions) Swap(i, j int) {
|
|
p[i], p[j] = p[j], p[i]
|
|
}
|
|
|
|
func (p prioritizedOptions) Less(i, j int) bool {
|
|
return p[i].priority < p[j].priority
|
|
}
|
|
|
|
type options struct {
|
|
// ignore specifies which levels to ignore.
|
|
ignore [colltab.NumLevels]bool
|
|
|
|
// caseLevel is true if there is an additional level of case matching
|
|
// between the secondary and tertiary levels.
|
|
caseLevel bool
|
|
|
|
// backwards specifies the order of sorting at the secondary level.
|
|
// This option exists predominantly to support reverse sorting of accents in French.
|
|
backwards bool
|
|
|
|
// numeric specifies whether any sequence of decimal digits (category is Nd)
|
|
// is sorted at a primary level with its numeric value.
|
|
// For example, "A-21" < "A-123".
|
|
// This option is set by wrapping the main Weighter with NewNumericWeighter.
|
|
numeric bool
|
|
|
|
// alternate specifies an alternative handling of variables.
|
|
alternate alternateHandling
|
|
|
|
// variableTop is the largest primary value that is considered to be
|
|
// variable.
|
|
variableTop uint32
|
|
|
|
t colltab.Weighter
|
|
|
|
f norm.Form
|
|
}
|
|
|
|
func (o *options) setOptions(opts []Option) {
|
|
sort.Sort(prioritizedOptions(opts))
|
|
for _, x := range opts {
|
|
x.f(o)
|
|
}
|
|
}
|
|
|
|
// OptionsFromTag extracts the BCP47 collation options from the tag and
|
|
// configures a collator accordingly. These options are set before any other
|
|
// option.
|
|
func OptionsFromTag(t language.Tag) Option {
|
|
return Option{0, func(o *options) {
|
|
o.setFromTag(t)
|
|
}}
|
|
}
|
|
|
|
func (o *options) setFromTag(t language.Tag) {
|
|
o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
|
|
o.backwards = ldmlBool(t, o.backwards, "kb")
|
|
o.numeric = ldmlBool(t, o.numeric, "kn")
|
|
|
|
// Extract settings from the BCP47 u extension.
|
|
switch t.TypeForKey("ks") { // strength
|
|
case "level1":
|
|
o.ignore[colltab.Secondary] = true
|
|
o.ignore[colltab.Tertiary] = true
|
|
case "level2":
|
|
o.ignore[colltab.Tertiary] = true
|
|
case "level3", "":
|
|
// The default.
|
|
case "level4":
|
|
o.ignore[colltab.Quaternary] = false
|
|
case "identic":
|
|
o.ignore[colltab.Quaternary] = false
|
|
o.ignore[colltab.Identity] = false
|
|
}
|
|
|
|
switch t.TypeForKey("ka") {
|
|
case "shifted":
|
|
o.alternate = altShifted
|
|
// The following two types are not official BCP47, but we support them to
|
|
// give access to this otherwise hidden functionality. The name blanked is
|
|
// derived from the LDML name blanked and posix reflects the main use of
|
|
// the shift-trimmed option.
|
|
case "blanked":
|
|
o.alternate = altBlanked
|
|
case "posix":
|
|
o.alternate = altShiftTrimmed
|
|
}
|
|
|
|
// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").
|
|
|
|
// Not used:
|
|
// - normalization ("kk", not necessary for this implementation)
|
|
// - hiraganaQuatenary ("kh", obsolete)
|
|
}
|
|
|
|
func ldmlBool(t language.Tag, old bool, key string) bool {
|
|
switch t.TypeForKey(key) {
|
|
case "true":
|
|
return true
|
|
case "false":
|
|
return false
|
|
default:
|
|
return old
|
|
}
|
|
}
|
|
|
|
var (
|
|
// IgnoreCase sets case-insensitive comparison.
|
|
IgnoreCase Option = ignoreCase
|
|
ignoreCase = Option{3, ignoreCaseF}
|
|
|
|
// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "ö").
|
|
IgnoreDiacritics Option = ignoreDiacritics
|
|
ignoreDiacritics = Option{3, ignoreDiacriticsF}
|
|
|
|
// IgnoreWidth causes full-width characters to match their half-width
|
|
// equivalents.
|
|
IgnoreWidth Option = ignoreWidth
|
|
ignoreWidth = Option{2, ignoreWidthF}
|
|
|
|
// Loose sets the collator to ignore diacritics, case and width.
|
|
Loose Option = loose
|
|
loose = Option{4, looseF}
|
|
|
|
// Force ordering if strings are equivalent but not equal.
|
|
Force Option = force
|
|
force = Option{5, forceF}
|
|
|
|
// Numeric specifies that numbers should sort numerically ("2" < "12").
|
|
Numeric Option = numeric
|
|
numeric = Option{5, numericF}
|
|
)
|
|
|
|
func ignoreWidthF(o *options) {
|
|
o.ignore[colltab.Tertiary] = true
|
|
o.caseLevel = true
|
|
}
|
|
|
|
func ignoreDiacriticsF(o *options) {
|
|
o.ignore[colltab.Secondary] = true
|
|
}
|
|
|
|
func ignoreCaseF(o *options) {
|
|
o.ignore[colltab.Tertiary] = true
|
|
o.caseLevel = false
|
|
}
|
|
|
|
func looseF(o *options) {
|
|
ignoreWidthF(o)
|
|
ignoreDiacriticsF(o)
|
|
ignoreCaseF(o)
|
|
}
|
|
|
|
func forceF(o *options) {
|
|
o.ignore[colltab.Identity] = false
|
|
}
|
|
|
|
func numericF(o *options) { o.numeric = true }
|
|
|
|
// Reorder overrides the pre-defined ordering of scripts and character sets.
|
|
func Reorder(s ...string) Option {
|
|
// TODO: need fractional weights to implement this.
|
|
panic("TODO: implement")
|
|
}
|
|
|
|
// TODO: consider making these public again. These options cannot be fully
|
|
// specified in BCP47, so an API interface seems warranted. Still a higher-level
|
|
// interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)
|
|
|
|
// alternateHandling identifies the various ways in which variables are handled.
|
|
// A rune with a primary weight lower than the variable top is considered a
|
|
// variable.
|
|
// See https://www.unicode.org/reports/tr10/#Variable_Weighting for details.
|
|
type alternateHandling int
|
|
|
|
const (
|
|
// altNonIgnorable turns off special handling of variables.
|
|
altNonIgnorable alternateHandling = iota
|
|
|
|
// altBlanked sets variables and all subsequent primary ignorables to be
|
|
// ignorable at all levels. This is identical to removing all variables
|
|
// and subsequent primary ignorables from the input.
|
|
altBlanked
|
|
|
|
// altShifted sets variables to be ignorable for levels one through three and
|
|
// adds a fourth level based on the values of the ignored levels.
|
|
altShifted
|
|
|
|
// altShiftTrimmed is a slight variant of altShifted that is used to
|
|
// emulate POSIX.
|
|
altShiftTrimmed
|
|
)
|