stash/vendor/golang.org/x/text/collate/option.go

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package collate

import (
	"sort"

	"golang.org/x/text/internal/colltab"
	"golang.org/x/text/language"
	"golang.org/x/text/unicode/norm"
)

// newCollator creates a new collator with default options configured.
func newCollator(t colltab.Weighter) *Collator {
	// Initialize a collator with default options.
	c := &Collator{
		options: options{
			ignore: [colltab.NumLevels]bool{
				colltab.Quaternary: true,
				colltab.Identity:   true,
			},
			f: norm.NFD,
			t: t,
		},
	}

	// TODO: store vt in tags or remove.
	c.variableTop = t.Top()

	return c
}

// An Option is used to change the behavior of a Collator. Options override the
// settings passed through the locale identifier.
type Option struct {
	priority int
	f        func(o *options)
}

type prioritizedOptions []Option

func (p prioritizedOptions) Len() int {
	return len(p)
}

func (p prioritizedOptions) Swap(i, j int) {
	p[i], p[j] = p[j], p[i]
}

func (p prioritizedOptions) Less(i, j int) bool {
	return p[i].priority < p[j].priority
}

type options struct {
	// ignore specifies which levels to ignore.
	ignore [colltab.NumLevels]bool

	// caseLevel is true if there is an additional level of case matching
	// between the secondary and tertiary levels.
	caseLevel bool

	// backwards specifies the order of sorting at the secondary level.
	// This option exists predominantly to support reverse sorting of accents in French.
	backwards bool

	// numeric specifies whether any sequence of decimal digits (category is Nd)
	// is sorted at a primary level with its numeric value.
	// For example, "A-21" < "A-123".
	// This option is set by wrapping the main Weighter with NewNumericWeighter.
	numeric bool

	// alternate specifies an alternative handling of variables.
	alternate alternateHandling

	// variableTop is the largest primary value that is considered to be
	// variable.
	variableTop uint32

	t colltab.Weighter

	f norm.Form
}

func (o *options) setOptions(opts []Option) {
	sort.Sort(prioritizedOptions(opts))
	for _, x := range opts {
		x.f(o)
	}
}

// OptionsFromTag extracts the BCP47 collation options from the tag and
// configures a collator accordingly. These options are set before any other
// option.
func OptionsFromTag(t language.Tag) Option {
	return Option{0, func(o *options) {
		o.setFromTag(t)
	}}
}

func (o *options) setFromTag(t language.Tag) {
	o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
	o.backwards = ldmlBool(t, o.backwards, "kb")
	o.numeric = ldmlBool(t, o.numeric, "kn")

	// Extract settings from the BCP47 u extension.
	switch t.TypeForKey("ks") { // strength
	case "level1":
		o.ignore[colltab.Secondary] = true
		o.ignore[colltab.Tertiary] = true
	case "level2":
		o.ignore[colltab.Tertiary] = true
	case "level3", "":
		// The default.
	case "level4":
		o.ignore[colltab.Quaternary] = false
	case "identic":
		o.ignore[colltab.Quaternary] = false
		o.ignore[colltab.Identity] = false
	}

	switch t.TypeForKey("ka") {
	case "shifted":
		o.alternate = altShifted
	// The following two types are not official BCP47, but we support them to
	// give access to this otherwise hidden functionality. The name blanked is
	// derived from the LDML name blanked and posix reflects the main use of
	// the shift-trimmed option.
	case "blanked":
		o.alternate = altBlanked
	case "posix":
		o.alternate = altShiftTrimmed
	}

	// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").

	// Not used:
	// - normalization ("kk", not necessary for this implementation)
	// - hiraganaQuatenary ("kh", obsolete)
}

func ldmlBool(t language.Tag, old bool, key string) bool {
	switch t.TypeForKey(key) {
	case "true":
		return true
	case "false":
		return false
	default:
		return old
	}
}

var (
	// IgnoreCase sets case-insensitive comparison.
	IgnoreCase Option = ignoreCase
	ignoreCase        = Option{3, ignoreCaseF}

	// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "ö").
	IgnoreDiacritics Option = ignoreDiacritics
	ignoreDiacritics        = Option{3, ignoreDiacriticsF}

	// IgnoreWidth causes full-width characters to match their half-width
	// equivalents.
	IgnoreWidth Option = ignoreWidth
	ignoreWidth        = Option{2, ignoreWidthF}

	// Loose sets the collator to ignore diacritics, case and width.
	Loose Option = loose
	loose        = Option{4, looseF}

	// Force ordering if strings are equivalent but not equal.
	Force Option = force
	force        = Option{5, forceF}

	// Numeric specifies that numbers should sort numerically ("2" < "12").
	Numeric Option = numeric
	numeric        = Option{5, numericF}
)

func ignoreWidthF(o *options) {
	o.ignore[colltab.Tertiary] = true
	o.caseLevel = true
}

func ignoreDiacriticsF(o *options) {
	o.ignore[colltab.Secondary] = true
}

func ignoreCaseF(o *options) {
	o.ignore[colltab.Tertiary] = true
	o.caseLevel = false
}

func looseF(o *options) {
	ignoreWidthF(o)
	ignoreDiacriticsF(o)
	ignoreCaseF(o)
}

func forceF(o *options) {
	o.ignore[colltab.Identity] = false
}

func numericF(o *options) { o.numeric = true }

// Reorder overrides the pre-defined ordering of scripts and character sets.
func Reorder(s ...string) Option {
	// TODO: need fractional weights to implement this.
	panic("TODO: implement")
}

// TODO: consider making these public again. These options cannot be fully
// specified in BCP47, so an API interface seems warranted. Still a higher-level
// interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)

// alternateHandling identifies the various ways in which variables are handled.
// A rune with a primary weight lower than the variable top is considered a
// variable.
// See https://www.unicode.org/reports/tr10/#Variable_Weighting for details.
type alternateHandling int

const (
	// altNonIgnorable turns off special handling of variables.
	altNonIgnorable alternateHandling = iota

	// altBlanked sets variables and all subsequent primary ignorables to be
	// ignorable at all levels. This is identical to removing all variables
	// and subsequent primary ignorables from the input.
	altBlanked

	// altShifted sets variables to be ignorable for levels one through three and
	// adds a fourth level based on the values of the ignored levels.
	altShifted

	// altShiftTrimmed is a slight variant of altShifted that is used to
	// emulate POSIX.
	altShiftTrimmed
)