go/src/golang.org/x/text/internal/number/pattern.go - third_party - Git at Google

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package number

 import (
 	"errors"
 	"unicode/utf8"
 )

 // This file contains a parser for the CLDR number patterns as described in
 // http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
 //
 // The following BNF is derived from this standard.
 //
 // pattern    := subpattern (';' subpattern)?
 // subpattern := affix? number exponent? affix?
 // number     := decimal | sigDigits
 // decimal    := '#'* '0'* ('.' fraction)? | '#' | '0'
 // fraction   := '0'* '#'*
 // sigDigits  := '#'* '@' '@'* '#'*
 // exponent   := 'E' '+'? '0'* '0'
 // padSpec    := '*' \L
 //
 // Notes:
 // - An affix pattern may contain any runes, but runes with special meaning
 //   should be escaped.
 // - Sequences of digits, '#', and '@' in decimal and sigDigits may have
 //   interstitial commas.

 // TODO: replace special characters in affixes (-, +, ¤) with control codes.

 // Format holds information for formatting numbers. It is designed to hold
 // information from CLDR number patterns.
 //
 // This pattern is precompiled  for all patterns for all languages. Even though
 // the number of patterns is not very large, we want to keep this small.
 //
 // This type is only intended for internal use.
 type Format struct {
 	// TODO: this struct can be packed a lot better than it is now. Should be
 	// possible to make it 32 bytes.

 	Affix     string // includes prefix and suffix. First byte is prefix length.
 	Offset    uint16 // Offset into Affix for prefix and suffix
 	NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.

 	Multiplier     uint32
 	RoundIncrement uint32 // Use Min*Digits to determine scale
 	PadRune        rune

 	FormatWidth uint16

 	GroupingSize [2]uint8
 	Flags        FormatFlag

 	// Number of digits.
 	MinIntegerDigits     uint8
 	MaxIntegerDigits     uint8
 	MinFractionDigits    uint8
 	MaxFractionDigits    uint8
 	MinSignificantDigits uint8
 	MaxSignificantDigits uint8
 	MinExponentDigits    uint8
 }

 // A FormatFlag is a bit mask for the flag field of a Format.
 type FormatFlag uint8

 const (
 	AlwaysSign FormatFlag = 1 << iota
 	AlwaysExpSign
 	AlwaysDecimalSeparator
 	ParenthesisForNegative // Common pattern. Saves space.

 	PadAfterNumber
 	PadAfterAffix

 	PadBeforePrefix = 0 // Default
 	PadAfterPrefix  = PadAfterAffix
 	PadBeforeSuffix = PadAfterNumber
 	PadAfterSuffix  = PadAfterNumber | PadAfterAffix
 	PadMask         = PadAfterNumber | PadAfterAffix
 )

 type parser struct {
 	*Format

 	leadingSharps int

 	pos            int
 	err            error
 	doNotTerminate bool
 	groupingCount  uint
 	hasGroup       bool
 	buf            []byte
 }

 func (p *parser) setError(err error) {
 	if p.err == nil {
 		p.err = err
 	}
 }

 func (p *parser) updateGrouping() {
 	if p.hasGroup && p.groupingCount < 255 {
 		p.GroupingSize[1] = p.GroupingSize[0]
 		p.GroupingSize[0] = uint8(p.groupingCount)
 	}
 	p.groupingCount = 0
 	p.hasGroup = true
 }

 var (
 	// TODO: more sensible and localizeable error messages.
 	errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
 	errInvalidPadSpecifier   = errors.New("format: invalid pad specifier")
 	errInvalidQuote          = errors.New("format: invalid quote")
 	errAffixTooLarge         = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
 	errDuplicatePercentSign  = errors.New("format: duplicate percent sign")
 	errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
 	errUnexpectedEnd         = errors.New("format: unexpected end of pattern")
 )

 // ParsePattern extracts formatting information from a CLDR number pattern.
 //
 // See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
 func ParsePattern(s string) (f *Format, err error) {
 	p := parser{Format: &Format{}}

 	s = p.parseSubPattern(s)

 	if s != "" {
 		// Parse negative sub pattern.
 		if s[0] != ';' {
 			p.setError(errors.New("format: error parsing first sub pattern"))
 			return nil, p.err
 		}
 		neg := parser{Format: &Format{}} // just for extracting the affixes.
 		s = neg.parseSubPattern(s[len(";"):])
 		p.NegOffset = uint16(len(p.buf))
 		p.buf = append(p.buf, neg.buf...)
 	}
 	if s != "" {
 		p.setError(errors.New("format: spurious characters at end of pattern"))
 	}
 	if p.err != nil {
 		return nil, p.err
 	}
 	if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
 		// No prefix or suffixes.
 		p.NegOffset = 0
 	} else {
 		p.Affix = affix
 	}
 	return p.Format, nil
 }

 func (p *parser) parseSubPattern(s string) string {
 	s = p.parsePad(s, PadBeforePrefix)
 	s = p.parseAffix(s)
 	s = p.parsePad(s, PadAfterPrefix)

 	s = p.parse(p.number, s)

 	s = p.parsePad(s, PadBeforeSuffix)
 	s = p.parseAffix(s)
 	s = p.parsePad(s, PadAfterSuffix)
 	return s
 }

 func (p *parser) parsePad(s string, f FormatFlag) (tail string) {
 	if len(s) >= 2 && s[0] == '*' {
 		r, sz := utf8.DecodeRuneInString(s[1:])
 		if p.PadRune != 0 {
 			p.err = errMultiplePadSpecifiers
 		} else {
 			p.Flags |= f
 			p.PadRune = r
 		}
 		return s[1+sz:]
 	}
 	return s
 }

 func (p *parser) parseAffix(s string) string {
 	x := len(p.buf)
 	p.buf = append(p.buf, 0) // placeholder for affix length

 	s = p.parse(p.affix, s)

 	n := len(p.buf) - x - 1
 	if n > 0xFF {
 		p.setError(errAffixTooLarge)
 	}
 	p.buf[x] = uint8(n)
 	return s
 }

 // state implements a state transition. It returns the new state. A state
 // function may set an error on the parser or may simply return on an incorrect
 // token and let the next phase fail.
 type state func(r rune) state

 // parse repeatedly applies a state function on the given string until a
 // termination condition is reached.
 func (p *parser) parse(fn state, s string) (tail string) {
 	for i, r := range s {
 		p.doNotTerminate = false
 		if fn = fn(r); fn == nil || p.err != nil {
 			return s[i:]
 		}
 		p.FormatWidth++
 	}
 	if p.doNotTerminate {
 		p.setError(errUnexpectedEnd)
 	}
 	return ""
 }

 func (p *parser) affix(r rune) state {
 	switch r {
 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
 		'#', '@', '.', '*', ',', ';':
 		return nil
 	case '\'':
 		return p.escape
 	case '%':
 		if p.Multiplier != 0 {
 			p.setError(errDuplicatePercentSign)
 		}
 		p.Multiplier = 100
 	case '\u2030': // ‰ Per mille
 		if p.Multiplier != 0 {
 			p.setError(errDuplicatePermilleSign)
 		}
 		p.Multiplier = 1000
 		// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
 	}
 	p.buf = append(p.buf, string(r)...)
 	return p.affix
 }

 func (p *parser) escape(r rune) state {
 	switch r {
 	case '\'':
 		return p.affix
 	default:
 		p.buf = append(p.buf, string(r)...)
 	}
 	return p.escape
 }

 // number parses a number. The BNF says the integer part should always have
 // a '0', but that does not appear to be the case according to the rest of the
 // documentation. We will allow having only '#' numbers.
 func (p *parser) number(r rune) state {
 	switch r {
 	case '#':
 		p.groupingCount++
 		p.leadingSharps++
 	case '@':
 		p.groupingCount++
 		p.leadingSharps = 0
 		return p.sigDigits(r)
 	case ',':
 		if p.leadingSharps == 0 { // no leading commas
 			return nil
 		}
 		p.updateGrouping()
 	case 'E':
 		p.MaxIntegerDigits = uint8(p.leadingSharps)
 		return p.exponent
 	case '.': // allow ".##" etc.
 		p.updateGrouping()
 		return p.fraction
 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 		return p.integer(r)
 	default:
 		return nil
 	}
 	return p.number
 }

 func (p *parser) integer(r rune) state {
 	if !('0' <= r && r <= '9') {
 		var next state
 		switch r {
 		case 'E':
 			if p.leadingSharps > 0 {
 				p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
 			}
 			next = p.exponent
 		case '.':
 			next = p.fraction
 		}
 		p.updateGrouping()
 		return next
 	}
 	p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
 	p.groupingCount++
 	p.MinIntegerDigits++
 	return p.integer
 }

 func (p *parser) sigDigits(r rune) state {
 	switch r {
 	case '@':
 		p.groupingCount++
 		p.MaxSignificantDigits++
 		p.MinSignificantDigits++
 	case '#':
 		return p.sigDigitsFinal(r)
 	case 'E':
 		p.updateGrouping()
 		return p.normalizeSigDigitsWithExponent()
 	default:
 		p.updateGrouping()
 		return nil
 	}
 	return p.sigDigits
 }

 func (p *parser) sigDigitsFinal(r rune) state {
 	switch r {
 	case '#':
 		p.groupingCount++
 		p.MaxSignificantDigits++
 	case 'E':
 		p.updateGrouping()
 		return p.normalizeSigDigitsWithExponent()
 	default:
 		p.updateGrouping()
 		return nil
 	}
 	return p.sigDigitsFinal
 }

 func (p *parser) normalizeSigDigitsWithExponent() state {
 	p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
 	p.MinFractionDigits = p.MinSignificantDigits - 1
 	p.MaxFractionDigits = p.MaxSignificantDigits - 1
 	p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
 	return p.exponent
 }

 func (p *parser) fraction(r rune) state {
 	switch r {
 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 		p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
 		p.MinFractionDigits++
 		p.MaxFractionDigits++
 	case '#':
 		p.MaxFractionDigits++
 	case 'E':
 		if p.leadingSharps > 0 {
 			p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
 		}
 		return p.exponent
 	default:
 		return nil
 	}
 	return p.fraction
 }

 func (p *parser) exponent(r rune) state {
 	switch r {
 	case '+':
 		// Set mode and check it wasn't already set.
 		if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
 			break
 		}
 		p.Flags |= AlwaysExpSign
 		p.doNotTerminate = true
 		return p.exponent
 	case '0':
 		p.MinExponentDigits++
 		return p.exponent
 	}
 	// termination condition
 	if p.MinExponentDigits == 0 {
 		p.setError(errors.New("format: need at least one digit"))
 	}
 	return nil
 }
	// Copyright 2015 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package number

	import (
	"errors"
	"unicode/utf8"
	)

	// This file contains a parser for the CLDR number patterns as described in
	// http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
	//
	// The following BNF is derived from this standard.
	//
	// pattern := subpattern (';' subpattern)?
	// subpattern := affix? number exponent? affix?
	// number := decimal \| sigDigits
	// decimal := '#'* '0'* ('.' fraction)? \| '#' \| '0'
	// fraction := '0'* '#'*
	// sigDigits := '#'* '@' '@'* '#'*
	// exponent := 'E' '+'? '0'* '0'
	// padSpec := '*' \L
	//
	// Notes:
	// - An affix pattern may contain any runes, but runes with special meaning
	// should be escaped.
	// - Sequences of digits, '#', and '@' in decimal and sigDigits may have
	// interstitial commas.

	// TODO: replace special characters in affixes (-, +, ¤) with control codes.

	// Format holds information for formatting numbers. It is designed to hold
	// information from CLDR number patterns.
	//
	// This pattern is precompiled for all patterns for all languages. Even though
	// the number of patterns is not very large, we want to keep this small.
	//
	// This type is only intended for internal use.
	type Format struct {
	// TODO: this struct can be packed a lot better than it is now. Should be
	// possible to make it 32 bytes.

	Affix string // includes prefix and suffix. First byte is prefix length.
	Offset uint16 // Offset into Affix for prefix and suffix
	NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.

	Multiplier uint32
	RoundIncrement uint32 // Use Min*Digits to determine scale
	PadRune rune

	FormatWidth uint16

	GroupingSize [2]uint8
	Flags FormatFlag

	// Number of digits.
	MinIntegerDigits uint8
	MaxIntegerDigits uint8
	MinFractionDigits uint8
	MaxFractionDigits uint8
	MinSignificantDigits uint8
	MaxSignificantDigits uint8
	MinExponentDigits uint8
	}

	// A FormatFlag is a bit mask for the flag field of a Format.
	type FormatFlag uint8

	const (
	AlwaysSign FormatFlag = 1 << iota
	AlwaysExpSign
	AlwaysDecimalSeparator
	ParenthesisForNegative // Common pattern. Saves space.

	PadAfterNumber
	PadAfterAffix

	PadBeforePrefix = 0 // Default
	PadAfterPrefix = PadAfterAffix
	PadBeforeSuffix = PadAfterNumber
	PadAfterSuffix = PadAfterNumber \| PadAfterAffix
	PadMask = PadAfterNumber \| PadAfterAffix
	)

	type parser struct {
	*Format

	leadingSharps int

	pos int
	err error
	doNotTerminate bool
	groupingCount uint
	hasGroup bool
	buf []byte
	}

	func (p *parser) setError(err error) {
	if p.err == nil {
	p.err = err
	}
	}

	func (p *parser) updateGrouping() {
	if p.hasGroup && p.groupingCount < 255 {
	p.GroupingSize[1] = p.GroupingSize[0]
	p.GroupingSize[0] = uint8(p.groupingCount)
	}
	p.groupingCount = 0
	p.hasGroup = true
	}

	var (
	// TODO: more sensible and localizeable error messages.
	errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
	errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
	errInvalidQuote = errors.New("format: invalid quote")
	errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
	errDuplicatePercentSign = errors.New("format: duplicate percent sign")
	errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
	errUnexpectedEnd = errors.New("format: unexpected end of pattern")
	)

	// ParsePattern extracts formatting information from a CLDR number pattern.
	//
	// See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
	func ParsePattern(s string) (f *Format, err error) {
	p := parser{Format: &Format{}}

	s = p.parseSubPattern(s)

	if s != "" {
	// Parse negative sub pattern.
	if s[0] != ';' {
	p.setError(errors.New("format: error parsing first sub pattern"))
	return nil, p.err
	}
	neg := parser{Format: &Format{}} // just for extracting the affixes.
	s = neg.parseSubPattern(s[len(";"):])
	p.NegOffset = uint16(len(p.buf))
	p.buf = append(p.buf, neg.buf...)
	}
	if s != "" {
	p.setError(errors.New("format: spurious characters at end of pattern"))
	}
	if p.err != nil {
	return nil, p.err
	}
	if affix := string(p.buf); affix == "\x00\x00" \|\| affix == "\x00\x00\x00\x00" {
	// No prefix or suffixes.
	p.NegOffset = 0
	} else {
	p.Affix = affix
	}
	return p.Format, nil
	}

	func (p *parser) parseSubPattern(s string) string {
	s = p.parsePad(s, PadBeforePrefix)
	s = p.parseAffix(s)
	s = p.parsePad(s, PadAfterPrefix)

	s = p.parse(p.number, s)

	s = p.parsePad(s, PadBeforeSuffix)
	s = p.parseAffix(s)
	s = p.parsePad(s, PadAfterSuffix)
	return s
	}

	func (p *parser) parsePad(s string, f FormatFlag) (tail string) {
	if len(s) >= 2 && s[0] == '*' {
	r, sz := utf8.DecodeRuneInString(s[1:])
	if p.PadRune != 0 {
	p.err = errMultiplePadSpecifiers
	} else {
	p.Flags \|= f
	p.PadRune = r
	}
	return s[1+sz:]
	}
	return s
	}

	func (p *parser) parseAffix(s string) string {
	x := len(p.buf)
	p.buf = append(p.buf, 0) // placeholder for affix length

	s = p.parse(p.affix, s)

	n := len(p.buf) - x - 1
	if n > 0xFF {
	p.setError(errAffixTooLarge)
	}
	p.buf[x] = uint8(n)
	return s
	}

	// state implements a state transition. It returns the new state. A state
	// function may set an error on the parser or may simply return on an incorrect
	// token and let the next phase fail.
	type state func(r rune) state

	// parse repeatedly applies a state function on the given string until a
	// termination condition is reached.
	func (p *parser) parse(fn state, s string) (tail string) {
	for i, r := range s {
	p.doNotTerminate = false
	if fn = fn(r); fn == nil \|\| p.err != nil {
	return s[i:]
	}
	p.FormatWidth++
	}
	if p.doNotTerminate {
	p.setError(errUnexpectedEnd)
	}
	return ""
	}

	func (p *parser) affix(r rune) state {
	switch r {
	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	'#', '@', '.', '*', ',', ';':
	return nil
	case '\'':
	return p.escape
	case '%':
	if p.Multiplier != 0 {
	p.setError(errDuplicatePercentSign)
	}
	p.Multiplier = 100
	case '\u2030': // ‰ Per mille
	if p.Multiplier != 0 {
	p.setError(errDuplicatePermilleSign)
	}
	p.Multiplier = 1000
	// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
	}
	p.buf = append(p.buf, string(r)...)
	return p.affix
	}

	func (p *parser) escape(r rune) state {
	switch r {
	case '\'':
	return p.affix
	default:
	p.buf = append(p.buf, string(r)...)
	}
	return p.escape
	}

	// number parses a number. The BNF says the integer part should always have
	// a '0', but that does not appear to be the case according to the rest of the
	// documentation. We will allow having only '#' numbers.
	func (p *parser) number(r rune) state {
	switch r {
	case '#':
	p.groupingCount++
	p.leadingSharps++
	case '@':
	p.groupingCount++
	p.leadingSharps = 0
	return p.sigDigits(r)
	case ',':
	if p.leadingSharps == 0 { // no leading commas
	return nil
	}
	p.updateGrouping()
	case 'E':
	p.MaxIntegerDigits = uint8(p.leadingSharps)
	return p.exponent
	case '.': // allow ".##" etc.
	p.updateGrouping()
	return p.fraction
	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
	return p.integer(r)
	default:
	return nil
	}
	return p.number
	}

	func (p *parser) integer(r rune) state {
	if !('0' <= r && r <= '9') {
	var next state
	switch r {
	case 'E':
	if p.leadingSharps > 0 {
	p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
	}
	next = p.exponent
	case '.':
	next = p.fraction
	}
	p.updateGrouping()
	return next
	}
	p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
	p.groupingCount++
	p.MinIntegerDigits++
	return p.integer
	}

	func (p *parser) sigDigits(r rune) state {
	switch r {
	case '@':
	p.groupingCount++
	p.MaxSignificantDigits++
	p.MinSignificantDigits++
	case '#':
	return p.sigDigitsFinal(r)
	case 'E':
	p.updateGrouping()
	return p.normalizeSigDigitsWithExponent()
	default:
	p.updateGrouping()
	return nil
	}
	return p.sigDigits
	}

	func (p *parser) sigDigitsFinal(r rune) state {
	switch r {
	case '#':
	p.groupingCount++
	p.MaxSignificantDigits++
	case 'E':
	p.updateGrouping()
	return p.normalizeSigDigitsWithExponent()
	default:
	p.updateGrouping()
	return nil
	}
	return p.sigDigitsFinal
	}

	func (p *parser) normalizeSigDigitsWithExponent() state {
	p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
	p.MinFractionDigits = p.MinSignificantDigits - 1
	p.MaxFractionDigits = p.MaxSignificantDigits - 1
	p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
	return p.exponent
	}

	func (p *parser) fraction(r rune) state {
	switch r {
	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
	p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
	p.MinFractionDigits++
	p.MaxFractionDigits++
	case '#':
	p.MaxFractionDigits++
	case 'E':
	if p.leadingSharps > 0 {
	p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
	}
	return p.exponent
	default:
	return nil
	}
	return p.fraction
	}

	func (p *parser) exponent(r rune) state {
	switch r {
	case '+':
	// Set mode and check it wasn't already set.
	if p.Flags&AlwaysExpSign != 0 \|\| p.MinExponentDigits > 0 {
	break
	}
	p.Flags \|= AlwaysExpSign
	p.doNotTerminate = true
	return p.exponent
	case '0':
	p.MinExponentDigits++
	return p.exponent
	}
	// termination condition
	if p.MinExponentDigits == 0 {
	p.setError(errors.New("format: need at least one digit"))
	}
	return nil
	}