Update toml dependency

This commit is contained in:
Frank Denis 2021-08-08 10:21:25 +02:00
parent 1052fa6323
commit c9d5d81e6a
43 changed files with 1455 additions and 825 deletions

View file

@ -1,12 +1,14 @@
package toml
import (
"errors"
"fmt"
"strconv"
"strings"
"time"
"unicode"
"unicode/utf8"
"github.com/BurntSushi/toml/internal"
)
type parser struct {
@ -14,39 +16,54 @@ type parser struct {
types map[string]tomlType
lx *lexer
// A list of keys in the order that they appear in the TOML data.
ordered []Key
// the full key for the current hash in scope
context Key
// the base key name for everything except hashes
currentKey string
// rough approximation of line number
approxLine int
// A map of 'key.group.names' to whether they were created implicitly.
implicits map[string]bool
ordered []Key // List of keys in the order that they appear in the TOML data.
context Key // Full key for the current hash in scope.
currentKey string // Base key name for everything except hashes.
approxLine int // Rough approximation of line number
implicits map[string]bool // Record implied keys (e.g. 'key.group.names').
}
type parseError string
// ParseError is used when a file can't be parsed: for example invalid integer
// literals, duplicate keys, etc.
type ParseError struct {
Message string
Line int
LastKey string
}
func (pe parseError) Error() string {
return string(pe)
func (pe ParseError) Error() string {
return fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
pe.Line, pe.LastKey, pe.Message)
}
func parse(data string) (p *parser, err error) {
defer func() {
if r := recover(); r != nil {
var ok bool
if err, ok = r.(parseError); ok {
if err, ok = r.(ParseError); ok {
return
}
panic(r)
}
}()
// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
// which mangles stuff.
if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") {
data = data[2:]
}
// Examine first few bytes for NULL bytes; this probably means it's a UTF-16
// file (second byte in surrogate pair being NULL). Again, do this here to
// avoid having to deal with UTF-8/16 stuff in the lexer.
ex := 6
if len(data) < 6 {
ex = len(data)
}
if strings.ContainsRune(data[:ex], 0) {
return nil, errors.New("files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8")
}
p = &parser{
mapping: make(map[string]interface{}),
types: make(map[string]tomlType),
@ -66,13 +83,17 @@ func parse(data string) (p *parser, err error) {
}
func (p *parser) panicf(format string, v ...interface{}) {
msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
p.approxLine, p.current(), fmt.Sprintf(format, v...))
panic(parseError(msg))
msg := fmt.Sprintf(format, v...)
panic(ParseError{
Message: msg,
Line: p.approxLine,
LastKey: p.current(),
})
}
func (p *parser) next() item {
it := p.lx.nextItem()
//fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.line, it.val)
if it.typ == itemError {
p.panicf("%s", it.val)
}
@ -97,44 +118,63 @@ func (p *parser) assertEqual(expected, got itemType) {
func (p *parser) topLevel(item item) {
switch item.typ {
case itemCommentStart:
case itemCommentStart: // # ..
p.approxLine = item.line
p.expect(itemText)
case itemTableStart:
kg := p.next()
p.approxLine = kg.line
case itemTableStart: // [ .. ]
name := p.next()
p.approxLine = name.line
var key Key
for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
key = append(key, p.keyString(kg))
for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
key = append(key, p.keyString(name))
}
p.assertEqual(itemTableEnd, kg.typ)
p.assertEqual(itemTableEnd, name.typ)
p.establishContext(key, false)
p.addContext(key, false)
p.setType("", tomlHash)
p.ordered = append(p.ordered, key)
case itemArrayTableStart:
kg := p.next()
p.approxLine = kg.line
case itemArrayTableStart: // [[ .. ]]
name := p.next()
p.approxLine = name.line
var key Key
for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
key = append(key, p.keyString(kg))
for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
key = append(key, p.keyString(name))
}
p.assertEqual(itemArrayTableEnd, kg.typ)
p.assertEqual(itemArrayTableEnd, name.typ)
p.establishContext(key, true)
p.addContext(key, true)
p.setType("", tomlArrayHash)
p.ordered = append(p.ordered, key)
case itemKeyStart:
kname := p.next()
p.approxLine = kname.line
p.currentKey = p.keyString(kname)
case itemKeyStart: // key = ..
outerContext := p.context
/// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
k := p.next()
p.approxLine = k.line
var key Key
for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
key = append(key, p.keyString(k))
}
p.assertEqual(itemKeyEnd, k.typ)
val, typ := p.value(p.next())
p.setValue(p.currentKey, val)
p.setType(p.currentKey, typ)
/// The current key is the last part.
p.currentKey = key[len(key)-1]
/// All the other parts (if any) are the context; need to set each part
/// as implicit.
context := key[:len(key)-1]
for i := range context {
p.addImplicitContext(append(p.context, context[i:i+1]...))
}
/// Set value.
val, typ := p.value(p.next(), false)
p.set(p.currentKey, val, typ)
p.ordered = append(p.ordered, p.context.add(p.currentKey))
/// Remove the context we added (preserving any context from [tbl] lines).
p.context = outerContext
p.currentKey = ""
default:
p.bug("Unexpected type at top level: %s", item.typ)
@ -148,180 +188,253 @@ func (p *parser) keyString(it item) string {
return it.val
case itemString, itemMultilineString,
itemRawString, itemRawMultilineString:
s, _ := p.value(it)
s, _ := p.value(it, false)
return s.(string)
default:
p.bug("Unexpected key type: %s", it.typ)
panic("unreachable")
}
panic("unreachable")
}
var datetimeRepl = strings.NewReplacer(
"z", "Z",
"t", "T",
" ", "T")
// value translates an expected value from the lexer into a Go value wrapped
// as an empty interface.
func (p *parser) value(it item) (interface{}, tomlType) {
func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
switch it.typ {
case itemString:
return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
case itemMultilineString:
trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
return p.replaceEscapes(stripFirstNewline(stripEscapedNewlines(it.val))), p.typeOfPrimitive(it)
case itemRawString:
return it.val, p.typeOfPrimitive(it)
case itemRawMultilineString:
return stripFirstNewline(it.val), p.typeOfPrimitive(it)
case itemInteger:
return p.valueInteger(it)
case itemFloat:
return p.valueFloat(it)
case itemBool:
switch it.val {
case "true":
return true, p.typeOfPrimitive(it)
case "false":
return false, p.typeOfPrimitive(it)
default:
p.bug("Expected boolean value, but got '%s'.", it.val)
}
p.bug("Expected boolean value, but got '%s'.", it.val)
case itemInteger:
if !numUnderscoresOK(it.val) {
p.panicf("Invalid integer %q: underscores must be surrounded by digits",
it.val)
}
val := strings.Replace(it.val, "_", "", -1)
num, err := strconv.ParseInt(val, 10, 64)
if err != nil {
// Distinguish integer values. Normally, it'd be a bug if the lexer
// provides an invalid integer, but it's possible that the number is
// out of range of valid values (which the lexer cannot determine).
// So mark the former as a bug but the latter as a legitimate user
// error.
if e, ok := err.(*strconv.NumError); ok &&
e.Err == strconv.ErrRange {
p.panicf("Integer '%s' is out of the range of 64-bit "+
"signed integers.", it.val)
} else {
p.bug("Expected integer value, but got '%s'.", it.val)
}
}
return num, p.typeOfPrimitive(it)
case itemFloat:
parts := strings.FieldsFunc(it.val, func(r rune) bool {
switch r {
case '.', 'e', 'E':
return true
}
return false
})
for _, part := range parts {
if !numUnderscoresOK(part) {
p.panicf("Invalid float %q: underscores must be "+
"surrounded by digits", it.val)
}
}
if !numPeriodsOK(it.val) {
// As a special case, numbers like '123.' or '1.e2',
// which are valid as far as Go/strconv are concerned,
// must be rejected because TOML says that a fractional
// part consists of '.' followed by 1+ digits.
p.panicf("Invalid float %q: '.' must be followed "+
"by one or more digits", it.val)
}
val := strings.Replace(it.val, "_", "", -1)
num, err := strconv.ParseFloat(val, 64)
if err != nil {
if e, ok := err.(*strconv.NumError); ok &&
e.Err == strconv.ErrRange {
p.panicf("Float '%s' is out of the range of 64-bit "+
"IEEE-754 floating-point numbers.", it.val)
} else {
p.panicf("Invalid float value: %q", it.val)
}
}
return num, p.typeOfPrimitive(it)
case itemDatetime:
var t time.Time
var ok bool
var err error
for _, format := range []string{
"2006-01-02T15:04:05Z07:00",
"2006-01-02T15:04:05",
"2006-01-02",
} {
t, err = time.ParseInLocation(format, it.val, time.Local)
if err == nil {
ok = true
break
}
}
if !ok {
p.panicf("Invalid TOML Datetime: %q.", it.val)
}
return t, p.typeOfPrimitive(it)
return p.valueDatetime(it)
case itemArray:
array := make([]interface{}, 0)
types := make([]tomlType, 0)
for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
if it.typ == itemCommentStart {
p.expect(itemText)
continue
}
val, typ := p.value(it)
array = append(array, val)
types = append(types, typ)
}
return array, p.typeOfArray(types)
return p.valueArray(it)
case itemInlineTableStart:
var (
hash = make(map[string]interface{})
outerContext = p.context
outerKey = p.currentKey
)
p.context = append(p.context, p.currentKey)
p.currentKey = ""
for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
if it.typ != itemKeyStart {
p.bug("Expected key start but instead found %q, around line %d",
it.val, p.approxLine)
}
if it.typ == itemCommentStart {
p.expect(itemText)
continue
}
// retrieve key
k := p.next()
p.approxLine = k.line
kname := p.keyString(k)
// retrieve value
p.currentKey = kname
val, typ := p.value(p.next())
// make sure we keep metadata up to date
p.setType(kname, typ)
p.ordered = append(p.ordered, p.context.add(p.currentKey))
hash[kname] = val
}
p.context = outerContext
p.currentKey = outerKey
return hash, tomlHash
return p.valueInlineTable(it, parentIsArray)
default:
p.bug("Unexpected value type: %s", it.typ)
}
p.bug("Unexpected value type: %s", it.typ)
panic("unreachable")
}
func (p *parser) valueInteger(it item) (interface{}, tomlType) {
if !numUnderscoresOK(it.val) {
p.panicf("Invalid integer %q: underscores must be surrounded by digits", it.val)
}
if numHasLeadingZero(it.val) {
p.panicf("Invalid integer %q: cannot have leading zeroes", it.val)
}
num, err := strconv.ParseInt(it.val, 0, 64)
if err != nil {
// Distinguish integer values. Normally, it'd be a bug if the lexer
// provides an invalid integer, but it's possible that the number is
// out of range of valid values (which the lexer cannot determine).
// So mark the former as a bug but the latter as a legitimate user
// error.
if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
p.panicf("Integer '%s' is out of the range of 64-bit signed integers.", it.val)
} else {
p.bug("Expected integer value, but got '%s'.", it.val)
}
}
return num, p.typeOfPrimitive(it)
}
func (p *parser) valueFloat(it item) (interface{}, tomlType) {
parts := strings.FieldsFunc(it.val, func(r rune) bool {
switch r {
case '.', 'e', 'E':
return true
}
return false
})
for _, part := range parts {
if !numUnderscoresOK(part) {
p.panicf("Invalid float %q: underscores must be surrounded by digits", it.val)
}
}
if len(parts) > 0 && numHasLeadingZero(parts[0]) {
p.panicf("Invalid float %q: cannot have leading zeroes", it.val)
}
if !numPeriodsOK(it.val) {
// As a special case, numbers like '123.' or '1.e2',
// which are valid as far as Go/strconv are concerned,
// must be rejected because TOML says that a fractional
// part consists of '.' followed by 1+ digits.
p.panicf("Invalid float %q: '.' must be followed by one or more digits", it.val)
}
val := strings.Replace(it.val, "_", "", -1)
if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does.
val = "nan"
}
num, err := strconv.ParseFloat(val, 64)
if err != nil {
if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
p.panicf("Float '%s' is out of the range of 64-bit IEEE-754 floating-point numbers.", it.val)
} else {
p.panicf("Invalid float value: %q", it.val)
}
}
return num, p.typeOfPrimitive(it)
}
var dtTypes = []struct {
fmt string
zone *time.Location
}{
{time.RFC3339Nano, time.Local},
{"2006-01-02T15:04:05.999999999", internal.LocalDatetime},
{"2006-01-02", internal.LocalDate},
{"15:04:05.999999999", internal.LocalTime},
}
func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
it.val = datetimeRepl.Replace(it.val)
var (
t time.Time
ok bool
err error
)
for _, dt := range dtTypes {
t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
if err == nil {
ok = true
break
}
}
if !ok {
p.panicf("Invalid TOML Datetime: %q.", it.val)
}
return t, p.typeOfPrimitive(it)
}
func (p *parser) valueArray(it item) (interface{}, tomlType) {
p.setType(p.currentKey, tomlArray)
// p.setType(p.currentKey, typ)
var (
array []interface{}
types []tomlType
)
for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
if it.typ == itemCommentStart {
p.expect(itemText)
continue
}
val, typ := p.value(it, true)
array = append(array, val)
types = append(types, typ)
}
return array, tomlArray
}
func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
var (
hash = make(map[string]interface{})
outerContext = p.context
outerKey = p.currentKey
)
p.context = append(p.context, p.currentKey)
prevContext := p.context
p.currentKey = ""
p.addImplicit(p.context)
p.addContext(p.context, parentIsArray)
/// Loop over all table key/value pairs.
for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
if it.typ == itemCommentStart {
p.expect(itemText)
continue
}
/// Read all key parts.
k := p.next()
p.approxLine = k.line
var key Key
for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
key = append(key, p.keyString(k))
}
p.assertEqual(itemKeyEnd, k.typ)
/// The current key is the last part.
p.currentKey = key[len(key)-1]
/// All the other parts (if any) are the context; need to set each part
/// as implicit.
context := key[:len(key)-1]
for i := range context {
p.addImplicitContext(append(p.context, context[i:i+1]...))
}
/// Set the value.
val, typ := p.value(p.next(), false)
p.set(p.currentKey, val, typ)
p.ordered = append(p.ordered, p.context.add(p.currentKey))
hash[p.currentKey] = val
/// Restore context.
p.context = prevContext
}
p.context = outerContext
p.currentKey = outerKey
return hash, tomlHash
}
// numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
// +/- signs, and base prefixes.
func numHasLeadingZero(s string) bool {
if len(s) > 1 && s[0] == '0' && isDigit(rune(s[1])) { // >1 to allow "0" and isDigit to allow 0x
return true
}
if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
return true
}
return false
}
// numUnderscoresOK checks whether each underscore in s is surrounded by
// characters that are not underscores.
func numUnderscoresOK(s string) bool {
switch s {
case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
return true
}
accept := false
for _, r := range s {
if r == '_' {
if !accept {
return false
}
accept = false
continue
}
accept = true
// isHexadecimal is a superset of all the permissable characters
// surrounding an underscore.
accept = isHexadecimal(r)
}
return accept
}
@ -338,13 +451,12 @@ func numPeriodsOK(s string) bool {
return !period
}
// establishContext sets the current context of the parser,
// where the context is either a hash or an array of hashes. Which one is
// set depends on the value of the `array` parameter.
// Set the current context of the parser, where the context is either a hash or
// an array of hashes, depending on the value of the `array` parameter.
//
// Establishing the context also makes sure that the key isn't a duplicate, and
// will create implicit hashes automatically.
func (p *parser) establishContext(key Key, array bool) {
func (p *parser) addContext(key Key, array bool) {
var ok bool
// Always start at the top level and drill down for our context.
@ -383,7 +495,7 @@ func (p *parser) establishContext(key Key, array bool) {
// list of tables for it.
k := key[len(key)-1]
if _, ok := hashContext[k]; !ok {
hashContext[k] = make([]map[string]interface{}, 0, 5)
hashContext[k] = make([]map[string]interface{}, 0, 4)
}
// Add a new table. But make sure the key hasn't already been used
@ -391,8 +503,7 @@ func (p *parser) establishContext(key Key, array bool) {
if hash, ok := hashContext[k].([]map[string]interface{}); ok {
hashContext[k] = append(hash, make(map[string]interface{}))
} else {
p.panicf("Key '%s' was already created and cannot be used as "+
"an array.", keyContext)
p.panicf("Key '%s' was already created and cannot be used as an array.", keyContext)
}
} else {
p.setValue(key[len(key)-1], make(map[string]interface{}))
@ -400,15 +511,22 @@ func (p *parser) establishContext(key Key, array bool) {
p.context = append(p.context, key[len(key)-1])
}
// set calls setValue and setType.
func (p *parser) set(key string, val interface{}, typ tomlType) {
p.setValue(p.currentKey, val)
p.setType(p.currentKey, typ)
}
// setValue sets the given key to the given value in the current context.
// It will make sure that the key hasn't already been defined, account for
// implicit key groups.
func (p *parser) setValue(key string, value interface{}) {
var tmpHash interface{}
var ok bool
hash := p.mapping
keyContext := make(Key, 0)
var (
tmpHash interface{}
ok bool
hash = p.mapping
keyContext Key
)
for _, k := range p.context {
keyContext = append(keyContext, k)
if tmpHash, ok = hash[k]; !ok {
@ -422,24 +540,26 @@ func (p *parser) setValue(key string, value interface{}) {
case map[string]interface{}:
hash = t
default:
p.bug("Expected hash to have type 'map[string]interface{}', but "+
"it has '%T' instead.", tmpHash)
p.panicf("Key '%s' has already been defined.", keyContext)
}
}
keyContext = append(keyContext, key)
if _, ok := hash[key]; ok {
// Typically, if the given key has already been set, then we have
// to raise an error since duplicate keys are disallowed. However,
// it's possible that a key was previously defined implicitly. In this
// case, it is allowed to be redefined concretely. (See the
// `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
// Normally redefining keys isn't allowed, but the key could have been
// defined implicitly and it's allowed to be redefined concretely. (See
// the `valid/implicit-and-explicit-after.toml` in toml-test)
//
// But we have to make sure to stop marking it as an implicit. (So that
// another redefinition provokes an error.)
//
// Note that since it has already been defined (as a hash), we don't
// want to overwrite it. So our business is done.
if p.isArray(keyContext) {
p.removeImplicit(keyContext)
hash[key] = value
return
}
if p.isImplicit(keyContext) {
p.removeImplicit(keyContext)
return
@ -449,6 +569,7 @@ func (p *parser) setValue(key string, value interface{}) {
// key, which is *always* wrong.
p.panicf("Key '%s' has already been defined.", keyContext)
}
hash[key] = value
}
@ -468,21 +589,15 @@ func (p *parser) setType(key string, typ tomlType) {
p.types[keyContext.String()] = typ
}
// addImplicit sets the given Key as having been created implicitly.
func (p *parser) addImplicit(key Key) {
p.implicits[key.String()] = true
}
// removeImplicit stops tagging the given key as having been implicitly
// created.
func (p *parser) removeImplicit(key Key) {
p.implicits[key.String()] = false
}
// isImplicit returns true if the key group pointed to by the key was created
// implicitly.
func (p *parser) isImplicit(key Key) bool {
return p.implicits[key.String()]
// Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
// "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = true }
func (p *parser) removeImplicit(key Key) { p.implicits[key.String()] = false }
func (p *parser) isImplicit(key Key) bool { return p.implicits[key.String()] }
func (p *parser) isArray(key Key) bool { return p.types[key.String()] == tomlArray }
func (p *parser) addImplicitContext(key Key) {
p.addImplicit(key)
p.addContext(key, false)
}
// current returns the full key name of the current context.
@ -497,20 +612,54 @@ func (p *parser) current() string {
}
func stripFirstNewline(s string) string {
if len(s) == 0 || s[0] != '\n' {
return s
if len(s) > 0 && s[0] == '\n' {
return s[1:]
}
return s[1:]
if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
return s[2:]
}
return s
}
func stripEscapedWhitespace(s string) string {
esc := strings.Split(s, "\\\n")
if len(esc) > 1 {
for i := 1; i < len(esc); i++ {
esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
// Remove newlines inside triple-quoted strings if a line ends with "\".
func stripEscapedNewlines(s string) string {
split := strings.Split(s, "\n")
if len(split) < 1 {
return s
}
escNL := false // Keep track of the last non-blank line was escaped.
for i, line := range split {
line = strings.TrimRight(line, " \t\r")
if len(line) == 0 || line[len(line)-1] != '\\' {
split[i] = strings.TrimRight(split[i], "\r")
if !escNL && i != len(split)-1 {
split[i] += "\n"
}
continue
}
escBS := true
for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- {
escBS = !escBS
}
if escNL {
line = strings.TrimLeft(line, " \t\r")
}
escNL = !escBS
if escBS {
split[i] += "\n"
continue
}
split[i] = line[:len(line)-1] // Remove \
if len(split)-1 > i {
split[i+1] = strings.TrimLeft(split[i+1], " \t\r")
}
}
return strings.Join(esc, "")
return strings.Join(split, "")
}
func (p *parser) replaceEscapes(str string) string {
@ -533,6 +682,9 @@ func (p *parser) replaceEscapes(str string) string {
default:
p.bug("Expected valid escape code after \\, but got %q.", s[r])
return ""
case ' ', '\t':
p.panicf("invalid escape: '\\%c'", s[r])
return ""
case 'b':
replaced = append(replaced, rune(0x0008))
r += 1
@ -585,8 +737,3 @@ func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
}
return rune(hex)
}
func isStringType(ty itemType) bool {
return ty == itemString || ty == itemMultilineString ||
ty == itemRawString || ty == itemRawMultilineString
}