matterbridge/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go

1246 lines
26 KiB
Go
Raw Normal View History

2023-01-28 22:57:53 +01:00
package unstable
2022-04-25 23:50:10 +02:00
import (
"bytes"
2023-01-28 22:57:53 +01:00
"fmt"
2022-04-25 23:50:10 +02:00
"unicode"
2023-01-28 22:57:53 +01:00
"github.com/pelletier/go-toml/v2/internal/characters"
2022-04-25 23:50:10 +02:00
"github.com/pelletier/go-toml/v2/internal/danger"
)
2023-01-28 22:57:53 +01:00
// ParserError describes an error relative to the content of the document.
//
// It cannot outlive the instance of Parser it refers to, and may cause panics
// if the parser is reset.
type ParserError struct {
Highlight []byte
Message string
Key []string // optional
}
// Error is the implementation of the error interface.
func (e *ParserError) Error() string {
return e.Message
}
// NewParserError is a convenience function to create a ParserError
//
// Warning: Highlight needs to be a subslice of Parser.data, so only slices
// returned by Parser.Raw are valid candidates.
func NewParserError(highlight []byte, format string, args ...interface{}) error {
return &ParserError{
Highlight: highlight,
Message: fmt.Errorf(format, args...).Error(),
}
}
// Parser scans over a TOML-encoded document and generates an iterative AST.
//
// To prime the Parser, first reset it with the contents of a TOML document.
// Then, process all top-level expressions sequentially. See Example.
//
// Don't forget to check Error() after you're done parsing.
//
// Each top-level expression needs to be fully processed before calling
// NextExpression() again. Otherwise, calls to various Node methods may panic if
// the parser has moved on the next expression.
//
// For performance reasons, go-toml doesn't make a copy of the input bytes to
// the parser. Make sure to copy all the bytes you need to outlive the slice
// given to the parser.
type Parser struct {
2022-04-25 23:50:10 +02:00
data []byte
2023-01-28 22:57:53 +01:00
builder builder
ref reference
2022-04-25 23:50:10 +02:00
left []byte
err error
first bool
KeepComments bool
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
// Data returns the slice provided to the last call to Reset.
func (p *Parser) Data() []byte {
return p.data
}
// Range returns a range description that corresponds to a given slice of the
// input. If the argument is not a subslice of the parser input, this function
// panics.
func (p *Parser) Range(b []byte) Range {
return Range{
2022-04-25 23:50:10 +02:00
Offset: uint32(danger.SubsliceOffset(p.data, b)),
Length: uint32(len(b)),
}
}
2023-01-28 22:57:53 +01:00
// Raw returns the slice corresponding to the bytes in the given range.
func (p *Parser) Raw(raw Range) []byte {
2022-04-25 23:50:10 +02:00
return p.data[raw.Offset : raw.Offset+raw.Length]
}
2023-01-28 22:57:53 +01:00
// Reset brings the parser to its initial state for a given input. It wipes an
// reuses internal storage to reduce allocation.
func (p *Parser) Reset(b []byte) {
2022-04-25 23:50:10 +02:00
p.builder.Reset()
2023-01-28 22:57:53 +01:00
p.ref = invalidReference
2022-04-25 23:50:10 +02:00
p.data = b
p.left = b
p.err = nil
p.first = true
}
2023-01-28 22:57:53 +01:00
// NextExpression parses the next top-level expression. If an expression was
// successfully parsed, it returns true. If the parser is at the end of the
// document or an error occurred, it returns false.
//
// Retrieve the parsed expression with Expression().
func (p *Parser) NextExpression() bool {
2022-04-25 23:50:10 +02:00
if len(p.left) == 0 || p.err != nil {
return false
}
p.builder.Reset()
2023-01-28 22:57:53 +01:00
p.ref = invalidReference
2022-04-25 23:50:10 +02:00
for {
if len(p.left) == 0 || p.err != nil {
return false
}
if !p.first {
p.left, p.err = p.parseNewline(p.left)
}
if len(p.left) == 0 || p.err != nil {
return false
}
p.ref, p.left, p.err = p.parseExpression(p.left)
if p.err != nil {
return false
}
p.first = false
if p.ref.Valid() {
return true
}
}
}
2023-01-28 22:57:53 +01:00
// Expression returns a pointer to the node representing the last successfully
// parsed expression.
2023-01-28 22:57:53 +01:00
func (p *Parser) Expression() *Node {
2022-04-25 23:50:10 +02:00
return p.builder.NodeAt(p.ref)
}
// Error returns any error that has occurred during parsing.
2023-01-28 22:57:53 +01:00
func (p *Parser) Error() error {
2022-04-25 23:50:10 +02:00
return p.err
}
// Position describes a position in the input.
type Position struct {
// Number of bytes from the beginning of the input.
Offset int
// Line number, starting at 1.
Line int
// Column number, starting at 1.
Column int
}
// Shape describes the position of a range in the input.
type Shape struct {
Start Position
End Position
}
func (p *Parser) position(b []byte) Position {
offset := danger.SubsliceOffset(p.data, b)
lead := p.data[:offset]
return Position{
Offset: offset,
Line: bytes.Count(lead, []byte{'\n'}) + 1,
Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
}
}
// Shape returns the shape of the given range in the input. Will
// panic if the range is not a subslice of the input.
func (p *Parser) Shape(r Range) Shape {
raw := p.Raw(r)
return Shape{
Start: p.position(raw),
End: p.position(raw[r.Length:]),
}
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseNewline(b []byte) ([]byte, error) {
2022-04-25 23:50:10 +02:00
if b[0] == '\n' {
return b[1:], nil
}
if b[0] == '\r' {
_, rest, err := scanWindowsNewline(b)
return rest, err
}
2023-01-28 22:57:53 +01:00
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
2022-04-25 23:50:10 +02:00
}
func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
ref := invalidReference
data, rest, err := scanComment(b)
if p.KeepComments && err == nil {
ref = p.builder.Push(Node{
Kind: Comment,
Raw: p.Range(data),
Data: data,
})
}
return ref, rest, err
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// expression = ws [ comment ]
// expression =/ ws keyval ws [ comment ]
// expression =/ ws table ws [ comment ]
2023-01-28 22:57:53 +01:00
ref := invalidReference
2022-04-25 23:50:10 +02:00
b = p.parseWhitespace(b)
if len(b) == 0 {
return ref, b, nil
}
if b[0] == '#' {
ref, rest, err := p.parseComment(b)
2022-04-25 23:50:10 +02:00
return ref, rest, err
}
if b[0] == '\n' || b[0] == '\r' {
return ref, b, nil
}
var err error
if b[0] == '[' {
ref, b, err = p.parseTable(b)
} else {
ref, b, err = p.parseKeyval(b)
}
if err != nil {
return ref, nil, err
}
b = p.parseWhitespace(b)
if len(b) > 0 && b[0] == '#' {
cref, rest, err := p.parseComment(b)
if cref != invalidReference {
p.builder.Chain(ref, cref)
}
2022-04-25 23:50:10 +02:00
return ref, rest, err
}
return ref, b, nil
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// table = std-table / array-table
if len(b) > 1 && b[1] == '[' {
return p.parseArrayTable(b)
}
return p.parseStdTable(b)
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// array-table = array-table-open key array-table-close
// array-table-open = %x5B.5B ws ; [[ Double left square bracket
// array-table-close = ws %x5D.5D ; ]] Double right square bracket
2023-01-28 22:57:53 +01:00
ref := p.builder.Push(Node{
Kind: ArrayTable,
2022-04-25 23:50:10 +02:00
})
b = b[2:]
b = p.parseWhitespace(b)
k, b, err := p.parseKey(b)
if err != nil {
return ref, nil, err
}
p.builder.AttachChild(ref, k)
b = p.parseWhitespace(b)
b, err = expect(']', b)
if err != nil {
return ref, nil, err
}
b, err = expect(']', b)
return ref, b, err
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// std-table = std-table-open key std-table-close
// std-table-open = %x5B ws ; [ Left square bracket
// std-table-close = ws %x5D ; ] Right square bracket
2023-01-28 22:57:53 +01:00
ref := p.builder.Push(Node{
Kind: Table,
2022-04-25 23:50:10 +02:00
})
b = b[1:]
b = p.parseWhitespace(b)
key, b, err := p.parseKey(b)
if err != nil {
return ref, nil, err
}
p.builder.AttachChild(ref, key)
b = p.parseWhitespace(b)
b, err = expect(']', b)
return ref, b, err
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// keyval = key keyval-sep val
2023-01-28 22:57:53 +01:00
ref := p.builder.Push(Node{
Kind: KeyValue,
2022-04-25 23:50:10 +02:00
})
key, b, err := p.parseKey(b)
if err != nil {
2023-01-28 22:57:53 +01:00
return invalidReference, nil, err
2022-04-25 23:50:10 +02:00
}
// keyval-sep = ws %x3D ws ; =
b = p.parseWhitespace(b)
if len(b) == 0 {
2023-01-28 22:57:53 +01:00
return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
2022-04-25 23:50:10 +02:00
}
b, err = expect('=', b)
if err != nil {
2023-01-28 22:57:53 +01:00
return invalidReference, nil, err
2022-04-25 23:50:10 +02:00
}
b = p.parseWhitespace(b)
valRef, b, err := p.parseVal(b)
if err != nil {
return ref, b, err
}
p.builder.Chain(valRef, key)
p.builder.AttachChild(ref, valRef)
return ref, b, err
}
//nolint:cyclop,funlen
2023-01-28 22:57:53 +01:00
func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// val = string / boolean / array / inline-table / date-time / float / integer
2023-01-28 22:57:53 +01:00
ref := invalidReference
2022-04-25 23:50:10 +02:00
if len(b) == 0 {
2023-01-28 22:57:53 +01:00
return ref, nil, NewParserError(b, "expected value, not eof")
2022-04-25 23:50:10 +02:00
}
var err error
c := b[0]
switch c {
case '"':
var raw []byte
var v []byte
if scanFollowsMultilineBasicStringDelimiter(b) {
raw, v, b, err = p.parseMultilineBasicString(b)
} else {
raw, v, b, err = p.parseBasicString(b)
}
if err == nil {
2023-01-28 22:57:53 +01:00
ref = p.builder.Push(Node{
Kind: String,
2022-04-25 23:50:10 +02:00
Raw: p.Range(raw),
Data: v,
})
}
return ref, b, err
case '\'':
var raw []byte
var v []byte
if scanFollowsMultilineLiteralStringDelimiter(b) {
raw, v, b, err = p.parseMultilineLiteralString(b)
} else {
raw, v, b, err = p.parseLiteralString(b)
}
if err == nil {
2023-01-28 22:57:53 +01:00
ref = p.builder.Push(Node{
Kind: String,
2022-04-25 23:50:10 +02:00
Raw: p.Range(raw),
Data: v,
})
}
return ref, b, err
case 't':
if !scanFollowsTrue(b) {
2023-01-28 22:57:53 +01:00
return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
ref = p.builder.Push(Node{
Kind: Bool,
2022-04-25 23:50:10 +02:00
Data: b[:4],
})
return ref, b[4:], nil
case 'f':
if !scanFollowsFalse(b) {
2023-01-28 22:57:53 +01:00
return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
ref = p.builder.Push(Node{
Kind: Bool,
2022-04-25 23:50:10 +02:00
Data: b[:5],
})
return ref, b[5:], nil
case '[':
return p.parseValArray(b)
case '{':
return p.parseInlineTable(b)
default:
return p.parseIntOrFloatOrDateTime(b)
}
}
func atmost(b []byte, n int) []byte {
if n >= len(b) {
return b
}
return b[:n]
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
2022-04-25 23:50:10 +02:00
v, rest, err := scanLiteralString(b)
if err != nil {
return nil, nil, nil, err
}
return v, v[1 : len(v)-1], rest, nil
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
// inline-table-open = %x7B ws ; {
// inline-table-close = ws %x7D ; }
// inline-table-sep = ws %x2C ws ; , Comma
// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
2023-01-28 22:57:53 +01:00
parent := p.builder.Push(Node{
Kind: InlineTable,
Raw: p.Range(b[:1]),
2022-04-25 23:50:10 +02:00
})
first := true
2023-01-28 22:57:53 +01:00
var child reference
2022-04-25 23:50:10 +02:00
b = b[1:]
var err error
for len(b) > 0 {
previousB := b
b = p.parseWhitespace(b)
if len(b) == 0 {
2023-01-28 22:57:53 +01:00
return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
2022-04-25 23:50:10 +02:00
}
if b[0] == '}' {
break
}
if !first {
b, err = expect(',', b)
if err != nil {
return parent, nil, err
}
b = p.parseWhitespace(b)
}
2023-01-28 22:57:53 +01:00
var kv reference
2022-04-25 23:50:10 +02:00
kv, b, err = p.parseKeyval(b)
if err != nil {
return parent, nil, err
}
if first {
p.builder.AttachChild(parent, kv)
} else {
p.builder.Chain(child, kv)
}
child = kv
first = false
}
rest, err := expect('}', b)
return parent, rest, err
}
//nolint:funlen,cyclop
2023-01-28 22:57:53 +01:00
func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// array = array-open [ array-values ] ws-comment-newline array-close
// array-open = %x5B ; [
// array-close = %x5D ; ]
// array-values = ws-comment-newline val ws-comment-newline array-sep array-values
// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
// array-sep = %x2C ; , Comma
// ws-comment-newline = *( wschar / [ comment ] newline )
arrayStart := b
b = b[1:]
2023-01-28 22:57:53 +01:00
parent := p.builder.Push(Node{
Kind: Array,
2022-04-25 23:50:10 +02:00
})
// First indicates whether the parser is looking for the first element
// (non-comment) of the array.
2022-04-25 23:50:10 +02:00
first := true
lastChild := invalidReference
addChild := func(valueRef reference) {
if lastChild == invalidReference {
p.builder.AttachChild(parent, valueRef)
} else {
p.builder.Chain(lastChild, valueRef)
}
lastChild = valueRef
}
2022-04-25 23:50:10 +02:00
var err error
for len(b) > 0 {
cref := invalidReference
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
2022-04-25 23:50:10 +02:00
if err != nil {
return parent, nil, err
}
if cref != invalidReference {
addChild(cref)
}
2022-04-25 23:50:10 +02:00
if len(b) == 0 {
2023-01-28 22:57:53 +01:00
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
2022-04-25 23:50:10 +02:00
}
if b[0] == ']' {
break
}
if b[0] == ',' {
if first {
2023-01-28 22:57:53 +01:00
return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
2022-04-25 23:50:10 +02:00
}
b = b[1:]
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
2022-04-25 23:50:10 +02:00
if err != nil {
return parent, nil, err
}
if cref != invalidReference {
addChild(cref)
}
2022-04-25 23:50:10 +02:00
} else if !first {
2023-01-28 22:57:53 +01:00
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
2022-04-25 23:50:10 +02:00
}
// TOML allows trailing commas in arrays.
if len(b) > 0 && b[0] == ']' {
break
}
2023-01-28 22:57:53 +01:00
var valueRef reference
2022-04-25 23:50:10 +02:00
valueRef, b, err = p.parseVal(b)
if err != nil {
return parent, nil, err
}
addChild(valueRef)
2022-04-25 23:50:10 +02:00
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
2022-04-25 23:50:10 +02:00
if err != nil {
return parent, nil, err
}
if cref != invalidReference {
addChild(cref)
}
2022-04-25 23:50:10 +02:00
first = false
}
rest, err := expect(']', b)
return parent, rest, err
}
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
rootCommentRef := invalidReference
latestCommentRef := invalidReference
addComment := func(ref reference) {
if rootCommentRef == invalidReference {
rootCommentRef = ref
} else if latestCommentRef == invalidReference {
p.builder.AttachChild(rootCommentRef, ref)
latestCommentRef = ref
} else {
p.builder.Chain(latestCommentRef, ref)
latestCommentRef = ref
}
}
2022-04-25 23:50:10 +02:00
for len(b) > 0 {
var err error
b = p.parseWhitespace(b)
if len(b) > 0 && b[0] == '#' {
var ref reference
ref, b, err = p.parseComment(b)
2022-04-25 23:50:10 +02:00
if err != nil {
return invalidReference, nil, err
}
if ref != invalidReference {
addComment(ref)
2022-04-25 23:50:10 +02:00
}
}
if len(b) == 0 {
break
}
if b[0] == '\n' || b[0] == '\r' {
b, err = p.parseNewline(b)
if err != nil {
return invalidReference, nil, err
2022-04-25 23:50:10 +02:00
}
} else {
break
}
}
return rootCommentRef, b, nil
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
2022-04-25 23:50:10 +02:00
token, rest, err := scanMultilineLiteralString(b)
if err != nil {
return nil, nil, nil, err
}
i := 3
// skip the immediate new line
if token[i] == '\n' {
i++
} else if token[i] == '\r' && token[i+1] == '\n' {
i += 2
}
return token, token[i : len(token)-3], rest, err
}
//nolint:funlen,gocognit,cyclop
2023-01-28 22:57:53 +01:00
func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
2022-04-25 23:50:10 +02:00
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
// ml-basic-string-delim
// ml-basic-string-delim = 3quotation-mark
// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
//
// mlb-content = mlb-char / newline / mlb-escaped-nl
// mlb-char = mlb-unescaped / escaped
// mlb-quotes = 1*2quotation-mark
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
// mlb-escaped-nl = escape ws newline *( wschar / newline )
token, escaped, rest, err := scanMultilineBasicString(b)
if err != nil {
return nil, nil, nil, err
}
i := 3
// skip the immediate new line
if token[i] == '\n' {
i++
} else if token[i] == '\r' && token[i+1] == '\n' {
i += 2
}
// fast path
startIdx := i
endIdx := len(token) - len(`"""`)
if !escaped {
str := token[startIdx:endIdx]
2023-01-28 22:57:53 +01:00
verr := characters.Utf8TomlValidAlreadyEscaped(str)
2022-04-25 23:50:10 +02:00
if verr.Zero() {
return token, str, rest, nil
}
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
2022-04-25 23:50:10 +02:00
}
var builder bytes.Buffer
// The scanner ensures that the token starts and ends with quotes and that
// escapes are balanced.
for i < len(token)-3 {
c := token[i]
//nolint:nestif
if c == '\\' {
// When the last non-whitespace character on a line is an unescaped \,
// it will be trimmed along with all whitespace (including newlines) up
// to the next non-whitespace character or closing delimiter.
isLastNonWhitespaceOnLine := false
j := 1
findEOLLoop:
for ; j < len(token)-3-i; j++ {
switch token[i+j] {
case ' ', '\t':
continue
case '\r':
if token[i+j+1] == '\n' {
continue
}
case '\n':
isLastNonWhitespaceOnLine = true
}
break findEOLLoop
}
if isLastNonWhitespaceOnLine {
i += j
for ; i < len(token)-3; i++ {
c := token[i]
if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
i--
break
}
}
i++
continue
}
// handle escaping
i++
c = token[i]
switch c {
case '"', '\\':
builder.WriteByte(c)
case 'b':
builder.WriteByte('\b')
case 'f':
builder.WriteByte('\f')
case 'n':
builder.WriteByte('\n')
case 'r':
builder.WriteByte('\r')
case 't':
builder.WriteByte('\t')
case 'e':
builder.WriteByte(0x1B)
case 'u':
x, err := hexToRune(atmost(token[i+1:], 4), 4)
if err != nil {
return nil, nil, nil, err
}
builder.WriteRune(x)
i += 4
case 'U':
x, err := hexToRune(atmost(token[i+1:], 8), 8)
if err != nil {
return nil, nil, nil, err
}
builder.WriteRune(x)
i += 8
default:
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
2022-04-25 23:50:10 +02:00
}
i++
} else {
2023-01-28 22:57:53 +01:00
size := characters.Utf8ValidNext(token[i:])
2022-04-25 23:50:10 +02:00
if size == 0 {
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
2022-04-25 23:50:10 +02:00
}
builder.Write(token[i : i+size])
i += size
}
}
return token, builder.Bytes(), rest, nil
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// key = simple-key / dotted-key
// simple-key = quoted-key / unquoted-key
//
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
// quoted-key = basic-string / literal-string
// dotted-key = simple-key 1*( dot-sep simple-key )
//
// dot-sep = ws %x2E ws ; . Period
raw, key, b, err := p.parseSimpleKey(b)
if err != nil {
2023-01-28 22:57:53 +01:00
return invalidReference, nil, err
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
ref := p.builder.Push(Node{
Kind: Key,
2022-04-25 23:50:10 +02:00
Raw: p.Range(raw),
Data: key,
})
for {
b = p.parseWhitespace(b)
if len(b) > 0 && b[0] == '.' {
b = p.parseWhitespace(b[1:])
raw, key, b, err = p.parseSimpleKey(b)
if err != nil {
return ref, nil, err
}
2023-01-28 22:57:53 +01:00
p.builder.PushAndChain(Node{
Kind: Key,
2022-04-25 23:50:10 +02:00
Raw: p.Range(raw),
Data: key,
})
} else {
break
}
}
return ref, b, nil
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
2022-04-25 23:50:10 +02:00
if len(b) == 0 {
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(b, "expected key but found none")
2022-04-25 23:50:10 +02:00
}
// simple-key = quoted-key / unquoted-key
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
// quoted-key = basic-string / literal-string
switch {
case b[0] == '\'':
return p.parseLiteralString(b)
case b[0] == '"':
return p.parseBasicString(b)
case isUnquotedKeyChar(b[0]):
key, rest = scanUnquotedKey(b)
return key, key, rest, nil
default:
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
2022-04-25 23:50:10 +02:00
}
}
//nolint:funlen,cyclop
2023-01-28 22:57:53 +01:00
func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
2022-04-25 23:50:10 +02:00
// basic-string = quotation-mark *basic-char quotation-mark
// quotation-mark = %x22 ; "
// basic-char = basic-unescaped / escaped
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
// escaped = escape escape-seq-char
// escape-seq-char = %x22 ; " quotation mark U+0022
// escape-seq-char =/ %x5C ; \ reverse solidus U+005C
// escape-seq-char =/ %x62 ; b backspace U+0008
// escape-seq-char =/ %x66 ; f form feed U+000C
// escape-seq-char =/ %x6E ; n line feed U+000A
// escape-seq-char =/ %x72 ; r carriage return U+000D
// escape-seq-char =/ %x74 ; t tab U+0009
// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
token, escaped, rest, err := scanBasicString(b)
if err != nil {
return nil, nil, nil, err
}
startIdx := len(`"`)
endIdx := len(token) - len(`"`)
// Fast path. If there is no escape sequence, the string should just be
// an UTF-8 encoded string, which is the same as Go. In that case,
// validate the string and return a direct reference to the buffer.
if !escaped {
str := token[startIdx:endIdx]
2023-01-28 22:57:53 +01:00
verr := characters.Utf8TomlValidAlreadyEscaped(str)
2022-04-25 23:50:10 +02:00
if verr.Zero() {
return token, str, rest, nil
}
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
2022-04-25 23:50:10 +02:00
}
i := startIdx
var builder bytes.Buffer
// The scanner ensures that the token starts and ends with quotes and that
// escapes are balanced.
for i < len(token)-1 {
c := token[i]
if c == '\\' {
i++
c = token[i]
switch c {
case '"', '\\':
builder.WriteByte(c)
case 'b':
builder.WriteByte('\b')
case 'f':
builder.WriteByte('\f')
case 'n':
builder.WriteByte('\n')
case 'r':
builder.WriteByte('\r')
case 't':
builder.WriteByte('\t')
case 'e':
builder.WriteByte(0x1B)
case 'u':
x, err := hexToRune(token[i+1:len(token)-1], 4)
if err != nil {
return nil, nil, nil, err
}
builder.WriteRune(x)
i += 4
case 'U':
x, err := hexToRune(token[i+1:len(token)-1], 8)
if err != nil {
return nil, nil, nil, err
}
builder.WriteRune(x)
i += 8
default:
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
2022-04-25 23:50:10 +02:00
}
i++
} else {
2023-01-28 22:57:53 +01:00
size := characters.Utf8ValidNext(token[i:])
2022-04-25 23:50:10 +02:00
if size == 0 {
2023-01-28 22:57:53 +01:00
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
2022-04-25 23:50:10 +02:00
}
builder.Write(token[i : i+size])
i += size
}
}
return token, builder.Bytes(), rest, nil
}
func hexToRune(b []byte, length int) (rune, error) {
if len(b) < length {
2023-01-28 22:57:53 +01:00
return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
2022-04-25 23:50:10 +02:00
}
b = b[:length]
var r uint32
for i, c := range b {
d := uint32(0)
switch {
case '0' <= c && c <= '9':
d = uint32(c - '0')
case 'a' <= c && c <= 'f':
d = uint32(c - 'a' + 10)
case 'A' <= c && c <= 'F':
d = uint32(c - 'A' + 10)
default:
2023-01-28 22:57:53 +01:00
return -1, NewParserError(b[i:i+1], "non-hex character")
2022-04-25 23:50:10 +02:00
}
r = r*16 + d
}
if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
2023-01-28 22:57:53 +01:00
return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
2022-04-25 23:50:10 +02:00
}
return rune(r), nil
}
2023-01-28 22:57:53 +01:00
func (p *Parser) parseWhitespace(b []byte) []byte {
2022-04-25 23:50:10 +02:00
// ws = *wschar
// wschar = %x20 ; Space
// wschar =/ %x09 ; Horizontal tab
_, rest := scanWhitespace(b)
return rest
}
//nolint:cyclop
2023-01-28 22:57:53 +01:00
func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
switch b[0] {
case 'i':
if !scanFollowsInf(b) {
2023-01-28 22:57:53 +01:00
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
return p.builder.Push(Node{
Kind: Float,
2022-04-25 23:50:10 +02:00
Data: b[:3],
Raw: p.Range(b[:3]),
2022-04-25 23:50:10 +02:00
}), b[3:], nil
case 'n':
if !scanFollowsNan(b) {
2023-01-28 22:57:53 +01:00
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
return p.builder.Push(Node{
Kind: Float,
2022-04-25 23:50:10 +02:00
Data: b[:3],
Raw: p.Range(b[:3]),
2022-04-25 23:50:10 +02:00
}), b[3:], nil
case '+', '-':
return p.scanIntOrFloat(b)
}
if len(b) < 3 {
return p.scanIntOrFloat(b)
}
s := 5
if len(b) < s {
s = len(b)
}
for idx, c := range b[:s] {
if isDigit(c) {
continue
}
if idx == 2 && c == ':' || (idx == 4 && c == '-') {
return p.scanDateTime(b)
}
break
}
return p.scanIntOrFloat(b)
}
2023-01-28 22:57:53 +01:00
func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
// followed by a digit.
hasDate := false
hasTime := false
hasTz := false
seenSpace := false
i := 0
byteLoop:
for ; i < len(b); i++ {
c := b[i]
switch {
case isDigit(c):
case c == '-':
hasDate = true
const minOffsetOfTz = 8
if i >= minOffsetOfTz {
hasTz = true
}
case c == 'T' || c == 't' || c == ':' || c == '.':
hasTime = true
case c == '+' || c == '-' || c == 'Z' || c == 'z':
hasTz = true
case c == ' ':
if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
i += 2
// Avoid reaching past the end of the document in case the time
// is malformed. See TestIssue585.
if i >= len(b) {
i--
}
seenSpace = true
hasTime = true
} else {
break byteLoop
}
default:
break byteLoop
}
}
2023-01-28 22:57:53 +01:00
var kind Kind
2022-04-25 23:50:10 +02:00
if hasTime {
if hasDate {
if hasTz {
2023-01-28 22:57:53 +01:00
kind = DateTime
2022-04-25 23:50:10 +02:00
} else {
2023-01-28 22:57:53 +01:00
kind = LocalDateTime
2022-04-25 23:50:10 +02:00
}
} else {
2023-01-28 22:57:53 +01:00
kind = LocalTime
2022-04-25 23:50:10 +02:00
}
} else {
2023-01-28 22:57:53 +01:00
kind = LocalDate
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
return p.builder.Push(Node{
2022-04-25 23:50:10 +02:00
Kind: kind,
Data: b[:i],
}), b[i:], nil
}
//nolint:funlen,gocognit,cyclop
2023-01-28 22:57:53 +01:00
func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
2022-04-25 23:50:10 +02:00
i := 0
if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
var isValidRune validRuneFn
switch b[1] {
case 'x':
isValidRune = isValidHexRune
case 'o':
isValidRune = isValidOctalRune
case 'b':
isValidRune = isValidBinaryRune
default:
i++
}
if isValidRune != nil {
i += 2
for ; i < len(b); i++ {
if !isValidRune(b[i]) {
break
}
}
}
2023-01-28 22:57:53 +01:00
return p.builder.Push(Node{
Kind: Integer,
2022-04-25 23:50:10 +02:00
Data: b[:i],
Raw: p.Range(b[:i]),
2022-04-25 23:50:10 +02:00
}), b[i:], nil
}
isFloat := false
for ; i < len(b); i++ {
c := b[i]
if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
continue
}
if c == '.' || c == 'e' || c == 'E' {
isFloat = true
continue
}
if c == 'i' {
if scanFollowsInf(b[i:]) {
2023-01-28 22:57:53 +01:00
return p.builder.Push(Node{
Kind: Float,
2022-04-25 23:50:10 +02:00
Data: b[:i+3],
Raw: p.Range(b[:i+3]),
2022-04-25 23:50:10 +02:00
}), b[i+3:], nil
}
2023-01-28 22:57:53 +01:00
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
2022-04-25 23:50:10 +02:00
}
if c == 'n' {
if scanFollowsNan(b[i:]) {
2023-01-28 22:57:53 +01:00
return p.builder.Push(Node{
Kind: Float,
2022-04-25 23:50:10 +02:00
Data: b[:i+3],
Raw: p.Range(b[:i+3]),
2022-04-25 23:50:10 +02:00
}), b[i+3:], nil
}
2023-01-28 22:57:53 +01:00
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
2022-04-25 23:50:10 +02:00
}
break
}
if i == 0 {
2023-01-28 22:57:53 +01:00
return invalidReference, b, NewParserError(b, "incomplete number")
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
kind := Integer
2022-04-25 23:50:10 +02:00
if isFloat {
2023-01-28 22:57:53 +01:00
kind = Float
2022-04-25 23:50:10 +02:00
}
2023-01-28 22:57:53 +01:00
return p.builder.Push(Node{
2022-04-25 23:50:10 +02:00
Kind: kind,
Data: b[:i],
Raw: p.Range(b[:i]),
2022-04-25 23:50:10 +02:00
}), b[i:], nil
}
func isDigit(r byte) bool {
return r >= '0' && r <= '9'
}
type validRuneFn func(r byte) bool
func isValidHexRune(r byte) bool {
return r >= 'a' && r <= 'f' ||
r >= 'A' && r <= 'F' ||
r >= '0' && r <= '9' ||
r == '_'
}
func isValidOctalRune(r byte) bool {
return r >= '0' && r <= '7' || r == '_'
}
func isValidBinaryRune(r byte) bool {
return r == '0' || r == '1' || r == '_'
}
func expect(x byte, b []byte) ([]byte, error) {
if len(b) == 0 {
2023-01-28 22:57:53 +01:00
return nil, NewParserError(b, "expected character %c but the document ended here", x)
2022-04-25 23:50:10 +02:00
}
if b[0] != x {
2023-01-28 22:57:53 +01:00
return nil, NewParserError(b[0:1], "expected character %c", x)
2022-04-25 23:50:10 +02:00
}
return b[1:], nil
}