diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fc64851..5ae3228 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,23 +9,18 @@ jobs: - name: Checkout uses: actions/checkout@v4 with: - path: gopath/github.com/katydid/parser-go - - name: Checkout testsuite - uses: actions/checkout@v4 - with: - repository: katydid/testsuite - path: gopath/github.com/katydid/testsuite + path: gopath/github.com/katydid/parser-go-yaml - name: Setup Go uses: actions/setup-go@v3 with: go-version: 1.23 - name: Fetch dependencies run: | - cd gopath/github.com/katydid/parser-go + cd gopath/github.com/katydid/parser-go-yaml go mod download -x - name: Install tools run: | - cd gopath/github.com/katydid/parser-go + cd gopath/github.com/katydid/parser-go-yaml go install -x github.com/awalterschulze/checklicense - name: env run: env && go env && pwd @@ -33,17 +28,17 @@ jobs: run: tree - name: Build run: | - cd gopath/github.com/katydid/parser-go + cd gopath/github.com/katydid/parser-go-yaml make build - name: Test run: | - cd gopath/github.com/katydid/parser-go + cd gopath/github.com/katydid/parser-go-yaml make test - name: Checklicencse run: | - cd gopath/github.com/katydid/parser-go + cd gopath/github.com/katydid/parser-go-yaml make checklicense - name: Diff run: | - cd gopath/github.com/katydid/parser-go + cd gopath/github.com/katydid/parser-go-yaml make diff diff --git a/COPIED_FROM_GO b/COPIED_FROM_GO deleted file mode 100644 index 6aa5cfd..0000000 --- a/COPIED_FROM_GO +++ /dev/null @@ -1,29 +0,0 @@ -Parts of the code in parser/xml and parser/json have been copied from the go standard library - -Copyright (c) 2012 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/Makefile b/Makefile index 66af65f..ac94f71 100644 --- a/Makefile +++ b/Makefile @@ -19,13 +19,10 @@ all: nuke build test checklicense: go get github.com/awalterschulze/checklicense checklicense . \ - bnf \ doc.go \ tools/tools.go \ .svg \ - .txt \ - COPIED_FROM_GO \ - parser/yaml/issues.md + .txt test: go test ./... diff --git a/README.md b/README.md index dd41b8f..5f1d4fe 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,13 @@ -## parser-go +## parser-go-yaml -Parser interface for Go. +Parser for Yaml in Go. -This includes tools for developing implementations of the parser interface, for example the `debug` package. +## Known Issues +The YAML parser implementation is in progress and has the following knwown limitations +- Array elements with more than 1 key are parsed incorrectly +- Comments are not supported +- The file delimeter "---" is not supported +- JSON syntax is not yet supported +- Quotes are currently included as part of strings +- Mixing tabs and spaces in indendation is not supported \ No newline at end of file diff --git a/go.mod b/go.mod index edafcc5..af0b470 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,7 @@ -module github.com/katydid/parser-go +module github.com/katydid/parser-go-yaml go 1.23 +require github.com/katydid/parser-go v0.5.0 + require github.com/awalterschulze/checklicense v1.0.0 diff --git a/go.sum b/go.sum index 9467359..07f4fa1 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,4 @@ github.com/awalterschulze/checklicense v1.0.0 h1:SiRilt26Q+2M238VbXw+e5826mxPypvsj/xgglHDHW8= github.com/awalterschulze/checklicense v1.0.0/go.mod h1:oUHvoD4crryzAwDwtdQqCnaZRznAvZE64edH9ukb1K4= +github.com/katydid/parser-go v0.5.0 h1:47Q2bnGzEwRTdb1q8IjC3t7FGtxHd/LDF9q7ASuk7as= +github.com/katydid/parser-go v0.5.0/go.mod h1:1BwRfDlyRgDlAaURZ4FXi7IzHqWKTsrCdsstLsFeQho= diff --git a/parser/debug/doc.go b/parser/debug/doc.go deleted file mode 100644 index 5d2cb1f..0000000 --- a/parser/debug/doc.go +++ /dev/null @@ -1,2 +0,0 @@ -//The debug package is great for testing and debugging of parser.Interface implementations. -package debug diff --git a/parser/debug/example.go b/parser/debug/example.go deleted file mode 100644 index 5bcf7f4..0000000 --- a/parser/debug/example.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -// Input is a sample instance of the Debug struct. -var Input = &Debug{ - A: int64(1), - B: []string{"b2", "b3"}, - C: &Debug{ - A: int64(2), - D: ptr(int32(3)), - E: []*Debug{ - { - B: []string{"b4"}, - }, - { - B: []string{"b5"}, - }, - }, - }, - D: ptr(int32(4)), - F: []uint32{5}, -} - -// Output is a sample instance of Nodes that repesents the Input variable after it has been parsed by Walk. -var Output = Nodes{ - Field(`A`, `1`), - Nested(`B`, - Field(`0`, `b2`), - Field(`1`, `b3`), - ), - Nested(`C`, - Field(`A`, `2`), - Field(`D`, `3`), - Nested(`E`, - Nested(`0`, - Field(`A`, `0`), - Nested(`B`, - Field(`0`, `b4`), - ), - ), - Nested(`1`, - Field(`A`, `0`), - Nested(`B`, - Field(`0`, `b5`), - ), - ), - ), - ), - Field(`D`, `4`), - Nested(`F`, - Field(`0`, `5`), - ), -} - -func ptr[A any](a A) *A { - return &a -} diff --git a/parser/debug/log.go b/parser/debug/log.go deleted file mode 100644 index c24aa35..0000000 --- a/parser/debug/log.go +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -import ( - "log" - "os" - "path/filepath" - "runtime" - "strconv" - "time" - - "github.com/katydid/parser-go/parser" -) - -// Logger is an interface for a type that is made to log debug info. -type Logger interface { - Printf(format string, v ...interface{}) -} - -// NewLineLogger returns a logger that logs the line at which the Printf method was called to stderr. -func NewLineLogger() Logger { - return &line{log.New(os.Stderr, "", 0)} -} - -type line struct { - l Logger -} - -func (l *line) Printf(format string, v ...interface{}) { - _, thisfile, _, ok := runtime.Caller(0) - if !ok { - l.l.Printf(":0: "+format, v...) - return - } - i := 0 - for { - i++ - _, file, line, ok := runtime.Caller(i) - if !ok { - l.l.Printf(":"+strconv.Itoa(i)+": "+format, v...) - return - } - if file == thisfile { - continue - } - _, name := filepath.Split(file) - l.l.Printf(name+":"+strconv.Itoa(line)+": "+format, v...) - return - } -} - -// NewDelayLogger returns a logger that sleeps after every log. -// This is useful for debugging infinite loops. -func NewDelayLogger(delay time.Duration) Logger { - return &d{ - delay: delay, - log: NewLineLogger(), - } -} - -type d struct { - log Logger - delay time.Duration -} - -func (d *d) Printf(format string, v ...interface{}) { - d.log.Printf(format, v...) - time.Sleep(d.delay) -} - -type l struct { - name string - s parser.Interface - l Logger - copies int -} - -// NewLogger returns a parser that when called returns and logs the value returned by the argument parser to the argument logger. -func NewLogger(s parser.Interface, logger Logger) parser.Interface { - return &l{"parser", s, logger, 0} -} - -func (l *l) Double() (float64, error) { - v, err := l.s.Double() - l.l.Printf(l.name+".Double() (%v, %v)", v, err) - return v, err -} - -func (l *l) Int() (int64, error) { - v, err := l.s.Int() - l.l.Printf(l.name+".Int() (%v, %v)", v, err) - return v, err -} - -func (l *l) Uint() (uint64, error) { - v, err := l.s.Uint() - l.l.Printf(l.name+".Uint() (%v, %v)", v, err) - return v, err -} - -func (l *l) Bool() (bool, error) { - v, err := l.s.Bool() - l.l.Printf(l.name+".Bool() (%v, %v)", v, err) - return v, err -} - -func (l *l) String() (string, error) { - v, err := l.s.String() - l.l.Printf(l.name+".String() (%v, %v)", v, err) - return v, err -} - -func (l *l) Bytes() ([]byte, error) { - v, err := l.s.Bytes() - l.l.Printf(l.name+".Bytes() (%v, %v)", v, err) - return v, err -} - -func (l *l) Next() error { - err := l.s.Next() - l.l.Printf(l.name+".Next() (%v)", err) - return err -} - -func (l *l) IsLeaf() bool { - v := l.s.IsLeaf() - l.l.Printf(l.name+".IsLeaf() (%v)", v) - return v -} - -func (l *l) Up() { - l.s.Up() - l.l.Printf(l.name + ".Up()") - return -} - -func (l *l) Down() { - l.s.Down() - l.l.Printf(l.name + ".Down()") - return -} diff --git a/parser/debug/node.go b/parser/debug/node.go deleted file mode 100644 index 268ccf2..0000000 --- a/parser/debug/node.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -// Field is a helper function for creating a Node with a label and one child label. -// This is how a field with a value is typically represented. -func Field(name string, value string) Node { - return Node{ - Label: name, - Children: Nodes{ - Node{ - Label: value, - }, - }, - } -} - -// Nested is a helper function for creating a Node. -func Nested(name string, fs ...Node) Node { - return Node{ - Label: name, - Children: Nodes(fs), - } -} diff --git a/parser/debug/type.go b/parser/debug/type.go deleted file mode 100644 index ade7711..0000000 --- a/parser/debug/type.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -type Debug struct { - A int64 - B []string `json:"B,omitempty"` - C *Debug `json:"C,omitempty"` - D *int32 `json:"D,omitempty"` - E []*Debug `json:"E,omitempty"` - F []uint32 `json:"F,omitempty"` - G *float64 `json:"G,omitempty"` -} diff --git a/parser/debug/value.go b/parser/debug/value.go deleted file mode 100644 index c9683e0..0000000 --- a/parser/debug/value.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -import ( - "github.com/katydid/parser-go/parser" -) - -type errValue struct{} - -func (*errValue) Double() (float64, error) { - return 0, parser.ErrNotDouble -} - -func (*errValue) Bytes() ([]byte, error) { - return nil, parser.ErrNotBytes -} - -func (*errValue) Int() (int64, error) { - return 0, parser.ErrNotInt -} - -func (*errValue) Bool() (bool, error) { - return false, parser.ErrNotBool -} - -func (*errValue) Uint() (uint64, error) { - return 0, parser.ErrNotUint -} - -func (*errValue) String() (string, error) { - return "", parser.ErrNotString -} - -type doubleValue struct { - *errValue - v float64 -} - -// NewDoubleValue wraps a native go type into a parser.Value. -func NewDoubleValue(v float64) parser.Value { - return &doubleValue{&errValue{}, v} -} - -func (v *doubleValue) Double() (float64, error) { - return v.v, nil -} - -type intValue struct { - *errValue - v int64 -} - -// NewIntValue wraps a native go type into a parser.Value. -func NewIntValue(v int64) parser.Value { - return &intValue{&errValue{}, v} -} - -func (v *intValue) Int() (int64, error) { - return v.v, nil -} - -type uintValue struct { - *errValue - v uint64 -} - -// NewUintValue wraps a native go type into a parser.Value. -func NewUintValue(v uint64) parser.Value { - return &uintValue{&errValue{}, v} -} - -func (v *uintValue) Uint() (uint64, error) { - return v.v, nil -} - -type boolValue struct { - *errValue - v bool -} - -// NewBoolValue wraps a native go type into a parser.Value. -func NewBoolValue(v bool) parser.Value { - return &boolValue{&errValue{}, v} -} - -func (v *boolValue) Bool() (bool, error) { - return v.v, nil -} - -type stringValue struct { - *errValue - v string -} - -// NewStringValue wraps a native go type into a parser.Value. -func NewStringValue(v string) parser.Value { - return &stringValue{&errValue{}, v} -} - -func (v *stringValue) String() (string, error) { - return v.v, nil -} - -type bytesValue struct { - *errValue - v []byte -} - -// NewBytesValue wraps a native go type into a parser.Value. -func NewBytesValue(v []byte) parser.Value { - return &bytesValue{&errValue{}, v} -} - -func (v *bytesValue) Bytes() ([]byte, error) { - return v.v, nil -} diff --git a/parser/debug/walk.go b/parser/debug/walk.go deleted file mode 100644 index 01e36b6..0000000 --- a/parser/debug/walk.go +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -import ( - "fmt" - "io" - "math/rand" - "strings" - "time" - - "github.com/katydid/parser-go/parser" -) - -func getValue(p parser.Interface) interface{} { - var v interface{} - var err error - v, err = p.Int() - if err == nil { - return v - } - v, err = p.Uint() - if err == nil { - return v - } - v, err = p.Double() - if err == nil { - return v - } - v, err = p.Bool() - if err == nil { - return v - } - v, err = p.String() - if err == nil { - return v - } - v, err = p.Bytes() - if err == nil { - return v - } - return nil -} - -// Node is a type that represents a node in a tree. -// It has a label an children nodes. -type Node struct { - Label string - Children Nodes -} - -// String returns a string representation of Node. -func (this Node) String() string { - if len(this.Children) == 0 { - return this.Label - } - return this.Label + ":" + this.Children.String() -} - -// Equal returns whether two Nodes are the same. -func (this Node) Equal(that Node) bool { - if this.Label != that.Label { - return false - } - if !this.Children.Equal(that.Children) { - return false - } - return true -} - -// Nodes is a list of Node. -type Nodes []Node - -// String returns a string representation of Nodes. -func (this Nodes) String() string { - ss := make([]string, len(this)) - for i := range this { - ss[i] = this[i].String() - } - return "{" + strings.Join(ss, ",") + "}" -} - -// Equal returns whether two Node lists are equal. -func (this Nodes) Equal(that Nodes) bool { - if len(this) != len(that) { - return false - } - for i := range this { - if !this[i].Equal(that[i]) { - return false - } - } - return true -} - -// Walk walks through the whole parser in a top down manner and records the values into a Nodes structute. -func Walk(p parser.Interface) Nodes { - a := make(Nodes, 0) - for { - if err := p.Next(); err != nil { - if err == io.EOF { - break - } else { - panic(err) - } - } - value := getValue(p) - if p.IsLeaf() { - a = append(a, Node{fmt.Sprintf("%v", value), nil}) - } else { - name := fmt.Sprintf("%v", value) - p.Down() - v := Walk(p) - p.Up() - a = append(a, Node{name, v}) - } - } - return a -} - -// NewRand returns a random integer generator, that can be used with RandomWalk. -// Its seed is the current time. -func NewRand() Rand { - return rand.New(rand.NewSource(time.Now().UnixNano())) -} - -// Rand is a subset of the interface that is implemented by math/rand representing only the methods used by the RandomWalk function. -type Rand interface { - Intn(n int) int -} - -// RandomWalk does a random walk of the parser, given two probabilities. -// -// next is passed to r.Intn and when a zero value is returned the Next method on the parser is called. -// down is passed to r.Intn and when a non zero value is returned the Down method on the parser is called. -// -// RandomWalk is great for testing that the implemented parser can handle random skipping of parts of the tree. -func RandomWalk(p parser.Interface, r Rand, next, down int) Nodes { - a := make(Nodes, 0) - for { - if r.Intn(next) == 0 { - break - } - if err := p.Next(); err != nil { - if err == io.EOF { - break - } else { - panic(err) - } - } - value := getValue(p) - if p.IsLeaf() { - a = append(a, Node{fmt.Sprintf("%#v", value), nil}) - } else { - name := fmt.Sprintf("%#v", value) - var v Nodes - if r.Intn(down) != 0 { - p.Down() - v = RandomWalk(p, r, next, down) - p.Up() - } - a = append(a, Node{name, v}) - } - } - return a -} diff --git a/parser/errors.go b/parser/errors.go deleted file mode 100644 index 9bd7d69..0000000 --- a/parser/errors.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2013 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package parser - -import ( - "fmt" -) - -//ErrNotDouble is an error that represents a type error. -var ErrNotDouble = fmt.Errorf("value is not a double") - -//ErrNotInt is an error that represents a type error. -var ErrNotInt = fmt.Errorf("value is not a int") - -//ErrNotUint is an error that represents a type error. -var ErrNotUint = fmt.Errorf("value is not a uint") - -//ErrNotBool is an error that represents a type error. -var ErrNotBool = fmt.Errorf("value is not a bool") - -//ErrNotString is an error that represents a type error. -var ErrNotString = fmt.Errorf("value is not a string") - -//ErrNotBytes is an error that represents a type error. -var ErrNotBytes = fmt.Errorf("value is not a bytes") diff --git a/parser/json/json.go b/parser/json/json.go deleted file mode 100644 index 421cb6a..0000000 --- a/parser/json/json.go +++ /dev/null @@ -1,627 +0,0 @@ -// Copyright 2013 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package json contains the implementation of a JSON parser. -package json - -import ( - "bytes" - "fmt" - "io" - "strconv" - - "github.com/katydid/parser-go/parser" -) - -// ErrUnquote returns an error that resulted from trying to unquote a string. -var ErrUnquote = fmt.Errorf("json: error unquoting string") - -func errInString(buf []byte) error { - return fmt.Errorf("katydid/json error in json string: %s", string(buf)) -} - -func isString(buf []byte) bool { - return buf[0] == '"' -} - -func scanString(buf []byte) (int, error) { - escaped := false - udigits := -1 - if buf[0] != '"' { - return 0, errInString(buf) - } - for i, c := range buf[1:] { - if escaped { - switch c { - case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': - escaped = false - continue - case 'u': - udigits = 0 - continue - } - return 0, errInString(buf) - } - if udigits >= 0 { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - udigits++ - } else { - return 0, errInString(buf) - } - if udigits == 4 { - udigits = -1 - } - continue - } - if c == '"' { - return i + 2, nil - } - if c == '\\' { - escaped = true - continue - } - if c < 0x20 { - return 0, errInString(buf) - } - } - panic("unreachable") -} - -func isNumber(c byte) bool { - return (c == '-') || ((c >= '0') && (c <= '9')) -} - -func isSpace(c byte) bool { - return (c == ' ') || (c == '\n') || (c == '\r') || (c == '\t') -} - -func skipSpace(buf []byte) int { - for i, c := range buf { - if !isSpace(c) { - return i - } - } - return len(buf) -} - -func (s *jsonParser) expected(expected string) error { - return fmt.Errorf("katydid/json/parser: expected '%s' at offset %d, but got '%c'", expected, s.offset, s.buf[s.offset]) -} - -func (s *jsonParser) scanOpenObject() error { - if s.buf[s.offset] == '{' { - s.offset++ - } else { - return s.expected("{") - } - return s.skipSpace() -} - -func (s *jsonParser) scanOpenArray() error { - if s.buf[s.offset] == '[' { - s.offset++ - } else { - return s.expected("[") - } - return s.skipSpace() -} - -func (s *jsonParser) scanString() error { - s.startValueOffset = s.offset - n, err := scanString(s.buf[s.offset:]) - if err != nil { - return err - } - s.offset += n - if s.offset >= len(s.buf) { - s.offset = len(s.buf) - } - s.endValueOffset = s.offset - return s.skipSpace() -} - -func (s *jsonParser) scanName() error { - startOffset := s.offset - n, err := scanString(s.buf[s.offset:]) - if err != nil { - return err - } - s.offset += n - var ok bool - s.name, ok = unquote(s.buf[startOffset:s.offset]) - if !ok { - return ErrUnquote - } - return s.skipSpace() -} - -func (s *jsonParser) skipSpace() error { - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - s.offset += skipSpace(s.buf[s.offset:]) - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - return nil -} - -func (s *jsonParser) scanTrue() error { - if s.offset+4 > len(s.buf) { - return io.ErrShortBuffer - } - if !bytes.Equal(s.buf[s.offset:s.offset+4], []byte("true")) { - return s.expected("true") - } - s.startValueOffset = s.offset - s.endValueOffset = s.offset + 4 - s.offset += 4 - return s.skipSpace() -} - -func (s *jsonParser) scanFalse() error { - if s.offset+5 > len(s.buf) { - return io.ErrShortBuffer - } - if !bytes.Equal(s.buf[s.offset:s.offset+5], []byte("false")) { - return s.expected("false") - } - s.startValueOffset = s.offset - s.endValueOffset = s.offset + 5 - s.offset += 5 - return s.skipSpace() -} - -func (s *jsonParser) scanNull() error { - if s.offset+4 > len(s.buf) { - return io.ErrShortBuffer - } - if !bytes.Equal(s.buf[s.offset:s.offset+4], []byte("null")) { - return s.expected("null") - } - s.startValueOffset = s.offset - s.endValueOffset = s.offset + 4 - s.offset += 4 - return s.skipSpace() -} - -func (s *jsonParser) scanArray() error { - count := 0 - index := 0 - for i, c := range s.buf[s.offset:] { - if c == '[' { - count++ - } - if c == ']' { - count-- - } - if count == 0 { - index = i - break - } - } - if count != 0 { - return s.expected("]") - } - s.startValueOffset = s.offset - s.endValueOffset = s.offset + index + 1 - s.offset += index + 1 - s.isValueArray = true - return s.skipSpace() -} - -func (s *jsonParser) scanObject() error { - count := 0 - index := 0 - for i, c := range s.buf[s.offset:] { - if c == '{' { - count++ - } - if c == '}' { - count-- - } - if count == 0 { - index = i - break - } - } - if count != 0 { - return s.expected("}") - } - s.startValueOffset = s.offset - s.endValueOffset = s.offset + index + 1 - s.offset += index + 1 - s.isValueObject = true - return s.skipSpace() -} - -func isDigit(c byte) bool { - return c >= '0' && c <= '9' -} - -func isDigit19(c byte) bool { - return c >= '1' && c <= '9' -} - -func (s *jsonParser) scanNumber() error { - s.startValueOffset = s.offset - if s.buf[s.offset] == '-' { - s.offset++ - if s.offset >= len(s.buf) { - return io.ErrShortBuffer - } - } - if s.buf[s.offset] == '0' { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - } else if isDigit19(s.buf[s.offset]) { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - for s.offset < len(s.buf) && isDigit(s.buf[s.offset]) { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - } - } - if s.offset < len(s.buf) && s.buf[s.offset] == '.' { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - for s.offset < len(s.buf) && isDigit(s.buf[s.offset]) { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - } - } - if s.offset < len(s.buf) && - (s.buf[s.offset] == 'e' || s.buf[s.offset] == 'E') { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - if s.offset < len(s.buf) { - if s.buf[s.offset] == '+' || s.buf[s.offset] == '-' { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - } - } - for s.offset < len(s.buf) && isDigit(s.buf[s.offset]) { - s.offset++ - if s.offset > len(s.buf) { - return io.ErrShortBuffer - } - } - } - s.endValueOffset = s.offset - return nil -} - -func (s *jsonParser) scanValue() error { - c := s.buf[s.offset] - if isNumber(c) { - return s.scanNumber() - } - switch c { - case '"': - return s.scanString() - case '{': - return s.scanObject() - case '[': - return s.scanArray() - case 't': - return s.scanTrue() - case 'f': - return s.scanFalse() - case 'n': - return s.scanNull() - } - return s.expected("value") -} - -func (s *jsonParser) scanColon() error { - if s.buf[s.offset] != ':' { - return s.expected(":") - } - s.offset++ - return s.skipSpace() -} - -func (s *jsonParser) scanCloseObject() error { - if s.buf[s.offset] == '}' { - return io.EOF - } - return s.expected("}") -} - -func (s *jsonParser) scanCloseArray() error { - if s.buf[s.offset] == ']' { - return io.EOF - } - return s.expected("]") -} - -func (s *jsonParser) scanComma() error { - if s.buf[s.offset] != ',' { - return s.expected(",") - } - s.offset++ - return s.skipSpace() -} - -func (s *jsonParser) nextValueInArray() error { - if s.firstArrayValue { - if err := s.scanOpenArray(); err != nil { - return err - } - s.firstArrayValue = false - } else { - if s.buf[s.offset] == ',' { - if err := s.scanComma(); err != nil { - return err - } - } else { - return s.scanCloseArray() - } - } - if s.buf[s.offset] == ']' { - return s.scanCloseArray() - } - return s.scanValue() -} - -func (s *jsonParser) nextValueInObject() error { - if s.firstObjectValue { - if err := s.scanOpenObject(); err != nil { - return err - } - s.firstObjectValue = false - } else { - if s.buf[s.offset] == ',' { - if err := s.scanComma(); err != nil { - return err - } - } else { - if err := s.scanCloseObject(); err != nil { - return err - } - } - } - if isString(s.buf[s.offset:]) { - if err := s.scanName(); err != nil { - return err - } - if err := s.scanColon(); err != nil { - return err - } - if err := s.scanValue(); err != nil { - return err - } - return nil - } - return s.scanCloseObject() -} - -func (s *jsonParser) Next() error { - if s.isLeaf { - if s.firstObjectValue { - s.firstObjectValue = false - return nil - } - return io.EOF - } - s.isValueObject = false - s.isValueArray = false - if err := s.skipSpace(); err != nil { - return err - } - if s.inArray { - if !s.firstArrayValue { - s.arrayIndex++ - } - return s.nextValueInArray() - } - return s.nextValueInObject() -} - -func (s *jsonParser) IsLeaf() bool { - return s.isLeaf -} - -func (s *jsonParser) Value() []byte { - return s.buf[s.startValueOffset:s.endValueOffset] -} - -func (s *jsonParser) Double() (float64, error) { - if s.isLeaf { - v := string(s.Value()) - i, err := strconv.ParseFloat(v, 64) - return i, err - } - return 0, parser.ErrNotDouble -} - -func (s *jsonParser) Int() (int64, error) { - if s.isLeaf { - v := string(s.Value()) - i, err := strconv.ParseInt(v, 10, 64) - if err != nil { - f, ferr := strconv.ParseFloat(v, 64) - if ferr != nil { - return i, err - } - if float64(int64(f)) == f { - return int64(f), nil - } - } - return i, err - } - if s.inArray { - return int64(s.arrayIndex), nil - } - return 0, parser.ErrNotInt -} - -func (s *jsonParser) Uint() (uint64, error) { - if s.isLeaf { - v := string(s.Value()) - i, err := strconv.ParseUint(v, 10, 64) - return uint64(i), err - } - return 0, parser.ErrNotUint -} - -func (s *jsonParser) Bool() (bool, error) { - if s.isLeaf { - v := string(s.Value()) - if v == "true" { - return true, nil - } - if v == "false" { - return false, nil - } - } - return false, parser.ErrNotBool -} - -func (s *jsonParser) String() (string, error) { - if s.isLeaf { - v := s.Value() - if v[0] != '"' { - return "", parser.ErrNotString - } - res, ok := unquote(v) - if !ok { - return "", ErrUnquote - } - return res, nil - } - if s.inArray { - return "", parser.ErrNotString - } - return s.name, nil -} - -func (s *jsonParser) Bytes() ([]byte, error) { - return nil, parser.ErrNotBytes -} - -// JsonParser is a parser for JSON -type JsonParser interface { - parser.Interface - //Init initialises the parser with a byte buffer containing JSON. - Init(buf []byte) error - Reset() error -} - -// NewJsonParser returns a new JSON parser. -func NewJsonParser() JsonParser { - return &jsonParser{ - state: state{ - firstObjectValue: true, - }, - stack: make([]state, 0, 10), - } -} - -func (s *jsonParser) Init(buf []byte) error { - s.state = state{ - firstObjectValue: true, - buf: buf, - } - s.stack = s.stack[:0] - if err := s.skipSpace(); err != nil { - return err - } - if s.buf[s.offset] == '{' { - //do nothing - } else if s.buf[s.offset] == '[' { - if err := s.scanValue(); err != nil { - return err - } - s.inArray = true - s.firstArrayValue = true - s.buf = s.buf[s.startValueOffset:s.endValueOffset] - s.offset = 0 - } else { - if err := s.scanValue(); err != nil { - return err - } - s.state.isLeaf = true - s.state.firstObjectValue = true - } - return nil -} - -func (s *jsonParser) Reset() error { - if len(s.stack) > 0 { - return s.Init(s.stack[0].buf) - } - return s.Init(s.buf) -} - -type jsonParser struct { - state - stack []state -} - -type state struct { - buf []byte - offset int - name string - startValueOffset int - endValueOffset int - inArray bool - firstObjectValue bool - firstArrayValue bool - isValueObject bool - isValueArray bool - isLeaf bool - arrayIndex int -} - -func (s *jsonParser) Up() { - top := len(s.stack) - 1 - s.state = s.stack[top] - s.stack = s.stack[:top] -} - -func (s *jsonParser) Down() { - if s.isValueObject { - s.stack = append(s.stack, s.state) - s.state = state{ - buf: s.buf[s.startValueOffset:s.endValueOffset], - firstObjectValue: true, - } - } else if s.isValueArray { - s.stack = append(s.stack, s.state) - s.state = state{ - buf: s.buf[s.startValueOffset:s.endValueOffset], - firstArrayValue: true, - inArray: true, - } - } else { - s.stack = append(s.stack, s.state) - s.state.isLeaf = true - s.state.firstObjectValue = true - } -} diff --git a/parser/json/json_test.go b/parser/json/json_test.go deleted file mode 100644 index 866e342..0000000 --- a/parser/json/json_test.go +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package json_test - -import ( - "encoding/json" - "testing" - - "github.com/katydid/parser-go/parser/debug" - sjson "github.com/katydid/parser-go/parser/json" -) - -func TestDebug(t *testing.T) { - p := sjson.NewJsonParser() - data, err := json.Marshal(debug.Input) - if err != nil { - t.Fatal(err) - } - if err := p.Init(data); err != nil { - t.Fatal(err) - } - m := debug.Walk(p) - if !m.Equal(debug.Output) { - t.Fatalf("expected %s but got %s", debug.Output, m) - } -} - -func TestRandomDebug(t *testing.T) { - p := sjson.NewJsonParser() - data, err := json.Marshal(debug.Input) - if err != nil { - t.Fatal(err) - } - for i := 0; i < 10; i++ { - if err := p.Init(data); err != nil { - t.Fatal(err) - } - //l := debug.NewLogger(p, debug.NewLineLogger()) - debug.RandomWalk(p, debug.NewRand(), 10, 3) - //t.Logf("original %v vs random %v", debug.Output, m) - } -} - -func TestEscapedChar(t *testing.T) { - j := map[string][]interface{}{ - `a\"`: {1}, - } - data, err := json.Marshal(j) - if err != nil { - t.Fatal(err) - } - t.Logf("%s", string(data)) - parser := sjson.NewJsonParser() - if err := parser.Init(data); err != nil { - t.Fatal(err) - } - m := debug.Walk(parser) - name := m[0].Label - if name != `a\"` { - t.Fatalf("wrong escaped name %s", name) - } -} - -func TestMultiLineArray(t *testing.T) { - s := `{ - "A":[1] - }` - parser := sjson.NewJsonParser() - if err := parser.Init([]byte(s)); err != nil { - t.Fatal(err) - } - jout := debug.Walk(parser) - t.Logf("%v", jout) -} - -func TestIntWithExponent(t *testing.T) { - s := `{"A":1e+08}` - parser := sjson.NewJsonParser() - if err := parser.Init([]byte(s)); err != nil { - t.Fatal(err) - } - if err := parser.Next(); err != nil { - t.Fatal(err) - } - parser.Down() - if err := parser.Next(); err != nil { - t.Fatal(err) - } - if !parser.IsLeaf() { - t.Fatal("incorrect walk, please adjust the path above") - } - if i, err := parser.Int(); err != nil { - t.Fatalf("did not expect error %v", err) - } else if i != 1e+08 { - t.Fatalf("got %d", i) - } -} - -func testValue(t *testing.T, input, output string) { - parser := sjson.NewJsonParser() - if err := parser.Init([]byte(input)); err != nil { - t.Fatalf("init error: %v", err) - } - jout := debug.Walk(parser) - if len(jout) != 1 { - t.Fatalf("expected one node") - } - if len(jout[0].Children) != 0 { - t.Fatalf("did not expected any children") - } - if jout[0].Label != output { - t.Fatalf("expected %s got %s", output, jout[0].Label) - } -} - -func TestValues(t *testing.T) { - testValue(t, "0", "0") - testValue(t, "1", "1") - testValue(t, "-1", "-1") - testValue(t, "123", "123") - testValue(t, "1.1", "1.1") - testValue(t, "1.123", "1.123") - testValue(t, "1.1e1", "11") - testValue(t, "1.1e-1", "0.11") - testValue(t, "1.1e10", "11000000000") - testValue(t, "1.1e+10", "11000000000") - testValue(t, `"a"`, "a") - testValue(t, `"abc"`, "abc") - testValue(t, `""`, "") - testValue(t, `"\b"`, "\b") - testValue(t, `true`, "true") - testValue(t, `false`, "false") - testValue(t, `null`, "") -} - -func testArray(t *testing.T, s string) { - parser := sjson.NewJsonParser() - if err := parser.Init([]byte(s)); err != nil { - t.Fatal(err) - } - jout := debug.Walk(parser) - t.Logf("%v", jout) -} - -func TestArray(t *testing.T) { - testArray(t, `[1]`) - testArray(t, `[1,2.3e5]`) - testArray(t, `[1,"a"]`) - testArray(t, `[true, false, null]`) - testArray(t, `[{"a": true, "b": [1,2]}]`) -} diff --git a/parser/json/unquote.go b/parser/json/unquote.go deleted file mode 100644 index c9b21de..0000000 --- a/parser/json/unquote.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code has been copied from https://golang.org/src/encoding/json/decode.go - -package json - -import ( - "strconv" - "unicode" - "unicode/utf16" - "unicode/utf8" -) - -// getu4 decodes \uXXXX from the beginning of s, returning the hex value, -// or it returns -1. -func getu4(s []byte) rune { - if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { - return -1 - } - r, err := strconv.ParseUint(string(s[2:6]), 16, 64) - if err != nil { - return -1 - } - return rune(r) -} - -// unquote converts a quoted JSON string literal s into an actual string t. -// The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) - t = string(s) - return -} - -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { - return - } - s = s[1 : len(s)-1] - - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { - return s, true - } - - b := make([]byte, len(s)+2*utf8.UTFMax) - w := copy(b, s[0:r]) - for r < len(s) { - // Out of room? Can only happen if s is full of - // malformed UTF-8 and we're replacing each - // byte with RuneError. - if w >= len(b)-2*utf8.UTFMax { - nb := make([]byte, (len(b)+utf8.UTFMax)*2) - copy(nb, b[0:w]) - b = nb - } - switch c := s[r]; { - case c == '\\': - r++ - if r >= len(s) { - return - } - switch s[r] { - default: - return - case '"', '\\', '/', '\'': - b[w] = s[r] - r++ - w++ - case 'b': - b[w] = '\b' - r++ - w++ - case 'f': - b[w] = '\f' - r++ - w++ - case 'n': - b[w] = '\n' - r++ - w++ - case 'r': - b[w] = '\r' - r++ - w++ - case 't': - b[w] = '\t' - r++ - w++ - case 'u': - r-- - rr := getu4(s[r:]) - if rr < 0 { - return - } - r += 6 - if utf16.IsSurrogate(rr) { - rr1 := getu4(s[r:]) - if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { - // A valid pair; consume. - r += 6 - w += utf8.EncodeRune(b[w:], dec) - break - } - // Invalid surrogate; fall back to replacement rune. - rr = unicode.ReplacementChar - } - w += utf8.EncodeRune(b[w:], rr) - } - - // Quote, control characters are invalid. - case c == '"', c < ' ': - return - - // ASCII - case c < utf8.RuneSelf: - b[w] = c - r++ - w++ - - // Coerce to well-formed UTF-8. - default: - rr, size := utf8.DecodeRune(s[r:]) - r += size - w += utf8.EncodeRune(b[w:], rr) - } - } - return b[0:w], true -} diff --git a/parser/parser.go b/parser/parser.go deleted file mode 100644 index ee2bff8..0000000 --- a/parser/parser.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2013 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//Package parser represents the parser.Interface and some errors for implementing parsers. -package parser - -import "fmt" - -//A type conforming to the parser.Interface interface, abstracts away the implementation details of a parser. -type Interface interface { - Next() error - IsLeaf() bool - Up() - Down() - Value -} - -//A type confirming to the parser.Value interface, repesents one native value, tree node label (field name) or some repesentation a node label. -//Typically only one of the methods returns a value without an error, but more than one method can return without an error. -//For example a positive json number can return an errorless value for the Double, Int and Uint methods. -type Value interface { - Double() (float64, error) - Int() (int64, error) - Uint() (uint64, error) - Bool() (bool, error) - String() (string, error) - Bytes() ([]byte, error) -} - -//Sprint returns a value printed as a string. -func Sprint(value Value) string { - return fmt.Sprintf("%#v", getValue(value)) -} - -func getValue(value Value) interface{} { - var v interface{} - var err error - v, err = value.Bool() - if err == nil { - return v - } - v, err = value.Bytes() - if err == nil { - return v - } - v, err = value.String() - if err == nil { - return v - } - v, err = value.Int() - if err == nil { - return v - } - v, err = value.Uint() - if err == nil { - return v - } - v, err = value.Double() - if err == nil { - return v - } - return nil -} diff --git a/parser/reflect/reflect.go b/parser/reflect/reflect.go deleted file mode 100644 index 16f0966..0000000 --- a/parser/reflect/reflect.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2013 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package reflect contains an implementation of a parser for a reflected go structure. -package reflect - -import ( - "io" - "reflect" - - "github.com/katydid/parser-go/parser" -) - -type state struct { - parent reflect.Value - typ reflect.StructField - value reflect.Value - field int - maxField int - isLeaf bool - isArray bool -} - -type reflectParser struct { - state - stack []state -} - -func deref(v reflect.Value) reflect.Value { - if v.Kind() == reflect.Ptr { - return v.Elem() - } - return v -} - -func newState(val reflect.Value) state { - value := deref(val) - if value.Kind() == reflect.Struct { - return state{ - parent: value, - maxField: value.NumField(), - } - } - if isSlice(value) { - return state{ - parent: value, - maxField: value.Len(), - isArray: true, - } - } - return state{ - value: val, - isLeaf: true, - maxField: 1, - } -} - -func isSlice(v reflect.Value) bool { - return v.Kind() == reflect.Slice && v.Type().Elem().Kind() != reflect.Uint8 -} - -// ReflectParser is a parser for a reflected go structure. -type ReflectParser interface { - parser.Interface - //Init initialises the parser with a value of reflected go structure. - Init(value reflect.Value) ReflectParser -} - -// NewReflectParser returns a new reflect parser. -func NewReflectParser() ReflectParser { - return &reflectParser{stack: make([]state, 0, 10)} -} - -func (s *reflectParser) Init(value reflect.Value) ReflectParser { - s.state = newState(value) - return s -} - -func (s *reflectParser) Next() error { - if s.field >= s.maxField { - return io.EOF - } - if !s.isLeaf && !s.isArray { - s.typ = s.parent.Type().Field(s.field) - s.value = s.parent.Field(s.field) - if s.value.Kind() == reflect.Ptr || s.value.Kind() == reflect.Slice { - if s.value.IsNil() { - s.field++ - return s.Next() - } - } - } - s.field++ - return nil -} - -func (s *reflectParser) IsLeaf() bool { - return s.isLeaf -} - -func (s *reflectParser) getValue() reflect.Value { - return deref(s.value) -} - -func (s *reflectParser) Double() (float64, error) { - if s.isLeaf { - value := s.getValue() - switch value.Kind() { - case reflect.Float64, reflect.Float32: - return value.Float(), nil - } - } - return 0, parser.ErrNotDouble -} - -func (s *reflectParser) Int() (int64, error) { - if s.isArray { - return int64(s.field - 1), nil - } - if s.isLeaf { - value := s.getValue() - switch value.Kind() { - case reflect.Int64, reflect.Int32: - return value.Int(), nil - } - } - return 0, parser.ErrNotInt -} - -func (s *reflectParser) Uint() (uint64, error) { - if s.isLeaf { - value := s.getValue() - switch value.Kind() { - case reflect.Uint64, reflect.Uint32: - return value.Uint(), nil - } - } - return 0, parser.ErrNotUint -} - -func (s *reflectParser) Bool() (bool, error) { - if s.isLeaf { - value := s.getValue() - switch value.Kind() { - case reflect.Bool: - return value.Bool(), nil - } - } - return false, parser.ErrNotBool -} - -func (s *reflectParser) String() (string, error) { - if !s.isLeaf { - return s.typ.Name, nil - } - value := s.getValue() - switch value.Kind() { - case reflect.String: - return value.String(), nil - } - return "", parser.ErrNotString -} - -func (s *reflectParser) Bytes() ([]byte, error) { - if s.isLeaf { - value := s.getValue() - switch value.Kind() { - case reflect.Slice, reflect.Uint8, reflect.Int8: - return value.Bytes(), nil - } - } - return nil, parser.ErrNotBytes -} - -func (s *reflectParser) Up() { - top := len(s.stack) - 1 - s.state = s.stack[top] - s.stack = s.stack[:top] -} - -func (s *reflectParser) Down() { - s.stack = append(s.stack, s.state) - if s.isArray { - s.state = newState(s.state.parent.Index(s.field - 1)) - } else { - s.state = newState(s.state.value) - } -} diff --git a/parser/reflect/reflect_test.go b/parser/reflect/reflect_test.go deleted file mode 100644 index b19f139..0000000 --- a/parser/reflect/reflect_test.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package reflect - -import ( - "reflect" - "testing" - - "github.com/katydid/parser-go/parser/debug" -) - -func TestDebug(t *testing.T) { - p := NewReflectParser() - p.Init(reflect.ValueOf(debug.Input)) - m := debug.Walk(p) - if !m.Equal(debug.Output) { - t.Fatalf("expected %s but got %s", debug.Output, m) - } -} - -func TestRandomDebug(t *testing.T) { - p := NewReflectParser() - for i := 0; i < 10; i++ { - p.Init(reflect.ValueOf(debug.Input)) - //l := debug.NewLogger(p, debug.NewLineLogger()) - debug.RandomWalk(p, debug.NewRand(), 10, 3) - //t.Logf("original %v vs random %v", debug.Output, m) - } -} diff --git a/parser/xml/bytes_decl.go b/parser/xml/bytes_decl.go deleted file mode 100644 index fd0afe3..0000000 --- a/parser/xml/bytes_decl.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package xml - -import ( - "bytes" -) - -// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. -func IndexByte(s []byte, c byte) int { - return bytes.IndexByte(s, c) -} - -// Equal returns a boolean reporting whether a and b -// are the same length and contain the same bytes. -// A nil argument is equivalent to an empty slice. -func Equal(a, b []byte) bool { - return bytes.Equal(a, b) -} - -// Compare returns an integer comparing two byte slices lexicographically. -// The result will be 0 if a==b, -1 if a < b, and +1 if a > b. -// A nil argument is equivalent to an empty slice. -func Compare(a, b []byte) int { - return bytes.Compare(a, b) -} diff --git a/parser/xml/marshal.go b/parser/xml/marshal.go deleted file mode 100644 index 05b5542..0000000 --- a/parser/xml/marshal.go +++ /dev/null @@ -1,1131 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "bufio" - "bytes" - "encoding" - "errors" - "fmt" - "io" - "reflect" - "strconv" - "strings" -) - -const ( - // Header is a generic XML header suitable for use with the output of [Marshal]. - // This is not automatically added to any output of this package, - // it is provided as a convenience. - Header = `` + "\n" -) - -// Marshal returns the XML encoding of v. -// -// Marshal handles an array or slice by marshaling each of the elements. -// Marshal handles a pointer by marshaling the value it points at or, if the -// pointer is nil, by writing nothing. Marshal handles an interface value by -// marshaling the value it contains or, if the interface value is nil, by -// writing nothing. Marshal handles all other data by writing one or more XML -// elements containing the data. -// -// The name for the XML elements is taken from, in order of preference: -// - the tag on the XMLName field, if the data is a struct -// - the value of the XMLName field of type [Name] -// - the tag of the struct field used to obtain the data -// - the name of the struct field used to obtain the data -// - the name of the marshaled type -// -// The XML element for a struct contains marshaled elements for each of the -// exported fields of the struct, with these exceptions: -// - the XMLName field, described above, is omitted. -// - a field with tag "-" is omitted. -// - a field with tag "name,attr" becomes an attribute with -// the given name in the XML element. -// - a field with tag ",attr" becomes an attribute with the -// field name in the XML element. -// - a field with tag ",chardata" is written as character data, -// not as an XML element. -// - a field with tag ",cdata" is written as character data -// wrapped in one or more tags, not as an XML element. -// - a field with tag ",innerxml" is written verbatim, not subject -// to the usual marshaling procedure. -// - a field with tag ",comment" is written as an XML comment, not -// subject to the usual marshaling procedure. It must not contain -// the "--" string within it. -// - a field with a tag including the "omitempty" option is omitted -// if the field value is empty. The empty values are false, 0, any -// nil pointer or interface value, and any array, slice, map, or -// string of length zero. -// - an anonymous struct field is handled as if the fields of its -// value were part of the outer struct. -// - a field implementing [Marshaler] is written by calling its MarshalXML -// method. -// - a field implementing [encoding.TextMarshaler] is written by encoding the -// result of its MarshalText method as text. -// -// If a field uses a tag "a>b>c", then the element c will be nested inside -// parent elements a and b. Fields that appear next to each other that name -// the same parent will be enclosed in one XML element. -// -// If the XML name for a struct field is defined by both the field tag and the -// struct's XMLName field, the names must match. -// -// See [MarshalIndent] for an example. -// -// Marshal will return an error if asked to marshal a channel, function, or map. -func Marshal(v any) ([]byte, error) { - var b bytes.Buffer - enc := NewEncoder(&b) - if err := enc.Encode(v); err != nil { - return nil, err - } - if err := enc.Close(); err != nil { - return nil, err - } - return b.Bytes(), nil -} - -// Marshaler is the interface implemented by objects that can marshal -// themselves into valid XML elements. -// -// MarshalXML encodes the receiver as zero or more XML elements. -// By convention, arrays or slices are typically encoded as a sequence -// of elements, one per entry. -// Using start as the element tag is not required, but doing so -// will enable [Unmarshal] to match the XML elements to the correct -// struct field. -// One common implementation strategy is to construct a separate -// value with a layout corresponding to the desired XML and then -// to encode it using e.EncodeElement. -// Another common strategy is to use repeated calls to e.EncodeToken -// to generate the XML output one token at a time. -// The sequence of encoded tokens must make up zero or more valid -// XML elements. -type Marshaler interface { - MarshalXML(e *Encoder, start StartElement) error -} - -// MarshalerAttr is the interface implemented by objects that can marshal -// themselves into valid XML attributes. -// -// MarshalXMLAttr returns an XML attribute with the encoded value of the receiver. -// Using name as the attribute name is not required, but doing so -// will enable [Unmarshal] to match the attribute to the correct -// struct field. -// If MarshalXMLAttr returns the zero attribute [Attr]{}, no attribute -// will be generated in the output. -// MarshalXMLAttr is used only for struct fields with the -// "attr" option in the field tag. -type MarshalerAttr interface { - MarshalXMLAttr(name Name) (Attr, error) -} - -// MarshalIndent works like [Marshal], but each XML element begins on a new -// indented line that starts with prefix and is followed by one or more -// copies of indent according to the nesting depth. -func MarshalIndent(v any, prefix, indent string) ([]byte, error) { - var b bytes.Buffer - enc := NewEncoder(&b) - enc.Indent(prefix, indent) - if err := enc.Encode(v); err != nil { - return nil, err - } - if err := enc.Close(); err != nil { - return nil, err - } - return b.Bytes(), nil -} - -// An Encoder writes XML data to an output stream. -type Encoder struct { - p printer -} - -// NewEncoder returns a new encoder that writes to w. -func NewEncoder(w io.Writer) *Encoder { - e := &Encoder{printer{w: bufio.NewWriter(w)}} - e.p.encoder = e - return e -} - -// Indent sets the encoder to generate XML in which each element -// begins on a new indented line that starts with prefix and is followed by -// one or more copies of indent according to the nesting depth. -func (enc *Encoder) Indent(prefix, indent string) { - enc.p.prefix = prefix - enc.p.indent = indent -} - -// Encode writes the XML encoding of v to the stream. -// -// See the documentation for [Marshal] for details about the conversion -// of Go values to XML. -// -// Encode calls [Encoder.Flush] before returning. -func (enc *Encoder) Encode(v any) error { - err := enc.p.marshalValue(reflect.ValueOf(v), nil, nil) - if err != nil { - return err - } - return enc.p.w.Flush() -} - -// EncodeElement writes the XML encoding of v to the stream, -// using start as the outermost tag in the encoding. -// -// See the documentation for [Marshal] for details about the conversion -// of Go values to XML. -// -// EncodeElement calls [Encoder.Flush] before returning. -func (enc *Encoder) EncodeElement(v any, start StartElement) error { - err := enc.p.marshalValue(reflect.ValueOf(v), nil, &start) - if err != nil { - return err - } - return enc.p.w.Flush() -} - -var ( - begComment = []byte("") - endProcInst = []byte("?>") -) - -// EncodeToken writes the given XML token to the stream. -// It returns an error if [StartElement] and [EndElement] tokens are not properly matched. -// -// EncodeToken does not call [Encoder.Flush], because usually it is part of a larger operation -// such as [Encoder.Encode] or [Encoder.EncodeElement] (or a custom [Marshaler]'s MarshalXML invoked -// during those), and those will call Flush when finished. -// Callers that create an Encoder and then invoke EncodeToken directly, without -// using Encode or EncodeElement, need to call Flush when finished to ensure -// that the XML is written to the underlying writer. -// -// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token -// in the stream. -func (enc *Encoder) EncodeToken(t Token) error { - - p := &enc.p - switch t := t.(type) { - case StartElement: - if err := p.writeStart(&t); err != nil { - return err - } - case EndElement: - if err := p.writeEnd(t.Name); err != nil { - return err - } - case CharData: - escapeText(p, t, false) - case Comment: - if bytes.Contains(t, endComment) { - return fmt.Errorf("xml: EncodeToken of Comment containing --> marker") - } - p.WriteString("") - return p.cachedWriteError() - case ProcInst: - // First token to be encoded which is also a ProcInst with target of xml - // is the xml declaration. The only ProcInst where target of xml is allowed. - if t.Target == "xml" && p.w.Buffered() != 0 { - return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded") - } - if !isNameString(t.Target) { - return fmt.Errorf("xml: EncodeToken of ProcInst with invalid Target") - } - if bytes.Contains(t.Inst, endProcInst) { - return fmt.Errorf("xml: EncodeToken of ProcInst containing ?> marker") - } - p.WriteString(" 0 { - p.WriteByte(' ') - p.Write(t.Inst) - } - p.WriteString("?>") - case Directive: - if !isValidDirective(t) { - return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers") - } - p.WriteString("") - default: - return fmt.Errorf("xml: EncodeToken of invalid token type") - - } - return p.cachedWriteError() -} - -// isValidDirective reports whether dir is a valid directive text, -// meaning angle brackets are matched, ignoring comments and strings. -func isValidDirective(dir Directive) bool { - var ( - depth int - inquote uint8 - incomment bool - ) - for i, c := range dir { - switch { - case incomment: - if c == '>' { - if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) { - incomment = false - } - } - // Just ignore anything in comment - case inquote != 0: - if c == inquote { - inquote = 0 - } - // Just ignore anything within quotes - case c == '\'' || c == '"': - inquote = c - case c == '<': - if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) { - incomment = true - } else { - depth++ - } - case c == '>': - if depth == 0 { - return false - } - depth-- - } - } - return depth == 0 && inquote == 0 && !incomment -} - -// Flush flushes any buffered XML to the underlying writer. -// See the [Encoder.EncodeToken] documentation for details about when it is necessary. -func (enc *Encoder) Flush() error { - return enc.p.w.Flush() -} - -// Close the Encoder, indicating that no more data will be written. It flushes -// any buffered XML to the underlying writer and returns an error if the -// written XML is invalid (e.g. by containing unclosed elements). -func (enc *Encoder) Close() error { - return enc.p.Close() -} - -type printer struct { - w *bufio.Writer - encoder *Encoder - seq int - indent string - prefix string - depth int - indentedIn bool - putNewline bool - attrNS map[string]string // map prefix -> name space - attrPrefix map[string]string // map name space -> prefix - prefixes []string - tags []Name - closed bool - err error -} - -// createAttrPrefix finds the name space prefix attribute to use for the given name space, -// defining a new prefix if necessary. It returns the prefix. -func (p *printer) createAttrPrefix(url string) string { - if prefix := p.attrPrefix[url]; prefix != "" { - return prefix - } - - // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml" - // and must be referred to that way. - // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", - // but users should not be trying to use that one directly - that's our job.) - if url == xmlURL { - return xmlPrefix - } - - // Need to define a new name space. - if p.attrPrefix == nil { - p.attrPrefix = make(map[string]string) - p.attrNS = make(map[string]string) - } - - // Pick a name. We try to use the final element of the path - // but fall back to _. - prefix := strings.TrimRight(url, "/") - if i := strings.LastIndex(prefix, "/"); i >= 0 { - prefix = prefix[i+1:] - } - if prefix == "" || !isName([]byte(prefix)) || strings.Contains(prefix, ":") { - prefix = "_" - } - // xmlanything is reserved and any variant of it regardless of - // case should be matched, so: - // (('X'|'x') ('M'|'m') ('L'|'l')) - // See Section 2.3 of https://www.w3.org/TR/REC-xml/ - if len(prefix) >= 3 && strings.EqualFold(prefix[:3], "xml") { - prefix = "_" + prefix - } - if p.attrNS[prefix] != "" { - // Name is taken. Find a better one. - for p.seq++; ; p.seq++ { - if id := prefix + "_" + strconv.Itoa(p.seq); p.attrNS[id] == "" { - prefix = id - break - } - } - } - - p.attrPrefix[url] = prefix - p.attrNS[prefix] = url - - p.WriteString(`xmlns:`) - p.WriteString(prefix) - p.WriteString(`="`) - EscapeText(p, []byte(url)) - p.WriteString(`" `) - - p.prefixes = append(p.prefixes, prefix) - - return prefix -} - -// deleteAttrPrefix removes an attribute name space prefix. -func (p *printer) deleteAttrPrefix(prefix string) { - delete(p.attrPrefix, p.attrNS[prefix]) - delete(p.attrNS, prefix) -} - -func (p *printer) markPrefix() { - p.prefixes = append(p.prefixes, "") -} - -func (p *printer) popPrefix() { - for len(p.prefixes) > 0 { - prefix := p.prefixes[len(p.prefixes)-1] - p.prefixes = p.prefixes[:len(p.prefixes)-1] - if prefix == "" { - break - } - p.deleteAttrPrefix(prefix) - } -} - -var ( - marshalerType = reflect.TypeFor[Marshaler]() - marshalerAttrType = reflect.TypeFor[MarshalerAttr]() - textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() -) - -// marshalValue writes one or more XML elements representing val. -// If val was obtained from a struct field, finfo must have its details. -func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplate *StartElement) error { - if startTemplate != nil && startTemplate.Name.Local == "" { - return fmt.Errorf("xml: EncodeElement of StartElement with missing name") - } - - if !val.IsValid() { - return nil - } - if finfo != nil && finfo.flags&fOmitEmpty != 0 && isEmptyValue(val) { - return nil - } - - // Drill into interfaces and pointers. - // This can turn into an infinite loop given a cyclic chain, - // but it matches the Go 1 behavior. - for val.Kind() == reflect.Interface || val.Kind() == reflect.Pointer { - if val.IsNil() { - return nil - } - val = val.Elem() - } - - kind := val.Kind() - typ := val.Type() - - // Check for marshaler. - if val.CanInterface() && typ.Implements(marshalerType) { - return p.marshalInterface(val.Interface().(Marshaler), defaultStart(typ, finfo, startTemplate)) - } - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(marshalerType) { - return p.marshalInterface(pv.Interface().(Marshaler), defaultStart(pv.Type(), finfo, startTemplate)) - } - } - - // Check for text marshaler. - if val.CanInterface() && typ.Implements(textMarshalerType) { - return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), defaultStart(typ, finfo, startTemplate)) - } - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { - return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), defaultStart(pv.Type(), finfo, startTemplate)) - } - } - - // Slices and arrays iterate over the elements. They do not have an enclosing tag. - if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { - for i, n := 0, val.Len(); i < n; i++ { - if err := p.marshalValue(val.Index(i), finfo, startTemplate); err != nil { - return err - } - } - return nil - } - - tinfo, err := getTypeInfo(typ) - if err != nil { - return err - } - - // Create start element. - // Precedence for the XML element name is: - // 0. startTemplate - // 1. XMLName field in underlying struct; - // 2. field name/tag in the struct field; and - // 3. type name - var start StartElement - - if startTemplate != nil { - start.Name = startTemplate.Name - start.Attr = append(start.Attr, startTemplate.Attr...) - } else if tinfo.xmlname != nil { - xmlname := tinfo.xmlname - if xmlname.name != "" { - start.Name.Space, start.Name.Local = xmlname.xmlns, xmlname.name - } else { - fv := xmlname.value(val, dontInitNilPointers) - if v, ok := fv.Interface().(Name); ok && v.Local != "" { - start.Name = v - } - } - } - if start.Name.Local == "" && finfo != nil { - start.Name.Space, start.Name.Local = finfo.xmlns, finfo.name - } - if start.Name.Local == "" { - name := typ.Name() - if i := strings.IndexByte(name, '['); i >= 0 { - // Truncate generic instantiation name. See issue 48318. - name = name[:i] - } - if name == "" { - return &UnsupportedTypeError{typ} - } - start.Name.Local = name - } - - // Attributes - for i := range tinfo.fields { - finfo := &tinfo.fields[i] - if finfo.flags&fAttr == 0 { - continue - } - fv := finfo.value(val, dontInitNilPointers) - - if finfo.flags&fOmitEmpty != 0 && (!fv.IsValid() || isEmptyValue(fv)) { - continue - } - - if fv.Kind() == reflect.Interface && fv.IsNil() { - continue - } - - name := Name{Space: finfo.xmlns, Local: finfo.name} - if err := p.marshalAttr(&start, name, fv); err != nil { - return err - } - } - - // If an empty name was found, namespace is overridden with an empty space - if tinfo.xmlname != nil && start.Name.Space == "" && - tinfo.xmlname.xmlns == "" && tinfo.xmlname.name == "" && - len(p.tags) != 0 && p.tags[len(p.tags)-1].Space != "" { - start.Attr = append(start.Attr, Attr{Name{"", xmlnsPrefix}, ""}) - } - if err := p.writeStart(&start); err != nil { - return err - } - - if val.Kind() == reflect.Struct { - err = p.marshalStruct(tinfo, val) - } else { - s, b, err1 := p.marshalSimple(typ, val) - if err1 != nil { - err = err1 - } else if b != nil { - EscapeText(p, b) - } else { - p.EscapeString(s) - } - } - if err != nil { - return err - } - - if err := p.writeEnd(start.Name); err != nil { - return err - } - - return p.cachedWriteError() -} - -// marshalAttr marshals an attribute with the given name and value, adding to start.Attr. -func (p *printer) marshalAttr(start *StartElement, name Name, val reflect.Value) error { - if val.CanInterface() && val.Type().Implements(marshalerAttrType) { - attr, err := val.Interface().(MarshalerAttr).MarshalXMLAttr(name) - if err != nil { - return err - } - if attr.Name.Local != "" { - start.Attr = append(start.Attr, attr) - } - return nil - } - - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(marshalerAttrType) { - attr, err := pv.Interface().(MarshalerAttr).MarshalXMLAttr(name) - if err != nil { - return err - } - if attr.Name.Local != "" { - start.Attr = append(start.Attr, attr) - } - return nil - } - } - - if val.CanInterface() && val.Type().Implements(textMarshalerType) { - text, err := val.Interface().(encoding.TextMarshaler).MarshalText() - if err != nil { - return err - } - start.Attr = append(start.Attr, Attr{name, string(text)}) - return nil - } - - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { - text, err := pv.Interface().(encoding.TextMarshaler).MarshalText() - if err != nil { - return err - } - start.Attr = append(start.Attr, Attr{name, string(text)}) - return nil - } - } - - // Dereference or skip nil pointer, interface values. - switch val.Kind() { - case reflect.Pointer, reflect.Interface: - if val.IsNil() { - return nil - } - val = val.Elem() - } - - // Walk slices. - if val.Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { - n := val.Len() - for i := 0; i < n; i++ { - if err := p.marshalAttr(start, name, val.Index(i)); err != nil { - return err - } - } - return nil - } - - if val.Type() == attrType { - start.Attr = append(start.Attr, val.Interface().(Attr)) - return nil - } - - s, b, err := p.marshalSimple(val.Type(), val) - if err != nil { - return err - } - if b != nil { - s = string(b) - } - start.Attr = append(start.Attr, Attr{name, s}) - return nil -} - -// defaultStart returns the default start element to use, -// given the reflect type, field info, and start template. -func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement { - var start StartElement - // Precedence for the XML element name is as above, - // except that we do not look inside structs for the first field. - if startTemplate != nil { - start.Name = startTemplate.Name - start.Attr = append(start.Attr, startTemplate.Attr...) - } else if finfo != nil && finfo.name != "" { - start.Name.Local = finfo.name - start.Name.Space = finfo.xmlns - } else if typ.Name() != "" { - start.Name.Local = typ.Name() - } else { - // Must be a pointer to a named type, - // since it has the Marshaler methods. - start.Name.Local = typ.Elem().Name() - } - return start -} - -// marshalInterface marshals a Marshaler interface value. -func (p *printer) marshalInterface(val Marshaler, start StartElement) error { - // Push a marker onto the tag stack so that MarshalXML - // cannot close the XML tags that it did not open. - p.tags = append(p.tags, Name{}) - n := len(p.tags) - - err := val.MarshalXML(p.encoder, start) - if err != nil { - return err - } - - // Make sure MarshalXML closed all its tags. p.tags[n-1] is the mark. - if len(p.tags) > n { - return fmt.Errorf("xml: %s.MarshalXML wrote invalid XML: <%s> not closed", receiverType(val), p.tags[len(p.tags)-1].Local) - } - p.tags = p.tags[:n-1] - return nil -} - -// marshalTextInterface marshals a TextMarshaler interface value. -func (p *printer) marshalTextInterface(val encoding.TextMarshaler, start StartElement) error { - if err := p.writeStart(&start); err != nil { - return err - } - text, err := val.MarshalText() - if err != nil { - return err - } - EscapeText(p, text) - return p.writeEnd(start.Name) -} - -// writeStart writes the given start element. -func (p *printer) writeStart(start *StartElement) error { - if start.Name.Local == "" { - return fmt.Errorf("xml: start tag with no name") - } - - p.tags = append(p.tags, start.Name) - p.markPrefix() - - p.writeIndent(1) - p.WriteByte('<') - p.WriteString(start.Name.Local) - - if start.Name.Space != "" { - p.WriteString(` xmlns="`) - p.EscapeString(start.Name.Space) - p.WriteByte('"') - } - - // Attributes - for _, attr := range start.Attr { - name := attr.Name - if name.Local == "" { - continue - } - p.WriteByte(' ') - if name.Space != "" { - p.WriteString(p.createAttrPrefix(name.Space)) - p.WriteByte(':') - } - p.WriteString(name.Local) - p.WriteString(`="`) - p.EscapeString(attr.Value) - p.WriteByte('"') - } - p.WriteByte('>') - return nil -} - -func (p *printer) writeEnd(name Name) error { - if name.Local == "" { - return fmt.Errorf("xml: end tag with no name") - } - if len(p.tags) == 0 || p.tags[len(p.tags)-1].Local == "" { - return fmt.Errorf("xml: end tag without start tag", name.Local) - } - if top := p.tags[len(p.tags)-1]; top != name { - if top.Local != name.Local { - return fmt.Errorf("xml: end tag does not match start tag <%s>", name.Local, top.Local) - } - return fmt.Errorf("xml: end tag in namespace %s does not match start tag <%s> in namespace %s", name.Local, name.Space, top.Local, top.Space) - } - p.tags = p.tags[:len(p.tags)-1] - - p.writeIndent(-1) - p.WriteByte('<') - p.WriteByte('/') - p.WriteString(name.Local) - p.WriteByte('>') - p.popPrefix() - return nil -} - -func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) (string, []byte, error) { - switch val.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return strconv.FormatInt(val.Int(), 10), nil, nil - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return strconv.FormatUint(val.Uint(), 10), nil, nil - case reflect.Float32, reflect.Float64: - return strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()), nil, nil - case reflect.String: - return val.String(), nil, nil - case reflect.Bool: - return strconv.FormatBool(val.Bool()), nil, nil - case reflect.Array: - if typ.Elem().Kind() != reflect.Uint8 { - break - } - // [...]byte - var bytes []byte - if val.CanAddr() { - bytes = val.Bytes() - } else { - bytes = make([]byte, val.Len()) - reflect.Copy(reflect.ValueOf(bytes), val) - } - return "", bytes, nil - case reflect.Slice: - if typ.Elem().Kind() != reflect.Uint8 { - break - } - // []byte - return "", val.Bytes(), nil - } - return "", nil, &UnsupportedTypeError{typ} -} - -var ddBytes = []byte("--") - -// indirect drills into interfaces and pointers, returning the pointed-at value. -// If it encounters a nil interface or pointer, indirect returns that nil value. -// This can turn into an infinite loop given a cyclic chain, -// but it matches the Go 1 behavior. -func indirect(vf reflect.Value) reflect.Value { - for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Pointer { - if vf.IsNil() { - return vf - } - vf = vf.Elem() - } - return vf -} - -func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { - s := parentStack{p: p} - for i := range tinfo.fields { - finfo := &tinfo.fields[i] - if finfo.flags&fAttr != 0 { - continue - } - vf := finfo.value(val, dontInitNilPointers) - if !vf.IsValid() { - // The field is behind an anonymous struct field that's - // nil. Skip it. - continue - } - - switch finfo.flags & fMode { - case fCDATA, fCharData: - emit := EscapeText - if finfo.flags&fMode == fCDATA { - emit = emitCDATA - } - if err := s.trim(finfo.parents); err != nil { - return err - } - if vf.CanInterface() && vf.Type().Implements(textMarshalerType) { - data, err := vf.Interface().(encoding.TextMarshaler).MarshalText() - if err != nil { - return err - } - if err := emit(p, data); err != nil { - return err - } - continue - } - if vf.CanAddr() { - pv := vf.Addr() - if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { - data, err := pv.Interface().(encoding.TextMarshaler).MarshalText() - if err != nil { - return err - } - if err := emit(p, data); err != nil { - return err - } - continue - } - } - - var scratch [64]byte - vf = indirect(vf) - switch vf.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if err := emit(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)); err != nil { - return err - } - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - if err := emit(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10)); err != nil { - return err - } - case reflect.Float32, reflect.Float64: - if err := emit(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits())); err != nil { - return err - } - case reflect.Bool: - if err := emit(p, strconv.AppendBool(scratch[:0], vf.Bool())); err != nil { - return err - } - case reflect.String: - if err := emit(p, []byte(vf.String())); err != nil { - return err - } - case reflect.Slice: - if elem, ok := vf.Interface().([]byte); ok { - if err := emit(p, elem); err != nil { - return err - } - } - } - continue - - case fComment: - if err := s.trim(finfo.parents); err != nil { - return err - } - vf = indirect(vf) - k := vf.Kind() - if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) { - return fmt.Errorf("xml: bad type for comment field of %s", val.Type()) - } - if vf.Len() == 0 { - continue - } - p.writeIndent(0) - p.WriteString("" is invalid grammar. Make it "- -->" - p.WriteByte(' ') - } - p.WriteString("-->") - continue - - case fInnerXML: - vf = indirect(vf) - iface := vf.Interface() - switch raw := iface.(type) { - case []byte: - p.Write(raw) - continue - case string: - p.WriteString(raw) - continue - } - - case fElement, fElement | fAny: - if err := s.trim(finfo.parents); err != nil { - return err - } - if len(finfo.parents) > len(s.stack) { - if vf.Kind() != reflect.Pointer && vf.Kind() != reflect.Interface || !vf.IsNil() { - if err := s.push(finfo.parents[len(s.stack):]); err != nil { - return err - } - } - } - } - if err := p.marshalValue(vf, finfo, nil); err != nil { - return err - } - } - s.trim(nil) - return p.cachedWriteError() -} - -// Write implements io.Writer -func (p *printer) Write(b []byte) (n int, err error) { - if p.closed && p.err == nil { - p.err = errors.New("use of closed Encoder") - } - if p.err == nil { - n, p.err = p.w.Write(b) - } - return n, p.err -} - -// WriteString implements io.StringWriter -func (p *printer) WriteString(s string) (n int, err error) { - if p.closed && p.err == nil { - p.err = errors.New("use of closed Encoder") - } - if p.err == nil { - n, p.err = p.w.WriteString(s) - } - return n, p.err -} - -// WriteByte implements io.ByteWriter -func (p *printer) WriteByte(c byte) error { - if p.closed && p.err == nil { - p.err = errors.New("use of closed Encoder") - } - if p.err == nil { - p.err = p.w.WriteByte(c) - } - return p.err -} - -// Close the Encoder, indicating that no more data will be written. It flushes -// any buffered XML to the underlying writer and returns an error if the -// written XML is invalid (e.g. by containing unclosed elements). -func (p *printer) Close() error { - if p.closed { - return nil - } - p.closed = true - if err := p.w.Flush(); err != nil { - return err - } - if len(p.tags) > 0 { - return fmt.Errorf("unclosed tag <%s>", p.tags[len(p.tags)-1].Local) - } - return nil -} - -// return the bufio Writer's cached write error -func (p *printer) cachedWriteError() error { - _, err := p.Write(nil) - return err -} - -func (p *printer) writeIndent(depthDelta int) { - if len(p.prefix) == 0 && len(p.indent) == 0 { - return - } - if depthDelta < 0 { - p.depth-- - if p.indentedIn { - p.indentedIn = false - return - } - p.indentedIn = false - } - if p.putNewline { - p.WriteByte('\n') - } else { - p.putNewline = true - } - if len(p.prefix) > 0 { - p.WriteString(p.prefix) - } - if len(p.indent) > 0 { - for i := 0; i < p.depth; i++ { - p.WriteString(p.indent) - } - } - if depthDelta > 0 { - p.depth++ - p.indentedIn = true - } -} - -type parentStack struct { - p *printer - stack []string -} - -// trim updates the XML context to match the longest common prefix of the stack -// and the given parents. A closing tag will be written for every parent -// popped. Passing a zero slice or nil will close all the elements. -func (s *parentStack) trim(parents []string) error { - split := 0 - for ; split < len(parents) && split < len(s.stack); split++ { - if parents[split] != s.stack[split] { - break - } - } - for i := len(s.stack) - 1; i >= split; i-- { - if err := s.p.writeEnd(Name{Local: s.stack[i]}); err != nil { - return err - } - } - s.stack = s.stack[:split] - return nil -} - -// push adds parent elements to the stack and writes open tags. -func (s *parentStack) push(parents []string) error { - for i := 0; i < len(parents); i++ { - if err := s.p.writeStart(&StartElement{Name: Name{Local: parents[i]}}); err != nil { - return err - } - } - s.stack = append(s.stack, parents...) - return nil -} - -// UnsupportedTypeError is returned when [Marshal] encounters a type -// that cannot be converted into XML. -type UnsupportedTypeError struct { - Type reflect.Type -} - -func (e *UnsupportedTypeError) Error() string { - return "xml: unsupported type: " + e.Type.String() -} - -func isEmptyValue(v reflect.Value) bool { - switch v.Kind() { - case reflect.Array, reflect.Map, reflect.Slice, reflect.String: - return v.Len() == 0 - case reflect.Bool, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, - reflect.Float32, reflect.Float64, - reflect.Interface, reflect.Pointer: - return v.IsZero() - } - return false -} diff --git a/parser/xml/min.go b/parser/xml/min.go deleted file mode 100644 index efcc336..0000000 --- a/parser/xml/min.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package xml - -func min(i, j int) int { - if i < j { - return i - } else { - return j - } -} diff --git a/parser/xml/parser.go b/parser/xml/parser.go deleted file mode 100644 index 5ba9a77..0000000 --- a/parser/xml/parser.go +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package xml contains a parser for XML. -package xml - -import ( - "bytes" - "fmt" - "io" - "regexp" - "strconv" - "strings" - - "github.com/katydid/parser-go/parser" -) - -type xmlParser struct { - buf []byte - dec *Decoder - tok Token - attrs []Attr - attrIndex int - attrValue bool - attrFirst bool - attrPrefix string - elemPrefix string - textPrefix string -} - -// XMLParser is an xml parser. -type XMLParser interface { - parser.Interface - //Init intialises the parser with a byte buffer containing xml. - Init([]byte) error - Reset() error -} - -// NewXMLParser returns a new xml parser. -func NewXMLParser(options ...Option) XMLParser { - x := &xmlParser{} - for _, option := range options { - option(x) - } - return x -} - -// Option is used set options when creating a new XMLParser -type Option func(x *xmlParser) - -// WithAttrPrefix specifies the prefix which will be added to attributes returned by the parser. -func WithAttrPrefix(a string) func(x *xmlParser) { - return func(x *xmlParser) { - x.attrPrefix = a - } -} - -// WithElemPrefix specifies the prefix which will be added to elements returned by the parser. -func WithElemPrefix(e string) func(x *xmlParser) { - return func(x *xmlParser) { - x.elemPrefix = e - } -} - -// WithTextPrefix specifies the prefix which will be added to text returned by the parser. -func WithTextPrefix(e string) func(x *xmlParser) { - return func(x *xmlParser) { - x.textPrefix = e - } -} - -var procInstPattern = regexp.MustCompile(`<\?.*\?>`) - -func (p *xmlParser) Init(buf []byte) error { - buf = procInstPattern.ReplaceAll(buf, []byte{}) - buf = bytes.TrimSpace(buf) - p.buf = buf - p.dec = NewDecoder(bytes.NewBuffer(buf)) - p.dec.Strict = false - return nil -} - -func (s *xmlParser) Reset() error { - return s.Init(s.buf) -} - -func hasContent(c CharData) bool { - return len(string(c)) > 0 -} - -func (p *xmlParser) Next() (err error) { - if p.attrValue { - if p.attrFirst { - p.attrFirst = false - return nil - } else { - return io.EOF - } - } - if p.tok == nil { - if p.attrs != nil { - p.attrIndex++ - if p.attrIndex < len(p.attrs) { - return nil - } - } - } - if p.tok != nil { - for { - if _, ok := p.tok.(StartElement); ok { - //fmt.Printf("Skipping %s\n", s.Name) - if err := p.dec.Skip(); err != nil { - //fmt.Printf("Skip err = %v\n", err) - return err - } - break - } else if _, ok := p.tok.(EndElement); ok { - return io.EOF - } else if c, ok := p.tok.(CharData); ok { - if hasContent(c) { - break - } - } else if _, ok := p.tok.(Comment); ok { - p.tok, err = p.dec.Token() - //fmt.Printf("Comment Next Token %#v, err = %v\n", p.tok, err) - if err != nil { - return err - } - } else { - panic(fmt.Sprintf("unknown token %T", p.tok)) - } - } - } - p.tok, err = p.dec.Token() - //fmt.Printf("Next Token %#v, err %v\n", p.tok, err) - for err == nil { - if _, ok := p.tok.(StartElement); ok { - break - } else if c, ok := p.tok.(CharData); ok { - if hasContent(c) { - break - } - } else if _, ok := p.tok.(EndElement); ok { - return io.EOF - } - p.tok, err = p.dec.Token() - //fmt.Printf("Next Next Token %#v, err = %v\n", p.tok, err) - } - return err -} - -func (p *xmlParser) IsLeaf() bool { - if p.tok == nil { - if p.attrValue { - return true - } - return false - } - _, ok := p.tok.(CharData) - //fmt.Printf("IsLeaf %#v\n", p.tok) - return ok -} - -func (p *xmlParser) getValue() string { - if p.tok == nil && p.attrValue { - return p.attrs[p.attrIndex].Value - } - if c, ok := p.tok.(CharData); ok { - return string(c) - } - return "" -} - -func (p *xmlParser) Double() (float64, error) { - return strconv.ParseFloat(p.getValue(), 64) -} - -func (p *xmlParser) Int() (int64, error) { - i, err := strconv.ParseInt(p.getValue(), 10, 64) - return int64(i), err -} - -func (p *xmlParser) Uint() (uint64, error) { - i, err := strconv.ParseUint(p.getValue(), 10, 64) - return uint64(i), err -} - -func (p *xmlParser) Bool() (bool, error) { - return strconv.ParseBool(strings.TrimSpace(p.getValue())) -} - -func (p *xmlParser) String() (string, error) { - if p.tok == nil && p.attrIndex < len(p.attrs) { - if p.attrValue { - return p.textPrefix + p.attrs[p.attrIndex].Value, nil - } else { - return p.attrPrefix + p.attrs[p.attrIndex].Name.Local, nil - } - } - if s, ok := p.tok.(StartElement); ok { - return p.elemPrefix + s.Name.Local, nil - } - if c, ok := p.tok.(CharData); ok { - return p.textPrefix + string(c), nil - } - return "", parser.ErrNotString -} - -func (p *xmlParser) Bytes() ([]byte, error) { - if c, ok := p.tok.(CharData); ok { - return []byte(c), nil - } - return nil, parser.ErrNotBytes -} - -func (p *xmlParser) Up() { - if p.tok == nil { - if p.attrValue { - p.attrValue = false - p.attrFirst = false - return - } - } - if _, ok := p.tok.(EndElement); ok { - p.tok = nil - p.attrs = nil - p.attrIndex = 0 - return - } - if err := p.dec.Skip(); err != nil { - if err != io.EOF { - panic(err) - } - } -} - -func (p *xmlParser) Down() { - if p.tok == nil { - if p.attrIndex < len(p.attrs) { - p.attrValue = true - p.attrFirst = true - return - } - } - if s, ok := p.tok.(StartElement); ok { - p.tok = nil - p.attrs = s.Attr - p.attrIndex = -1 - return - } - panic(fmt.Sprintf("not a start element %T", p.tok)) -} diff --git a/parser/xml/parser_test.go b/parser/xml/parser_test.go deleted file mode 100644 index 2a00749..0000000 --- a/parser/xml/parser_test.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2015 Walter Schulze -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package xml - -import ( - "encoding/json" - "testing" - - "github.com/katydid/parser-go/parser/debug" -) - -func testXML(t *testing.T, s string) { - x := NewXMLParser() - if err := x.Init([]byte(s)); err != nil { - t.Fatal(err) - } - m := debug.Walk(debug.NewLogger(x, debug.NewLineLogger())) - data, err := json.Marshal(m) - if err != nil { - t.Fatal(err) - } - t.Logf(string(data)) -} - -func TestExample(t *testing.T) { - example := ` - - Katydid - - true - - - ` - testXML(t, example) -} - -func TestPudding(t *testing.T) { - pudding := ` - - ab - 2 - - 1 - - ` - testXML(t, pudding) -} - -func TestPerson(t *testing.T) { - person := ` - Robert - - 456 - TheStreet - - 0127897897 - - ` - testXML(t, person) -} - -func TestAB(t *testing.T) { - testXML(t, `B`) -} diff --git a/parser/xml/read.go b/parser/xml/read.go deleted file mode 100644 index 3cc4968..0000000 --- a/parser/xml/read.go +++ /dev/null @@ -1,777 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "bytes" - "encoding" - "errors" - "fmt" - "reflect" - "runtime" - "strconv" - "strings" -) - -// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: -// an XML element is an order-dependent collection of anonymous -// values, while a data structure is an order-independent collection -// of named values. -// See [encoding/json] for a textual representation more suitable -// to data structures. - -// Unmarshal parses the XML-encoded data and stores the result in -// the value pointed to by v, which must be an arbitrary struct, -// slice, or string. Well-formed data that does not fit into v is -// discarded. -// -// Because Unmarshal uses the reflect package, it can only assign -// to exported (upper case) fields. Unmarshal uses a case-sensitive -// comparison to match XML element names to tag values and struct -// field names. -// -// Unmarshal maps an XML element to a struct using the following rules. -// In the rules, the tag of a field refers to the value associated with the -// key 'xml' in the struct field's tag (see the example above). -// -// - If the struct has a field of type []byte or string with tag -// ",innerxml", Unmarshal accumulates the raw XML nested inside the -// element in that field. The rest of the rules still apply. -// -// - If the struct has a field named XMLName of type Name, -// Unmarshal records the element name in that field. -// -// - If the XMLName field has an associated tag of the form -// "name" or "namespace-URL name", the XML element must have -// the given name (and, optionally, name space) or else Unmarshal -// returns an error. -// -// - If the XML element has an attribute whose name matches a -// struct field name with an associated tag containing ",attr" or -// the explicit name in a struct field tag of the form "name,attr", -// Unmarshal records the attribute value in that field. -// -// - If the XML element has an attribute not handled by the previous -// rule and the struct has a field with an associated tag containing -// ",any,attr", Unmarshal records the attribute value in the first -// such field. -// -// - If the XML element contains character data, that data is -// accumulated in the first struct field that has tag ",chardata". -// The struct field may have type []byte or string. -// If there is no such field, the character data is discarded. -// -// - If the XML element contains comments, they are accumulated in -// the first struct field that has tag ",comment". The struct -// field may have type []byte or string. If there is no such -// field, the comments are discarded. -// -// - If the XML element contains a sub-element whose name matches -// the prefix of a tag formatted as "a" or "a>b>c", unmarshal -// will descend into the XML structure looking for elements with the -// given names, and will map the innermost elements to that struct -// field. A tag starting with ">" is equivalent to one starting -// with the field name followed by ">". -// -// - If the XML element contains a sub-element whose name matches -// a struct field's XMLName tag and the struct field has no -// explicit name tag as per the previous rule, unmarshal maps -// the sub-element to that struct field. -// -// - If the XML element contains a sub-element whose name matches a -// field without any mode flags (",attr", ",chardata", etc), Unmarshal -// maps the sub-element to that struct field. -// -// - If the XML element contains a sub-element that hasn't matched any -// of the above rules and the struct has a field with tag ",any", -// unmarshal maps the sub-element to that struct field. -// -// - An anonymous struct field is handled as if the fields of its -// value were part of the outer struct. -// -// - A struct field with tag "-" is never unmarshaled into. -// -// If Unmarshal encounters a field type that implements the Unmarshaler -// interface, Unmarshal calls its UnmarshalXML method to produce the value from -// the XML element. Otherwise, if the value implements -// [encoding.TextUnmarshaler], Unmarshal calls that value's UnmarshalText method. -// -// Unmarshal maps an XML element to a string or []byte by saving the -// concatenation of that element's character data in the string or -// []byte. The saved []byte is never nil. -// -// Unmarshal maps an attribute value to a string or []byte by saving -// the value in the string or slice. -// -// Unmarshal maps an attribute value to an [Attr] by saving the attribute, -// including its name, in the Attr. -// -// Unmarshal maps an XML element or attribute value to a slice by -// extending the length of the slice and mapping the element or attribute -// to the newly created value. -// -// Unmarshal maps an XML element or attribute value to a bool by -// setting it to the boolean value represented by the string. Whitespace -// is trimmed and ignored. -// -// Unmarshal maps an XML element or attribute value to an integer or -// floating-point field by setting the field to the result of -// interpreting the string value in decimal. There is no check for -// overflow. Whitespace is trimmed and ignored. -// -// Unmarshal maps an XML element to a Name by recording the element -// name. -// -// Unmarshal maps an XML element to a pointer by setting the pointer -// to a freshly allocated value and then mapping the element to that value. -// -// A missing element or empty attribute value will be unmarshaled as a zero value. -// If the field is a slice, a zero value will be appended to the field. Otherwise, the -// field will be set to its zero value. -func Unmarshal(data []byte, v any) error { - return NewDecoder(bytes.NewReader(data)).Decode(v) -} - -// Decode works like [Unmarshal], except it reads the decoder -// stream to find the start element. -func (d *Decoder) Decode(v any) error { - return d.DecodeElement(v, nil) -} - -// DecodeElement works like [Unmarshal] except that it takes -// a pointer to the start XML element to decode into v. -// It is useful when a client reads some raw XML tokens itself -// but also wants to defer to [Unmarshal] for some elements. -func (d *Decoder) DecodeElement(v any, start *StartElement) error { - val := reflect.ValueOf(v) - if val.Kind() != reflect.Pointer { - return errors.New("non-pointer passed to Unmarshal") - } - - if val.IsNil() { - return errors.New("nil pointer passed to Unmarshal") - } - return d.unmarshal(val.Elem(), start, 0) -} - -// An UnmarshalError represents an error in the unmarshaling process. -type UnmarshalError string - -func (e UnmarshalError) Error() string { return string(e) } - -// Unmarshaler is the interface implemented by objects that can unmarshal -// an XML element description of themselves. -// -// UnmarshalXML decodes a single XML element -// beginning with the given start element. -// If it returns an error, the outer call to Unmarshal stops and -// returns that error. -// UnmarshalXML must consume exactly one XML element. -// One common implementation strategy is to unmarshal into -// a separate value with a layout matching the expected XML -// using d.DecodeElement, and then to copy the data from -// that value into the receiver. -// Another common strategy is to use d.Token to process the -// XML object one token at a time. -// UnmarshalXML may not use d.RawToken. -type Unmarshaler interface { - UnmarshalXML(d *Decoder, start StartElement) error -} - -// UnmarshalerAttr is the interface implemented by objects that can unmarshal -// an XML attribute description of themselves. -// -// UnmarshalXMLAttr decodes a single XML attribute. -// If it returns an error, the outer call to [Unmarshal] stops and -// returns that error. -// UnmarshalXMLAttr is used only for struct fields with the -// "attr" option in the field tag. -type UnmarshalerAttr interface { - UnmarshalXMLAttr(attr Attr) error -} - -// receiverType returns the receiver type to use in an expression like "%s.MethodName". -func receiverType(val any) string { - t := reflect.TypeOf(val) - if t.Name() != "" { - return t.String() - } - return "(" + t.String() + ")" -} - -// unmarshalInterface unmarshals a single XML element into val. -// start is the opening tag of the element. -func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { - // Record that decoder must stop at end tag corresponding to start. - d.pushEOF() - - d.unmarshalDepth++ - err := val.UnmarshalXML(d, *start) - d.unmarshalDepth-- - if err != nil { - d.popEOF() - return err - } - - if !d.popEOF() { - return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local) - } - - return nil -} - -// unmarshalTextInterface unmarshals a single XML element into val. -// The chardata contained in the element (but not its children) -// is passed to the text unmarshaler. -func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { - var buf []byte - depth := 1 - for depth > 0 { - t, err := d.Token() - if err != nil { - return err - } - switch t := t.(type) { - case CharData: - if depth == 1 { - buf = append(buf, t...) - } - case StartElement: - depth++ - case EndElement: - depth-- - } - } - return val.UnmarshalText(buf) -} - -// unmarshalAttr unmarshals a single XML attribute into val. -func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { - if val.Kind() == reflect.Pointer { - if val.IsNil() { - val.Set(reflect.New(val.Type().Elem())) - } - val = val.Elem() - } - if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) { - // This is an unmarshaler with a non-pointer receiver, - // so it's likely to be incorrect, but we do what we're told. - return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) - } - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) { - return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) - } - } - - // Not an UnmarshalerAttr; try encoding.TextUnmarshaler. - if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { - // This is an unmarshaler with a non-pointer receiver, - // so it's likely to be incorrect, but we do what we're told. - return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) - } - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { - return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) - } - } - - if val.Type().Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { - // Slice of element values. - // Grow slice. - n := val.Len() - val.Grow(1) - val.SetLen(n + 1) - - // Recur to read element into slice. - if err := d.unmarshalAttr(val.Index(n), attr); err != nil { - val.SetLen(n) - return err - } - return nil - } - - if val.Type() == attrType { - val.Set(reflect.ValueOf(attr)) - return nil - } - - return copyValue(val, []byte(attr.Value)) -} - -var ( - attrType = reflect.TypeFor[Attr]() - unmarshalerType = reflect.TypeFor[Unmarshaler]() - unmarshalerAttrType = reflect.TypeFor[UnmarshalerAttr]() - textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() -) - -const ( - maxUnmarshalDepth = 10000 - maxUnmarshalDepthWasm = 5000 // go.dev/issue/56498 -) - -var errUnmarshalDepth = errors.New("exceeded max depth") - -// Unmarshal a single XML element into val. -func (d *Decoder) unmarshal(val reflect.Value, start *StartElement, depth int) error { - if depth >= maxUnmarshalDepth || runtime.GOARCH == "wasm" && depth >= maxUnmarshalDepthWasm { - return errUnmarshalDepth - } - // Find start element if we need it. - if start == nil { - for { - tok, err := d.Token() - if err != nil { - return err - } - if t, ok := tok.(StartElement); ok { - start = &t - break - } - } - } - - // Load value from interface, but only if the result will be - // usefully addressable. - if val.Kind() == reflect.Interface && !val.IsNil() { - e := val.Elem() - if e.Kind() == reflect.Pointer && !e.IsNil() { - val = e - } - } - - if val.Kind() == reflect.Pointer { - if val.IsNil() { - val.Set(reflect.New(val.Type().Elem())) - } - val = val.Elem() - } - - if val.CanInterface() && val.Type().Implements(unmarshalerType) { - // This is an unmarshaler with a non-pointer receiver, - // so it's likely to be incorrect, but we do what we're told. - return d.unmarshalInterface(val.Interface().(Unmarshaler), start) - } - - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(unmarshalerType) { - return d.unmarshalInterface(pv.Interface().(Unmarshaler), start) - } - } - - if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { - return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler)) - } - - if val.CanAddr() { - pv := val.Addr() - if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { - return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler)) - } - } - - var ( - data []byte - saveData reflect.Value - comment []byte - saveComment reflect.Value - saveXML reflect.Value - saveXMLIndex int - saveXMLData []byte - saveAny reflect.Value - sv reflect.Value - tinfo *typeInfo - err error - ) - - switch v := val; v.Kind() { - default: - return errors.New("unknown type " + v.Type().String()) - - case reflect.Interface: - // TODO: For now, simply ignore the field. In the near - // future we may choose to unmarshal the start - // element on it, if not nil. - return d.Skip() - - case reflect.Slice: - typ := v.Type() - if typ.Elem().Kind() == reflect.Uint8 { - // []byte - saveData = v - break - } - - // Slice of element values. - // Grow slice. - n := v.Len() - v.Grow(1) - v.SetLen(n + 1) - - // Recur to read element into slice. - if err := d.unmarshal(v.Index(n), start, depth+1); err != nil { - v.SetLen(n) - return err - } - return nil - - case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: - saveData = v - - case reflect.Struct: - typ := v.Type() - if typ == nameType { - v.Set(reflect.ValueOf(start.Name)) - break - } - - sv = v - tinfo, err = getTypeInfo(typ) - if err != nil { - return err - } - - // Validate and assign element name. - if tinfo.xmlname != nil { - finfo := tinfo.xmlname - if finfo.name != "" && finfo.name != start.Name.Local { - return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") - } - if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { - e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " - if start.Name.Space == "" { - e += "no name space" - } else { - e += start.Name.Space - } - return UnmarshalError(e) - } - fv := finfo.value(sv, initNilPointers) - if _, ok := fv.Interface().(Name); ok { - fv.Set(reflect.ValueOf(start.Name)) - } - } - - // Assign attributes. - for _, a := range start.Attr { - handled := false - any := -1 - for i := range tinfo.fields { - finfo := &tinfo.fields[i] - switch finfo.flags & fMode { - case fAttr: - strv := finfo.value(sv, initNilPointers) - if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { - if err := d.unmarshalAttr(strv, a); err != nil { - return err - } - handled = true - } - - case fAny | fAttr: - if any == -1 { - any = i - } - } - } - if !handled && any >= 0 { - finfo := &tinfo.fields[any] - strv := finfo.value(sv, initNilPointers) - if err := d.unmarshalAttr(strv, a); err != nil { - return err - } - } - } - - // Determine whether we need to save character data or comments. - for i := range tinfo.fields { - finfo := &tinfo.fields[i] - switch finfo.flags & fMode { - case fCDATA, fCharData: - if !saveData.IsValid() { - saveData = finfo.value(sv, initNilPointers) - } - - case fComment: - if !saveComment.IsValid() { - saveComment = finfo.value(sv, initNilPointers) - } - - case fAny, fAny | fElement: - if !saveAny.IsValid() { - saveAny = finfo.value(sv, initNilPointers) - } - - case fInnerXML: - if !saveXML.IsValid() { - saveXML = finfo.value(sv, initNilPointers) - if d.saved == nil { - saveXMLIndex = 0 - d.saved = new(bytes.Buffer) - } else { - saveXMLIndex = d.savedOffset() - } - } - } - } - } - - // Find end element. - // Process sub-elements along the way. -Loop: - for { - var savedOffset int - if saveXML.IsValid() { - savedOffset = d.savedOffset() - } - tok, err := d.Token() - if err != nil { - return err - } - switch t := tok.(type) { - case StartElement: - consumed := false - if sv.IsValid() { - // unmarshalPath can call unmarshal, so we need to pass the depth through so that - // we can continue to enforce the maximum recursion limit. - consumed, err = d.unmarshalPath(tinfo, sv, nil, &t, depth) - if err != nil { - return err - } - if !consumed && saveAny.IsValid() { - consumed = true - if err := d.unmarshal(saveAny, &t, depth+1); err != nil { - return err - } - } - } - if !consumed { - if err := d.Skip(); err != nil { - return err - } - } - - case EndElement: - if saveXML.IsValid() { - saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset] - if saveXMLIndex == 0 { - d.saved = nil - } - } - break Loop - - case CharData: - if saveData.IsValid() { - data = append(data, t...) - } - - case Comment: - if saveComment.IsValid() { - comment = append(comment, t...) - } - } - } - - if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) { - if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { - return err - } - saveData = reflect.Value{} - } - - if saveData.IsValid() && saveData.CanAddr() { - pv := saveData.Addr() - if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { - if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { - return err - } - saveData = reflect.Value{} - } - } - - if err := copyValue(saveData, data); err != nil { - return err - } - - switch t := saveComment; t.Kind() { - case reflect.String: - t.SetString(string(comment)) - case reflect.Slice: - t.Set(reflect.ValueOf(comment)) - } - - switch t := saveXML; t.Kind() { - case reflect.String: - t.SetString(string(saveXMLData)) - case reflect.Slice: - if t.Type().Elem().Kind() == reflect.Uint8 { - t.Set(reflect.ValueOf(saveXMLData)) - } - } - - return nil -} - -func copyValue(dst reflect.Value, src []byte) (err error) { - dst0 := dst - - if dst.Kind() == reflect.Pointer { - if dst.IsNil() { - dst.Set(reflect.New(dst.Type().Elem())) - } - dst = dst.Elem() - } - - // Save accumulated data. - switch dst.Kind() { - case reflect.Invalid: - // Probably a comment. - default: - return errors.New("cannot unmarshal into " + dst0.Type().String()) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if len(src) == 0 { - dst.SetInt(0) - return nil - } - itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) - if err != nil { - return err - } - dst.SetInt(itmp) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - if len(src) == 0 { - dst.SetUint(0) - return nil - } - utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) - if err != nil { - return err - } - dst.SetUint(utmp) - case reflect.Float32, reflect.Float64: - if len(src) == 0 { - dst.SetFloat(0) - return nil - } - ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits()) - if err != nil { - return err - } - dst.SetFloat(ftmp) - case reflect.Bool: - if len(src) == 0 { - dst.SetBool(false) - return nil - } - value, err := strconv.ParseBool(strings.TrimSpace(string(src))) - if err != nil { - return err - } - dst.SetBool(value) - case reflect.String: - dst.SetString(string(src)) - case reflect.Slice: - if len(src) == 0 { - // non-nil to flag presence - src = []byte{} - } - dst.SetBytes(src) - } - return nil -} - -// unmarshalPath walks down an XML structure looking for wanted -// paths, and calls unmarshal on them. -// The consumed result tells whether XML elements have been consumed -// from the Decoder until start's matching end element, or if it's -// still untouched because start is uninteresting for sv's fields. -func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement, depth int) (consumed bool, err error) { - recurse := false -Loop: - for i := range tinfo.fields { - finfo := &tinfo.fields[i] - if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { - continue - } - for j := range parents { - if parents[j] != finfo.parents[j] { - continue Loop - } - } - if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { - // It's a perfect match, unmarshal the field. - return true, d.unmarshal(finfo.value(sv, initNilPointers), start, depth+1) - } - if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { - // It's a prefix for the field. Break and recurse - // since it's not ok for one field path to be itself - // the prefix for another field path. - recurse = true - - // We can reuse the same slice as long as we - // don't try to append to it. - parents = finfo.parents[:len(parents)+1] - break - } - } - if !recurse { - // We have no business with this element. - return false, nil - } - // The element is not a perfect match for any field, but one - // or more fields have the path to this element as a parent - // prefix. Recurse and attempt to match these. - for { - var tok Token - tok, err = d.Token() - if err != nil { - return true, err - } - switch t := tok.(type) { - case StartElement: - // the recursion depth of unmarshalPath is limited to the path length specified - // by the struct field tag, so we don't increment the depth here. - consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t, depth) - if err != nil { - return true, err - } - if !consumed2 { - if err := d.Skip(); err != nil { - return true, err - } - } - case EndElement: - return true, nil - } - } -} - -// Skip reads tokens until it has consumed the end element -// matching the most recent start element already consumed, -// skipping nested structures. -// It returns nil if it finds an end element matching the start -// element; otherwise it returns an error describing the problem. -func (d *Decoder) Skip() error { - var depth int64 - for { - tok, err := d.Token() - if err != nil { - return err - } - switch tok.(type) { - case StartElement: - depth++ - case EndElement: - if depth == 0 { - return nil - } - depth-- - } - } -} diff --git a/parser/xml/typeinfo.go b/parser/xml/typeinfo.go deleted file mode 100644 index b18ed28..0000000 --- a/parser/xml/typeinfo.go +++ /dev/null @@ -1,367 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "fmt" - "reflect" - "strings" - "sync" -) - -// typeInfo holds details for the xml representation of a type. -type typeInfo struct { - xmlname *fieldInfo - fields []fieldInfo -} - -// fieldInfo holds details for the xml representation of a single field. -type fieldInfo struct { - idx []int - name string - xmlns string - flags fieldFlags - parents []string -} - -type fieldFlags int - -const ( - fElement fieldFlags = 1 << iota - fAttr - fCDATA - fCharData - fInnerXML - fComment - fAny - - fOmitEmpty - - fMode = fElement | fAttr | fCDATA | fCharData | fInnerXML | fComment | fAny - - xmlName = "XMLName" -) - -var tinfoMap sync.Map // map[reflect.Type]*typeInfo - -var nameType = reflect.TypeFor[Name]() - -// getTypeInfo returns the typeInfo structure with details necessary -// for marshaling and unmarshaling typ. -func getTypeInfo(typ reflect.Type) (*typeInfo, error) { - if ti, ok := tinfoMap.Load(typ); ok { - return ti.(*typeInfo), nil - } - - tinfo := &typeInfo{} - if typ.Kind() == reflect.Struct && typ != nameType { - n := typ.NumField() - for i := 0; i < n; i++ { - f := typ.Field(i) - if (!f.IsExported() && !f.Anonymous) || f.Tag.Get("xml") == "-" { - continue // Private field - } - - // For embedded structs, embed its fields. - if f.Anonymous { - t := f.Type - if t.Kind() == reflect.Pointer { - t = t.Elem() - } - if t.Kind() == reflect.Struct { - inner, err := getTypeInfo(t) - if err != nil { - return nil, err - } - if tinfo.xmlname == nil { - tinfo.xmlname = inner.xmlname - } - for _, finfo := range inner.fields { - finfo.idx = append([]int{i}, finfo.idx...) - if err := addFieldInfo(typ, tinfo, &finfo); err != nil { - return nil, err - } - } - continue - } - } - - finfo, err := structFieldInfo(typ, &f) - if err != nil { - return nil, err - } - - if f.Name == xmlName { - tinfo.xmlname = finfo - continue - } - - // Add the field if it doesn't conflict with other fields. - if err := addFieldInfo(typ, tinfo, finfo); err != nil { - return nil, err - } - } - } - - ti, _ := tinfoMap.LoadOrStore(typ, tinfo) - return ti.(*typeInfo), nil -} - -// structFieldInfo builds and returns a fieldInfo for f. -func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, error) { - finfo := &fieldInfo{idx: f.Index} - - // Split the tag from the xml namespace if necessary. - tag := f.Tag.Get("xml") - if ns, t, ok := strings.Cut(tag, " "); ok { - finfo.xmlns, tag = ns, t - } - - // Parse flags. - tokens := strings.Split(tag, ",") - if len(tokens) == 1 { - finfo.flags = fElement - } else { - tag = tokens[0] - for _, flag := range tokens[1:] { - switch flag { - case "attr": - finfo.flags |= fAttr - case "cdata": - finfo.flags |= fCDATA - case "chardata": - finfo.flags |= fCharData - case "innerxml": - finfo.flags |= fInnerXML - case "comment": - finfo.flags |= fComment - case "any": - finfo.flags |= fAny - case "omitempty": - finfo.flags |= fOmitEmpty - } - } - - // Validate the flags used. - valid := true - switch mode := finfo.flags & fMode; mode { - case 0: - finfo.flags |= fElement - case fAttr, fCDATA, fCharData, fInnerXML, fComment, fAny, fAny | fAttr: - if f.Name == xmlName || tag != "" && mode != fAttr { - valid = false - } - default: - // This will also catch multiple modes in a single field. - valid = false - } - if finfo.flags&fMode == fAny { - finfo.flags |= fElement - } - if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 { - valid = false - } - if !valid { - return nil, fmt.Errorf("xml: invalid tag in field %s of type %s: %q", - f.Name, typ, f.Tag.Get("xml")) - } - } - - // Use of xmlns without a name is not allowed. - if finfo.xmlns != "" && tag == "" { - return nil, fmt.Errorf("xml: namespace without name in field %s of type %s: %q", - f.Name, typ, f.Tag.Get("xml")) - } - - if f.Name == xmlName { - // The XMLName field records the XML element name. Don't - // process it as usual because its name should default to - // empty rather than to the field name. - finfo.name = tag - return finfo, nil - } - - if tag == "" { - // If the name part of the tag is completely empty, get - // default from XMLName of underlying struct if feasible, - // or field name otherwise. - if xmlname := lookupXMLName(f.Type); xmlname != nil { - finfo.xmlns, finfo.name = xmlname.xmlns, xmlname.name - } else { - finfo.name = f.Name - } - return finfo, nil - } - - // Prepare field name and parents. - parents := strings.Split(tag, ">") - if parents[0] == "" { - parents[0] = f.Name - } - if parents[len(parents)-1] == "" { - return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ) - } - finfo.name = parents[len(parents)-1] - if len(parents) > 1 { - if (finfo.flags & fElement) == 0 { - return nil, fmt.Errorf("xml: %s chain not valid with %s flag", tag, strings.Join(tokens[1:], ",")) - } - finfo.parents = parents[:len(parents)-1] - } - - // If the field type has an XMLName field, the names must match - // so that the behavior of both marshaling and unmarshaling - // is straightforward and unambiguous. - if finfo.flags&fElement != 0 { - ftyp := f.Type - xmlname := lookupXMLName(ftyp) - if xmlname != nil && xmlname.name != finfo.name { - return nil, fmt.Errorf("xml: name %q in tag of %s.%s conflicts with name %q in %s.XMLName", - finfo.name, typ, f.Name, xmlname.name, ftyp) - } - } - return finfo, nil -} - -// lookupXMLName returns the fieldInfo for typ's XMLName field -// in case it exists and has a valid xml field tag, otherwise -// it returns nil. -func lookupXMLName(typ reflect.Type) (xmlname *fieldInfo) { - for typ.Kind() == reflect.Pointer { - typ = typ.Elem() - } - if typ.Kind() != reflect.Struct { - return nil - } - for i, n := 0, typ.NumField(); i < n; i++ { - f := typ.Field(i) - if f.Name != xmlName { - continue - } - finfo, err := structFieldInfo(typ, &f) - if err == nil && finfo.name != "" { - return finfo - } - // Also consider errors as a non-existent field tag - // and let getTypeInfo itself report the error. - break - } - return nil -} - -// addFieldInfo adds finfo to tinfo.fields if there are no -// conflicts, or if conflicts arise from previous fields that were -// obtained from deeper embedded structures than finfo. In the latter -// case, the conflicting entries are dropped. -// A conflict occurs when the path (parent + name) to a field is -// itself a prefix of another path, or when two paths match exactly. -// It is okay for field paths to share a common, shorter prefix. -func addFieldInfo(typ reflect.Type, tinfo *typeInfo, newf *fieldInfo) error { - var conflicts []int -Loop: - // First, figure all conflicts. Most working code will have none. - for i := range tinfo.fields { - oldf := &tinfo.fields[i] - if oldf.flags&fMode != newf.flags&fMode { - continue - } - if oldf.xmlns != "" && newf.xmlns != "" && oldf.xmlns != newf.xmlns { - continue - } - minl := min(len(newf.parents), len(oldf.parents)) - for p := 0; p < minl; p++ { - if oldf.parents[p] != newf.parents[p] { - continue Loop - } - } - if len(oldf.parents) > len(newf.parents) { - if oldf.parents[len(newf.parents)] == newf.name { - conflicts = append(conflicts, i) - } - } else if len(oldf.parents) < len(newf.parents) { - if newf.parents[len(oldf.parents)] == oldf.name { - conflicts = append(conflicts, i) - } - } else { - if newf.name == oldf.name && newf.xmlns == oldf.xmlns { - conflicts = append(conflicts, i) - } - } - } - // Without conflicts, add the new field and return. - if conflicts == nil { - tinfo.fields = append(tinfo.fields, *newf) - return nil - } - - // If any conflict is shallower, ignore the new field. - // This matches the Go field resolution on embedding. - for _, i := range conflicts { - if len(tinfo.fields[i].idx) < len(newf.idx) { - return nil - } - } - - // Otherwise, if any of them is at the same depth level, it's an error. - for _, i := range conflicts { - oldf := &tinfo.fields[i] - if len(oldf.idx) == len(newf.idx) { - f1 := typ.FieldByIndex(oldf.idx) - f2 := typ.FieldByIndex(newf.idx) - return &TagPathError{typ, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")} - } - } - - // Otherwise, the new field is shallower, and thus takes precedence, - // so drop the conflicting fields from tinfo and append the new one. - for c := len(conflicts) - 1; c >= 0; c-- { - i := conflicts[c] - copy(tinfo.fields[i:], tinfo.fields[i+1:]) - tinfo.fields = tinfo.fields[:len(tinfo.fields)-1] - } - tinfo.fields = append(tinfo.fields, *newf) - return nil -} - -// A TagPathError represents an error in the unmarshaling process -// caused by the use of field tags with conflicting paths. -type TagPathError struct { - Struct reflect.Type - Field1, Tag1 string - Field2, Tag2 string -} - -func (e *TagPathError) Error() string { - return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) -} - -const ( - initNilPointers = true - dontInitNilPointers = false -) - -// value returns v's field value corresponding to finfo. -// It's equivalent to v.FieldByIndex(finfo.idx), but when passed -// initNilPointers, it initializes and dereferences pointers as necessary. -// When passed dontInitNilPointers and a nil pointer is reached, the function -// returns a zero reflect.Value. -func (finfo *fieldInfo) value(v reflect.Value, shouldInitNilPointers bool) reflect.Value { - for i, x := range finfo.idx { - if i > 0 { - t := v.Type() - if t.Kind() == reflect.Pointer && t.Elem().Kind() == reflect.Struct { - if v.IsNil() { - if !shouldInitNilPointers { - return reflect.Value{} - } - v.Set(reflect.New(v.Type().Elem())) - } - v = v.Elem() - } - } - v = v.Field(x) - } - return v -} diff --git a/parser/xml/xml.go b/parser/xml/xml.go deleted file mode 100644 index 951676d..0000000 --- a/parser/xml/xml.go +++ /dev/null @@ -1,2076 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package xml implements a simple XML 1.0 parser that -// understands XML name spaces. -package xml - -// References: -// Annotated XML spec: https://www.xml.com/axml/testaxml.htm -// XML name spaces: https://www.w3.org/TR/REC-xml-names/ - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "io" - "strconv" - "strings" - "unicode" - "unicode/utf8" -) - -// A SyntaxError represents a syntax error in the XML input stream. -type SyntaxError struct { - Msg string - Line int -} - -func (e *SyntaxError) Error() string { - return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg -} - -// A Name represents an XML name (Local) annotated -// with a name space identifier (Space). -// In tokens returned by [Decoder.Token], the Space identifier -// is given as a canonical URL, not the short prefix used -// in the document being parsed. -type Name struct { - Space, Local string -} - -// An Attr represents an attribute in an XML element (Name=Value). -type Attr struct { - Name Name - Value string -} - -// A Token is an interface holding one of the token types: -// [StartElement], [EndElement], [CharData], [Comment], [ProcInst], or [Directive]. -type Token any - -// A StartElement represents an XML start element. -type StartElement struct { - Name Name - Attr []Attr -} - -// Copy creates a new copy of StartElement. -func (e StartElement) Copy() StartElement { - attrs := make([]Attr, len(e.Attr)) - copy(attrs, e.Attr) - e.Attr = attrs - return e -} - -// End returns the corresponding XML end element. -func (e StartElement) End() EndElement { - return EndElement{e.Name} -} - -// An EndElement represents an XML end element. -type EndElement struct { - Name Name -} - -// A CharData represents XML character data (raw text), -// in which XML escape sequences have been replaced by -// the characters they represent. -type CharData []byte - -// Copy creates a new copy of CharData. -func (c CharData) Copy() CharData { return CharData(bytes.Clone(c)) } - -// A Comment represents an XML comment of the form . -// The bytes do not include the comment markers. -type Comment []byte - -// Copy creates a new copy of Comment. -func (c Comment) Copy() Comment { return Comment(bytes.Clone(c)) } - -// A ProcInst represents an XML processing instruction of the form -type ProcInst struct { - Target string - Inst []byte -} - -// Copy creates a new copy of ProcInst. -func (p ProcInst) Copy() ProcInst { - p.Inst = bytes.Clone(p.Inst) - return p -} - -// A Directive represents an XML directive of the form . -// The bytes do not include the markers. -type Directive []byte - -// Copy creates a new copy of Directive. -func (d Directive) Copy() Directive { return Directive(bytes.Clone(d)) } - -// CopyToken returns a copy of a Token. -func CopyToken(t Token) Token { - switch v := t.(type) { - case CharData: - return v.Copy() - case Comment: - return v.Copy() - case Directive: - return v.Copy() - case ProcInst: - return v.Copy() - case StartElement: - return v.Copy() - } - return t -} - -// A TokenReader is anything that can decode a stream of XML tokens, including a -// [Decoder]. -// -// When Token encounters an error or end-of-file condition after successfully -// reading a token, it returns the token. It may return the (non-nil) error from -// the same call or return the error (and a nil token) from a subsequent call. -// An instance of this general case is that a TokenReader returning a non-nil -// token at the end of the token stream may return either io.EOF or a nil error. -// The next Read should return nil, [io.EOF]. -// -// Implementations of Token are discouraged from returning a nil token with a -// nil error. Callers should treat a return of nil, nil as indicating that -// nothing happened; in particular it does not indicate EOF. -type TokenReader interface { - Token() (Token, error) -} - -// A Decoder represents an XML parser reading a particular input stream. -// The parser assumes that its input is encoded in UTF-8. -type Decoder struct { - // Strict defaults to true, enforcing the requirements - // of the XML specification. - // If set to false, the parser allows input containing common - // mistakes: - // * If an element is missing an end tag, the parser invents - // end tags as necessary to keep the return values from Token - // properly balanced. - // * In attribute values and character data, unknown or malformed - // character entities (sequences beginning with &) are left alone. - // - // Setting: - // - // d.Strict = false - // d.AutoClose = xml.HTMLAutoClose - // d.Entity = xml.HTMLEntity - // - // creates a parser that can handle typical HTML. - // - // Strict mode does not enforce the requirements of the XML name spaces TR. - // In particular it does not reject name space tags using undefined prefixes. - // Such tags are recorded with the unknown prefix as the name space URL. - Strict bool - - // When Strict == false, AutoClose indicates a set of elements to - // consider closed immediately after they are opened, regardless - // of whether an end element is present. - AutoClose []string - - // Entity can be used to map non-standard entity names to string replacements. - // The parser behaves as if these standard mappings are present in the map, - // regardless of the actual map content: - // - // "lt": "<", - // "gt": ">", - // "amp": "&", - // "apos": "'", - // "quot": `"`, - Entity map[string]string - - // CharsetReader, if non-nil, defines a function to generate - // charset-conversion readers, converting from the provided - // non-UTF-8 charset into UTF-8. If CharsetReader is nil or - // returns an error, parsing stops with an error. One of the - // CharsetReader's result values must be non-nil. - CharsetReader func(charset string, input io.Reader) (io.Reader, error) - - // DefaultSpace sets the default name space used for unadorned tags, - // as if the entire XML stream were wrapped in an element containing - // the attribute xmlns="DefaultSpace". - DefaultSpace string - - r io.ByteReader - t TokenReader - buf bytes.Buffer - saved *bytes.Buffer - stk *stack - free *stack - needClose bool - toClose Name - nextToken Token - nextByte int - ns map[string]string - err error - line int - linestart int64 - offset int64 - unmarshalDepth int -} - -// NewDecoder creates a new XML parser reading from r. -// If r does not implement [io.ByteReader], NewDecoder will -// do its own buffering. -func NewDecoder(r io.Reader) *Decoder { - d := &Decoder{ - ns: make(map[string]string), - nextByte: -1, - line: 1, - Strict: true, - } - d.switchToReader(r) - return d -} - -// NewTokenDecoder creates a new XML parser using an underlying token stream. -func NewTokenDecoder(t TokenReader) *Decoder { - // Is it already a Decoder? - if d, ok := t.(*Decoder); ok { - return d - } - d := &Decoder{ - ns: make(map[string]string), - t: t, - nextByte: -1, - line: 1, - Strict: true, - } - return d -} - -// Token returns the next XML token in the input stream. -// At the end of the input stream, Token returns nil, [io.EOF]. -// -// Slices of bytes in the returned token data refer to the -// parser's internal buffer and remain valid only until the next -// call to Token. To acquire a copy of the bytes, call [CopyToken] -// or the token's Copy method. -// -// Token expands self-closing elements such as
-// into separate start and end elements returned by successive calls. -// -// Token guarantees that the [StartElement] and [EndElement] -// tokens it returns are properly nested and matched: -// if Token encounters an unexpected end element -// or EOF before all expected end elements, -// it will return an error. -// -// If [Decoder.CharsetReader] is called and returns an error, -// the error is wrapped and returned. -// -// Token implements XML name spaces as described by -// https://www.w3.org/TR/REC-xml-names/. Each of the -// [Name] structures contained in the Token has the Space -// set to the URL identifying its name space when known. -// If Token encounters an unrecognized name space prefix, -// it uses the prefix as the Space rather than report an error. -func (d *Decoder) Token() (Token, error) { - var t Token - var err error - if d.stk != nil && d.stk.kind == stkEOF { - return nil, io.EOF - } - if d.nextToken != nil { - t = d.nextToken - d.nextToken = nil - } else { - if t, err = d.rawToken(); t == nil && err != nil { - if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF { - err = d.syntaxError("unexpected EOF") - } - return nil, err - } - // We still have a token to process, so clear any - // errors (e.g. EOF) and proceed. - err = nil - } - if !d.Strict { - if t1, ok := d.autoClose(t); ok { - d.nextToken = t - t = t1 - } - } - switch t1 := t.(type) { - case StartElement: - // In XML name spaces, the translations listed in the - // attributes apply to the element name and - // to the other attribute names, so process - // the translations first. - for _, a := range t1.Attr { - if a.Name.Space == xmlnsPrefix { - v, ok := d.ns[a.Name.Local] - d.pushNs(a.Name.Local, v, ok) - d.ns[a.Name.Local] = a.Value - } - if a.Name.Space == "" && a.Name.Local == xmlnsPrefix { - // Default space for untagged names - v, ok := d.ns[""] - d.pushNs("", v, ok) - d.ns[""] = a.Value - } - } - - d.pushElement(t1.Name) - d.translate(&t1.Name, true) - for i := range t1.Attr { - d.translate(&t1.Attr[i].Name, false) - } - t = t1 - - case EndElement: - if !d.popElement(&t1) { - return nil, d.err - } - t = t1 - } - return t, err -} - -const ( - xmlURL = "http://www.w3.org/XML/1998/namespace" - xmlnsPrefix = "xmlns" - xmlPrefix = "xml" -) - -// Apply name space translation to name n. -// The default name space (for Space=="") -// applies only to element names, not to attribute names. -func (d *Decoder) translate(n *Name, isElementName bool) { - switch { - case n.Space == xmlnsPrefix: - return - case n.Space == "" && !isElementName: - return - case n.Space == xmlPrefix: - n.Space = xmlURL - case n.Space == "" && n.Local == xmlnsPrefix: - return - } - if v, ok := d.ns[n.Space]; ok { - n.Space = v - } else if n.Space == "" { - n.Space = d.DefaultSpace - } -} - -func (d *Decoder) switchToReader(r io.Reader) { - // Get efficient byte at a time reader. - // Assume that if reader has its own - // ReadByte, it's efficient enough. - // Otherwise, use bufio. - if rb, ok := r.(io.ByteReader); ok { - d.r = rb - } else { - d.r = bufio.NewReader(r) - } -} - -// Parsing state - stack holds old name space translations -// and the current set of open elements. The translations to pop when -// ending a given tag are *below* it on the stack, which is -// more work but forced on us by XML. -type stack struct { - next *stack - kind int - name Name - ok bool -} - -const ( - stkStart = iota - stkNs - stkEOF -) - -func (d *Decoder) push(kind int) *stack { - s := d.free - if s != nil { - d.free = s.next - } else { - s = new(stack) - } - s.next = d.stk - s.kind = kind - d.stk = s - return s -} - -func (d *Decoder) pop() *stack { - s := d.stk - if s != nil { - d.stk = s.next - s.next = d.free - d.free = s - } - return s -} - -// Record that after the current element is finished -// (that element is already pushed on the stack) -// Token should return EOF until popEOF is called. -func (d *Decoder) pushEOF() { - // Walk down stack to find Start. - // It might not be the top, because there might be stkNs - // entries above it. - start := d.stk - for start.kind != stkStart { - start = start.next - } - // The stkNs entries below a start are associated with that - // element too; skip over them. - for start.next != nil && start.next.kind == stkNs { - start = start.next - } - s := d.free - if s != nil { - d.free = s.next - } else { - s = new(stack) - } - s.kind = stkEOF - s.next = start.next - start.next = s -} - -// Undo a pushEOF. -// The element must have been finished, so the EOF should be at the top of the stack. -func (d *Decoder) popEOF() bool { - if d.stk == nil || d.stk.kind != stkEOF { - return false - } - d.pop() - return true -} - -// Record that we are starting an element with the given name. -func (d *Decoder) pushElement(name Name) { - s := d.push(stkStart) - s.name = name -} - -// Record that we are changing the value of ns[local]. -// The old value is url, ok. -func (d *Decoder) pushNs(local string, url string, ok bool) { - s := d.push(stkNs) - s.name.Local = local - s.name.Space = url - s.ok = ok -} - -// Creates a SyntaxError with the current line number. -func (d *Decoder) syntaxError(msg string) error { - return &SyntaxError{Msg: msg, Line: d.line} -} - -// Record that we are ending an element with the given name. -// The name must match the record at the top of the stack, -// which must be a pushElement record. -// After popping the element, apply any undo records from -// the stack to restore the name translations that existed -// before we saw this element. -func (d *Decoder) popElement(t *EndElement) bool { - s := d.pop() - name := t.Name - switch { - case s == nil || s.kind != stkStart: - d.err = d.syntaxError("unexpected end element ") - return false - case s.name.Local != name.Local: - if !d.Strict { - d.needClose = true - d.toClose = t.Name - t.Name = s.name - return true - } - d.err = d.syntaxError("element <" + s.name.Local + "> closed by ") - return false - case s.name.Space != name.Space: - ns := name.Space - if name.Space == "" { - ns = `""` - } - d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + - " closed by in space " + ns) - return false - } - - d.translate(&t.Name, true) - - // Pop stack until a Start or EOF is on the top, undoing the - // translations that were associated with the element we just closed. - for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF { - s := d.pop() - if s.ok { - d.ns[s.name.Local] = s.name.Space - } else { - delete(d.ns, s.name.Local) - } - } - - return true -} - -// If the top element on the stack is autoclosing and -// t is not the end tag, invent the end tag. -func (d *Decoder) autoClose(t Token) (Token, bool) { - if d.stk == nil || d.stk.kind != stkStart { - return nil, false - } - for _, s := range d.AutoClose { - if strings.EqualFold(s, d.stk.name.Local) { - // This one should be auto closed if t doesn't close it. - et, ok := t.(EndElement) - if !ok || !strings.EqualFold(et.Name.Local, d.stk.name.Local) { - return EndElement{d.stk.name}, true - } - break - } - } - return nil, false -} - -var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method") - -// RawToken is like [Decoder.Token] but does not verify that -// start and end elements match and does not translate -// name space prefixes to their corresponding URLs. -func (d *Decoder) RawToken() (Token, error) { - if d.unmarshalDepth > 0 { - return nil, errRawToken - } - return d.rawToken() -} - -func (d *Decoder) rawToken() (Token, error) { - if d.t != nil { - return d.t.Token() - } - if d.err != nil { - return nil, d.err - } - if d.needClose { - // The last element we read was self-closing and - // we returned just the StartElement half. - // Return the EndElement half now. - d.needClose = false - return EndElement{d.toClose}, nil - } - - b, ok := d.getc() - if !ok { - return nil, d.err - } - - if b != '<' { - // Text section. - d.ungetc(b) - data := d.text(-1, false) - if data == nil { - return nil, d.err - } - return CharData(data), nil - } - - if b, ok = d.mustgetc(); !ok { - return nil, d.err - } - switch b { - case '/': - // ' { - d.err = d.syntaxError("invalid characters between ") - return nil, d.err - } - return EndElement{name}, nil - - case '?': - // ' { - break - } - b0 = b - } - data := d.buf.Bytes() - data = data[0 : len(data)-2] // chop ?> - - if target == "xml" { - content := string(data) - ver := procInst("version", content) - if ver != "" && ver != "1.0" { - d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver) - return nil, d.err - } - enc := procInst("encoding", content) - if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") { - if d.CharsetReader == nil { - d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc) - return nil, d.err - } - newr, err := d.CharsetReader(enc, d.r.(io.Reader)) - if err != nil { - d.err = fmt.Errorf("xml: opening charset %q: %w", enc, err) - return nil, d.err - } - if newr == nil { - panic("CharsetReader returned a nil Reader for charset " + enc) - } - d.switchToReader(newr) - } - } - return ProcInst{target, data}, nil - - case '!': - // ' { - d.err = d.syntaxError( - `invalid sequence "--" not allowed in comments`) - return nil, d.err - } - break - } - b0, b1 = b1, b - } - data := d.buf.Bytes() - data = data[0 : len(data)-3] // chop --> - return Comment(data), nil - - case '[': // . - data := d.text(-1, true) - if data == nil { - return nil, d.err - } - return CharData(data), nil - } - - // Probably a directive: , , etc. - // We don't care, but accumulate for caller. Quoted angle - // brackets do not count for nesting. - d.buf.Reset() - d.buf.WriteByte(b) - inquote := uint8(0) - depth := 0 - for { - if b, ok = d.mustgetc(); !ok { - return nil, d.err - } - if inquote == 0 && b == '>' && depth == 0 { - break - } - HandleB: - d.buf.WriteByte(b) - switch { - case b == inquote: - inquote = 0 - - case inquote != 0: - // in quotes, no special action - - case b == '\'' || b == '"': - inquote = b - - case b == '>' && inquote == 0: - depth-- - - case b == '<' && inquote == 0: - // Look for