From 307bb562d5877156b6e86fb78ee65cf55b4c3411 Mon Sep 17 00:00:00 2001 From: itchyny Date: Sun, 9 Apr 2023 18:45:25 +0900 Subject: [PATCH] stop number normalization for concurrent execution with same data (fix #212) --- README.md | 2 +- cli/encoder.go | 3 ++ cli/test.yaml | 11 +++++- compare.go | 3 +- compiler.go | 9 +---- encoder.go | 15 +++++--- encoder_test.go | 5 ++- func.go | 42 +++++++++++++++++---- normalize.go | 84 ----------------------------------------- operator.go | 14 +++++++ option.go | 2 +- option_function_test.go | 3 ++ option_test.go | 2 +- preview.go | 6 +-- preview_test.go | 9 +++++ query.go | 1 - query_test.go | 29 -------------- type.go | 6 ++- 18 files changed, 100 insertions(+), 146 deletions(-) delete mode 100644 normalize.go diff --git a/README.md b/README.md index 6370e440..d3b76b69 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ func main() { - Firstly, use [`gojq.Parse(string) (*Query, error)`](https://pkg.go.dev/github.com/itchyny/gojq#Parse) to get the query from a string. - Secondly, get the result iterator - using [`query.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Query.Run) or [`query.RunWithContext`](https://pkg.go.dev/github.com/itchyny/gojq#Query.RunWithContext) - - or alternatively, compile the query using [`gojq.Compile`](https://pkg.go.dev/github.com/itchyny/gojq#Compile) and then [`code.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Code.Run) or [`code.RunWithContext`](https://pkg.go.dev/github.com/itchyny/gojq#Code.RunWithContext). You can reuse the `*Code` against multiple inputs to avoid compilation of the same query. But for arguments of `code.Run`, do not give values sharing same data between multiple calls. + - or alternatively, compile the query using [`gojq.Compile`](https://pkg.go.dev/github.com/itchyny/gojq#Compile) and then [`code.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Code.Run) or [`code.RunWithContext`](https://pkg.go.dev/github.com/itchyny/gojq#Code.RunWithContext). You can reuse the `*Code` against multiple inputs to avoid compilation of the same query. - In either case, you cannot use custom type values as the query input. The type should be `[]any` for an array and `map[string]any` for a map (just like decoded to an `any` using the [encoding/json](https://golang.org/pkg/encoding/json/) package). You can't use `[]int` or `map[string]string`, for example. If you want to query your custom struct, marshal to JSON, unmarshal to `any` and use it as the query input. - Thirdly, iterate through the results using [`iter.Next() (any, bool)`](https://pkg.go.dev/github.com/itchyny/gojq#Iter). The iterator can emit an error so make sure to handle it. The method returns `true` with results, and `false` when the iterator terminates. - The return type is not `(any, error)` because iterators can emit multiple errors and you can continue after an error. It is difficult for the iterator to tell the termination in this situation. diff --git a/cli/encoder.go b/cli/encoder.go index 8841baac..7f8cf378 100644 --- a/cli/encoder.go +++ b/cli/encoder.go @@ -2,6 +2,7 @@ package cli import ( "bytes" + "encoding/json" "fmt" "io" "math" @@ -56,6 +57,8 @@ func (e *encoder) encode(v any) error { e.encodeFloat64(v) case *big.Int: e.write(v.Append(e.buf[:0], 10), numberColor) + case json.Number: + e.write([]byte(v.String()), numberColor) case string: e.encodeString(v, stringColor) case []any: diff --git a/cli/test.yaml b/cli/test.yaml index ee70767f..db019cb4 100644 --- a/cli/test.yaml +++ b/cli/test.yaml @@ -19,9 +19,11 @@ - name: number input args: - '.' - input: '128' + input: '128 3.14 10.0' expected: | 128 + 3.14 + 10.0 - name: number query args: @@ -282,6 +284,13 @@ expected: | 4722366482869645213696 +- name: object indexing with floating-point number with trailing zeros + args: + - '.foo' + input: '{"foo": 100.000000}' + expected: | + 100.000000 + - name: object indexing by keywords args: - '.and,.or,.try' diff --git a/compare.go b/compare.go index e70c1fbb..9c8b3986 100644 --- a/compare.go +++ b/compare.go @@ -1,6 +1,7 @@ package gojq import ( + "encoding/json" "math" "math/big" ) @@ -88,7 +89,7 @@ func typeIndex(v any) int { return 1 } return 2 - case int, float64, *big.Int: + case int, float64, *big.Int, json.Number: return 3 case string: return 4 diff --git a/compiler.go b/compiler.go index 135387fa..e377d397 100644 --- a/compiler.go +++ b/compiler.go @@ -34,7 +34,6 @@ type Code struct { // a result iterator. // // It is safe to call this method in goroutines, to reuse a compiled [*Code]. -// But for arguments, do not give values sharing same data between goroutines. func (c *Code) Run(v any, values ...any) Iter { return c.RunWithContext(context.Background(), v, values...) } @@ -46,10 +45,7 @@ func (c *Code) RunWithContext(ctx context.Context, v any, values ...any) Iter { } else if len(values) < len(c.variables) { return NewIter(&expectedVariableError{c.variables[len(values)]}) } - for i, v := range values { - values[i] = normalizeNumbers(v) - } - return newEnv(ctx).execute(c, normalizeNumbers(v), values...) + return newEnv(ctx).execute(c, v, values...) } type scopeinfo struct { @@ -160,7 +156,6 @@ func (c *compiler) compileImport(i *Import) error { } else { return fmt.Errorf("module not found: %q", path) } - vals = normalizeNumbers(vals) c.append(&code{op: oppush, v: vals}) c.append(&code{op: opstore, v: c.pushVariable(alias)}) c.append(&code{op: oppush, v: vals}) @@ -1166,7 +1161,7 @@ func (c *compiler) funcInput(any, []any) any { if !ok { return errors.New("break") } - return normalizeNumbers(v) + return v } func (c *compiler) funcModulemeta(v any, _ []any) any { diff --git a/encoder.go b/encoder.go index 3233e8a9..d4e7dfa3 100644 --- a/encoder.go +++ b/encoder.go @@ -2,6 +2,7 @@ package gojq import ( "bytes" + "encoding/json" "fmt" "io" "math" @@ -15,12 +16,12 @@ import ( // Marshal returns the jq-flavored JSON encoding of v. // // This method accepts only limited types (nil, bool, int, float64, *big.Int, -// string, []any, and map[string]any) because these are the possible types a -// gojq iterator can emit. This method marshals NaN to null, truncates -// infinities to (+|-) math.MaxFloat64, uses \b and \f in strings, and does not -// escape '<', '>', '&', '\u2028', and '\u2029'. These behaviors are based on -// the marshaler of jq command, and different from json.Marshal in the Go -// standard library. Note that the result is not safe to embed in HTML. +// json.Number, string, []any, and map[string]any) because these are the +// possible types a gojq iterator can emit. This method marshals NaN to null, +// truncates infinities to (+|-) math.MaxFloat64, uses \b and \f in strings, +// and does not escape '<', '>', '&', '\u2028', and '\u2029'. These behaviors +// are based on the marshaler of jq command, and different from json.Marshal in +// the Go standard library. Note that the result is not safe to embed in HTML. func Marshal(v any) ([]byte, error) { var b bytes.Buffer (&encoder{w: &b}).encode(v) @@ -62,6 +63,8 @@ func (e *encoder) encode(v any) { e.encodeFloat64(v) case *big.Int: e.w.Write(v.Append(e.buf[:0], 10)) + case json.Number: + e.w.WriteString(v.String()) case string: e.encodeString(v) case []any: diff --git a/encoder_test.go b/encoder_test.go index 1d1247eb..68adc68c 100644 --- a/encoder_test.go +++ b/encoder_test.go @@ -1,6 +1,7 @@ package gojq_test import ( + "encoding/json" "fmt" "math" "math/big" @@ -24,10 +25,10 @@ func TestMarshal(t *testing.T) { }, { value: []any{ - 42, 3.14, 1e-6, 1e-7, -1e-9, 1e-10, math.NaN(), math.Inf(1), math.Inf(-1), + 42, 3.14, 1e-6, 1e-7, -1e-9, 1e-10, math.NaN(), math.Inf(1), math.Inf(-1), json.Number("42"), new(big.Int).SetBytes([]byte("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff")), }, - expected: "[42,3.14,0.000001,1e-7,-1e-9,1e-10,null,1.7976931348623157e+308,-1.7976931348623157e+308,340282366920938463463374607431768211455]", + expected: "[42,3.14,0.000001,1e-7,-1e-9,1e-10,null,1.7976931348623157e+308,-1.7976931348623157e+308,42,340282366920938463463374607431768211455]", }, { value: []any{"", "abcde", "foo\x00\x1f\r\n\t\f\b<=>!\"#$%'& \\\x7fbar"}, diff --git a/func.go b/func.go index d94a7a47..fbd03708 100644 --- a/func.go +++ b/func.go @@ -288,6 +288,11 @@ func funcLength(v any) any { return v } return new(big.Int).Abs(v) + case json.Number: + if strings.HasPrefix(v.String(), "-") { + return v[1:] + } + return v case string: return len([]rune(v)) case []any: @@ -490,7 +495,7 @@ func funcAdd(v any) any { func funcToNumber(v any) any { switch v := v.(type) { - case int, float64, *big.Int: + case int, float64, *big.Int, json.Number: return v case string: if !newLexer(v).validNumber() { @@ -503,7 +508,7 @@ func funcToNumber(v any) any { } func toNumber(v string) any { - return normalizeNumber(json.Number(v)) + return parseNumber(json.Number(v)) } func funcToString(v any) any { @@ -794,7 +799,7 @@ func funcFromJSON(v any) any { if _, err := dec.Token(); err != io.EOF { return &funcTypeError{"fromjson", v} } - return normalizeNumbers(w) + return w } func funcFormat(v, x any) any { @@ -930,7 +935,7 @@ func funcIndex2(_, v, x any) any { default: return &expectedObjectError{v} } - case int, float64, *big.Int: + case int, float64, *big.Int, json.Number: i, _ := toInt(x) switch v := v.(type) { case nil: @@ -1133,7 +1138,7 @@ func (iter *rangeIter) Next() (any, bool) { func funcRange(_ any, xs []any) any { for _, x := range xs { switch x.(type) { - case int, float64, *big.Int: + case int, float64, *big.Int, json.Number: default: return &funcTypeError{"range", x} } @@ -1310,7 +1315,7 @@ func funcJoin(v, x any) any { } else { ss[i] = "false" } - case int, float64, *big.Int: + case int, float64, *big.Int, json.Number: ss[i] = jsonMarshal(v) default: return &joinTypeError{v} @@ -1527,7 +1532,7 @@ func update(v any, path []any, n any, a allocator) (any, error) { default: return nil, &expectedObjectError{v} } - case int, float64, *big.Int: + case int, float64, *big.Int, json.Number: i, _ := toInt(p) switch v := v.(type) { case nil: @@ -2063,6 +2068,8 @@ func toInt(x any) (int, bool) { return math.MaxInt, true } return math.MinInt, true + case json.Number: + return toInt(parseNumber(x)) default: return 0, false } @@ -2086,6 +2093,9 @@ func toFloat(x any) (float64, bool) { return x, true case *big.Int: return bigToFloat(x), true + case json.Number: + v, _ := x.Float64() + return v, true default: return 0.0, false } @@ -2100,3 +2110,21 @@ func bigToFloat(x *big.Int) float64 { } return math.Inf(x.Sign()) } + +func parseNumber(v json.Number) any { + if i, err := v.Int64(); err == nil && math.MinInt <= i && i <= math.MaxInt { + return int(i) + } + if strings.ContainsAny(v.String(), ".eE") { + if f, err := v.Float64(); err == nil { + return f + } + } + if bi, ok := new(big.Int).SetString(v.String(), 10); ok { + return bi + } + if strings.HasPrefix(v.String(), "-") { + return math.Inf(-1) + } + return math.Inf(1) +} diff --git a/normalize.go b/normalize.go deleted file mode 100644 index 2bfcd215..00000000 --- a/normalize.go +++ /dev/null @@ -1,84 +0,0 @@ -package gojq - -import ( - "encoding/json" - "math" - "math/big" - "strings" -) - -func normalizeNumber(v json.Number) any { - if i, err := v.Int64(); err == nil && math.MinInt <= i && i <= math.MaxInt { - return int(i) - } - if strings.ContainsAny(v.String(), ".eE") { - if f, err := v.Float64(); err == nil { - return f - } - } - if bi, ok := new(big.Int).SetString(v.String(), 10); ok { - return bi - } - if strings.HasPrefix(v.String(), "-") { - return math.Inf(-1) - } - return math.Inf(1) -} - -func normalizeNumbers(v any) any { - switch v := v.(type) { - case json.Number: - return normalizeNumber(v) - case *big.Int: - if v.IsInt64() { - if i := v.Int64(); math.MinInt <= i && i <= math.MaxInt { - return int(i) - } - } - return v - case int64: - if math.MinInt <= v && v <= math.MaxInt { - return int(v) - } - return big.NewInt(v) - case int32: - return int(v) - case int16: - return int(v) - case int8: - return int(v) - case uint: - if v <= math.MaxInt { - return int(v) - } - return new(big.Int).SetUint64(uint64(v)) - case uint64: - if v <= math.MaxInt { - return int(v) - } - return new(big.Int).SetUint64(v) - case uint32: - if uint64(v) <= math.MaxInt { - return int(v) - } - return new(big.Int).SetUint64(uint64(v)) - case uint16: - return int(v) - case uint8: - return int(v) - case float32: - return float64(v) - case []any: - for i, x := range v { - v[i] = normalizeNumbers(x) - } - return v - case map[string]any: - for k, x := range v { - v[k] = normalizeNumbers(x) - } - return v - default: - return v - } -} diff --git a/operator.go b/operator.go index 73a548e0..d5427442 100644 --- a/operator.go +++ b/operator.go @@ -1,6 +1,7 @@ package gojq import ( + "encoding/json" "math" "math/big" "strings" @@ -216,6 +217,12 @@ func binopTypeSwitch( callbackArrays func(_, _ []any) any, callbackMaps func(_, _ map[string]any) any, fallback func(_, _ any) any) any { + if n, ok := l.(json.Number); ok { + l = parseNumber(n) + } + if n, ok := r.(json.Number); ok { + r = parseNumber(n) + } switch l := l.(type) { case int: switch r := r.(type) { @@ -284,6 +291,8 @@ func funcOpPlus(v any) any { return v case *big.Int: return v + case json.Number: + return v default: return &unaryTypeError{"plus", v} } @@ -297,6 +306,11 @@ func funcOpNegate(v any) any { return -v case *big.Int: return new(big.Int).Neg(v) + case json.Number: + if strings.HasPrefix(v.String(), "-") { + return v[1:] + } + return "-" + v default: return &unaryTypeError{"negate", v} } diff --git a/option.go b/option.go index f1a110fa..ebc6c0c9 100644 --- a/option.go +++ b/option.go @@ -33,7 +33,7 @@ func WithVariables(variables []string) CompilerOption { // WithFunction is a compiler option for adding a custom internal function. // Specify the minimum and maximum count of the function arguments. These // values should satisfy 0 <= minarity <= maxarity <= 30, otherwise panics. -// On handling numbers, you should take account to int, float64 and *big.Int. +// On handling numbers, take account to int, float64, *big.Int, and json.Number. // These are the number types you are allowed to return, so do not return int64. // Refer to [ValueError] to return a value error just like built-in error // function. If you want to emit multiple values, call the empty function, diff --git a/option_function_test.go b/option_function_test.go index fe13ff9a..17046e41 100644 --- a/option_function_test.go +++ b/option_function_test.go @@ -19,6 +19,9 @@ func toFloat(x any) (float64, bool) { case *big.Int: f, err := strconv.ParseFloat(x.String(), 64) return f, err == nil + case json.Number: + f, err := x.Float64() + return f, err == nil default: return 0.0, false } diff --git a/option_test.go b/option_test.go index 26f43b7f..d94a8bdc 100644 --- a/option_test.go +++ b/option_test.go @@ -115,7 +115,7 @@ func TestWithModuleLoader_JSON(t *testing.T) { if !ok { break } - if expected := []any{[]any{1.0, 42, 123}, 5166}; !reflect.DeepEqual(got, expected) { + if expected := []any{[]any{1.0, 42, json.Number("123")}, 5166}; !reflect.DeepEqual(got, expected) { t.Errorf("expected: %v, got: %v", expected, got) } } diff --git a/preview.go b/preview.go index e082eb56..92f1a54a 100644 --- a/preview.go +++ b/preview.go @@ -7,9 +7,9 @@ import "unicode/utf8" // by 30 bytes, and more efficient than truncating the result of [Marshal]. // // This method is used by error messages of built-in operators and functions, -// and accepts only limited types (nil, bool, int, float64, *big.Int, string, -// []any, and map[string]any). Note that the maximum width and trailing strings -// on truncation may be changed in the future. +// and accepts only limited types (nil, bool, int, float64, *big.Int, +// json.Number, string, []any, and map[string]any). Note that the maximum width +// and trailing strings on truncation may be changed in the future. func Preview(v any) string { bs := jsonLimitedMarshal(v, 32) if l := 30; len(bs) > l { diff --git a/preview_test.go b/preview_test.go index 5b6c878f..8d39793c 100644 --- a/preview_test.go +++ b/preview_test.go @@ -1,6 +1,7 @@ package gojq_test import ( + "encoding/json" "fmt" "math" "math/big" @@ -58,6 +59,14 @@ func TestPreview(t *testing.T) { new(big.Int).SetBytes([]byte("\x0c\x9f\x2c\x9c\xd0\x46\x74\xed\xea\x40\x00\x00\x00")), "10000000000000000000000000 ...", }, + { + json.Number("42"), + "42", + }, + { + json.Number("10000000000000000000000000000000000000000"), + "10000000000000000000000000 ...", + }, { "0 1 2 3 4 5 6 7 8 9 10 11 12", `"0 1 2 3 4 5 6 7 8 9 10 11 12"`, diff --git a/query.go b/query.go index 5f20b4ff..e4ba167f 100644 --- a/query.go +++ b/query.go @@ -20,7 +20,6 @@ type Query struct { // Run the query. // // It is safe to call this method in goroutines, to reuse a parsed [*Query]. -// But for arguments, do not give values sharing same data between goroutines. func (e *Query) Run(v any) Iter { return e.RunWithContext(context.Background(), v) } diff --git a/query_test.go b/query_test.go index 32df5fec..6253d0b3 100644 --- a/query_test.go +++ b/query_test.go @@ -2,11 +2,8 @@ package gojq_test import ( "context" - "encoding/json" "fmt" "log" - "math" - "math/big" "os" "reflect" "strconv" @@ -205,32 +202,6 @@ func TestQueryRun_Strings(t *testing.T) { } } -func TestQueryRun_NumericTypes(t *testing.T) { - query, err := gojq.Parse(".[] + 0 != 0") - if err != nil { - t.Fatal(err) - } - iter := query.Run([]any{ - int64(1), int32(1), int16(1), int8(1), uint64(1), uint32(1), uint16(1), uint8(1), uint(math.MaxUint), - int64(math.MaxInt64), int64(math.MinInt64), uint64(math.MaxUint64), uint32(math.MaxUint32), - new(big.Int).SetUint64(math.MaxUint64), new(big.Int).SetUint64(math.MaxUint32), - json.Number(fmt.Sprint(uint64(math.MaxInt64))), json.Number(fmt.Sprint(uint64(math.MaxInt32))), - float64(1.0), float32(1.0), - }) - for { - v, ok := iter.Next() - if !ok { - break - } - if err, ok := v.(error); ok { - t.Fatal(err) - } - if expected := true; expected != v { - t.Errorf("expected: %v, got: %v", expected, v) - } - } -} - func TestQueryRun_Input(t *testing.T) { query, err := gojq.Parse("input") if err != nil { diff --git a/type.go b/type.go index bb388e20..89ccda0e 100644 --- a/type.go +++ b/type.go @@ -1,6 +1,7 @@ package gojq import ( + "encoding/json" "fmt" "math/big" ) @@ -8,14 +9,15 @@ import ( // TypeOf returns the jq-flavored type name of v. // // This method is used by built-in type/0 function, and accepts only limited -// types (nil, bool, int, float64, *big.Int, string, []any, and map[string]any). +// types (nil, bool, int, float64, *big.Int, json.Number, string, []any, and +// map[string]any). func TypeOf(v any) string { switch v.(type) { case nil: return "null" case bool: return "boolean" - case int, float64, *big.Int: + case int, float64, *big.Int, json.Number: return "number" case string: return "string"