From 3339bc08c5029ee34ab18504d5f2736ad7e9c302 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=8E=AE=E6=96=87?= Date: Mon, 10 Feb 2025 00:55:12 +0800 Subject: [PATCH 1/4] Do not set value on float overflow This is consistent with int overflow and json v1 --- arshal_any.go | 6 +++--- arshal_default.go | 2 +- arshal_test.go | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arshal_any.go b/arshal_any.go index 5ff4bf9..64882bb 100644 --- a/arshal_any.go +++ b/arshal_any.go @@ -85,7 +85,7 @@ func unmarshalValueAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (any, error) } fv, ok := jsonwire.ParseFloat(val, 64) if !ok { - return fv, newUnmarshalErrorAfterWithValue(dec, float64Type, strconv.ErrRange) + return nil, newUnmarshalErrorAfterWithValue(dec, float64Type, strconv.ErrRange) } return fv, nil default: @@ -196,13 +196,13 @@ func unmarshalObjectAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (map[string] } val, err := unmarshalValueAny(dec, uo) - obj[name] = val if err != nil { if isFatalError(err, uo.Flags) { return obj, err } errUnmarshal = cmp.Or(err, errUnmarshal) } + obj[name] = val } if _, err := dec.ReadToken(); err != nil { return obj, err @@ -266,13 +266,13 @@ func unmarshalArrayAny(dec *jsontext.Decoder, uo *jsonopts.Struct) ([]any, error var errUnmarshal error for dec.PeekKind() != ']' { val, err := unmarshalValueAny(dec, uo) - arr = append(arr, val) if err != nil { if isFatalError(err, uo.Flags) { return arr, err } errUnmarshal = cmp.Or(errUnmarshal, err) } + arr = append(arr, val) } if _, err := dec.ReadToken(); err != nil { return arr, err diff --git a/arshal_default.go b/arshal_default.go index a6777ff..f23b472 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -680,10 +680,10 @@ func makeFloatArshaler(t reflect.Type) *arshaler { break } fv, ok := jsonwire.ParseFloat(val, bits) - va.SetFloat(fv) if !ok { return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrRange) } + va.SetFloat(fv) return nil } return newUnmarshalErrorAfter(dec, t, nil) diff --git a/arshal_test.go b/arshal_test.go index 8379e22..1a4fbe3 100644 --- a/arshal_test.go +++ b/arshal_test.go @@ -5434,7 +5434,7 @@ func TestUnmarshal(t *testing.T) { name: jsontest.Name("Floats/Float32/Overflow"), inBuf: `-1e1000`, inVal: addr(float32(32.32)), - want: addr(float32(-math.MaxFloat32)), + want: addr(float32(32.32)), wantErr: EU(strconv.ErrRange).withVal(`-1e1000`).withType('0', T[float32]()), }, { name: jsontest.Name("Floats/Float64/Pi"), @@ -5450,13 +5450,13 @@ func TestUnmarshal(t *testing.T) { name: jsontest.Name("Floats/Float64/Overflow"), inBuf: `-1e1000`, inVal: addr(float64(64.64)), - want: addr(float64(-math.MaxFloat64)), + want: addr(float64(64.64)), wantErr: EU(strconv.ErrRange).withVal(`-1e1000`).withType('0', T[float64]()), }, { name: jsontest.Name("Floats/Any/Overflow"), inBuf: `1e1000`, inVal: new(any), - want: addr(any(float64(math.MaxFloat64))), + want: new(any), wantErr: EU(strconv.ErrRange).withVal(`1e1000`).withType('0', T[float64]()), }, { name: jsontest.Name("Floats/Named"), From f357195309025a98c2ffd47429ba4e1ea667f03b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=8E=AE=E6=96=87?= Date: Mon, 10 Feb 2025 01:13:56 +0800 Subject: [PATCH 2/4] Refactor number parsing complexity to jsonwire package These can be reused in jsontext. Note that jsonwire.ParseFloat now returns inf for overflow. But this should not impact user-visible behavior, because we don't set value when overflow. --- arshal_any.go | 7 +- arshal_default.go | 45 +++------- arshal_time.go | 45 +++++----- internal/jsonwire/decode.go | 71 +++++++++------ internal/jsonwire/decode_test.go | 143 ++++++++++++++++--------------- jsontext/token.go | 4 +- jsontext/token_test.go | 4 +- 7 files changed, 158 insertions(+), 161 deletions(-) diff --git a/arshal_any.go b/arshal_any.go index 64882bb..d1171a1 100644 --- a/arshal_any.go +++ b/arshal_any.go @@ -7,7 +7,6 @@ package json import ( "cmp" "reflect" - "strconv" "github.com/go-json-experiment/json/internal" "github.com/go-json-experiment/json/internal/jsonflags" @@ -83,9 +82,9 @@ func unmarshalValueAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (any, error) if uo.Flags.Get(jsonflags.UnmarshalAnyWithRawNumber) { return internal.RawNumberOf(val), nil } - fv, ok := jsonwire.ParseFloat(val, 64) - if !ok { - return nil, newUnmarshalErrorAfterWithValue(dec, float64Type, strconv.ErrRange) + fv, err := jsonwire.ParseFloat(val, 64) + if err != nil { + return nil, newUnmarshalErrorAfterWithValue(dec, float64Type, err) } return fv, nil default: diff --git a/arshal_default.go b/arshal_default.go index f23b472..ac95fa8 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -479,28 +479,11 @@ func makeIntArshaler(t reflect.Type) *arshaler { if stringify && k == '0' { break } - var negOffset int - neg := len(val) > 0 && val[0] == '-' - if neg { - negOffset = 1 - } - n, ok := jsonwire.ParseUint(val[negOffset:]) - maxInt := uint64(1) << (bits - 1) - overflow := (neg && n > maxInt) || (!neg && n > maxInt-1) - if !ok { - if n != math.MaxUint64 { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrSyntax) - } - overflow = true - } - if overflow { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrRange) - } - if neg { - va.SetInt(int64(-n)) - } else { - va.SetInt(int64(+n)) + n, err := jsonwire.ParseInt(val, bits) + if err != nil { + return newUnmarshalErrorAfterWithValue(dec, t, err) } + va.SetInt(n) return nil } return newUnmarshalErrorAfter(dec, t, nil) @@ -566,17 +549,9 @@ func makeUintArshaler(t reflect.Type) *arshaler { if stringify && k == '0' { break } - n, ok := jsonwire.ParseUint(val) - maxUint := uint64(1) << bits - overflow := n > maxUint-1 - if !ok { - if n != math.MaxUint64 { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrSyntax) - } - overflow = true - } - if overflow { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrRange) + n, err := jsonwire.ParseUint(val, bits) + if err != nil { + return newUnmarshalErrorAfterWithValue(dec, t, err) } va.SetUint(n) return nil @@ -679,9 +654,9 @@ func makeFloatArshaler(t reflect.Type) *arshaler { if stringify && k == '0' { break } - fv, ok := jsonwire.ParseFloat(val, bits) - if !ok { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrRange) + fv, err := jsonwire.ParseFloat(val, bits) + if err != nil { + return newUnmarshalErrorAfterWithValue(dec, t, err) } va.SetFloat(fv) return nil diff --git a/arshal_time.go b/arshal_time.go index 9cb8e80..6dce5ca 100644 --- a/arshal_time.go +++ b/arshal_time.go @@ -419,18 +419,19 @@ func appendDurationBase10(b []byte, d time.Duration, pow10 uint64) []byte { func parseDurationBase10(b []byte, pow10 uint64) (time.Duration, error) { suffix, neg := consumeSign(b) // consume sign wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields - whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow + whole, err := jsonwire.ParseUint(wholeBytes, 64) // parse whole field; may overflow frac, okFrac := parseFracBase10(fracBytes, pow10) // parse frac field hi, lo := bits.Mul64(whole, uint64(pow10)) // overflow if hi > 0 sum, co := bits.Add64(lo, uint64(frac), 0) // overflow if co > 0 switch d := mayApplyDurationSign(sum, neg); { // overflow if neg != (d < 0) - case (!okWhole && whole != math.MaxUint64) || !okFrac: - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrSyntax) - case !okWhole || hi > 0 || co > 0 || neg != (d < 0): - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrRange) - default: + case !okFrac: + err = strconv.ErrSyntax + case hi > 0 || co > 0 || neg != (d < 0): + err = strconv.ErrRange + case err == nil: return d, nil } + return 0, fmt.Errorf("invalid duration %q: %w", b, err) } // appendDurationBase60 appends d formatted with H:MM:SS.SSS notation. @@ -455,7 +456,7 @@ func parseDurationBase60(b []byte) (time.Duration, error) { hourBytes, suffix := bytesCutByte(suffix, ':', false) // consume hour field minBytes, suffix := bytesCutByte(suffix, ':', false) // consume min field secBytes, nsecBytes := bytesCutByte(suffix, '.', true) // consume sec and nsec fields - hour, okHour := jsonwire.ParseUint(hourBytes) // parse hour field; may overflow + hour, err := jsonwire.ParseUint(hourBytes, 64) // parse hour field; may overflow min := parseDec2(minBytes) // parse min field sec := parseDec2(secBytes) // parse sec field nsec, okNsec := parseFracBase10(nsecBytes, 1e9) // parse nsec field @@ -463,13 +464,14 @@ func parseDurationBase60(b []byte) (time.Duration, error) { hi, lo := bits.Mul64(hour, 60*60*1e9) // overflow if hi > 0 sum, co := bits.Add64(lo, n, 0) // overflow if co > 0 switch d := mayApplyDurationSign(sum, neg); { // overflow if neg != (d < 0) - case (!okHour && hour != math.MaxUint64) || !checkBase60(minBytes) || !checkBase60(secBytes) || !okNsec: - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrSyntax) - case !okHour || hi > 0 || co > 0 || neg != (d < 0): - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrRange) - default: + case !checkBase60(minBytes) || !checkBase60(secBytes) || !okNsec: + err = strconv.ErrSyntax + case hi > 0 || co > 0 || neg != (d < 0): + err = strconv.ErrRange + case err == nil: return d, nil } + return 0, fmt.Errorf("invalid duration %q: %w", b, err) } // mayAppendDurationSign appends a negative sign if n is negative. @@ -517,19 +519,19 @@ func appendTimeUnix(b []byte, t time.Time, pow10 uint64) []byte { func parseTimeUnix(b []byte, pow10 uint64) (time.Time, error) { suffix, neg := consumeSign(b) // consume sign wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields - whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow + whole, err := jsonwire.ParseUint(wholeBytes, 64) // parse whole field; may overflow frac, okFrac := parseFracBase10(fracBytes, 1e9/pow10) // parse frac field var sec, nsec int64 switch { case pow10 == 1e0: // fast case where units is in seconds sec = int64(whole) // check overflow later after negation nsec = int64(frac) // cannot overflow - case okWhole: // intermediate case where units is not seconds, but no overflow + case err == nil: // intermediate case where units is not seconds, but no overflow sec = int64(whole / pow10) // check overflow later after negation nsec = int64((whole%pow10)*(1e9/pow10) + frac) // cannot overflow - case !okWhole && whole == math.MaxUint64: // slow case where units is not seconds and overflow occurred + case err == strconv.ErrRange: // slow case where units is not seconds and overflow occurred width := int(math.Log10(float64(pow10))) // compute len(strconv.Itoa(pow10-1)) - whole, okWhole = jsonwire.ParseUint(wholeBytes[:len(wholeBytes)-width]) // parse the upper whole field + whole, err = jsonwire.ParseUint(wholeBytes[:len(wholeBytes)-width], 64) // parse the upper whole field mid, _ := parsePaddedBase10(wholeBytes[len(wholeBytes)-width:], pow10) // parse the lower whole field sec = int64(whole) // check overflow later after negation nsec = int64(mid*(1e9/pow10) + frac) // cannot overflow @@ -538,13 +540,14 @@ func parseTimeUnix(b []byte, pow10 uint64) (time.Time, error) { sec, nsec = negateSecNano(sec, nsec) } switch t := time.Unix(sec, nsec).UTC(); { - case (!okWhole && whole != math.MaxUint64) || !okFrac: - return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrSyntax) - case !okWhole || neg != (t.Unix() < 0): - return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrRange) - default: + case !okFrac: + err = strconv.ErrSyntax + case neg != (t.Unix() < 0): + err = strconv.ErrRange + case err == nil: return t, nil } + return time.Time{}, fmt.Errorf("invalid time %q: %w", b, err) } // negateSecNano negates a Unix timestamp, where nsec must be within [0, 1e9). diff --git a/internal/jsonwire/decode.go b/internal/jsonwire/decode.go index 0278771..cd5652b 100644 --- a/internal/jsonwire/decode.go +++ b/internal/jsonwire/decode.go @@ -6,7 +6,6 @@ package jsonwire import ( "io" - "math" "slices" "strconv" "unicode/utf16" @@ -586,42 +585,62 @@ func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) { // ParseUint parses b as a decimal unsigned integer according to // a strict subset of the JSON number grammar, returning the value if valid. -// It returns (0, false) if there is a syntax error and -// returns (math.MaxUint64, false) if there is an overflow. -func ParseUint(b []byte) (v uint64, ok bool) { +// It returns (0, strconv.ErrSyntax) if there is a syntax error and +// returns (max, strconv.ErrRange) if there is an overflow. +func ParseUint(b []byte, bits int) (uint64, error) { const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64))) var n int + var v uint64 for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ { v = 10*v + uint64(b[n]-'0') } + + max := uint64(1)<= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth): - return math.MaxUint64, false + return max, strconv.ErrRange + case v > max: + return max, strconv.ErrRange + } + return v, nil +} + +func ParseInt(b []byte, bits int) (int64, error) { + negOffset := 0 + neg := len(b) > 0 && b[0] == '-' + if neg { + negOffset = 1 + } + n, err := ParseUint(b[negOffset:], bits) + if err != nil && n == 0 { + return 0, err + } + + maxInt := uint64(1) << (bits - 1) + if neg && n > maxInt { + return -int64(maxInt), strconv.ErrRange + } else if !neg && n > maxInt-1 { + return int64(maxInt - 1), strconv.ErrRange + } + + if neg { + return int64(-n), nil + } else { + return int64(+n), nil } - return v, true } // ParseFloat parses a floating point number according to the Go float grammar. -// Note that the JSON number grammar is a strict subset. -// -// If the number overflows the finite representation of a float, -// then we return MaxFloat since any finite value will always be infinitely -// more accurate at representing another finite value than an infinite value. -func ParseFloat(b []byte, bits int) (v float64, ok bool) { - fv, err := strconv.ParseFloat(string(b), bits) - if math.IsInf(fv, 0) { - switch { - case bits == 32 && math.IsInf(fv, +1): - fv = +math.MaxFloat32 - case bits == 64 && math.IsInf(fv, +1): - fv = +math.MaxFloat64 - case bits == 32 && math.IsInf(fv, -1): - fv = -math.MaxFloat32 - case bits == 64 && math.IsInf(fv, -1): - fv = -math.MaxFloat64 - } +func ParseFloat(b []byte, bits int) (float64, error) { + // Note that the JSON number grammar is a strict subset. + // We have ensured the input is a valid json number in [ConsumeNumberResumable], + // So we may take advantage of the simpler grammar and + // replace this with a more efficient implementation in the future. + v, err := strconv.ParseFloat(string(b), bits) + if err != nil { + err = err.(*strconv.NumError).Err } - return fv, err == nil + return v, err } diff --git a/internal/jsonwire/decode_test.go b/internal/jsonwire/decode_test.go index 1748b59..e3801f5 100644 --- a/internal/jsonwire/decode_test.go +++ b/internal/jsonwire/decode_test.go @@ -9,6 +9,7 @@ import ( "io" "math" "reflect" + "strconv" "strings" "testing" ) @@ -342,55 +343,55 @@ func TestParseHexUint16(t *testing.T) { func TestParseUint(t *testing.T) { tests := []struct { - in string - want uint64 - wantOk bool + in string + want uint64 + wantErr error }{ - {"", 0, false}, - {"0", 0, true}, - {"1", 1, true}, - {"-1", 0, false}, - {"1f", 0, false}, - {"00", 0, false}, - {"01", 0, false}, - {"10", 10, true}, - {"10.9", 0, false}, - {" 10", 0, false}, - {"10 ", 0, false}, - {"123456789", 123456789, true}, - {"123456789d", 0, false}, - {"18446744073709551614", math.MaxUint64 - 1, true}, - {"18446744073709551615", math.MaxUint64, true}, - {"18446744073709551616", math.MaxUint64, false}, - {"18446744073709551620", math.MaxUint64, false}, - {"18446744073709551700", math.MaxUint64, false}, - {"18446744073709552000", math.MaxUint64, false}, - {"18446744073709560000", math.MaxUint64, false}, - {"18446744073709600000", math.MaxUint64, false}, - {"18446744073710000000", math.MaxUint64, false}, - {"18446744073800000000", math.MaxUint64, false}, - {"18446744074000000000", math.MaxUint64, false}, - {"18446744080000000000", math.MaxUint64, false}, - {"18446744100000000000", math.MaxUint64, false}, - {"18446745000000000000", math.MaxUint64, false}, - {"18446750000000000000", math.MaxUint64, false}, - {"18446800000000000000", math.MaxUint64, false}, - {"18447000000000000000", math.MaxUint64, false}, - {"18450000000000000000", math.MaxUint64, false}, - {"18500000000000000000", math.MaxUint64, false}, - {"19000000000000000000", math.MaxUint64, false}, - {"19999999999999999999", math.MaxUint64, false}, - {"20000000000000000000", math.MaxUint64, false}, - {"100000000000000000000", math.MaxUint64, false}, - {"99999999999999999999999999999999", math.MaxUint64, false}, - {"99999999999999999999999999999999f", 0, false}, + {"", 0, strconv.ErrSyntax}, + {"0", 0, nil}, + {"1", 1, nil}, + {"-1", 0, strconv.ErrSyntax}, + {"1f", 0, strconv.ErrSyntax}, + {"00", 0, strconv.ErrSyntax}, + {"01", 0, strconv.ErrSyntax}, + {"10", 10, nil}, + {"10.9", 0, strconv.ErrSyntax}, + {" 10", 0, strconv.ErrSyntax}, + {"10 ", 0, strconv.ErrSyntax}, + {"123456789", 123456789, nil}, + {"123456789d", 0, strconv.ErrSyntax}, + {"18446744073709551614", math.MaxUint64 - 1, nil}, + {"18446744073709551615", math.MaxUint64, nil}, + {"18446744073709551616", math.MaxUint64, strconv.ErrRange}, + {"18446744073709551620", math.MaxUint64, strconv.ErrRange}, + {"18446744073709551700", math.MaxUint64, strconv.ErrRange}, + {"18446744073709552000", math.MaxUint64, strconv.ErrRange}, + {"18446744073709560000", math.MaxUint64, strconv.ErrRange}, + {"18446744073709600000", math.MaxUint64, strconv.ErrRange}, + {"18446744073710000000", math.MaxUint64, strconv.ErrRange}, + {"18446744073800000000", math.MaxUint64, strconv.ErrRange}, + {"18446744074000000000", math.MaxUint64, strconv.ErrRange}, + {"18446744080000000000", math.MaxUint64, strconv.ErrRange}, + {"18446744100000000000", math.MaxUint64, strconv.ErrRange}, + {"18446745000000000000", math.MaxUint64, strconv.ErrRange}, + {"18446750000000000000", math.MaxUint64, strconv.ErrRange}, + {"18446800000000000000", math.MaxUint64, strconv.ErrRange}, + {"18447000000000000000", math.MaxUint64, strconv.ErrRange}, + {"18450000000000000000", math.MaxUint64, strconv.ErrRange}, + {"18500000000000000000", math.MaxUint64, strconv.ErrRange}, + {"19000000000000000000", math.MaxUint64, strconv.ErrRange}, + {"19999999999999999999", math.MaxUint64, strconv.ErrRange}, + {"20000000000000000000", math.MaxUint64, strconv.ErrRange}, + {"100000000000000000000", math.MaxUint64, strconv.ErrRange}, + {"99999999999999999999999999999999", math.MaxUint64, strconv.ErrRange}, + {"99999999999999999999999999999999f", 0, strconv.ErrSyntax}, } for _, tt := range tests { t.Run("", func(t *testing.T) { - got, gotOk := ParseUint([]byte(tt.in)) - if got != tt.want || gotOk != tt.wantOk { - t.Errorf("ParseUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotOk, tt.want, tt.wantOk) + got, gotErr := ParseUint([]byte(tt.in), 64) + if got != tt.want || gotErr != tt.wantErr { + t.Errorf("ParseUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) } }) } @@ -398,43 +399,43 @@ func TestParseUint(t *testing.T) { func TestParseFloat(t *testing.T) { tests := []struct { - in string - want32 float64 - want64 float64 - wantOk bool + in string + want32 float64 + want64 float64 + wantErr error }{ - {"0", 0, 0, true}, - {"-1", -1, -1, true}, - {"1", 1, 1, true}, + {"0", 0, 0, nil}, + {"-1", -1, -1, nil}, + {"1", 1, 1, nil}, - {"-16777215", -16777215, -16777215, true}, // -(1<<24 - 1) - {"16777215", 16777215, 16777215, true}, // +(1<<24 - 1) - {"-16777216", -16777216, -16777216, true}, // -(1<<24) - {"16777216", 16777216, 16777216, true}, // +(1<<24) - {"-16777217", -16777216, -16777217, true}, // -(1<<24 + 1) - {"16777217", 16777216, 16777217, true}, // +(1<<24 + 1) + {"-16777215", -16777215, -16777215, nil}, // -(1<<24 - 1) + {"16777215", 16777215, 16777215, nil}, // +(1<<24 - 1) + {"-16777216", -16777216, -16777216, nil}, // -(1<<24) + {"16777216", 16777216, 16777216, nil}, // +(1<<24) + {"-16777217", -16777216, -16777217, nil}, // -(1<<24 + 1) + {"16777217", 16777216, 16777217, nil}, // +(1<<24 + 1) - {"-9007199254740991", -9007199254740992, -9007199254740991, true}, // -(1<<53 - 1) - {"9007199254740991", 9007199254740992, 9007199254740991, true}, // +(1<<53 - 1) - {"-9007199254740992", -9007199254740992, -9007199254740992, true}, // -(1<<53) - {"9007199254740992", 9007199254740992, 9007199254740992, true}, // +(1<<53) - {"-9007199254740993", -9007199254740992, -9007199254740992, true}, // -(1<<53 + 1) - {"9007199254740993", 9007199254740992, 9007199254740992, true}, // +(1<<53 + 1) + {"-9007199254740991", -9007199254740992, -9007199254740991, nil}, // -(1<<53 - 1) + {"9007199254740991", 9007199254740992, 9007199254740991, nil}, // +(1<<53 - 1) + {"-9007199254740992", -9007199254740992, -9007199254740992, nil}, // -(1<<53) + {"9007199254740992", 9007199254740992, 9007199254740992, nil}, // +(1<<53) + {"-9007199254740993", -9007199254740992, -9007199254740992, nil}, // -(1<<53 + 1) + {"9007199254740993", 9007199254740992, 9007199254740992, nil}, // +(1<<53 + 1) - {"-1e1000", -math.MaxFloat32, -math.MaxFloat64, false}, - {"1e1000", +math.MaxFloat32, +math.MaxFloat64, false}, + {"-1e1000", math.Inf(-1), math.Inf(-1), strconv.ErrRange}, + {"1e1000", math.Inf(+1), math.Inf(+1), strconv.ErrRange}, } for _, tt := range tests { t.Run("", func(t *testing.T) { - got32, gotOk32 := ParseFloat([]byte(tt.in), 32) - if got32 != tt.want32 || gotOk32 != tt.wantOk { - t.Errorf("ParseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotOk32, tt.want32, tt.wantOk) + got32, gotErr32 := ParseFloat([]byte(tt.in), 32) + if got32 != tt.want32 || gotErr32 != tt.wantErr { + t.Errorf("ParseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotErr32, tt.want32, tt.wantErr) } - got64, gotOk64 := ParseFloat([]byte(tt.in), 64) - if got64 != tt.want64 || gotOk64 != tt.wantOk { - t.Errorf("ParseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotOk64, tt.want64, tt.wantOk) + got64, gotErr64 := ParseFloat([]byte(tt.in), 64) + if got64 != tt.want64 || gotErr64 != tt.wantErr { + t.Errorf("ParseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotErr64, tt.want64, tt.wantErr) } }) } diff --git a/jsontext/token.go b/jsontext/token.go index b389fc0..70ae346 100644 --- a/jsontext/token.go +++ b/jsontext/token.go @@ -357,7 +357,7 @@ func (t Token) Int() int64 { if len(buf) > 0 && buf[0] == '-' { neg, buf = true, buf[1:] } - if numAbs, ok := jsonwire.ParseUint(buf); ok { + if numAbs, err := jsonwire.ParseUint(buf, 64); err == nil { if neg { if numAbs > -minInt64 { return minInt64 @@ -418,7 +418,7 @@ func (t Token) Uint() uint64 { if len(buf) > 0 && buf[0] == '-' { neg, buf = true, buf[1:] } - if num, ok := jsonwire.ParseUint(buf); ok { + if num, err := jsonwire.ParseUint(buf, 64); err == nil { if neg { return minUint64 } diff --git a/jsontext/token_test.go b/jsontext/token_test.go index 2180b6a..276e329 100644 --- a/jsontext/token_test.go +++ b/jsontext/token_test.go @@ -73,8 +73,8 @@ func TestTokenAccessors(t *testing.T) { {Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}}, {Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, {rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`1e1000`), token{String: "1e1000", Float: math.MaxFloat64, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, - {rawToken(`-1e1000`), token{String: "-1e1000", Float: -math.MaxFloat64, Int: minInt64, Uint: minUint64, Kind: '0'}}, + {rawToken(`1e1000`), token{String: "1e1000", Float: math.Inf(+1), Int: maxInt64, Uint: maxUint64, Kind: '0'}}, + {rawToken(`-1e1000`), token{String: "-1e1000", Float: math.Inf(-1), Int: minInt64, Uint: minUint64, Kind: '0'}}, {rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}}, {rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}}, {rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}}, From 547d5f76d2563507ea7e61fdd5f44e76d587b3e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=8E=AE=E6=96=87?= Date: Mon, 10 Feb 2025 00:43:38 +0800 Subject: [PATCH 3/4] Introduce RawToken API Decoder now returns RawToken instead of Token. RawToken exposes a new set of methods: ParseFloat(bits int) (float64, error) ParseInt(bits int) (int64, error) ParseUint(bits int) (uint64, error) They reuses the same logic of arshalers, intended to offers Decoder user a more efficient and convenient way to parse numbers. Compared with the original Token.Float, these methods properly return errors for overflow, etc. Compared with strconv, these methods may take advantage of the fact that json number has simpler syntax than Go. Compared with json.UnmarshalDecode, these methods avoids the cost of arshaler lookup and reflection. New jsontext.Raw and Token.Raw func are added to convert between RawToken and Token. The original Float, Int, Uint methods now only support the case where the Token is created by the correspond constructor. e.g., Int(1).Uint() will panic. This should ensure we will never get inaccurate values. --- bench_test.go | 8 +- jsontext/coder_test.go | 13 +- jsontext/decode.go | 66 ++--- jsontext/decode_test.go | 10 +- jsontext/encode_test.go | 7 +- jsontext/example_test.go | 5 +- jsontext/fuzz_test.go | 8 +- jsontext/token.go | 527 +++++++++++++++++++-------------------- jsontext/token_test.go | 115 ++++++--- v1/stream.go | 2 +- 10 files changed, 404 insertions(+), 357 deletions(-) diff --git a/bench_test.go b/bench_test.go index db1abbc..0d22031 100644 --- a/bench_test.go +++ b/bench_test.go @@ -407,9 +407,13 @@ func mustDecodeTokens(t testing.TB, data []byte) []jsontext.Token { case '"': tokens = append(tokens, jsontext.String(tok.String())) case '0': - tokens = append(tokens, jsontext.Float(tok.Float())) + v, err := tok.ParseFloat(64) + if err != nil { + t.Fatalf("ParseFloat error: %v", err) + } + tokens = append(tokens, jsontext.Float(v)) default: - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, jsontext.Raw(tok.Clone())) } } return tokens diff --git a/jsontext/coder_test.go b/jsontext/coder_test.go index 8c34721..073c0b0 100644 --- a/jsontext/coder_test.go +++ b/jsontext/coder_test.go @@ -144,9 +144,10 @@ var coderTestdata = []coderTestdataEntry{{ 0, -0, 0.0, -0.0, 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001, 1e1000, -5e-324, 1e+100, 1.7976931348623157e+308, 9007199254740990, 9007199254740991, 9007199254740992, 9007199254740993, 9007199254740994, + "Infinity", "-Infinity", "NaN", -9223372036854775808, 9223372036854775807, 0, 18446744073709551615 ] `, - outCompacted: "[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,-9223372036854775808,9223372036854775807,0,18446744073709551615]", + outCompacted: `[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,"Infinity","-Infinity","NaN",-9223372036854775808,9223372036854775807,0,18446744073709551615]`, outIndented: `[ 0, -0, @@ -162,22 +163,26 @@ var coderTestdata = []coderTestdataEntry{{ 9007199254740992, 9007199254740993, 9007199254740994, + "Infinity", + "-Infinity", + "NaN", -9223372036854775808, 9223372036854775807, 0, 18446744073709551615 ]`, - outCanonicalized: `[0,0,0,0,1,1.7976931348623157e+308,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740992,9007199254740994,-9223372036854776000,9223372036854776000,0,18446744073709552000]`, + outCanonicalized: `[0,0,0,0,1,1.7976931348623157e+308,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740992,9007199254740994,"Infinity","-Infinity","NaN",-9223372036854776000,9223372036854776000,0,18446744073709552000]`, tokens: []Token{ ArrayStart, Float(0), Float(math.Copysign(0, -1)), rawToken(`0.0`), rawToken(`-0.0`), rawToken(`1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001`), rawToken(`1e1000`), Float(-5e-324), Float(1e100), Float(1.7976931348623157e+308), Float(9007199254740990), Float(9007199254740991), Float(9007199254740992), rawToken(`9007199254740993`), rawToken(`9007199254740994`), + Float(math.Inf(+1)), Float(math.Inf(-1)), Float(math.NaN()), Int(minInt64), Int(maxInt64), Uint(minUint64), Uint(maxUint64), ArrayEnd, }, pointers: []Pointer{ - "", "/0", "/1", "/2", "/3", "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/14", "/15", "/16", "/17", "", + "", "/0", "/1", "/2", "/3", "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/14", "/15", "/16", "/17", "/18", "/19", "/20", "", }, }, { name: jsontest.Name("ObjectN0"), @@ -473,7 +478,7 @@ func testCoderInterleaved(t *testing.T, where jsontest.CasePos, modeName string, } t.Fatalf("%s: Decoder.ReadToken error: %v", where, err) } - if err := enc.WriteToken(tok); err != nil { + if err := enc.WriteToken(Raw(tok)); err != nil { t.Fatalf("%s: Encoder.WriteToken error: %v", where, err) } } else { diff --git a/jsontext/decode.go b/jsontext/decode.go index 6f14095..da35475 100644 --- a/jsontext/decode.go +++ b/jsontext/decode.go @@ -437,13 +437,13 @@ func (d *decoderState) SkipUntil(depth int, length int64) error { return nil } -// ReadToken reads the next [Token], advancing the read offset. +// ReadToken reads the next [RawToken], advancing the read offset. // The returned token is only valid until the next Peek, Read, or Skip call. // It returns [io.EOF] if there are no more tokens. -func (d *Decoder) ReadToken() (Token, error) { +func (d *Decoder) ReadToken() (RawToken, error) { return d.s.ReadToken() } -func (d *decoderState) ReadToken() (Token, error) { +func (d *decoderState) ReadToken() (RawToken, error) { // Determine the next kind. var err error var next Kind @@ -453,7 +453,7 @@ func (d *decoderState) ReadToken() (Token, error) { if d.peekErr != nil { err := d.peekErr d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error - return Token{}, err + return RawToken{}, err } next = Kind(d.buf[pos]).normalize() d.peekPos = 0 // reset cache @@ -468,7 +468,7 @@ func (d *decoderState) ReadToken() (Token, error) { if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { err = io.EOF // EOF possibly if no Tokens present after top-level value } - return Token{}, wrapSyntacticError(d, err, pos, 0) + return RawToken{}, wrapSyntacticError(d, err, pos, 0) } } @@ -481,13 +481,13 @@ func (d *decoderState) ReadToken() (Token, error) { if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { err = wrapSyntacticError(d, err, pos, 0) - return Token{}, d.checkDelimBeforeIOError(delim, err) + return RawToken{}, d.checkDelimBeforeIOError(delim, err) } } } next = Kind(d.buf[pos]).normalize() if d.Tokens.needDelim(next) != delim { - return Token{}, d.checkDelim(delim, next) + return RawToken{}, d.checkDelim(delim, next) } } @@ -498,46 +498,46 @@ func (d *decoderState) ReadToken() (Token, error) { if jsonwire.ConsumeNull(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "null") if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += len("null") } if err = d.Tokens.appendLiteral(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-len("null"), +1) // report position at start of literal + return RawToken{}, wrapSyntacticError(d, err, pos-len("null"), +1) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos - return Null, nil + return Null.raw, nil case 'f': if jsonwire.ConsumeFalse(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "false") if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += len("false") } if err = d.Tokens.appendLiteral(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-len("false"), +1) // report position at start of literal + return RawToken{}, wrapSyntacticError(d, err, pos-len("false"), +1) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos - return False, nil + return False.raw, nil case 't': if jsonwire.ConsumeTrue(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "true") if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += len("true") } if err = d.Tokens.appendLiteral(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-len("true"), +1) // report position at start of literal + return RawToken{}, wrapSyntacticError(d, err, pos-len("true"), +1) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos - return True, nil + return True.raw, nil case '"': var flags jsonwire.ValueFlags // TODO: Preserve this in Token? @@ -547,7 +547,7 @@ func (d *decoderState) ReadToken() (Token, error) { newAbsPos := d.baseOffset + int64(pos) n = int(newAbsPos - oldAbsPos) if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += n @@ -555,20 +555,20 @@ func (d *decoderState) ReadToken() (Token, error) { if d.Tokens.Last.NeedObjectName() { if !d.Flags.Get(jsonflags.AllowDuplicateNames) { if !d.Tokens.Last.isValidNamespace() { - return Token{}, wrapSyntacticError(d, errInvalidNamespace, pos-n, +1) + return RawToken{}, wrapSyntacticError(d, errInvalidNamespace, pos-n, +1) } if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) { err = wrapWithObjectName(ErrDuplicateName, d.buf[pos-n:pos]) - return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string + return RawToken{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string } } d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds } if err = d.Tokens.appendString(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string + return RawToken{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string } d.prevStart, d.prevEnd = pos-n, pos - return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil + return RawToken{dBuf: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil case '0': // NOTE: Since JSON numbers are not self-terminating, @@ -579,20 +579,20 @@ func (d *decoderState) ReadToken() (Token, error) { newAbsPos := d.baseOffset + int64(pos) n = int(newAbsPos - oldAbsPos) if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += n } if err = d.Tokens.appendNumber(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of number + return RawToken{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of number } d.prevStart, d.prevEnd = pos-n, pos - return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil + return RawToken{dBuf: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil case '{': if err = d.Tokens.pushObject(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } d.Names.push() if !d.Flags.Get(jsonflags.AllowDuplicateNames) { @@ -600,11 +600,11 @@ func (d *decoderState) ReadToken() (Token, error) { } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ObjectStart, nil + return ObjectStart.raw, nil case '}': if err = d.Tokens.popObject(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } d.Names.pop() if !d.Flags.Get(jsonflags.AllowDuplicateNames) { @@ -612,27 +612,27 @@ func (d *decoderState) ReadToken() (Token, error) { } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ObjectEnd, nil + return ObjectEnd.raw, nil case '[': if err = d.Tokens.pushArray(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ArrayStart, nil + return ArrayStart.raw, nil case ']': if err = d.Tokens.popArray(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ArrayEnd, nil + return ArrayEnd.raw, nil default: err = jsonwire.NewInvalidCharacterError(d.buf[pos:], "at start of value") - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } diff --git a/jsontext/decode_test.go b/jsontext/decode_test.go index 80f235d..98fe7e8 100644 --- a/jsontext/decode_test.go +++ b/jsontext/decode_test.go @@ -59,7 +59,7 @@ func testDecoder(t *testing.T, where jsontest.CasePos, typeName string, td coder } t.Fatalf("%s: Decoder.ReadToken error: %v", where, err) } - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, Raw(tok.Clone())) if td.pointers != nil { pointers = append(pointers, dec.StackPointer()) } @@ -94,7 +94,7 @@ func testDecoder(t *testing.T, where jsontest.CasePos, typeName string, td coder } t.Fatalf("%s: Decoder.ReadToken error: %v", where, err) } - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, Raw(tok.Clone())) default: val, err := dec.ReadValue() if err != nil { @@ -148,7 +148,7 @@ func testFaultyDecoder(t *testing.T, where jsontest.CasePos, typeName string, td } continue } - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, Raw(tok.Clone())) } if !equalTokens(tokens, td.tokens) { t.Fatalf("%s: tokens mismatch:\ngot %s\nwant %s", where, tokens, td.tokens) @@ -1007,9 +1007,9 @@ func testDecoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, in var gotErr error switch wantOut := call.wantOut.(type) { case Token: - var gotOut Token + var gotOut RawToken gotOut, gotErr = dec.ReadToken() - if gotOut.String() != wantOut.String() { + if Raw(gotOut).String() != wantOut.String() { t.Fatalf("%s: %d: Decoder.ReadToken = %v, want %v", where, i, gotOut, wantOut) } case Value: diff --git a/jsontext/encode_test.go b/jsontext/encode_test.go index fe8af3e..c5dd71d 100644 --- a/jsontext/encode_test.go +++ b/jsontext/encode_test.go @@ -74,8 +74,11 @@ func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName stri } default: val := Value(tok.String()) - if tok.Kind() == '"' { - val, _ = jsonwire.AppendQuote(nil, tok.String(), &jsonflags.Flags{}) + switch tok.Kind() { + case '"': + val, _ = tok.appendString(nil, &jsonflags.Flags{}) + case '0': + val, _ = tok.appendNumber(nil, &jsonflags.Flags{}) } if err := enc.WriteValue(val); err != nil { t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) diff --git a/jsontext/example_test.go b/jsontext/example_test.go index 3ab3e2d..6f547ae 100644 --- a/jsontext/example_test.go +++ b/jsontext/example_test.go @@ -42,6 +42,7 @@ func Example_stringReplace() { for { // Read a token from the input. tok, err := dec.ReadToken() + tokW := jsontext.Raw(tok) if err != nil { if err == io.EOF { break @@ -53,11 +54,11 @@ func Example_stringReplace() { // replace each occurrence with "Go" instead. if tok.Kind() == '"' && strings.Contains(tok.String(), "Golang") { replacements = append(replacements, dec.StackPointer()) - tok = jsontext.String(strings.ReplaceAll(tok.String(), "Golang", "Go")) + tokW = jsontext.String(strings.ReplaceAll(tok.String(), "Golang", "Go")) } // Write the (possibly modified) token to the output. - if err := enc.WriteToken(tok); err != nil { + if err := enc.WriteToken(tokW); err != nil { log.Fatal(err) } } diff --git a/jsontext/fuzz_test.go b/jsontext/fuzz_test.go index 055eed4..7de95e1 100644 --- a/jsontext/fuzz_test.go +++ b/jsontext/fuzz_test.go @@ -70,8 +70,8 @@ func FuzzCoder(f *testing.F) { enc := NewEncoder(dst) for _, tokVal := range tokVals { switch tokVal := tokVal.(type) { - case Token: - if err := enc.WriteToken(tokVal); err != nil { + case RawToken: + if err := enc.WriteToken(Raw(tokVal)); err != nil { t.Fatalf("Encoder.WriteToken error: %v", err) } case Value: @@ -88,14 +88,14 @@ func FuzzCoder(f *testing.F) { if err != nil { t.Fatalf("Decoder.ReadToken error: %v", err) } - got = append(got, tok.Clone()) + got = append(got, Raw(tok.Clone())) } for dec := NewDecoder(dst); dec.PeekKind() > 0; { tok, err := dec.ReadToken() if err != nil { t.Fatalf("Decoder.ReadToken error: %v", err) } - want = append(want, tok.Clone()) + want = append(want, Raw(tok.Clone())) } if !equalTokens(got, want) { t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want) diff --git a/jsontext/token.go b/jsontext/token.go index 70ae346..e22b06f 100644 --- a/jsontext/token.go +++ b/jsontext/token.go @@ -16,16 +16,41 @@ import ( // NOTE: Token is analogous to v1 json.Token. -const ( - maxInt64 = math.MaxInt64 - minInt64 = math.MinInt64 - maxUint64 = math.MaxUint64 - minUint64 = 0 // for consistency and readability purposes +var errInvalidToken = errors.New("invalid jsontext.Token") + +func tokenTypeTag() *decodeBuffer { + return &decodeBuffer{} +} - invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call" +// these special tags will have nil buf +var ( + strTag = tokenTypeTag() + uintTag = tokenTypeTag() + intTag = tokenTypeTag() + floatTag = tokenTypeTag() ) -var errInvalidToken = errors.New("invalid jsontext.Token") +// RawToken likes [Token], and is returned by [Decoder.ReadToken]. +// +// Use [Raw] to convert it to [Token] for [Encoder.WriteToken]. +type RawToken struct { + nonComparable + + // dBuf contains a reference to the dBuf decode buffer. + // It is only valid if num == dBuf.previousOffsetStart(). + dBuf *decodeBuffer + num uint64 +} + +func (t RawToken) isRaw() bool { + return t.dBuf.buf != nil +} + +func (t RawToken) ensureValid() { + if uint64(t.dBuf.previousOffsetStart()) != t.num { + panic("invalid jsontext.Token; it has been voided by a subsequent json.Decoder call") + } +} // Token represents a lexical JSON token, which may be one of the following: // - a JSON literal (i.e., null, true, or false) @@ -42,49 +67,21 @@ type Token struct { // Tokens can exist in either a "raw" or an "exact" form. // Tokens produced by the Decoder are in the "raw" form. - // Tokens returned by constructors are usually in the "exact" form. + // Tokens returned by constructors are in the "exact" form. // The Encoder accepts Tokens in either the "raw" or "exact" form. + + // raw may contains a valid RawToken if raw.dBuf.buf is non-nil. // - // The following chart shows the possible values for each Token type: - // ╔═════════════════╦════════════╤════════════╤════════════╗ - // ║ Token type ║ raw field │ str field │ num field ║ - // ╠═════════════════╬════════════╪════════════╪════════════╣ - // ║ null (raw) ║ "null" │ "" │ 0 ║ - // ║ false (raw) ║ "false" │ "" │ 0 ║ - // ║ true (raw) ║ "true" │ "" │ 0 ║ - // ║ string (raw) ║ non-empty │ "" │ offset ║ - // ║ string (string) ║ nil │ non-empty │ 0 ║ - // ║ number (raw) ║ non-empty │ "" │ offset ║ - // ║ number (float) ║ nil │ "f" │ non-zero ║ - // ║ number (int64) ║ nil │ "i" │ non-zero ║ - // ║ number (uint64) ║ nil │ "u" │ non-zero ║ - // ║ object (delim) ║ "{" or "}" │ "" │ 0 ║ - // ║ array (delim) ║ "[" or "]" │ "" │ 0 ║ - // ╚═════════════════╩════════════╧════════════╧════════════╝ + // If raw.dBuf equals to floatTag, intTag, or, uintTag, + // the token is a JSON number in the "exact" form and + // raw.num should be interpreted as a float64, int64, or uint64, respectively. // - // Notes: - // - For tokens stored in "raw" form, the num field contains the - // absolute offset determined by raw.previousOffsetStart(). - // The buffer itself is stored in raw.previousBuffer(). - // - JSON literals and structural characters are always in the "raw" form. - // - JSON strings and numbers can be in either "raw" or "exact" forms. - // - The exact zero value of JSON strings and numbers in the "exact" forms - // have ambiguous representation. Thus, they are always represented - // in the "raw" form. - - // raw contains a reference to the raw decode buffer. - // If non-nil, then its value takes precedence over str and num. - // It is only valid if num == raw.previousOffsetStart(). - raw *decodeBuffer - - // str is the unescaped JSON string if num is zero. - // Otherwise, it is "f", "i", or "u" if num should be interpreted - // as a float64, int64, or uint64, respectively. - str string + // If raw.dBuf equals to strTag, the token is a JSON string in the "string" form and + // str is the unescaped JSON string. + raw RawToken - // num is a float64, int64, or uint64 stored as a uint64 value. - // It is non-zero for any JSON number in the "exact" form. - num uint64 + // str is the unescaped JSON string + str string } // TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues? @@ -99,16 +96,15 @@ var ( ArrayStart Token = rawToken("[") ArrayEnd Token = rawToken("]") - zeroString Token = rawToken(`""`) - zeroNumber Token = rawToken(`0`) - - nanString Token = String("NaN") - pinfString Token = String("Infinity") - ninfString Token = String("-Infinity") + nanString = "NaN" + pinfString = "Infinity" + ninfString = "-Infinity" ) func rawToken(s string) Token { - return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}} + return Token{raw: RawToken{ + dBuf: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}, + }} } // Bool constructs a Token representing a JSON boolean. @@ -123,98 +119,109 @@ func Bool(b bool) Token { // The provided string should contain valid UTF-8, otherwise invalid characters // may be mangled as the Unicode replacement character. func String(s string) Token { - if len(s) == 0 { - return zeroString + return Token{ + raw: RawToken{dBuf: strTag}, + str: s, } - return Token{str: s} } // Float constructs a Token representing a JSON number. // The values NaN, +Inf, and -Inf will be represented -// as a JSON string with the values "NaN", "Infinity", and "-Infinity". +// as a JSON string with the values "NaN", "Infinity", and "-Infinity", +// but still has kind '0' and cannot be used as object keys. func Float(n float64) Token { - switch { - case math.Float64bits(n) == 0: - return zeroNumber - case math.IsNaN(n): - return nanString - case math.IsInf(n, +1): - return pinfString - case math.IsInf(n, -1): - return ninfString + return Token{ + raw: RawToken{dBuf: floatTag, num: math.Float64bits(n)}, } - return Token{str: "f", num: math.Float64bits(n)} } // Int constructs a Token representing a JSON number from an int64. func Int(n int64) Token { - if n == 0 { - return zeroNumber + return Token{ + raw: RawToken{dBuf: intTag, num: uint64(n)}, } - return Token{str: "i", num: uint64(n)} } // Uint constructs a Token representing a JSON number from a uint64. func Uint(n uint64) Token { - if n == 0 { - return zeroNumber + return Token{ + raw: RawToken{dBuf: uintTag, num: n}, } - return Token{str: "u", num: uint64(n)} +} + +func Raw(t RawToken) Token { + return Token{raw: t} } // Clone makes a copy of the Token such that its value remains valid // even after a subsequent [Decoder.Read] call. -func (t Token) Clone() Token { +func (t RawToken) Clone() RawToken { + if t.dBuf == nil { + return t // zero value + } // TODO: Allow caller to avoid any allocations? - if raw := t.raw; raw != nil { - // Avoid copying globals. - if t.raw.prevStart == 0 { - switch t.raw { - case Null.raw: - return Null - case False.raw: - return False - case True.raw: - return True - case ObjectStart.raw: - return ObjectStart - case ObjectEnd.raw: - return ObjectEnd - case ArrayStart.raw: - return ArrayStart - case ArrayEnd.raw: - return ArrayEnd - } + // Avoid copying globals. + if t.dBuf.prevStart == 0 { + switch t.dBuf { + case Null.raw.dBuf: + return Null.raw + case False.raw.dBuf: + return False.raw + case True.raw.dBuf: + return True.raw + case ObjectStart.raw.dBuf: + return ObjectStart.raw + case ObjectEnd.raw.dBuf: + return ObjectEnd.raw + case ArrayStart.raw.dBuf: + return ArrayStart.raw + case ArrayEnd.raw.dBuf: + return ArrayEnd.raw } + } - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - buf := bytes.Clone(raw.previousBuffer()) - return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} + t.ensureValid() + buf := bytes.Clone(t.dBuf.previousBuffer()) + return RawToken{dBuf: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} + +} + +// Clone makes a copy of the Token such that its value remains valid +// even after a subsequent [Decoder.Read] call. +func (t Token) Clone() Token { + if t.raw.dBuf == nil { + return t // zero value } - return t + if t.raw.isRaw() { + return Token{raw: t.raw.Clone()} + } + return t // exact form. } // Bool returns the value for a JSON boolean. // It panics if the token kind is not a JSON boolean. -func (t Token) Bool() bool { - switch t.raw { - case True.raw: +func (t RawToken) Bool() bool { + switch t.dBuf { + case True.raw.dBuf: return true - case False.raw: + case False.raw.dBuf: return false default: panic("invalid JSON token kind: " + t.Kind().String()) } } +func (t Token) Bool() bool { + return t.raw.Bool() +} + // appendString appends a JSON string to dst and returns it. // It panics if t is not a JSON string. func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) { - if raw := t.raw; raw != nil { + if t.raw.isRaw() { + // TODO: ensure vaild? // Handle raw string value. - buf := raw.previousBuffer() + buf := t.raw.dBuf.previousBuffer() if Kind(buf[0]) == '"' { if jsonwire.ConsumeSimpleString(buf) == len(buf) { return append(dst, buf...), nil @@ -222,7 +229,7 @@ func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) dst, _, err := jsonwire.ReformatString(dst, buf, flags) return dst, err } - } else if len(t.str) != 0 && t.num == 0 { + } else if t.raw.dBuf == strTag { // Handle exact string value. return jsonwire.AppendQuote(dst, t.str, flags) } @@ -230,6 +237,27 @@ func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) panic("invalid JSON token kind: " + t.Kind().String()) } +// String returns the unescaped string value for a JSON string. +// For other JSON kinds, this returns the raw JSON representation. +func (t RawToken) String() string { + return string(t.bytes()) +} + +func (t RawToken) bytes() []byte { + if t.dBuf == nil { + return []byte("") + } + t.ensureValid() + buf := t.dBuf.previousBuffer() + if buf[0] == '"' { + // TODO: Preserve ValueFlags in Token? + isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) + return jsonwire.UnquoteMayCopy(buf, isVerbatim) + } + // Handle tokens that are not JSON strings for fmt.Stringer. + return buf +} + // String returns the unescaped string value for a JSON string. // For other JSON kinds, this returns the raw JSON representation. func (t Token) String() string { @@ -243,33 +271,33 @@ func (t Token) String() string { } return s } + func (t Token) string() (string, []byte) { - if raw := t.raw; raw != nil { - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - buf := raw.previousBuffer() - if buf[0] == '"' { - // TODO: Preserve ValueFlags in Token? - isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) - return "", jsonwire.UnquoteMayCopy(buf, isVerbatim) - } - // Handle tokens that are not JSON strings for fmt.Stringer. - return "", buf - } - if len(t.str) != 0 && t.num == 0 { - return t.str, nil - } // Handle tokens that are not JSON strings for fmt.Stringer. - if t.num > 0 { - switch t.str[0] { - case 'f': - return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil - case 'i': - return strconv.FormatInt(int64(t.num), 10), nil - case 'u': - return strconv.FormatUint(uint64(t.num), 10), nil + switch t.raw.dBuf { + case strTag: + return t.str, nil + case nil: + return "", nil + case floatTag: + v := math.Float64frombits(t.raw.num) + switch { + case math.IsNaN(v): + return nanString, nil + case math.IsInf(v, +1): + return pinfString, nil + case math.IsInf(v, -1): + return ninfString, nil + default: + return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.raw.num), 64)), nil } + case intTag: + return strconv.FormatInt(int64(t.raw.num), 10), nil + case uintTag: + return strconv.FormatUint(uint64(t.raw.num), 10), nil + } + if t.raw.isRaw() { + return "", t.raw.bytes() } return "", nil } @@ -277,193 +305,140 @@ func (t Token) string() (string, []byte) { // appendNumber appends a JSON number to dst and returns it. // It panics if t is not a JSON number. func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) { - if raw := t.raw; raw != nil { + if t.raw.isRaw() { // Handle raw number value. - buf := raw.previousBuffer() + buf := t.raw.dBuf.previousBuffer() if Kind(buf[0]).normalize() == '0' { dst, _, err := jsonwire.ReformatNumber(dst, buf, flags) return dst, err } - } else if t.num != 0 { + } else { // Handle exact number value. - switch t.str[0] { - case 'f': - return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil - case 'i': - return strconv.AppendInt(dst, int64(t.num), 10), nil - case 'u': - return strconv.AppendUint(dst, uint64(t.num), 10), nil + switch t.raw.dBuf { + case floatTag: + v := math.Float64frombits(t.raw.num) + switch { + case math.IsNaN(v): + return jsonwire.AppendQuote(dst, nanString, flags) + case math.IsInf(v, +1): + return jsonwire.AppendQuote(dst, pinfString, flags) + case math.IsInf(v, -1): + return jsonwire.AppendQuote(dst, ninfString, flags) + default: + return jsonwire.AppendFloat(dst, v, 64), nil + } + case intTag: + return strconv.AppendInt(dst, int64(t.raw.num), 10), nil + case uintTag: + return strconv.AppendUint(dst, uint64(t.raw.num), 10), nil } } panic("invalid JSON token kind: " + t.Kind().String()) } -// Float returns the floating-point value for a JSON number. +var ErrUnexpectedKind = errors.New("unexpected JSON token kind") + +// ParseFloat parses the floating-point value for a JSON number. // It returns a NaN, +Inf, or -Inf value for any JSON string // with the values "NaN", "Infinity", or "-Infinity". -// It panics for all other cases. -func (t Token) Float() float64 { - if raw := t.raw; raw != nil { - // Handle raw number value. - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - buf := raw.previousBuffer() - if Kind(buf[0]).normalize() == '0' { - fv, _ := jsonwire.ParseFloat(buf, 64) - return fv - } - } else if t.num != 0 { - // Handle exact number value. - switch t.str[0] { - case 'f': - return math.Float64frombits(t.num) - case 'i': - return float64(int64(t.num)) - case 'u': - return float64(uint64(t.num)) - } +func (t RawToken) ParseFloat(bits int) (float64, error) { + t.ensureValid() + buf := t.dBuf.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + return jsonwire.ParseFloat(buf, bits) } - // Handle string values with "NaN", "Infinity", or "-Infinity". - if t.Kind() == '"' { + if buf[0] == '"' { switch t.String() { - case "NaN": - return math.NaN() - case "Infinity": - return math.Inf(+1) - case "-Infinity": - return math.Inf(-1) + case nanString: + return math.NaN(), nil + case pinfString: + return math.Inf(+1), nil + case ninfString: + return math.Inf(-1), nil } } - panic("invalid JSON token kind: " + t.Kind().String()) + return 0., ErrUnexpectedKind +} + +// Float returns the floating-point value for a JSON number. +// It panics if the token is not created with [Float]. +func (t Token) Float() float64 { + if t.raw.dBuf == floatTag { + return math.Float64frombits(t.raw.num) + } + panic("JSON token not created with Float") +} + +func (t RawToken) ParseInt(bits int) (int64, error) { + t.ensureValid() + buf := t.dBuf.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + return jsonwire.ParseInt(buf, bits) + } + return 0, ErrUnexpectedKind } // Int returns the signed integer value for a JSON number. -// The fractional component of any number is ignored (truncation toward zero). -// Any number beyond the representation of an int64 will be saturated -// to the closest representable value. -// It panics if the token kind is not a JSON number. +// It panics if the token is not created with [Int]. func (t Token) Int() int64 { - if raw := t.raw; raw != nil { - // Handle raw integer value. - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - neg := false - buf := raw.previousBuffer() - if len(buf) > 0 && buf[0] == '-' { - neg, buf = true, buf[1:] - } - if numAbs, err := jsonwire.ParseUint(buf, 64); err == nil { - if neg { - if numAbs > -minInt64 { - return minInt64 - } - return -1 * int64(numAbs) - } else { - if numAbs > +maxInt64 { - return maxInt64 - } - return +1 * int64(numAbs) - } - } - } else if t.num != 0 { - // Handle exact integer value. - switch t.str[0] { - case 'i': - return int64(t.num) - case 'u': - if t.num > maxInt64 { - return maxInt64 - } - return int64(t.num) - } + if t.raw.dBuf == intTag { + return int64(t.raw.num) } + panic("JSON token not created with Int") +} - // Handle JSON number that is a floating-point value. - if t.Kind() == '0' { - switch fv := t.Float(); { - case fv >= maxInt64: - return maxInt64 - case fv <= minInt64: - return minInt64 - default: - return int64(fv) // truncation toward zero - } +func (t RawToken) ParseUint(bits int) (uint64, error) { + t.ensureValid() + buf := t.dBuf.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + return jsonwire.ParseUint(buf, bits) } - - panic("invalid JSON token kind: " + t.Kind().String()) + return 0, ErrUnexpectedKind } // Uint returns the unsigned integer value for a JSON number. -// The fractional component of any number is ignored (truncation toward zero). -// Any number beyond the representation of an uint64 will be saturated -// to the closest representable value. -// It panics if the token kind is not a JSON number. +// It panics if the token is not created with [Uint]. func (t Token) Uint() uint64 { - // NOTE: This accessor returns 0 for any negative JSON number, - // which might be surprising, but is at least consistent with the behavior - // of saturating out-of-bounds numbers to the closest representable number. - - if raw := t.raw; raw != nil { - // Handle raw integer value. - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - neg := false - buf := raw.previousBuffer() - if len(buf) > 0 && buf[0] == '-' { - neg, buf = true, buf[1:] - } - if num, err := jsonwire.ParseUint(buf, 64); err == nil { - if neg { - return minUint64 - } - return num - } - } else if t.num != 0 { - // Handle exact integer value. - switch t.str[0] { - case 'u': - return t.num - case 'i': - if int64(t.num) < minUint64 { - return minUint64 - } - return uint64(int64(t.num)) - } + if t.raw.dBuf == uintTag { + return t.raw.num } + panic("JSON token not created with Uint") +} - // Handle JSON number that is a floating-point value. - if t.Kind() == '0' { - switch fv := t.Float(); { - case fv >= maxUint64: - return maxUint64 - case fv <= minUint64: - return minUint64 - default: - return uint64(fv) // truncation toward zero - } +// Float returns the RawToken embedded. +// It panics if the token is not created with [Raw]. +func (t Token) Raw() RawToken { + if t.raw.isRaw() { + return t.raw } + panic("JSON token not created with Raw") +} - panic("invalid JSON token kind: " + t.Kind().String()) +// Kind returns the token kind. +func (t RawToken) Kind() Kind { + if t.dBuf == nil { // for zero value RawToken + return invalidKind + } + t.ensureValid() + return Kind(t.dBuf.buf[t.dBuf.prevStart]).normalize() } // Kind returns the token kind. func (t Token) Kind() Kind { switch { - case t.raw != nil: - raw := t.raw - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - return Kind(t.raw.buf[raw.prevStart]).normalize() - case t.num != 0: + case t.raw.dBuf == nil: + return invalidKind // zero value Token + case t.raw.isRaw(): + return t.raw.Kind() + case t.raw.dBuf == intTag || t.raw.dBuf == uintTag || t.raw.dBuf == floatTag: + // For NaN and Inf, we still return '0' as the Kind + // even if it will be encoded as a string. + // We don't want to use this for object key, right? return '0' - case len(t.str) != 0: + case t.raw.dBuf == strTag: return '"' default: return invalidKind diff --git a/jsontext/token_test.go b/jsontext/token_test.go index 276e329..3c6bea4 100644 --- a/jsontext/token_test.go +++ b/jsontext/token_test.go @@ -7,9 +7,17 @@ package jsontext import ( "math" "reflect" + "strconv" "testing" ) +const ( + maxInt64 = math.MaxInt64 + minInt64 = math.MinInt64 + maxUint64 = math.MaxUint64 + minUint64 = 0 // for consistency and readability purposes +) + func TestTokenStringAllocations(t *testing.T) { if testing.CoverMode() != "" { t.Skip("coverage mode breaks the compiler optimization this depends on") @@ -56,35 +64,23 @@ func TestTokenAccessors(t *testing.T) { {String(""), token{String: "", Kind: '"'}}, {String("hello, world!"), token{String: "hello, world!", Kind: '"'}}, {rawToken(`"hello, world!"`), token{String: "hello, world!", Kind: '"'}}, - {Float(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}}, + {Float(0), token{String: "0", Float: 0, Kind: '0'}}, + {Float(1.2), token{String: "1.2", Float: 1.2, Kind: '0'}}, {Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, - {Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '"'}}, - {Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '"'}}, - {Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '"'}}, - {Int(minInt64), token{String: "-9223372036854775808", Float: minInt64, Int: minInt64, Uint: minUint64, Kind: '0'}}, - {Int(minInt64 + 1), token{String: "-9223372036854775807", Float: minInt64 + 1, Int: minInt64 + 1, Uint: minUint64, Kind: '0'}}, - {Int(-1), token{String: "-1", Float: -1, Int: -1, Uint: minUint64, Kind: '0'}}, - {Int(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}}, - {Int(+1), token{String: "1", Float: +1, Int: +1, Uint: +1, Kind: '0'}}, - {Int(maxInt64 - 1), token{String: "9223372036854775806", Float: maxInt64 - 1, Int: maxInt64 - 1, Uint: maxInt64 - 1, Kind: '0'}}, - {Int(maxInt64), token{String: "9223372036854775807", Float: maxInt64, Int: maxInt64, Uint: maxInt64, Kind: '0'}}, + {Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '0'}}, + {Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '0'}}, + {Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '0'}}, + {Int(minInt64), token{String: "-9223372036854775808", Int: minInt64, Uint: minUint64, Kind: '0'}}, + {Int(minInt64 + 1), token{String: "-9223372036854775807", Int: minInt64 + 1, Kind: '0'}}, + {Int(-1), token{String: "-1", Int: -1, Kind: '0'}}, + {Int(0), token{String: "0", Int: 0, Kind: '0'}}, + {Int(+1), token{String: "1", Int: +1, Kind: '0'}}, + {Int(maxInt64 - 1), token{String: "9223372036854775806", Int: maxInt64 - 1, Kind: '0'}}, + {Int(maxInt64), token{String: "9223372036854775807", Int: maxInt64, Kind: '0'}}, {Uint(minUint64), token{String: "0", Kind: '0'}}, - {Uint(minUint64 + 1), token{String: "1", Float: minUint64 + 1, Int: minUint64 + 1, Uint: minUint64 + 1, Kind: '0'}}, - {Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}}, - {Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, - {rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`1e1000`), token{String: "1e1000", Float: math.Inf(+1), Int: maxInt64, Uint: maxUint64, Kind: '0'}}, - {rawToken(`-1e1000`), token{String: "-1e1000", Float: math.Inf(-1), Int: minInt64, Uint: minUint64, Kind: '0'}}, - {rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`1.1`), token{String: "1.1", Float: 1.1, Int: 1, Uint: 1, Kind: '0'}}, - {rawToken(`-0.1`), token{String: "-0.1", Float: -0.1, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`-0.5`), token{String: "-0.5", Float: -0.5, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`-0.9`), token{String: "-0.9", Float: -0.9, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`-1.1`), token{String: "-1.1", Float: -1.1, Int: -1, Uint: 0, Kind: '0'}}, - {rawToken(`99999999999999999999`), token{String: "99999999999999999999", Float: 1e20 - 1, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, - {rawToken(`-99999999999999999999`), token{String: "-99999999999999999999", Float: -1e20 - 1, Int: minInt64, Uint: minUint64, Kind: '0'}}, + {Uint(minUint64 + 1), token{String: "1", Uint: minUint64 + 1, Kind: '0'}}, + {Uint(maxUint64 - 1), token{String: "18446744073709551614", Uint: maxUint64 - 1, Kind: '0'}}, + {Uint(maxUint64), token{String: "18446744073709551615", Uint: maxUint64, Kind: '0'}}, } for _, tt := range tests { @@ -132,6 +128,69 @@ func TestTokenAccessors(t *testing.T) { } } +func TestTokenAccessorRaw(t *testing.T) { + if !reflect.DeepEqual(False, Raw(False.Raw())) { + t.Error("False != Raw(False.Raw())") + } + + raw := func() *RawToken { + defer func() { recover() }() + raw := Float(0.).Raw() + return &raw + }() + if raw != nil { + t.Error("Float(0.).Raw() should panic") + } +} + +func TestTokenParseFloat(t *testing.T) { + tests := []struct { + in string + want float64 + err error + }{ + {`-0`, math.Copysign(0, -1), nil}, + {`1e1000`, math.Inf(+1), strconv.ErrRange}, + {`"Infinity"`, math.Inf(+1), nil}, + {`"-Infinity"`, math.Inf(-1), nil}, + {`"NaN"`, math.NaN(), nil}, + {`"anything"`, 0, ErrUnexpectedKind}, + } + + for _, tt := range tests { + t.Run(tt.in, func(t *testing.T) { + gotV, gotErr := rawToken(tt.in).raw.ParseFloat(64) + if math.Float64bits(gotV) != math.Float64bits(tt.want) { + t.Errorf("RawToken.ParseFloat(64) = %v, want %v", gotV, tt.want) + } + if gotErr != tt.err { + t.Errorf("RawToken.ParseFloat(64) error = %v, want %v", gotErr, tt.err) + } + }) + } +} + +func assertParse[T comparable](t *testing.T, s string, parse func(t RawToken, bits int) (T, error), wantV T, wantErr error) { + t.Helper() + gotV, gotErr := parse(rawToken(s).raw, 64) + if gotV != wantV { + t.Errorf("RawToken.ParseXXX(64) = %v, want %v", gotV, wantV) + } + if gotErr != wantErr { + t.Errorf("RawToken.ParseXXX(64) error = %v, want %v", gotErr, wantErr) + } +} + +func TestTokenParseInt(t *testing.T) { + assertParse(t, "123", RawToken.ParseInt, 123, nil) + assertParse(t, "99999999999999999999", RawToken.ParseInt, math.MaxInt64, strconv.ErrRange) + assertParse(t, "false", RawToken.ParseInt, 0, ErrUnexpectedKind) + + assertParse(t, "123", RawToken.ParseUint, 123, nil) + assertParse(t, "-1", RawToken.ParseUint, 0, strconv.ErrSyntax) + assertParse(t, "false", RawToken.ParseUint, 0, ErrUnexpectedKind) +} + func TestTokenClone(t *testing.T) { tests := []struct { in Token @@ -157,7 +216,7 @@ func TestTokenClone(t *testing.T) { if !reflect.DeepEqual(got, tt.in) { t.Errorf("Token(%s) == Token(%s).Clone() = false, want true", tt.in, tt.in) } - gotExactRaw := got.raw == tt.in.raw + gotExactRaw := got.raw.dBuf == tt.in.raw.dBuf if gotExactRaw != tt.wantExactRaw { t.Errorf("Token(%s).raw == Token(%s).Clone().raw = %v, want %v", tt.in, tt.in, gotExactRaw, tt.wantExactRaw) } diff --git a/v1/stream.go b/v1/stream.go index 03c12e1..4cf3c5e 100644 --- a/v1/stream.go +++ b/v1/stream.go @@ -206,7 +206,7 @@ func (dec *Decoder) Token() (Token, error) { if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber { return Number(tok.String()), nil } - return tok.Float(), nil + return tok.ParseFloat(64) case '{', '}', '[', ']': return Delim(k), nil default: From 76003806597b4b2b9a1da6b9662e02ed2423dd1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=8E=AE=E6=96=87?= Date: Wed, 12 Feb 2025 11:31:24 +0800 Subject: [PATCH 4/4] jsontext: remove support for write Float(+Inf) as "Infinity" Similarly, Parsing "Infinity" as +Inf is also removed This feature is not standard and should not be enabled by default. If this is really desired, user can still write a wrapper that invokes Float or String conditionally. Instead of embedding the maybe undesired branches in jsontext. It is harder to get consistent API design with the new RawToken refactor. Now Token.Float() should only process tokens returned by jsontext.Float, we need to differentate infinity float and infinity string. So what kind should the infinity float be? - if '0': Good for that it cannot be used as object key, but can make Token.String() confusing; also confusing for that a '0' token actually encodes to string. - if '"': Need many extra branching in Token.Kind and Token.appendString. And the kind returned by jsontext.Float() is not consistent. So, just remove these. --- arshal_default.go | 17 ++++++++++++++- jsontext/coder_test.go | 11 +++------- jsontext/encode_test.go | 6 ++++++ jsontext/token.go | 47 ++++++----------------------------------- jsontext/token_test.go | 37 ++++++-------------------------- 5 files changed, 39 insertions(+), 79 deletions(-) diff --git a/arshal_default.go b/arshal_default.go index ac95fa8..1ba09dd 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -43,6 +43,10 @@ var ( bytesType = reflect.TypeFor[[]byte]() emptyStructType = reflect.TypeFor[struct{}]() + + nanString = jsontext.String("NaN") + pinfString = jsontext.String("Infinity") + ninfString = jsontext.String("-Infinity") ) const startDetectingCyclesAfter = 1000 @@ -581,7 +585,18 @@ func makeFloatArshaler(t reflect.Type) *arshaler { err := fmt.Errorf("unsupported value: %v", fv) return newMarshalErrorBefore(enc, t, err) } - return enc.WriteToken(jsontext.Float(fv)) + var token jsontext.Token + switch { + case math.IsInf(fv, 1): + token = pinfString + case math.IsInf(fv, -1): + token = ninfString + case math.IsNaN(fv): + token = nanString + default: + panic("unreachable") + } + return enc.WriteToken(token) } // Optimize for marshaling without preceding whitespace or string escaping. diff --git a/jsontext/coder_test.go b/jsontext/coder_test.go index 073c0b0..60e3d0e 100644 --- a/jsontext/coder_test.go +++ b/jsontext/coder_test.go @@ -144,10 +144,9 @@ var coderTestdata = []coderTestdataEntry{{ 0, -0, 0.0, -0.0, 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001, 1e1000, -5e-324, 1e+100, 1.7976931348623157e+308, 9007199254740990, 9007199254740991, 9007199254740992, 9007199254740993, 9007199254740994, - "Infinity", "-Infinity", "NaN", -9223372036854775808, 9223372036854775807, 0, 18446744073709551615 ] `, - outCompacted: `[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,"Infinity","-Infinity","NaN",-9223372036854775808,9223372036854775807,0,18446744073709551615]`, + outCompacted: `[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,-9223372036854775808,9223372036854775807,0,18446744073709551615]`, outIndented: `[ 0, -0, @@ -163,26 +162,22 @@ var coderTestdata = []coderTestdataEntry{{ 9007199254740992, 9007199254740993, 9007199254740994, - "Infinity", - "-Infinity", - "NaN", -9223372036854775808, 9223372036854775807, 0, 18446744073709551615 ]`, - outCanonicalized: `[0,0,0,0,1,1.7976931348623157e+308,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740992,9007199254740994,"Infinity","-Infinity","NaN",-9223372036854776000,9223372036854776000,0,18446744073709552000]`, + outCanonicalized: `[0,0,0,0,1,1.7976931348623157e+308,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740992,9007199254740994,-9223372036854776000,9223372036854776000,0,18446744073709552000]`, tokens: []Token{ ArrayStart, Float(0), Float(math.Copysign(0, -1)), rawToken(`0.0`), rawToken(`-0.0`), rawToken(`1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001`), rawToken(`1e1000`), Float(-5e-324), Float(1e100), Float(1.7976931348623157e+308), Float(9007199254740990), Float(9007199254740991), Float(9007199254740992), rawToken(`9007199254740993`), rawToken(`9007199254740994`), - Float(math.Inf(+1)), Float(math.Inf(-1)), Float(math.NaN()), Int(minInt64), Int(maxInt64), Uint(minUint64), Uint(maxUint64), ArrayEnd, }, pointers: []Pointer{ - "", "/0", "/1", "/2", "/3", "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/14", "/15", "/16", "/17", "/18", "/19", "/20", "", + "", "/0", "/1", "/2", "/3", "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/14", "/15", "/16", "/17", "", }, }, { name: jsontest.Name("ObjectN0"), diff --git a/jsontext/encode_test.go b/jsontext/encode_test.go index c5dd71d..fb68b45 100644 --- a/jsontext/encode_test.go +++ b/jsontext/encode_test.go @@ -8,6 +8,7 @@ import ( "bytes" "errors" "io" + "math" "path" "slices" "testing" @@ -244,6 +245,11 @@ var encoderErrorTestdata = []struct { calls: []encoderMethodCall{ {Value(`0.e`), newInvalidCharacterError("e", "in number (expecting digit)").withPos(`0.`, ""), ""}, }, +}, { + name: jsontest.Name("InfinityNumber"), + calls: []encoderMethodCall{ + {Float(math.Inf(+1)), E(errors.New("unsupported value: +Inf")), ""}, + }, }, { name: jsontest.Name("TruncatedObject/AfterStart"), calls: []encoderMethodCall{ diff --git a/jsontext/token.go b/jsontext/token.go index e22b06f..599bd6c 100644 --- a/jsontext/token.go +++ b/jsontext/token.go @@ -7,6 +7,7 @@ package jsontext import ( "bytes" "errors" + "fmt" "math" "strconv" @@ -95,10 +96,6 @@ var ( ObjectEnd Token = rawToken("}") ArrayStart Token = rawToken("[") ArrayEnd Token = rawToken("]") - - nanString = "NaN" - pinfString = "Infinity" - ninfString = "-Infinity" ) func rawToken(s string) Token { @@ -125,10 +122,8 @@ func String(s string) Token { } } -// Float constructs a Token representing a JSON number. -// The values NaN, +Inf, and -Inf will be represented -// as a JSON string with the values "NaN", "Infinity", and "-Infinity", -// but still has kind '0' and cannot be used as object keys. +// Float constructs a Token representing a JSON number from a float64. +// The values NaN, +Inf, and -Inf will result in error if passed to [Encoder.WriteToken] func Float(n float64) Token { return Token{ raw: RawToken{dBuf: floatTag, num: math.Float64bits(n)}, @@ -280,17 +275,7 @@ func (t Token) string() (string, []byte) { case nil: return "", nil case floatTag: - v := math.Float64frombits(t.raw.num) - switch { - case math.IsNaN(v): - return nanString, nil - case math.IsInf(v, +1): - return pinfString, nil - case math.IsInf(v, -1): - return ninfString, nil - default: - return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.raw.num), 64)), nil - } + return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.raw.num), 64)), nil case intTag: return strconv.FormatInt(int64(t.raw.num), 10), nil case uintTag: @@ -317,16 +302,10 @@ func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) switch t.raw.dBuf { case floatTag: v := math.Float64frombits(t.raw.num) - switch { - case math.IsNaN(v): - return jsonwire.AppendQuote(dst, nanString, flags) - case math.IsInf(v, +1): - return jsonwire.AppendQuote(dst, pinfString, flags) - case math.IsInf(v, -1): - return jsonwire.AppendQuote(dst, ninfString, flags) - default: - return jsonwire.AppendFloat(dst, v, 64), nil + if math.IsInf(v, 0) || math.IsNaN(v) { + return nil, fmt.Errorf("unsupported value: %v", v) } + return jsonwire.AppendFloat(dst, v, 64), nil case intTag: return strconv.AppendInt(dst, int64(t.raw.num), 10), nil case uintTag: @@ -348,18 +327,6 @@ func (t RawToken) ParseFloat(bits int) (float64, error) { if Kind(buf[0]).normalize() == '0' { return jsonwire.ParseFloat(buf, bits) } - - if buf[0] == '"' { - switch t.String() { - case nanString: - return math.NaN(), nil - case pinfString: - return math.Inf(+1), nil - case ninfString: - return math.Inf(-1), nil - } - } - return 0., ErrUnexpectedKind } diff --git a/jsontext/token_test.go b/jsontext/token_test.go index 3c6bea4..146a7bb 100644 --- a/jsontext/token_test.go +++ b/jsontext/token_test.go @@ -68,8 +68,8 @@ func TestTokenAccessors(t *testing.T) { {Float(1.2), token{String: "1.2", Float: 1.2, Kind: '0'}}, {Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, {Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '0'}}, - {Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '0'}}, - {Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '0'}}, + {Float(math.Inf(+1)), token{String: "+Inf", Float: math.Inf(+1), Kind: '0'}}, + {Float(math.Inf(-1)), token{String: "-Inf", Float: math.Inf(-1), Kind: '0'}}, {Int(minInt64), token{String: "-9223372036854775808", Int: minInt64, Uint: minUint64, Kind: '0'}}, {Int(minInt64 + 1), token{String: "-9223372036854775807", Int: minInt64 + 1, Kind: '0'}}, {Int(-1), token{String: "-1", Int: -1, Kind: '0'}}, @@ -143,33 +143,6 @@ func TestTokenAccessorRaw(t *testing.T) { } } -func TestTokenParseFloat(t *testing.T) { - tests := []struct { - in string - want float64 - err error - }{ - {`-0`, math.Copysign(0, -1), nil}, - {`1e1000`, math.Inf(+1), strconv.ErrRange}, - {`"Infinity"`, math.Inf(+1), nil}, - {`"-Infinity"`, math.Inf(-1), nil}, - {`"NaN"`, math.NaN(), nil}, - {`"anything"`, 0, ErrUnexpectedKind}, - } - - for _, tt := range tests { - t.Run(tt.in, func(t *testing.T) { - gotV, gotErr := rawToken(tt.in).raw.ParseFloat(64) - if math.Float64bits(gotV) != math.Float64bits(tt.want) { - t.Errorf("RawToken.ParseFloat(64) = %v, want %v", gotV, tt.want) - } - if gotErr != tt.err { - t.Errorf("RawToken.ParseFloat(64) error = %v, want %v", gotErr, tt.err) - } - }) - } -} - func assertParse[T comparable](t *testing.T, s string, parse func(t RawToken, bits int) (T, error), wantV T, wantErr error) { t.Helper() gotV, gotErr := parse(rawToken(s).raw, 64) @@ -181,7 +154,11 @@ func assertParse[T comparable](t *testing.T, s string, parse func(t RawToken, bi } } -func TestTokenParseInt(t *testing.T) { +func TestTokenParseNumber(t *testing.T) { + assertParse(t, `1.23`, RawToken.ParseFloat, 1.23, nil) + assertParse(t, `1e1000`, RawToken.ParseFloat, math.Inf(+1), strconv.ErrRange) + assertParse(t, `"anything"`, RawToken.ParseFloat, 0, ErrUnexpectedKind) + assertParse(t, "123", RawToken.ParseInt, 123, nil) assertParse(t, "99999999999999999999", RawToken.ParseInt, math.MaxInt64, strconv.ErrRange) assertParse(t, "false", RawToken.ParseInt, 0, ErrUnexpectedKind)