diff --git a/buff.go b/buff.go index d1d7d24..211c842 100644 --- a/buff.go +++ b/buff.go @@ -3,6 +3,7 @@ package fixedwidth import ( "bytes" "errors" + "strings" "unicode/utf8" ) @@ -183,6 +184,54 @@ type rawValue struct { codepointIndices []int } +func (r rawValue) trimLeft(cutset string) rawValue { + newData := strings.TrimLeft(r.data, cutset) + leftRemovedBytes := len(r.data) - len(newData) + + if r.codepointIndices == nil { + return rawValue{data: newData} + } + + newIndices := r.trimCodepointIndices(leftRemovedBytes, 0) + return rawValue{data: newData, codepointIndices: newIndices} +} + +func (r rawValue) trimRight(cutset string) rawValue { + newData := strings.TrimRight(r.data, cutset) + rightRemovedBytes := len(r.data) - len(newData) + + if r.codepointIndices == nil { + return rawValue{data: newData} + } + + newIndices := r.trimCodepointIndices(0, rightRemovedBytes) + return rawValue{data: newData, codepointIndices: newIndices} +} + +func (r rawValue) trim(cutset string) rawValue { + leftTrimmed := strings.TrimLeft(r.data, cutset) + leftRemovedBytes := len(r.data) - len(leftTrimmed) + bothTrimmed := strings.TrimRight(leftTrimmed, cutset) + rightRemovedBytes := len(leftTrimmed) - len(bothTrimmed) + + if r.codepointIndices == nil { + return rawValue{data: bothTrimmed} + } + + newIndices := r.trimCodepointIndices(leftRemovedBytes, rightRemovedBytes) + return rawValue{data: bothTrimmed, codepointIndices: newIndices} +} + +func (r rawValue) trimCodepointIndices(leftRemovedBytes int, rightRemovedBytes int) []int { + newIndices := make([]int, 0, len(r.codepointIndices)) + for _, idx := range r.codepointIndices { + if idx >= leftRemovedBytes && idx < len(r.data)-rightRemovedBytes { + newIndices = append(newIndices, idx-leftRemovedBytes) + } + } + return newIndices +} + func newRawValue(data string, useCodepointIndices bool) (rawValue, error) { value := rawValue{ data: data, diff --git a/decode.go b/decode.go index bf8162c..b43ba20 100644 --- a/decode.go +++ b/decode.go @@ -8,7 +8,6 @@ import ( "io" "reflect" "strconv" - "strings" ) var ( @@ -197,20 +196,20 @@ func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) { } func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawValue { - var trimFunc func(string) string + var trimFunc func(r rawValue) rawValue switch format.alignment { - case left: - trimFunc = func(s string) string { - return strings.TrimRight(s, string(format.padChar)) + case left: // Aligned left, so trim from right side. + trimFunc = func(r rawValue) rawValue { + return r.trimRight(string(format.padChar)) } - case right: - trimFunc = func(s string) string { - return strings.TrimLeft(s, string(format.padChar)) + case right: // Aligned right, so trim from left side. + trimFunc = func(r rawValue) rawValue { + return r.trimLeft(string(format.padChar)) } default: - trimFunc = func(s string) string { - return strings.Trim(s, string(format.padChar)) + trimFunc = func(r rawValue) rawValue { + return r.trim(string(format.padChar)) } } @@ -227,10 +226,19 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa relevantIndices = value.codepointIndices[startPos-1 : endPos] lineData = value.data[relevantIndices[0]:value.codepointIndices[endPos]] } - return rawValue{ - data: trimFunc(lineData), - codepointIndices: relevantIndices, + + newIndices := relevantIndices + if relevantIndices[0] > 0 { + // We trimmed data from the front of the string. + // We need to adjust the codepoint indices to reflect this, as they have shifted. + removedFromFront := relevantIndices[0] + newIndices = make([]int, 0, len(relevantIndices)) + for _, idx := range relevantIndices { + newIndices = append(newIndices, idx-removedFromFront) + } } + + return trimFunc(rawValue{data: lineData, codepointIndices: newIndices}) } else { if len(value.data) == 0 || startPos > len(value.data) { return rawValue{data: ""} @@ -238,9 +246,7 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa if endPos > len(value.data) { endPos = len(value.data) } - return rawValue{ - data: trimFunc(value.data[startPos-1 : endPos]), - } + return trimFunc(rawValue{data: value.data[startPos-1 : endPos]}) } } diff --git a/decode_test.go b/decode_test.go index 4afc2f3..b5597cd 100644 --- a/decode_test.go +++ b/decode_test.go @@ -386,6 +386,122 @@ func TestDecodeSetUseCodepointIndices(t *testing.T) { } +func TestDecodeSetUseCodepointIndices_Nested(t *testing.T) { + type Nested struct { + First string `fixed:"1,3"` + Second string `fixed:"4,6"` + } + + type Test struct { + First string `fixed:"1,3"` + Second Nested `fixed:"4,9"` + Third string `fixed:"10,12"` + Fourth Nested `fixed:"13,18"` + Fifth string `fixed:"19,21"` + } + + for _, tt := range []struct { + name string + raw []byte + expected Test + }{ + { + name: "All ASCII characters", + raw: []byte("123ABC456DEF789GHI012\n"), + expected: Test{ + First: "123", + Second: Nested{First: "ABC", Second: "456"}, + Third: "DEF", + Fourth: Nested{First: "789", Second: "GHI"}, + Fifth: "012", + }, + }, + { + name: "Multi-byte characters", + raw: []byte("123x☃x456x☃x789x☃x012\n"), + expected: Test{ + First: "123", + Second: Nested{First: "x☃x", Second: "456"}, + Third: "x☃x", + Fourth: Nested{First: "789", Second: "x☃x"}, + Fifth: "012", + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + d := NewDecoder(bytes.NewReader(tt.raw)) + d.SetUseCodepointIndices(true) + var s Test + err := d.Decode(&s) + if err != nil { + t.Errorf("Unexpected err: %v", err) + } + if !reflect.DeepEqual(tt.expected, s) { + t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s) + } + }) + } +} + +func TestDecodeSetUseCodepointIndices_PaddingTrimmed(t *testing.T) { + type Nested struct { + First int64 `fixed:"1,2,right,0"` + Second string `fixed:"3,4"` + Third string `fixed:"5,6"` + Fourth string `fixed:"7,8"` + } + type Test struct { + First Nested `fixed:"1,8"` + Second string `fixed:"9,10"` + } + + for _, tt := range []struct { + name string + raw []byte + expected Test + }{ + { + name: "All ASCII characters", + raw: []byte("00 11"), + expected: Test{ + First: Nested{ + First: 0, + Second: "", + Third: "", + Fourth: "", + }, + Second: "11", + }, + }, + { + name: "Multi-byte characters", + raw: []byte("00 ☃☃"), + expected: Test{ + First: Nested{ + First: 0, + Second: "", + Third: "", + Fourth: "", + }, + Second: "☃☃", + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + d := NewDecoder(bytes.NewReader(tt.raw)) + d.SetUseCodepointIndices(true) + var s Test + err := d.Decode(&s) + if err != nil { + t.Errorf("Unexpected err: %v", err) + } + if !reflect.DeepEqual(tt.expected, s) { + t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s) + } + }) + } +} + // Verify the behavior of Decoder.Decode at the end of a file. See // https://github.com/ianlopshire/go-fixedwidth/issues/6 for more details. func TestDecode_EOF(t *testing.T) {