ianlopshire · ianlopshire · Feb 8, 2024 · Feb 8, 2024 · Feb 8, 2024 · Feb 8, 2024
diff --git a/buff.go b/buff.go
@@ -3,6 +3,7 @@ package fixedwidth
 import (
 	"bytes"
 	"errors"
+	"strings"
 	"unicode/utf8"
 )
 
@@ -183,6 +184,54 @@ type rawValue struct {
 	codepointIndices []int
 }
 
+func (r rawValue) trimLeft(cutset string) rawValue {
+	newData := strings.TrimLeft(r.data, cutset)
+	leftRemovedBytes := len(r.data) - len(newData)
+
+	if r.codepointIndices == nil {
+		return rawValue{data: newData}
+	}
+
+	newIndices := r.trimCodepointIndices(leftRemovedBytes, 0)
+	return rawValue{data: newData, codepointIndices: newIndices}
+}
+
+func (r rawValue) trimRight(cutset string) rawValue {
+	newData := strings.TrimRight(r.data, cutset)
+	rightRemovedBytes := len(r.data) - len(newData)
+
+	if r.codepointIndices == nil {
+		return rawValue{data: newData}
+	}
+
+	newIndices := r.trimCodepointIndices(0, rightRemovedBytes)
+	return rawValue{data: newData, codepointIndices: newIndices}
+}
+
+func (r rawValue) trim(cutset string) rawValue {
+	leftTrimmed := strings.TrimLeft(r.data, cutset)
+	leftRemovedBytes := len(r.data) - len(leftTrimmed)
+	bothTrimmed := strings.TrimRight(leftTrimmed, cutset)
+	rightRemovedBytes := len(leftTrimmed) - len(bothTrimmed)
+
+	if r.codepointIndices == nil {
+		return rawValue{data: bothTrimmed}
+	}
+
+	newIndices := r.trimCodepointIndices(leftRemovedBytes, rightRemovedBytes)
+	return rawValue{data: bothTrimmed, codepointIndices: newIndices}
+}
+
+func (r rawValue) trimCodepointIndices(leftRemovedBytes int, rightRemovedBytes int) []int {
+	newIndices := make([]int, 0, len(r.codepointIndices))
+	for _, idx := range r.codepointIndices {
+		if idx >= leftRemovedBytes && idx < len(r.data)-rightRemovedBytes {
+			newIndices = append(newIndices, idx-leftRemovedBytes)
+		}
+	}
+	return newIndices
+}
+
 func newRawValue(data string, useCodepointIndices bool) (rawValue, error) {
 	value := rawValue{
 		data: data,

diff --git a/decode.go b/decode.go
@@ -8,7 +8,6 @@ import (
 	"io"
 	"reflect"
 	"strconv"
-	"strings"
 )
 
 var (
@@ -197,20 +196,20 @@ func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) {
 }
 
 func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawValue {
-	var trimFunc func(string) string
+	var trimFunc func(r rawValue) rawValue
 
 	switch format.alignment {
-	case left:
-		trimFunc = func(s string) string {
-			return strings.TrimRight(s, string(format.padChar))
+	case left: // Aligned left, so trim from right side.
+		trimFunc = func(r rawValue) rawValue {
+			return r.trimRight(string(format.padChar))
 		}
-	case right:
-		trimFunc = func(s string) string {
-			return strings.TrimLeft(s, string(format.padChar))
+	case right: // Aligned right, so trim from left side.
+		trimFunc = func(r rawValue) rawValue {
+			return r.trimLeft(string(format.padChar))
 		}
 	default:
-		trimFunc = func(s string) string {
-			return strings.Trim(s, string(format.padChar))
+		trimFunc = func(r rawValue) rawValue {
+			return r.trim(string(format.padChar))
 		}
 	}
 
@@ -227,20 +226,27 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa
 			relevantIndices = value.codepointIndices[startPos-1 : endPos]
 			lineData = value.data[relevantIndices[0]:value.codepointIndices[endPos]]
 		}
-		return rawValue{
-			data:             trimFunc(lineData),
-			codepointIndices: relevantIndices,
+
+		newIndices := relevantIndices
+		if relevantIndices[0] > 0 {
+			// We trimmed data from the front of the string.
+			// We need to adjust the codepoint indices to reflect this, as they have shifted.
+			removedFromFront := relevantIndices[0]
+			newIndices = make([]int, 0, len(relevantIndices))
+			for _, idx := range relevantIndices {
+				newIndices = append(newIndices, idx-removedFromFront)
+			}
 		}
+
+		return trimFunc(rawValue{data: lineData, codepointIndices: newIndices})
 	} else {
 		if len(value.data) == 0 || startPos > len(value.data) {
 			return rawValue{data: ""}
 		}
 		if endPos > len(value.data) {
 			endPos = len(value.data)
 		}
-		return rawValue{
-			data: trimFunc(value.data[startPos-1 : endPos]),
-		}
+		return trimFunc(rawValue{data: value.data[startPos-1 : endPos]})
 	}
 }
 

diff --git a/decode_test.go b/decode_test.go
@@ -386,6 +386,122 @@ func TestDecodeSetUseCodepointIndices(t *testing.T) {
 
 }
 
+func TestDecodeSetUseCodepointIndices_Nested(t *testing.T) {
+	type Nested struct {
+		First  string `fixed:"1,3"`
+		Second string `fixed:"4,6"`
+	}
+
+	type Test struct {
+		First  string `fixed:"1,3"`
+		Second Nested `fixed:"4,9"`
+		Third  string `fixed:"10,12"`
+		Fourth Nested `fixed:"13,18"`
+		Fifth  string `fixed:"19,21"`
+	}
+
+	for _, tt := range []struct {
+		name     string
+		raw      []byte
+		expected Test
+	}{
+		{
+			name: "All ASCII characters",
+			raw:  []byte("123ABC456DEF789GHI012\n"),
+			expected: Test{
+				First:  "123",
+				Second: Nested{First: "ABC", Second: "456"},
+				Third:  "DEF",
+				Fourth: Nested{First: "789", Second: "GHI"},
+				Fifth:  "012",
+			},
+		},
+		{
+			name: "Multi-byte characters",
+			raw:  []byte("123x☃x456x☃x789x☃x012\n"),
+			expected: Test{
+				First:  "123",
+				Second: Nested{First: "x☃x", Second: "456"},
+				Third:  "x☃x",
+				Fourth: Nested{First: "789", Second: "x☃x"},
+				Fifth:  "012",
+			},
+		},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			d := NewDecoder(bytes.NewReader(tt.raw))
+			d.SetUseCodepointIndices(true)
+			var s Test
+			err := d.Decode(&s)
+			if err != nil {
+				t.Errorf("Unexpected err: %v", err)
+			}
+			if !reflect.DeepEqual(tt.expected, s) {
+				t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s)
+			}
+		})
+	}
+}
+
+func TestDecodeSetUseCodepointIndices_PaddingTrimmed(t *testing.T) {
+	type Nested struct {
+		First  int64  `fixed:"1,2,right,0"`
+		Second string `fixed:"3,4"`
+		Third  string `fixed:"5,6"`
+		Fourth string `fixed:"7,8"`
+	}
+	type Test struct {
+		First  Nested `fixed:"1,8"`
+		Second string `fixed:"9,10"`
+	}
+
+	for _, tt := range []struct {
+		name     string
+		raw      []byte
+		expected Test
+	}{
+		{
+			name: "All ASCII characters",
+			raw:  []byte("00      11"),
+			expected: Test{
+				First: Nested{
+					First:  0,
+					Second: "",
+					Third:  "",
+					Fourth: "",
+				},
+				Second: "11",
+			},
+		},
+		{
+			name: "Multi-byte characters",
+			raw:  []byte("00      ☃☃"),
+			expected: Test{
+				First: Nested{
+					First:  0,
+					Second: "",
+					Third:  "",
+					Fourth: "",
+				},
+				Second: "☃☃",
+			},
+		},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			d := NewDecoder(bytes.NewReader(tt.raw))
+			d.SetUseCodepointIndices(true)
+			var s Test
+			err := d.Decode(&s)
+			if err != nil {
+				t.Errorf("Unexpected err: %v", err)
+			}
+			if !reflect.DeepEqual(tt.expected, s) {
+				t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s)
+			}
+		})
+	}
+}
+
 // Verify the behavior of Decoder.Decode at the end of a file. See
 // https://github.com/ianlopshire/go-fixedwidth/issues/6 for more details.
 func TestDecode_EOF(t *testing.T) {