Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

internal/colltab: improve numeric sorting #50

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package collate

import (
"bytes"
"strings"
"testing"

"golang.org/x/text/internal/colltab"
Expand Down Expand Up @@ -473,7 +474,20 @@ func TestNumeric(t *testing.T) {
{"A-1", "A-2", -1},
{"A-2", "A-12", -1},
{"A-12", "A-2", 1},
{"A-0001", "A-1", 0},
{"A-0001", "A-1", 1},
{"0000-", "1-", -1},
{"00001", "1", 1},
{"00", "00", 0},
{"0", "00", -1},
{"00", "0", 1},
{"01", "001", -1},
{"01", "1", 1},
{"1", "01", -1},
{"9-A", "0-A", 1},
{"99-A", "0-A", 1},
{"9-A", "1-A", 1},
{"99-A", "1-A", 1},
{strings.Repeat("9", 270)+"-A", "1-A", 1},
} {
if got := c.CompareString(tt.a, tt.b); got != tt.want {
t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
Expand Down
21 changes: 19 additions & 2 deletions internal/colltab/numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,20 @@ func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
oldIndex := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
oldIndex = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[oldIndex+1:], nc.elems[oldIndex:])
nc.elems[oldIndex], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -105,14 +111,20 @@ func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n
return ce, n
}
nc.init(ce, len(buf), isZero)
oldIndex := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
oldIndex = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[oldIndex+1:], nc.elems[oldIndex:])
nc.elems[oldIndex], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -122,6 +134,7 @@ type numberConverter struct {
elems []Elem
nDigits int
lenIndex int
zero int

s string // set if the input was of type string
b []byte // set if the input was of type []byte
Expand All @@ -133,6 +146,7 @@ func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and a placeholder
// for the number of digits.
if isZero {
nc.zero++
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
Expand Down Expand Up @@ -217,20 +231,23 @@ const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
if nc.zero+1 < maxDigits {
nc.zero++
}
return true
}
nc.elems = elems
if !ok {
return false
}
nc.nDigits++
return nc.nDigits < maxDigits
return nc.nDigits+1 < maxDigits
}

// result fills in the length element for the digit sequence and returns the
// completed collation elements.
func (nc *numberConverter) result() []Elem {
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
e, _ := MakeElem(nc.nDigits+1, defaultSecondary, defaultTertiary, 0)
nc.elems[nc.lenIndex] = e
return nc.elems
}
57 changes: 38 additions & 19 deletions internal/colltab/numeric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,34 +78,37 @@ func TestNumericAppendNext(t *testing.T) {
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 1, 101)},
{"0", p(120, 0)},
{"01", p(120, 1, 101)},
{"0001", p(120, 1, 101)},
{"10", p(120, 2, 101, 100)},
{"99", p(120, 2, 119, 119)},
{"9999", p(120, 4, 119, 119, 119, 119)},
{"1a", p(120, 1, 101, 5)},
{"0b", p(120, 0, 6)},
{"01c", p(120, 1, 101, 8, 2)},
{"10x", p(120, 2, 101, 100, 200)},
{"99y", p(120, 2, 119, 119, 201)},
{"9999nop", p(120, 4, 119, 119, 119, 119, 121)},
{"1", p(120, 2, 101, 1)},
{"0", p(120, 1, 2)},
{"00", p(120, 1, 3)},
{"01", p(120, 2, 101, 2)},
{"0001", p(120, 2, 101, 4)},
{"02", p(120, 2, 102, 2)},
{"10", p(120, 3, 101, 100, 1)},
{"99", p(120, 3, 119, 119, 1)},
{"9999", p(120, 5, 119, 119, 119, 119, 1)},
{"1a", p(120, 2, 101, 1, 5)},
{"0b", p(120, 1, 2, 6)},
{"01c", p(120, 2, 101, 2, 8, 2)},
{"10x", p(120, 3, 101, 100, 1, 200)},
{"99y", p(120, 3, 119, 119, 1, 201)},
{"9999nop", p(120, 5, 119, 119, 119, 119, 1, 121)},

// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3, e(1)}},
{
"129",
[]Elem{
e(120), e(3),
e(120), e(4),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
e(1),
},
},

// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 2, 101, 100)},
{"a10", p(5, 120, 3, 101, 100, 1)},
} {
nw := NewNumericWeighter(numWeighter)

Expand Down Expand Up @@ -137,12 +140,28 @@ func TestNumericOverflow(t *testing.T) {

got, n := nw.AppendNextString(nil, manyDigits)

if n != maxDigits {
t.Errorf("n: got %d; want %d", n, maxDigits)
if n != maxDigits-1 { // A digit is lost because elem.Primary() == 0 has odd ordering properties and is skipped
t.Errorf("n: got %d; want %d", n, maxDigits-1)
}

if got[1].Primary() != maxDigits {
t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits)
t.Errorf("primary(e[1]): got %d; want %d", got[1].Primary(), maxDigits)
}
}

func TestNumericZeroOverflow(t *testing.T) {
manyDigits := strings.Repeat("0", maxDigits+1) + "a"

nw := NewNumericWeighter(numWeighter)

got, n := nw.AppendNextString(nil, manyDigits)

if n != maxDigits+2 { // Zeros after maxDigits-1 are ignored but are still consumed so that a number with leading zeros is ordered after a number with less leading zeros
t.Errorf("n: got %d; want %d", n, maxDigits+2)
}

if got[2].Primary() != maxDigits {
t.Errorf("primary(e[2]): got %d; want %d", got[1].Primary(), maxDigits)
}
}

Expand Down