Skip to content

Commit

Permalink
Tokens by value (#187)
Browse files Browse the repository at this point in the history
This results in about a 8% improvement in speed.
  • Loading branch information
eloff authored and alecthomas committed Nov 3, 2018
1 parent 5a47317 commit 9c3abea
Show file tree
Hide file tree
Showing 26 changed files with 2,536 additions and 98 deletions.
2 changes: 1 addition & 1 deletion cmd/chroma/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ func format(w io.Writer, style *chroma.Style, it chroma.Iterator) {

func check(filename string, it chroma.Iterator) {
line, col := 1, 0
for token := it(); token != nil; token = it() {
for token := it(); token != chroma.EOF; token = it() {
if token.Type == chroma.Error {
fmt.Printf("%s:%d:%d %q\n", filename, line, col, token.String())
}
Expand Down
10 changes: 5 additions & 5 deletions coalesce.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@ func Coalesce(lexer Lexer) Lexer { return &coalescer{lexer} }
type coalescer struct{ Lexer }

func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
var prev *Token
var prev Token
it, err := d.Lexer.Tokenise(options, text)
if err != nil {
return nil, err
}
return func() *Token {
for token := it(); token != nil; token = it() {
return func() Token {
for token := it(); token != (EOF); token = it() {
if len(token.Value) == 0 {
continue
}
if prev == nil {
if prev == EOF {
prev = token
} else {
if prev.Type == token.Type && len(prev.Value) < 8192 {
Expand All @@ -29,7 +29,7 @@ func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, e
}
}
out := prev
prev = nil
prev = EOF
return out
}, nil
}
2 changes: 1 addition & 1 deletion coalesce_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ func TestCoalesce(t *testing.T) {
}))
actual, err := Tokenise(lexer, nil, "!@#$")
assert.NoError(t, err)
expected := []*Token{{Punctuation, "!@#$"}}
expected := []Token{{Punctuation, "!@#$"}}
assert.Equal(t, expected, actual)
}
34 changes: 17 additions & 17 deletions delegate.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (d *delegatingLexer) Config() *Config {
// An insertion is the character range where language tokens should be inserted.
type insertion struct {
start, end int
tokens []*Token
tokens []Token
}

func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
Expand All @@ -44,15 +44,15 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera
insertions := []*insertion{}
var insert *insertion
offset := 0
var last *Token
var last Token
for _, t := range tokens {
if t.Type == Other {
if last != nil && insert != nil && last.Type != Other {
if last != EOF && insert != nil && last.Type != Other {
insert.end = offset
}
others.WriteString(t.Value)
} else {
if last == nil || last.Type == Other {
if last == EOF || last.Type == Other {
insert = &insertion{start: offset}
insertions = append(insertions, insert)
}
Expand All @@ -73,12 +73,12 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera
}

// Interleave the two sets of tokens.
out := []*Token{}
var out []Token
offset = 0 // Offset into text.
tokenIndex := 0
nextToken := func() *Token {
nextToken := func() Token {
if tokenIndex >= len(rootTokens) {
return nil
return EOF
}
t := rootTokens[tokenIndex]
tokenIndex++
Expand All @@ -95,18 +95,18 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera
}
t := nextToken()
i := nextInsertion()
for t != nil || i != nil {
for t != EOF || i != nil {
// fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
if t == nil || (i != nil && i.start < offset+len(t.Value)) {
var l *Token
if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
var l Token
l, t = splitToken(t, i.start-offset)
if l != nil {
if l != EOF {
out = append(out, l)
offset += len(l.Value)
}
out = append(out, i.tokens...)
offset += i.end - i.start
if t == nil {
if t == EOF {
t = nextToken()
}
i = nextInsertion()
Expand All @@ -119,15 +119,15 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera
return Literator(out...), nil
}

func splitToken(t *Token, offset int) (l *Token, r *Token) {
if t == nil {
return nil, nil
func splitToken(t Token, offset int) (l Token, r Token) {
if t == EOF {
return EOF, EOF
}
if offset == 0 {
return nil, t
return EOF, t
}
if offset == len(t.Value) {
return t, nil
return t, EOF
}
l = t.Clone()
r = t.Clone()
Expand Down
12 changes: 7 additions & 5 deletions delegate_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package chroma

import (
"fmt"
"testing"

"github.com/alecthomas/assert"
Expand Down Expand Up @@ -31,9 +32,9 @@ func TestDelegate(t *testing.T) {
testdata := []struct {
name string
source string
expected []*Token
expected []Token
}{
{"SourceInMiddle", `hello world <? what ?> there`, []*Token{
{"SourceInMiddle", `hello world <? what ?> there`, []Token{
{Keyword, "hello"},
{TextWhitespace, " "},
{Name, "world"},
Expand All @@ -48,7 +49,7 @@ func TestDelegate(t *testing.T) {
{TextWhitespace, " "},
{Name, "there"},
}},
{"SourceBeginning", `<? what ?> hello world there`, []*Token{
{"SourceBeginning", `<? what ?> hello world there`, []Token{
{CommentPreproc, "<?"},
{TextWhitespace, " "},
{Keyword, "what"},
Expand All @@ -61,7 +62,7 @@ func TestDelegate(t *testing.T) {
{TextWhitespace, " "},
{Name, "there"},
}},
{"SourceEnd", `hello world <? what there`, []*Token{
{"SourceEnd", `hello world <? what there`, []Token{
{Keyword, "hello"},
{TextWhitespace, " "},
{Name, "world"},
Expand All @@ -73,7 +74,7 @@ func TestDelegate(t *testing.T) {
{TextWhitespace, " "},
{Error, "there"},
}},
{"SourceMultiple", "hello world <? what ?> hello there <? what ?> hello", []*Token{
{"SourceMultiple", "hello world <? what ?> hello there <? what ?> hello", []Token{
{Keyword, "hello"},
{TextWhitespace, " "},
{Name, "world"},
Expand Down Expand Up @@ -104,6 +105,7 @@ func TestDelegate(t *testing.T) {
it, err := delegate.Tokenise(nil, test.source)
assert.NoError(t, err)
actual := it.Tokens()
fmt.Println(actual)
assert.Equal(t, test.expected, actual)
})
}
Expand Down
2 changes: 1 addition & 1 deletion formatters/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
var (
// NoOp formatter.
NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, iterator chroma.Iterator) error {
for t := iterator(); t != nil; t = iterator() {
for t := iterator(); t != chroma.EOF; t = iterator() {
if _, err := io.WriteString(w, t.Value); err != nil {
return err
}
Expand Down
6 changes: 3 additions & 3 deletions formatters/html/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (f *Formatter) restyle(style *chroma.Style) (*chroma.Style, error) {
// We deliberately don't use html/template here because it is two orders of magnitude slower (benchmarked).
//
// OTOH we need to be super careful about correct escaping...
func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []*chroma.Token) (err error) { // nolint: gocyclo
func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []chroma.Token) (err error) { // nolint: gocyclo
style, err = f.restyle(style)
if err != nil {
return err
Expand Down Expand Up @@ -391,8 +391,8 @@ func compressStyle(s string) string {
return strings.Join(out, ";")
}

func splitTokensIntoLines(tokens []*chroma.Token) (out [][]*chroma.Token) {
line := []*chroma.Token{}
func splitTokensIntoLines(tokens []chroma.Token) (out [][]chroma.Token) {
var line []chroma.Token
for _, token := range tokens {
for strings.Contains(token.Value, "\n") {
parts := strings.SplitAfterN(token.Value, "\n", 2)
Expand Down
6 changes: 3 additions & 3 deletions formatters/html/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ func BenchmarkHTMLFormatter(b *testing.B) {
}

func TestSplitTokensIntoLines(t *testing.T) {
in := []*chroma.Token{
in := []chroma.Token{
{Value: "hello", Type: chroma.NameKeyword},
{Value: " world\nwhat?\n", Type: chroma.NameKeyword},
}
expected := [][]*chroma.Token{
expected := [][]chroma.Token{
{
{Type: chroma.NameKeyword, Value: "hello"},
{Type: chroma.NameKeyword, Value: " world\n"},
Expand All @@ -53,7 +53,7 @@ func TestSplitTokensIntoLines(t *testing.T) {
}

func TestIteratorPanicRecovery(t *testing.T) {
it := func() *chroma.Token {
it := func() chroma.Token {
panic(errors.New("bad"))
}
err := New().Format(ioutil.Discard, styles.Fallback, it)
Expand Down
2 changes: 1 addition & 1 deletion formatters/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
var JSON = Register("json", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error {
fmt.Fprintln(w, "[")
i := 0
for t := it(); t != nil; t = it() {
for t := it(); t != chroma.EOF; t = it() {
if i > 0 {
fmt.Fprintln(w, ",")
}
Expand Down
2 changes: 1 addition & 1 deletion formatters/tokens.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (

// Tokens formatter outputs the raw token structures.
var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error {
for t := it(); t != nil; t = it() {
for t := it(); t != chroma.EOF; t = it() {
if _, err := fmt.Fprintln(w, t.GoString()); err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion formatters/tty_indexed.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style, it chroma
}
}()
theme := styleToEscapeSequence(c.table, style)
for token := it(); token != nil; token = it() {
for token := it(); token != chroma.EOF; token = it() {
// TODO: Cache token lookups?
clr, ok := theme[token.Type]
if !ok {
Expand Down
2 changes: 1 addition & 1 deletion formatters/tty_truecolour.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
var TTY16m = Register("terminal16m", chroma.FormatterFunc(trueColourFormatter))

func trueColourFormatter(w io.Writer, style *chroma.Style, it chroma.Iterator) error {
for token := it(); token != nil; token = it() {
for token := it(); token != chroma.EOF; token = it() {
entry := style.Get(token.Type)
if !entry.IsZero() {
out := ""
Expand Down
20 changes: 10 additions & 10 deletions iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,36 @@ package chroma
// nil will be returned at the end of the Token stream.
//
// If an error occurs within an Iterator, it may propagate this in a panic. Formatters should recover.
type Iterator func() *Token
type Iterator func() Token

// Tokens consumes all tokens from the iterator and returns them as a slice.
func (i Iterator) Tokens() []*Token {
out := []*Token{}
for t := i(); t != nil; t = i() {
func (i Iterator) Tokens() []Token {
var out []Token
for t := i(); t != EOF; t = i() {
out = append(out, t)
}
return out
}

// Concaterator concatenates tokens from a series of iterators.
func Concaterator(iterators ...Iterator) Iterator {
return func() *Token {
return func() Token {
for len(iterators) > 0 {
t := iterators[0]()
if t != nil {
if t != EOF {
return t
}
iterators = iterators[1:]
}
return nil
return EOF
}
}

// Literator converts a sequence of literal Tokens into an Iterator.
func Literator(tokens ...*Token) Iterator {
return func() (out *Token) {
func Literator(tokens ...Token) Iterator {
return func() Token {
if len(tokens) == 0 {
return nil
return EOF
}
token := tokens[0]
tokens = tokens[1:]
Expand Down
8 changes: 4 additions & 4 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ type Token struct {
func (t *Token) String() string { return t.Value }
func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }

func (t *Token) Clone() *Token {
clone := &Token{}
*clone = *t
return clone
func (t *Token) Clone() Token {
return *t
}

var EOF Token

type TokeniseOptions struct {
// State to start tokenisation in. Defaults to "root".
State string
Expand Down
2 changes: 1 addition & 1 deletion lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestSimpleLexer(t *testing.T) {
a = 10
`)
assert.NoError(t, err)
expected := []*Token{
expected := []Token{
{Whitespace, "\n\t"},
{Comment, "; this is a comment"},
{Whitespace, "\n\t"},
Expand Down
Loading

0 comments on commit 9c3abea

Please sign in to comment.