diff --git a/fixer_v2/engine.go b/fixer_v2/engine.go new file mode 100644 index 0000000..edf4d60 --- /dev/null +++ b/fixer_v2/engine.go @@ -0,0 +1,136 @@ +package fixerv2 + +import ( + "fmt" + "regexp" + "strings" + + parser "github.com/gnolang/tlin/fixer_v2/query" +) + +// Pattern represents a pattern-rewrite pair for code transformation +type Pattern struct { + Match string + Rewrite string +} + +var ( + whitespaceRegex = regexp.MustCompile(`\s+`) + openBraceRegex = regexp.MustCompile(`\s*{\s*`) + closeBraceRegex = regexp.MustCompile(`\s*}\s*`) +) + +// normalizePattern replaces consecutive whitespaces with a single space +// and standardizes the spacing around curly braces. +// Then it trims any leading or trailing whitespace. +// This helps unify the style of the pattern for regex generation. +// +// Note: this function is only used for testing +func normalizePattern(pattern string) string { + pattern = whitespaceRegex.ReplaceAllString(pattern, " ") + pattern = openBraceRegex.ReplaceAllString(pattern, " { ") + pattern = closeBraceRegex.ReplaceAllString(pattern, " } ") + return strings.TrimSpace(pattern) +} + +// buildRegexFromAST builds a regex pattern from the parsed AST +func buildRegexFromAST(node parser.Node) Option[Result] { + var sb strings.Builder + captures := make(map[string]int) + groupCount := 1 + + var processNode func(parser.Node) + processNode = func(n parser.Node) { + switch v := n.(type) { + case *parser.TextNode: + // treat text nodes as literals and convert whitespace to \s+ + escaped := regexp.QuoteMeta(v.Content) + processed := regexp.MustCompile(`\s+`).ReplaceAllString(escaped, `\s+`) + sb.WriteString(processed) + + case *parser.HoleNode: + // convert hole name to capture group name + captures[v.Name] = groupCount + groupCount++ + sb.WriteString(`([^{}]+?)`) + + case *parser.BlockNode: + // block nodes contain curly braces and handle internal nodes + sb.WriteString(`\s*{\s*`) + for _, child := range v.Content { + processNode(child) + } + sb.WriteString(`\s*}\s*`) + + case *parser.PatternNode: + // pattern nodes traverse all child nodes + for _, child := range v.Children { + processNode(child) + } + } + } + + processNode(node) + + regex, err := regexp.Compile(sb.String()) + return createOption(Result{regex: regex, captures: captures}, err) +} + +// patternToRegex converts the pattern string to a compiled *regexp.Regexp +// and returns a Result containing the regex and a map that correlates each +// placeholder name with its capture group index. +func patternToRegex(pattern string) Option[Result] { + lexer := parser.NewLexer(pattern) + tokens := lexer.Tokenize() + + parser := parser.NewParser(tokens) + ast := parser.Parse() + + return buildRegexFromAST(ast) +} + +// rewrite replaces placeholders in the rewrite pattern with the captured values in 'env'. +// +// For each placeholder name, we look for :[[name]] or :[name] in rewritePattern +// and substitute with the corresponding 'env[name]' value. +func rewrite(rewritePattern string, env map[string]string) string { + lexer := parser.NewLexer(rewritePattern) + tokens := lexer.Tokenize() + + prsr := parser.NewParser(tokens) + ast := prsr.Parse() + + var result strings.Builder + + var processNode func(parser.Node) + processNode = func(n parser.Node) { + switch v := n.(type) { + case *parser.TextNode: + result.WriteString(v.Content) + + case *parser.HoleNode: + // replace hole name with the corresponding value in 'env' + if value, ok := env[v.Name]; ok { + result.WriteString(value) + } else { + // if value is not found, keep the original hole expression + result.WriteString(fmt.Sprintf(":[%s]", v.Name)) + } + + case *parser.BlockNode: + result.WriteString("{") + for _, child := range v.Content { + processNode(child) + } + result.WriteString("}") + + case *parser.PatternNode: + for _, child := range v.Children { + processNode(child) + } + } + } + + processNode(ast) + return result.String() +} diff --git a/fixer_v2/engine_test.go b/fixer_v2/engine_test.go new file mode 100644 index 0000000..e727924 --- /dev/null +++ b/fixer_v2/engine_test.go @@ -0,0 +1,153 @@ +package fixerv2 + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +type TestResult struct { + vars map[string]string + rewrite string +} + +type TestCase struct { + name string + pattern Pattern + input string + wantMatch bool + wantResult TestResult +} + +func TestPatternMatching(t *testing.T) { + tests := []TestCase{ + { + name: "basic if-else to return", + pattern: Pattern{ + Match: `if :[[cond]] { + return true + } else { + return false + }`, + Rewrite: "return :[[cond]]", + }, + input: ` + func example() bool { + if x > 0 { + return true + } else { + return false + } + }`, + wantMatch: true, + wantResult: TestResult{ + vars: map[string]string{ + "cond": "x > 0", + }, + rewrite: "return x > 0", + }, + }, + { + name: "no match for different pattern", + pattern: Pattern{ + Match: `if :[[cond]] { + return true + } else { + return false + }`, + Rewrite: "return :[[cond]]", + }, + input: ` + func example() bool { + if x > 0 { + return true + } + return false + }`, + wantMatch: false, + wantResult: TestResult{ + vars: nil, + rewrite: "", + }, + }, + { + name: "match with nested conditions", + pattern: Pattern{ + Match: "if :[[outer]] { if :[[inner]] { :[[body]] } }", + Rewrite: "if :[[outer]] && :[[inner]] { :[[body]] }", + }, + input: ` + func example() { + if x > 0 { if y < 10 { doSomething() } } + }`, + wantMatch: true, + wantResult: TestResult{ + vars: map[string]string{ + "outer": "x > 0", + "inner": "y < 10", + "body": "doSomething()", + }, + rewrite: "if x > 0 && y < 10 { doSomething() }", + }, + }, + { + name: "match with short syntax", + pattern: Pattern{ + Match: "func :[name]() :[ret] { :[body] }", + Rewrite: "func :[name]() :[ret] {\n // Added comment\n :[body]\n}", + }, + input: ` + func example() bool { return true }`, + wantMatch: true, + wantResult: TestResult{ + vars: map[string]string{ + "name": "example", + "ret": "bool", + "body": "return true", + }, + rewrite: "func example() bool {\n // Added comment\n return true\n}", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resultOpt := patternToRegex(tt.pattern.Match) + assert.NoError(t, resultOpt.err, "patternToRegex should not return error") + + if resultOpt.err != nil { + return + } + + result := resultOpt.value + normalizedInput := normalizePattern(tt.input) + matches := result.regex.FindAllStringSubmatch(normalizedInput, -1) + + if tt.wantMatch { + assert.NotEmpty(t, matches, "expected to find matches") + if len(matches) > 0 { + env := extractEnvironment(t, matches[0], result.captures) + assert.Equal(t, tt.wantResult.vars, env, "captured variables should match") + + rewritten := rewrite(tt.pattern.Rewrite, env) + assert.Equal(t, tt.wantResult.rewrite, rewritten, "rewritten code should match") + } + } else { + assert.Empty(t, matches, "expected no matches") + } + }) + } +} + +// extractEnvironment is a helper function to extract captured variables +func extractEnvironment(t *testing.T, match []string, captures map[string]int) map[string]string { + t.Helper() + env := make(map[string]string) + for name, idx := range captures { + if idx < len(match) { + env[name] = strings.TrimSpace(match[idx]) + } + } + return env +} diff --git a/fixer_v2/monad.go b/fixer_v2/monad.go new file mode 100644 index 0000000..80868a9 --- /dev/null +++ b/fixer_v2/monad.go @@ -0,0 +1,37 @@ +package fixerv2 + +import "regexp" + +// Option represents a container type for handling +// values with potential errors +type Option[T any] struct { + value T + err error +} + +// Result holds the compile regex and its captured group mappings +type Result struct { + regex *regexp.Regexp + captures map[string]int +} + +// createOption creates a new Option +func createOption[T any](value T, err error) Option[T] { + return Option[T]{value: value, err: err} +} + +// Map applies a function to the Option value +func (o Option[T]) Map(f func(T) T) Option[T] { + if o.err != nil { + return o + } + return createOption(f(o.value), nil) +} + +// Bind chains Option operations while handling potential errors +func (o Option[T]) Bind(f func(T) Option[T]) Option[T] { + if o.err != nil { + return o + } + return f(o.value) +} diff --git a/fixer_v2/query/doc.go b/fixer_v2/query/doc.go new file mode 100644 index 0000000..bf683c3 --- /dev/null +++ b/fixer_v2/query/doc.go @@ -0,0 +1,100 @@ +/* +Package query provides a lexer and parser for processing Comby-style metavariable expressions +used in pattern matching and rewriting code. + +# Overview + +The query package implements a parser for Comby's metavariable syntax, which is used for +pattern matching and rewriting source code. It serves as the first parsing phase before +the main syntax parsing, handling metavariable expressions that can match arbitrary code +patterns. + +# Metavariable Syntax + +Metavariables in patterns are expressed using two forms: + + 1. Short form: :[identifier] + Example: :[var] + + 2. Long form: :[[identifier]] + Example: :[[function]] + +These metavariables can be used in both match and rewrite patterns. When a pattern +is matched against source code, metavariables capture the corresponding text and can +be referenced in the rewrite pattern. + +# Token Types + +The lexer recognizes the following token types: + + - TokenText: Plain text content + Example: "if", "return", etc. + + - TokenHole: Metavariable placeholders + Format: ":[name]" or ":[[name]]" + Example: ":[condition]", ":[[body]]" + + - TokenLBrace: Opening curly brace "{" + Used for block structure + + - TokenRBrace: Closing curly brace "}" + Used for block structure + + - TokenWhitespace: Spaces, tabs, newlines + Preserved for accurate source mapping + + - TokenEOF: End of input marker + +# AST Node Types + +The parser produces an AST with the following node types: + + - PatternNode: Root node containing the entire pattern + Children can be any other node type + + - HoleNode: Represents a metavariable + Contains the identifier name + + - TextNode: Contains literal text content + Includes whitespace when significant + + - BlockNode: Represents a curly brace enclosed block + Contains child nodes between braces + +# Usage Example + +Basic usage of the lexer and parser: + + // Create a new lexer with input + lexer := NewLexer("if :[condition] { :[[body]] }") + + // Tokenize the input + tokens := lexer.Tokenize() + + // Create parser and generate AST + parser := NewParser(tokens) + ast := parser.Parse() + +# Pattern Matching Rules + + 1. Metavariables match greedily but cannot cross block boundaries + Example: :[x] in "if :[x] {" will match everything up to the brace + + 2. Block matching respects nested structures + Matched content preserves all whitespace and formatting + + 3. Whitespace is normalized in pattern matching + Multiple spaces are treated as a single space + + 4. Block boundaries must be explicit + Curly braces must be present in the pattern to match blocks + +This package is designed to work as the first phase of a multi-phase parsing system +where metavariable expressions are processed before deeper syntactic analysis. +It provides the foundation for implementing Comby-style pattern matching and +rewriting functionality. + +For more details about Comby's pattern matching syntax, visit: +https://comby.dev/docs/syntax-reference +*/ +package query diff --git a/fixer_v2/query/lexer.go b/fixer_v2/query/lexer.go new file mode 100644 index 0000000..e7ab89e --- /dev/null +++ b/fixer_v2/query/lexer.go @@ -0,0 +1,172 @@ +package query + +import ( + "unicode" +) + +// Lexer is responsible for scanning the input string and producing tokens. +type Lexer struct { + input string // the entire input to tokenize + position int // current reading position in input + tokens []Token +} + +// NewLexer returns a new Lexer with the given input and initializes state. +func NewLexer(input string) *Lexer { + return &Lexer{ + input: input, + position: 0, + tokens: make([]Token, 0), + } +} + +// Tokenize processes the entire input and produces the list of tokens. +func (l *Lexer) Tokenize() []Token { + for l.position < len(l.input) { + currentPos := l.position + switch c := l.input[l.position]; { + // This might indicate a placeholder like :[name] or :[[name]] + case c == ':': + if l.matchHole() { + // If matchHole returns true, we found :[something] or :[[something]], + // and the position has been updated. Skip the default token creation. + continue + } + // If matchHole fails, we treat ':' as just a regular text token. + l.addToken(TokenText, string(c), currentPos) + l.position++ + + case c == '{': + l.addToken(TokenLBrace, "{", currentPos) + l.position++ + + case c == '}': + l.addToken(TokenRBrace, "}", currentPos) + l.position++ + + case isWhitespace(c): + l.lexWhitespace(currentPos) + l.position++ + + default: + // position incrementing is handled inside `lexText` + l.lexText(currentPos) + } + } + + // At the end, add an EOF token to indicate we're done. + l.addToken(TokenEOF, "", l.position) + return l.tokens +} + +// matchHole checks if the current position indeed indicates a hole +// like :[name] or :[[name]]. If it does, it produces a TokenHole token and +// returns true. Otherwise, it returns false and doesn't modify token list. +func (l *Lexer) matchHole() bool { + // First, ensure there's enough room for ":[" at least. + if l.position+1 >= len(l.input) { + return false + } + startPos := l.position + + // Check if the next char is '[' + if l.input[l.position+1] == '[' { + // If it's "[[", we consider it a "long form": :[[name]] + isLongForm := (l.position+2 < len(l.input) && l.input[l.position+2] == '[') + end := l.findHoleEnd(isLongForm) + if end > 0 { + // We found a valid closing bracket sequence + l.addToken(TokenHole, l.input[l.position:end], startPos) + // Move lexer position so that the main loop will continue from the right place. + // We do -1 because the main loop increments position once more. + l.position = end + return true + } + } + return false +} + +// lexWhitespace scans consecutive whitespace and produces one TokenWhitespace. +func (l *Lexer) lexWhitespace(startPos int) { + start := l.position + for l.position < len(l.input) && isWhitespace(l.input[l.position]) { + l.position++ + } + // The substring from start..l.position is all whitespace + l.addToken(TokenWhitespace, l.input[start:l.position], startPos) + // Move back one so that the main loop can increment it again. + l.position-- +} + +// lexText scans consecutive non-special, non-whitespace characters to produce TokenText. +func (l *Lexer) lexText(startPos int) { + start := l.position + for l.position < len(l.input) { + c := l.input[l.position] + + // starting with `:[`? + if c == ':' && l.position+1 < len(l.input) && l.input[l.position+1] == '[' { + break + } + + if c == '{' || c == '}' || isWhitespace(c) { + break + } + + l.position++ + } + + if l.position > start { + l.addToken(TokenText, l.input[start:l.position], startPos) + } +} + +// findHoleEnd tries to locate the matching ']' or ']]' depending on whether it's a long form :[[...]]. +// Returns the index just AFTER the closing bracket(s), or -1 if no match is found. +func (l *Lexer) findHoleEnd(isLongForm bool) int { + // If it is a long form :[[ name ]], we look for "]]" + if isLongForm { + // Start searching from l.position+3 since we already have ":[[" + for i := l.position + 3; i < len(l.input)-1; i++ { + if l.input[i] == ']' && l.input[i+1] == ']' { + return i + 2 + } + } + } else { + // Else, we look for a single ']' + // Start from l.position+2 because we have ":[" + for i := l.position + 2; i < len(l.input); i++ { + if l.input[i] == ']' { + return i + 1 + } + } + } + return -1 +} + +// addToken is a helper to append a new token to the lexer's token list. +func (l *Lexer) addToken(tokenType TokenType, value string, pos int) { + l.tokens = append(l.tokens, Token{ + Type: tokenType, + Value: value, + Position: pos, + }) +} + +// isWhitespace checks if the given byte is a space, tab, newline, etc. using unicode.IsSpace. +func isWhitespace(c byte) bool { + return unicode.IsSpace(rune(c)) +} + +// extractHoleName extracts the hole name from a string like ":[name]" or ":[[name]]". +// For example, ":[[cond]]" -> "cond", ":[cond]" -> "cond". +// Make sure the token value is well-formed before calling this function. +func extractHoleName(tokenValue string) string { + // We expect tokenValue to start with :[ or :[[, e.g. :[[cond]] + if len(tokenValue) > 4 && tokenValue[:3] == ":[[" { + // :[[ ... ]] + return tokenValue[3 : len(tokenValue)-2] + } + // :[ ... ] + return tokenValue[2 : len(tokenValue)-1] +} diff --git a/fixer_v2/query/meta_variable_test.go b/fixer_v2/query/meta_variable_test.go new file mode 100644 index 0000000..3459ca4 --- /dev/null +++ b/fixer_v2/query/meta_variable_test.go @@ -0,0 +1,286 @@ +package query + +import ( + "reflect" + "strings" + "testing" + "unicode" +) + +func TestLexer(t *testing.T) { + tests := []struct { + name string + input string + expected []Token + }{ + { + name: "empty input", + input: "", + expected: []Token{ + {Type: TokenEOF, Value: "", Position: 0}, + }, + }, + { + name: "only whitespace", + input: " \t\n ", + expected: []Token{ + {Type: TokenWhitespace, Value: " \t\n ", Position: 0}, + {Type: TokenEOF, Value: "", Position: 7}, + }, + }, + { + name: "adjacent holes", + input: ":[a]:[b]:[[c]]", + expected: []Token{ + {Type: TokenHole, Value: ":[a]", Position: 0}, + {Type: TokenHole, Value: ":[b]", Position: 4}, + {Type: TokenHole, Value: ":[[c]]", Position: 8}, + {Type: TokenEOF, Value: "", Position: 14}, + }, + }, + { + name: "incomplete hole pattern", + input: ":[test", + expected: []Token{ + {Type: TokenText, Value: ":", Position: 0}, + {Type: TokenText, Value: "[test", Position: 1}, + {Type: TokenEOF, Value: "", Position: 6}, + }, + }, + { + name: "simple if condition", + input: "if :[[cond]] { return true }", + expected: []Token{ + {Type: TokenText, Value: "if", Position: 0}, + {Type: TokenWhitespace, Value: " ", Position: 2}, + {Type: TokenHole, Value: ":[[cond]]", Position: 3}, + {Type: TokenWhitespace, Value: " ", Position: 12}, + {Type: TokenLBrace, Value: "{", Position: 13}, + {Type: TokenWhitespace, Value: " ", Position: 14}, + {Type: TokenText, Value: "return", Position: 15}, + {Type: TokenWhitespace, Value: " ", Position: 21}, + {Type: TokenText, Value: "true", Position: 22}, + {Type: TokenWhitespace, Value: " ", Position: 26}, + {Type: TokenRBrace, Value: "}", Position: 27}, + {Type: TokenEOF, Value: "", Position: 28}, + }, + }, + { + name: "simple hole pattern", + input: ":[name]", + expected: []Token{ + {Type: TokenHole, Value: ":[name]", Position: 0}, + {Type: TokenEOF, Value: "", Position: 7}, + }, + }, + { + name: "double bracket hole pattern", + input: ":[[variable]]", + expected: []Token{ + {Type: TokenHole, Value: ":[[variable]]", Position: 0}, + {Type: TokenEOF, Value: "", Position: 13}, + }, + }, + { + name: "multiple holes", + input: "test :[a] :[b] :[[c]]", + expected: []Token{ + {Type: TokenText, Value: "test", Position: 0}, + {Type: TokenWhitespace, Value: " ", Position: 4}, + {Type: TokenHole, Value: ":[a]", Position: 5}, + {Type: TokenWhitespace, Value: " ", Position: 9}, + {Type: TokenHole, Value: ":[b]", Position: 10}, + {Type: TokenWhitespace, Value: " ", Position: 14}, + {Type: TokenHole, Value: ":[[c]]", Position: 15}, + {Type: TokenEOF, Value: "", Position: 21}, + }, + }, + { + name: "special characters between text", + input: "hello:{world}:test", + expected: []Token{ + {Type: TokenText, Value: "hello:", Position: 0}, + {Type: TokenLBrace, Value: "{", Position: 6}, + {Type: TokenText, Value: "world", Position: 7}, + {Type: TokenRBrace, Value: "}", Position: 12}, + {Type: TokenText, Value: ":", Position: 13}, + {Type: TokenText, Value: "test", Position: 14}, + {Type: TokenEOF, Value: "", Position: 18}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + lexer := NewLexer(tt.input) + tokens := lexer.Tokenize() + + if !reflect.DeepEqual(tokens, tt.expected) { + t.Errorf("Lexer.Tokenize() got = %v, want %v", tokens, tt.expected) + } + }) + } +} + +func TestParser(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "empty input", + input: "", + expected: `PatternNode(0 children):`, + }, + { + name: "multiple adjacent holes", + input: ":[a]:[b]:[[c]]", + expected: `PatternNode(3 children): + 0: HoleNode(a) + 1: HoleNode(b) + 2: HoleNode(c)`, + }, + { + name: "if statement with condition", + input: "if :[[cond]] { return true }", + expected: `PatternNode(5 children): + 0: TextNode(if) + 1: TextNode( ) + 2: HoleNode(cond) + 3: TextNode( ) + 4: BlockNode(5 children): + 0: TextNode( ) + 1: TextNode(return) + 2: TextNode( ) + 3: TextNode(true) + 4: TextNode( )`, + }, + { + name: "simple hole", + input: "test :[name] here", + expected: `PatternNode(5 children): + 0: TextNode(test) + 1: TextNode( ) + 2: HoleNode(name) + 3: TextNode( ) + 4: TextNode(here)`, + }, + { + name: "nested blocks", + input: "if { if { return } }", + expected: `PatternNode(3 children): + 0: TextNode(if) + 1: TextNode( ) + 2: BlockNode(5 children): + 0: TextNode( ) + 1: TextNode(if) + 2: TextNode( ) + 3: BlockNode(3 children): + 0: TextNode( ) + 1: TextNode(return) + 2: TextNode( ) + 4: TextNode( )`, + }, + { + name: "complex nested blocks", + input: "if :[[cond]] { try { :[code] } catch { :[handler] } }", + expected: `PatternNode(5 children): + 0: TextNode(if) + 1: TextNode( ) + 2: HoleNode(cond) + 3: TextNode( ) + 4: BlockNode(9 children): + 0: TextNode( ) + 1: TextNode(try) + 2: TextNode( ) + 3: BlockNode(3 children): + 0: TextNode( ) + 1: HoleNode(code) + 2: TextNode( ) + 4: TextNode( ) + 5: TextNode(catch) + 6: TextNode( ) + 7: BlockNode(3 children): + 0: TextNode( ) + 1: HoleNode(handler) + 2: TextNode( ) + 8: TextNode( )`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + lexer := NewLexer(tt.input) + tokens := lexer.Tokenize() + parser := NewParser(tokens) + ast := parser.Parse() + + got := removeWhitespace(t, ast.String()) + want := removeWhitespace(t, tt.expected) + + if got != want { + t.Errorf("Parser.Parse()\ngot =\n%v\nwant =\n%v", ast.String(), tt.expected) + } + }) + } +} + +func removeWhitespace(t *testing.T, s string) string { + t.Helper() + var result strings.Builder + for _, ch := range s { + if !unicode.IsSpace(ch) { + result.WriteRune(ch) + } + } + return result.String() +} + +func TestHoleExtraction(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "empty hole name", + input: ":[[]]", + expected: "", + }, + { + name: "simple hole", + input: ":[name]", + expected: "name", + }, + { + name: "double bracket hole", + input: ":[[variable]]", + expected: "variable", + }, + { + name: "hole with numbers", + input: ":[test123]", + expected: "test123", + }, + { + name: "hole with underscores", + input: ":[test_var_123]", + expected: "test_var_123", + }, + { + name: "hole with special characters", + input: ":[[test-var]]", + expected: "test-var", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractHoleName(tt.input) + if result != tt.expected { + t.Errorf("extractHoleName() got = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/fixer_v2/query/parser.go b/fixer_v2/query/parser.go new file mode 100644 index 0000000..46cfe61 --- /dev/null +++ b/fixer_v2/query/parser.go @@ -0,0 +1,100 @@ +package query + +// Parser is supposed to consume tokens produced by the lexer and build an AST. +type Parser struct { + tokens []Token + current int + holes map[string]int // hole name -> position (optional usage) +} + +// NewParser creates a new Parser instance +func NewParser(tokens []Token) *Parser { + return &Parser{ + tokens: tokens, + current: 0, + holes: make(map[string]int), + } +} + +// Parse processes all tokens and builds an AST +func (p *Parser) Parse() Node { + rootNode := &PatternNode{pos: 0} + + for p.current < len(p.tokens) { + if p.tokens[p.current].Type == TokenEOF { + break + } + + node := p.parseNode() + if node != nil { + rootNode.Children = append(rootNode.Children, node) + } + } + + return rootNode +} + +// parseNode parses a single node based on the current token +func (p *Parser) parseNode() Node { + token := p.tokens[p.current] + + switch token.Type { + case TokenText, TokenWhitespace: + p.current++ + return &TextNode{ + Content: token.Value, + pos: token.Position, + } + case TokenHole: + p.current++ + holeName := extractHoleName(token.Value) + // update hole expr's position + p.holes[holeName] = token.Position + return &HoleNode{ + Name: holeName, + pos: token.Position, + } + case TokenLBrace: + return p.parseBlock() + default: + p.current++ + return nil + } +} + +// parseBlock parses a block enclosed by '{' and '}' +func (p *Parser) parseBlock() Node { + openPos := p.tokens[p.current].Position + p.current++ + + blockNode := &BlockNode{ + Content: make([]Node, 0), + pos: openPos, + } + + // parse nodes until we find the matching '}' + for p.current < len(p.tokens) { + if p.tokens[p.current].Type == TokenRBrace { + p.current++ + return blockNode + } + + node := p.parseNode() + if node != nil { + blockNode.Content = append(blockNode.Content, node) + } + } + + // if we get here, we're missing a closing brace + // TODO: error handling + return blockNode +} + +// peek peeks at the next token +// TODO: commented out for now +// func (p *Parser) peek() Token { +// if p.current+1 >= len(p.tokens) { +// return Token{Type: TokenEOF, Value: "", Position: -1} +// } +// return p.tokens[p.current+1] +// } diff --git a/fixer_v2/query/type.go b/fixer_v2/query/type.go new file mode 100644 index 0000000..e27ca80 --- /dev/null +++ b/fixer_v2/query/type.go @@ -0,0 +1,111 @@ +package query + +import ( + "fmt" + "strconv" + "strings" +) + +// TokenType defines different types of tokens that can be produced by the lexer. +type TokenType int + +const ( + TokenText TokenType = iota // Plain text + TokenHole // :[name] or :[[name]] + TokenLBrace // '{' + TokenRBrace // '}' + TokenWhitespace // spaces, tabs, newlines, etc. + TokenEOF // End of file (input) +) + +// Token represents a single lexical token with type, value, and position. +type Token struct { + Type TokenType // type of this token + Value string // the literal string for this token + Position int // the starting position in the original input +} + +// NodeType defines different node types for AST construction. +type NodeType int + +const ( + NodePattern NodeType = iota + NodeHole + NodeText + NodeBlock +) + +// Node is an interface that any AST node must implement. +type Node interface { + Type() NodeType // returns the node type + String() string // debugging or printing purpose + Position() int // where the node starts in the input +} + +var ( + _ Node = (*PatternNode)(nil) + _ Node = (*HoleNode)(nil) + _ Node = (*TextNode)(nil) + _ Node = (*BlockNode)(nil) +) + +// PatternNode is a top-level AST node that can contain multiple child nodes. +type PatternNode struct { + Children []Node + pos int +} + +func (p *PatternNode) Type() NodeType { return NodePattern } +func (p *PatternNode) String() string { + result := fmt.Sprintf("PatternNode(%d children):\n", len(p.Children)) + for i, child := range p.Children { + childStr := strings.ReplaceAll(child.String(), "\n", "\n ") + result += fmt.Sprintf(" %d: %s\n", i, childStr) + } + return strings.TrimRight(result, "\n") +} +func (p *PatternNode) Position() int { return p.pos } + +// HoleNode represents a placeholder in the pattern like :[name] or :[[name]]. +type HoleNode struct { + Name string + pos int +} + +func (h *HoleNode) Type() NodeType { return NodeHole } +func (h *HoleNode) String() string { + return fmt.Sprintf("HoleNode(%s)", h.Name) +} +func (h *HoleNode) Position() int { return h.pos } + +// TextNode represents normal text in the pattern. +type TextNode struct { + Content string + pos int +} + +func (t *TextNode) Type() NodeType { return NodeText } +func (t *TextNode) String() string { + escaped := strconv.Quote(t.Content) + return fmt.Sprintf("TextNode(%s)", escaped[1:len(escaped)-1]) +} + +func (t *TextNode) Position() int { return t.pos } + +// BlockNode could represent a block enclosed by '{' and '}' in your syntax. +type BlockNode struct { + Content []Node + pos int +} + +func (b *BlockNode) Type() NodeType { return NodeBlock } +func (b *BlockNode) String() string { + result := fmt.Sprintf("BlockNode(%d children):\n", len(b.Content)) + for i, child := range b.Content { + // apply indentation for children node + childStr := strings.ReplaceAll(child.String(), "\n", "\n ") + result += fmt.Sprintf(" %d: %s\n", i, childStr) + } + return strings.TrimRight(result, "\n") +} +func (b *BlockNode) Position() int { return b.pos }