diff --git a/fixer_v2/engine.go b/fixer_v2/engine.go index edf4d60..a663f49 100644 --- a/fixer_v2/engine.go +++ b/fixer_v2/engine.go @@ -50,7 +50,7 @@ func buildRegexFromAST(node parser.Node) Option[Result] { case *parser.HoleNode: // convert hole name to capture group name - captures[v.Name] = groupCount + captures[v.Name()] = groupCount groupCount++ sb.WriteString(`([^{}]+?)`) @@ -110,11 +110,11 @@ func rewrite(rewritePattern string, env map[string]string) string { case *parser.HoleNode: // replace hole name with the corresponding value in 'env' - if value, ok := env[v.Name]; ok { + if value, ok := env[v.Name()]; ok { result.WriteString(value) } else { // if value is not found, keep the original hole expression - result.WriteString(fmt.Sprintf(":[%s]", v.Name)) + result.WriteString(fmt.Sprintf(":[%s]", v.Name())) } case *parser.BlockNode: diff --git a/fixer_v2/query/hole.go b/fixer_v2/query/hole.go new file mode 100644 index 0000000..f545076 --- /dev/null +++ b/fixer_v2/query/hole.go @@ -0,0 +1,222 @@ +package query + +import ( + "fmt" + "strings" +) + +// HoleType defines the type of hole pattern +type HoleType int + +const ( + HoleAny HoleType = iota // :[[name]] or :[name] + HoleIdentifier // :[[id:identifier]] + HoleBlock // :[[block:block]] + HoleWhitespace // :[[ws:whitespace]] + HoleExpression // :[[expr:expression]] +) + +func (h HoleType) String() string { + switch h { + case HoleAny: + return "any" + case HoleIdentifier: + return "identifier" + case HoleBlock: + return "block" + case HoleWhitespace: + return "whitespace" + case HoleExpression: + return "expression" + default: + return "unknown" + } +} + +// Quantifier defines repetition patterns +type Quantifier int + +const ( + QuantNone Quantifier = iota // No quantifier (exactly once) + QuantZeroOrMore // * (zero or more times) + QuantOneOrMore // + (one or more times) + QuantZeroOrOne // ? (zero or one time) +) + +func (q Quantifier) String() string { + switch q { + case QuantNone: + return "" + case QuantZeroOrMore: + return "*" + case QuantOneOrMore: + return "+" + case QuantZeroOrOne: + return "?" + default: + return "unknown" + } +} + +// ParseHolePattern parses a hole pattern string and returns a HoleConfig +// Format: :[[name:type]] or :[[name:type]]* +func ParseHolePattern(pattern string) (*HoleConfig, error) { + // Skip : and opening brackets + start := 1 + if pattern[1] == '[' && pattern[2] == '[' { + start = 3 + } else if pattern[1] == '[' { + start = 2 + } else { + return nil, fmt.Errorf("invalid hole pattern: %s", pattern) + } + + // Find the end of the pattern + // Find end excluding quantifier and closing brackets + end := len(pattern) - 1 + + // Check for quantifier + hasQuantifier := end >= 0 && (pattern[end] == '*' || pattern[end] == '+' || pattern[end] == '?') + if hasQuantifier { + end-- + } + + // Remove closing brackets + if end >= 1 && pattern[end-1:end+1] == "]]" { + end -= 2 + } else if end >= 0 && pattern[end] == ']' { + end-- + } + + if end < start { + return nil, fmt.Errorf("invalid hole pattern: %s", pattern) + } + + // Parse name and type + content := pattern[start : end+1] + parts := strings.Split(content, ":") + config := &HoleConfig{ + Name: parts[0], + Type: HoleAny, + Quantifier: QuantNone, + } + + // Parse type if specified + if len(parts) > 1 { + switch parts[1] { + case "identifier": + config.Type = HoleIdentifier + case "block": + config.Type = HoleBlock + case "whitespace": + config.Type = HoleWhitespace + case "expression": + config.Type = HoleExpression + default: + return nil, fmt.Errorf("unknown hole type: %s", parts[1]) + } + } + + // Set quantifier if found earlier + if hasQuantifier { + switch pattern[len(pattern)-1] { + case '*': + config.Quantifier = QuantZeroOrMore + case '+': + config.Quantifier = QuantOneOrMore + case '?': + config.Quantifier = QuantZeroOrOne + } + } + + return config, nil +} + +func (l *Lexer) matchHole() bool { + if l.position+1 >= len(l.input) { + return false + } + startPos := l.position + + if l.input[l.position+1] == '[' { + isLongForm := (l.position+2 < len(l.input) && l.input[l.position+2] == '[') + end := l.findHoleEnd(isLongForm) + if end > 0 { + // Check for quantifier + if end < len(l.input) && isQuantifier(l.input[end]) { + end++ + } + + value := l.input[l.position:end] + config, err := ParseHolePattern(value) + if err != nil { + // If parsing fails, try to extract at least the name and create a basic config + basicName := extractHoleName(value) + basicConfig := HoleConfig{ + Name: basicName, + Type: HoleAny, + Quantifier: QuantNone, + } + l.addTokenWithHoleConfig(TokenHole, value, startPos, basicConfig) + } else { + // Create a token with the parsed configuration + l.addTokenWithHoleConfig(TokenHole, value, startPos, *config) + } + l.position = end + return true + } + } + return false +} + +func (l *Lexer) addTokenWithHoleConfig(tokenType TokenType, value string, pos int, config HoleConfig) { + l.tokens = append(l.tokens, Token{ + Type: tokenType, + Value: value, + Position: pos, + HoleConfig: &config, + }) +} + +// isQuantifier checks if a character is a valid quantifier +func isQuantifier(c byte) bool { + return c == '*' || c == '+' || c == '?' +} + +func (l *Lexer) findHoleEnd(isLongForm bool) int { + if isLongForm { + for i := l.position + 3; i < len(l.input)-1; i++ { + if l.input[i] == ']' && l.input[i+1] == ']' { + // Check if there's a quantifier after the closing brackets + if i+2 < len(l.input) && isQuantifier(l.input[i+2]) { + return i + 3 + } + return i + 2 + } + } + } else { + for i := l.position + 2; i < len(l.input); i++ { + if l.input[i] == ']' { + // Check if there's a quantifier after the closing bracket + if i+1 < len(l.input) && isQuantifier(l.input[i+1]) { + return i + 2 + } + return i + 1 + } + } + } + return -1 +} + +// extractHoleName extracts the hole name from a string like ":[name]" or ":[[name]]". +// For example, ":[[cond]]" -> "cond", ":[cond]" -> "cond". +// Make sure the token value is well-formed before calling this function. +func extractHoleName(tokenValue string) string { + // We expect tokenValue to start with :[ or :[[, e.g. :[[cond]] + if len(tokenValue) > 4 && tokenValue[:3] == ":[[" { + // :[[ ... ]] + return tokenValue[3 : len(tokenValue)-2] + } + // :[ ... ] + return tokenValue[2 : len(tokenValue)-1] +} diff --git a/fixer_v2/query/hole_test.go b/fixer_v2/query/hole_test.go new file mode 100644 index 0000000..60d3622 --- /dev/null +++ b/fixer_v2/query/hole_test.go @@ -0,0 +1,265 @@ +package query + +import ( + "testing" +) + +func TestParseHolePattern(t *testing.T) { + tests := []struct { + name string + pattern string + wantConfig *HoleConfig + wantErr bool + }{ + { + name: "simple hole", + pattern: ":[var]", + wantConfig: &HoleConfig{ + Name: "var", + Type: HoleAny, + Quantifier: QuantNone, + }, + }, + { + name: "identifier hole", + pattern: ":[[name:identifier]]", + wantConfig: &HoleConfig{ + Name: "name", + Type: HoleIdentifier, + Quantifier: QuantNone, + }, + }, + { + name: "block hole with quantifier", + pattern: ":[[block:block]]*", + wantConfig: &HoleConfig{ + Name: "block", + Type: HoleBlock, + Quantifier: QuantZeroOrMore, + }, + }, + { + name: "expression with plus quantifier", + pattern: ":[[expr:expression]]+", + wantConfig: &HoleConfig{ + Name: "expr", + Type: HoleExpression, + Quantifier: QuantOneOrMore, + }, + }, + { + name: "whitespace with optional quantifier", + pattern: ":[[ws:whitespace]]?", + wantConfig: &HoleConfig{ + Name: "ws", + Type: HoleWhitespace, + Quantifier: QuantZeroOrOne, + }, + }, + { + name: "invalid hole type", + pattern: ":[[var:invalid]]", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseHolePattern(tt.pattern) + if (err != nil) != tt.wantErr { + t.Errorf("ParseHolePattern() error = %v, wantErr %v", err, tt.wantErr) + return + } + if err == nil { + if got.Name != tt.wantConfig.Name { + t.Errorf("Name = %v, want %v", got.Name, tt.wantConfig.Name) + } + if got.Type != tt.wantConfig.Type { + t.Errorf("Type = %v, want %v", got.Type, tt.wantConfig.Type) + } + if got.Quantifier != tt.wantConfig.Quantifier { + t.Errorf("Quantifier = %v, want %v", got.Quantifier, tt.wantConfig.Quantifier) + } + } + }) + } +} + +func TestMatchHoleWithConfig(t *testing.T) { + tests := []struct { + name string + input string + startPos int + wantMatch bool + wantToken Token + wantPos int + }{ + { + name: "basic hole", + input: ":[var] rest", + startPos: 0, + wantMatch: true, + wantToken: Token{ + Type: TokenHole, + Value: ":[var]", + Position: 0, + HoleConfig: &HoleConfig{ + Name: "var", + Type: HoleAny, + Quantifier: QuantNone, + }, + }, + wantPos: 6, + }, + { + name: "typed hole", + input: ":[[expr:expression]] rest", + startPos: 0, + wantMatch: true, + wantToken: Token{ + Type: TokenHole, + Value: ":[[expr:expression]]", + Position: 0, + HoleConfig: &HoleConfig{ + Name: "expr", + Type: HoleExpression, + Quantifier: QuantNone, + }, + }, + wantPos: 20, + }, + { + name: "hole with quantifier", + input: ":[[stmts:block]]* {", + startPos: 0, + wantMatch: true, + wantToken: Token{ + Type: TokenHole, + Value: ":[[stmts:block]]*", + Position: 0, + HoleConfig: &HoleConfig{ + Name: "stmts", + Type: HoleBlock, + Quantifier: QuantZeroOrMore, + }, + }, + wantPos: 17, + }, + { + name: "whitespace hole with optional", + input: ":[[ws:whitespace]]? rest", + startPos: 0, + wantMatch: true, + wantToken: Token{ + Type: TokenHole, + Value: ":[[ws:whitespace]]?", + Position: 0, + HoleConfig: &HoleConfig{ + Name: "ws", + Type: HoleWhitespace, + Quantifier: QuantZeroOrOne, + }, + }, + wantPos: 19, + }, + { + name: "identifier with one or more", + input: ":[[ids:identifier]]+ rest", + startPos: 0, + wantMatch: true, + wantToken: Token{ + Type: TokenHole, + Value: ":[[ids:identifier]]+", + Position: 0, + HoleConfig: &HoleConfig{ + Name: "ids", + Type: HoleIdentifier, + Quantifier: QuantOneOrMore, + }, + }, + wantPos: 20, + }, + { + name: "invalid hole format", + input: ":[invalid rest", + startPos: 0, + wantMatch: false, + wantPos: 0, + }, + { + name: "invalid type", + input: ":[[x:invalid]] rest", + startPos: 0, + wantMatch: true, // should match but create basic hole + wantToken: Token{ + Type: TokenHole, + Value: ":[[x:invalid]]", + Position: 0, + HoleConfig: &HoleConfig{ + Name: "x:invalid", + Type: HoleAny, + Quantifier: QuantNone, + }, + }, + wantPos: 14, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + l := NewLexer(tt.input) + l.position = tt.startPos + + got := l.matchHole() + if got != tt.wantMatch { + t.Errorf("matchHole() match = %v, want %v", got, tt.wantMatch) + return + } + + if !tt.wantMatch { + return + } + + if len(l.tokens) != 1 { + t.Errorf("matchHole() produced %d tokens, want 1", len(l.tokens)) + return + } + + gotToken := l.tokens[0] + + if gotToken.Type != tt.wantToken.Type { + t.Errorf("Token.Type = %v, want %v", gotToken.Type, tt.wantToken.Type) + } + if gotToken.Value != tt.wantToken.Value { + t.Errorf("Token.Value = %v, want %v", gotToken.Value, tt.wantToken.Value) + } + if gotToken.Position != tt.wantToken.Position { + t.Errorf("Token.Position = %v, want %v", gotToken.Position, tt.wantToken.Position) + } + + // Compare HoleConfig if present + if tt.wantToken.HoleConfig != nil { + if gotToken.HoleConfig == nil { + t.Errorf("Token.HoleConfig is nil, want %+v", tt.wantToken.HoleConfig) + } else { + if gotToken.HoleConfig.Name != tt.wantToken.HoleConfig.Name { + t.Errorf("HoleConfig.Name = %v, want %v", + gotToken.HoleConfig.Name, tt.wantToken.HoleConfig.Name) + } + if gotToken.HoleConfig.Type != tt.wantToken.HoleConfig.Type { + t.Errorf("HoleConfig.Type = %v, want %v", + gotToken.HoleConfig.Type, tt.wantToken.HoleConfig.Type) + } + if gotToken.HoleConfig.Quantifier != tt.wantToken.HoleConfig.Quantifier { + t.Errorf("HoleConfig.Quantifier = %v, want %v", + gotToken.HoleConfig.Quantifier, tt.wantToken.HoleConfig.Quantifier) + } + } + } + + if l.position != tt.wantPos { + t.Errorf("Lexer position = %v, want %v", l.position, tt.wantPos) + } + }) + } +} diff --git a/fixer_v2/query/lexer.go b/fixer_v2/query/lexer.go index e7ab89e..55e1b40 100644 --- a/fixer_v2/query/lexer.go +++ b/fixer_v2/query/lexer.go @@ -59,33 +59,6 @@ func (l *Lexer) Tokenize() []Token { return l.tokens } -// matchHole checks if the current position indeed indicates a hole -// like :[name] or :[[name]]. If it does, it produces a TokenHole token and -// returns true. Otherwise, it returns false and doesn't modify token list. -func (l *Lexer) matchHole() bool { - // First, ensure there's enough room for ":[" at least. - if l.position+1 >= len(l.input) { - return false - } - startPos := l.position - - // Check if the next char is '[' - if l.input[l.position+1] == '[' { - // If it's "[[", we consider it a "long form": :[[name]] - isLongForm := (l.position+2 < len(l.input) && l.input[l.position+2] == '[') - end := l.findHoleEnd(isLongForm) - if end > 0 { - // We found a valid closing bracket sequence - l.addToken(TokenHole, l.input[l.position:end], startPos) - // Move lexer position so that the main loop will continue from the right place. - // We do -1 because the main loop increments position once more. - l.position = end - return true - } - } - return false -} - // lexWhitespace scans consecutive whitespace and produces one TokenWhitespace. func (l *Lexer) lexWhitespace(startPos int) { start := l.position @@ -121,29 +94,6 @@ func (l *Lexer) lexText(startPos int) { } } -// findHoleEnd tries to locate the matching ']' or ']]' depending on whether it's a long form :[[...]]. -// Returns the index just AFTER the closing bracket(s), or -1 if no match is found. -func (l *Lexer) findHoleEnd(isLongForm bool) int { - // If it is a long form :[[ name ]], we look for "]]" - if isLongForm { - // Start searching from l.position+3 since we already have ":[[" - for i := l.position + 3; i < len(l.input)-1; i++ { - if l.input[i] == ']' && l.input[i+1] == ']' { - return i + 2 - } - } - } else { - // Else, we look for a single ']' - // Start from l.position+2 because we have ":[" - for i := l.position + 2; i < len(l.input); i++ { - if l.input[i] == ']' { - return i + 1 - } - } - } - return -1 -} - // addToken is a helper to append a new token to the lexer's token list. func (l *Lexer) addToken(tokenType TokenType, value string, pos int) { l.tokens = append(l.tokens, Token{ @@ -157,16 +107,3 @@ func (l *Lexer) addToken(tokenType TokenType, value string, pos int) { func isWhitespace(c byte) bool { return unicode.IsSpace(rune(c)) } - -// extractHoleName extracts the hole name from a string like ":[name]" or ":[[name]]". -// For example, ":[[cond]]" -> "cond", ":[cond]" -> "cond". -// Make sure the token value is well-formed before calling this function. -func extractHoleName(tokenValue string) string { - // We expect tokenValue to start with :[ or :[[, e.g. :[[cond]] - if len(tokenValue) > 4 && tokenValue[:3] == ":[[" { - // :[[ ... ]] - return tokenValue[3 : len(tokenValue)-2] - } - // :[ ... ] - return tokenValue[2 : len(tokenValue)-1] -} diff --git a/fixer_v2/query/meta_variable_test.go b/fixer_v2/query/meta_variable_test.go index 3459ca4..ce3d992 100644 --- a/fixer_v2/query/meta_variable_test.go +++ b/fixer_v2/query/meta_variable_test.go @@ -1,7 +1,6 @@ package query import ( - "reflect" "strings" "testing" "unicode" @@ -115,8 +114,20 @@ func TestLexer(t *testing.T) { lexer := NewLexer(tt.input) tokens := lexer.Tokenize() - if !reflect.DeepEqual(tokens, tt.expected) { - t.Errorf("Lexer.Tokenize() got = %v, want %v", tokens, tt.expected) + if len(tokens) != len(tt.expected) { + t.Errorf("Lexer.Tokenize() got %d tokens, want %d tokens", len(tokens), len(tt.expected)) + return + } + + for i, got := range tokens { + want := tt.expected[i] + if got.Type != want.Type || + got.Value != want.Value || + got.Position != want.Position { + t.Errorf("Token[%d] = {Type: %v, Value: %q, Position: %d}, want {Type: %v, Value: %q, Position: %d}", + i, got.Type, got.Value, got.Position, + want.Type, want.Value, want.Position) + } } }) } diff --git a/fixer_v2/query/parser.go b/fixer_v2/query/parser.go index 46cfe61..db3996f 100644 --- a/fixer_v2/query/parser.go +++ b/fixer_v2/query/parser.go @@ -34,6 +34,7 @@ func (p *Parser) Parse() Node { return rootNode } +// parseNode parses a single node based on the current token // parseNode parses a single node based on the current token func (p *Parser) parseNode() Node { token := p.tokens[p.current] @@ -47,12 +48,18 @@ func (p *Parser) parseNode() Node { } case TokenHole: p.current++ - holeName := extractHoleName(token.Value) - // update hole expr's position - p.holes[holeName] = token.Position - return &HoleNode{ - Name: holeName, - pos: token.Position, + if token.HoleConfig != nil { + // token has already been parsed with a HoleConfig + p.holes[token.HoleConfig.Name] = token.Position + return &HoleNode{ + Config: *token.HoleConfig, + pos: token.Position, + } + } else { + // for backward compatibility + holeName := extractHoleName(token.Value) + p.holes[holeName] = token.Position + return NewHoleNode(holeName, token.Position) } case TokenLBrace: return p.parseBlock() @@ -89,12 +96,3 @@ func (p *Parser) parseBlock() Node { // TODO: error handling return blockNode } - -// peek peeks at the next token -// TODO: commented out for now -// func (p *Parser) peek() Token { -// if p.current+1 >= len(p.tokens) { -// return Token{Type: TokenEOF, Value: "", Position: -1} -// } -// return p.tokens[p.current+1] -// } diff --git a/fixer_v2/query/type.go b/fixer_v2/query/type.go index e27ca80..77ae06c 100644 --- a/fixer_v2/query/type.go +++ b/fixer_v2/query/type.go @@ -20,9 +20,10 @@ const ( // Token represents a single lexical token with type, value, and position. type Token struct { - Type TokenType // type of this token - Value string // the literal string for this token - Position int // the starting position in the original input + Type TokenType // type of this token + Value string // the literal string for this token + Position int // the starting position in the original input + HoleConfig *HoleConfig // configuration for hole tokens (nil for non-hole tokens) } // NodeType defines different node types for AST construction. @@ -66,17 +67,41 @@ func (p *PatternNode) String() string { } func (p *PatternNode) Position() int { return p.pos } +// HoleConfig stores configuration for a hole pattern +type HoleConfig struct { + Type HoleType + Quantifier Quantifier + Name string +} + // HoleNode represents a placeholder in the pattern like :[name] or :[[name]]. type HoleNode struct { - Name string - pos int + Config HoleConfig + pos int +} + +func NewHoleNode(name string, pos int) *HoleNode { + return &HoleNode{ + Config: HoleConfig{ + Name: name, + Type: HoleAny, + Quantifier: QuantNone, + }, + pos: pos, + } } func (h *HoleNode) Type() NodeType { return NodeHole } + func (h *HoleNode) String() string { - return fmt.Sprintf("HoleNode(%s)", h.Name) + if h.Config.Type == HoleAny && h.Config.Quantifier == QuantNone { + return fmt.Sprintf("HoleNode(%s)", h.Config.Name) + } + return fmt.Sprintf("HoleNode(%s:%s)%s", h.Config.Name, h.Config.Type, h.Config.Quantifier) } + func (h *HoleNode) Position() int { return h.pos } +func (h *HoleNode) Name() string { return h.Config.Name } // TextNode represents normal text in the pattern. type TextNode struct {