Skip to content

Commit

Permalink
commonize jsx and tsx
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyin committed Jan 31, 2025
1 parent a342798 commit 0e6e534
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 30 deletions.
52 changes: 41 additions & 11 deletions lizard_languages/js_style_regex_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,48 @@


def js_style_regex_expression(func):
def generate_tokens_with_regex(source_code, _=""):
def generate_tokens_with_regex(source_code, addition='', token_class=None):
regx_regx = r"\/(\S*?[^\s\\]\/)+?(igm)*"
regx_pattern = re.compile(regx_regx)
word_pattern = re.compile(r'\w+')
tokens = func(source_code, r"|"+regx_regx)
leading_by_word = False
for token in tokens:
if leading_by_word and regx_pattern.match(token):
for subtoken in func(token, _):
yield subtoken
tokens = list(func(source_code, addition, token_class))
result = []
i = 0
while i < len(tokens):
token = tokens[i]
if token == '/':
# Check if this could be a regex pattern
is_regex = False
if i == 0:
is_regex = True
elif i > 0:
prev_token = tokens[i-1].strip()
if prev_token and prev_token[-1] in '=,({[?:!&|;':
is_regex = True

if is_regex:
# This is likely a regex pattern start
regex_tokens = [token]
i += 1
while i < len(tokens) and not tokens[i].endswith('/'):
regex_tokens.append(tokens[i])
i += 1
if i < len(tokens):
regex_tokens.append(tokens[i])
i += 1
# Check for regex flags
if i < len(tokens) and re.match(r'^[igm]+$', tokens[i]):
regex_tokens.append(tokens[i])
i += 1
combined = ''.join(regex_tokens)
if regx_pattern.match(combined):
result.append(combined)
else:
result.extend(regex_tokens)
else:
# This is a division operator
result.append(token)
else:
yield token
if not token.isspace():
leading_by_word = word_pattern.match(token)
result.append(token)
i += 1
return result
return generate_tokens_with_regex
22 changes: 15 additions & 7 deletions lizard_languages/jsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@
from .javascript import JavaScriptReader, JSTokenizer
from .code_reader import CodeReader
from .js_style_regex_expression import js_style_regex_expression
from .javascript import Tokenizer # Added import for base Tokenizer class
from .javascript import Tokenizer


class JSXReader(JavaScriptReader):
# pylint: disable=R0903

ext = ['jsx']
language_names = ['jsx']

class JSXMixin:
'''Base mixin class for JSX/TSX shared functionality'''
@staticmethod
@js_style_regex_expression
def generate_tokens(source_code, addition='', token_class=None):
Expand All @@ -28,6 +24,18 @@ def generate_tokens(source_code, addition='', token_class=None):
yield tok


class JSXReader(JavaScriptReader, JSXMixin):
# pylint: disable=R0903

ext = ['jsx']
language_names = ['jsx']

@staticmethod
@js_style_regex_expression
def generate_tokens(source_code, addition='', token_class=None):
return JSXMixin.generate_tokens(source_code, addition, token_class)


class XMLTagWithAttrTokenizer(Tokenizer):
def __init__(self):
super(XMLTagWithAttrTokenizer, self).__init__()
Expand Down
14 changes: 3 additions & 11 deletions lizard_languages/tsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

from .typescript import TypeScriptReader, TypeScriptStates
from .javascript import JSTokenizer
from .jsx import XMLTagWithAttrTokenizer, isidentifier
from .jsx import JSXMixin
from .code_reader import CodeReader
from .js_style_regex_expression import js_style_regex_expression


class TSXReader(TypeScriptReader):
class TSXReader(TypeScriptReader, JSXMixin):
# pylint: disable=R0903

ext = ['tsx']
Expand All @@ -18,15 +18,7 @@ class TSXReader(TypeScriptReader):
@staticmethod
@js_style_regex_expression
def generate_tokens(source_code, addition='', token_class=None):
addition = addition +\
r"|(?:\$\w+)" + \
r"|(?:\<\/\w+\>)" + \
r"|`.*?`"
js_tokenizer = JSTokenizer()
for token in CodeReader.generate_tokens(
source_code, addition, token_class):
for tok in js_tokenizer(token):
yield tok
return JSXMixin.generate_tokens(source_code, addition, token_class)

def __init__(self, context):
super(TSXReader, self).__init__(context)
Expand Down
6 changes: 5 additions & 1 deletion prompt-history.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,8 @@ run all new test and fix if the test fail. Add temporary debug info to understan

---------

It looks like the XMLTagWithAttrTokenizer from @javascript.py belongs to @jsx.py , isn't it? Please move it out.
It looks like the XMLTagWithAttrTokenizer from @javascript.py belongs to @jsx.py , isn't it? Please move it out.

---------

@tsx.py @jsx.py should be doing very similar things. please refactor to remove the duplicate, inccluding conceptually duplicate code.

0 comments on commit 0e6e534

Please sign in to comment.