Skip to content

Commit

Permalink
move jsx code out
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyin committed Jan 31, 2025
1 parent 8bc3067 commit 23b2f32
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 83 deletions.
82 changes: 1 addition & 81 deletions lizard_languages/javascript.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __init__(self):

def process_token(self, token):
if token == "<":
from .jsx import XMLTagWithAttrTokenizer # Import only when needed
self.sub_tokenizer = XMLTagWithAttrTokenizer()
return
if token == "{":
Expand All @@ -70,84 +71,3 @@ def process_token(self, token):
self.stop()
return
yield token


class XMLTagWithAttrTokenizer(Tokenizer):
def __init__(self):
super(XMLTagWithAttrTokenizer, self).__init__()
self.tag = None
self.state = self._global_state
self.cache = ['<']

def process_token(self, token):
self.cache.append(token)
if not token.isspace():
result = self.state(token)
if result is not None:
return result
return ()

def abort(self):
self.stop()
return self.cache

def flush(self):
tmp, self.cache = self.cache, []
return [''.join(tmp)]

def _global_state(self, token):
if not isidentifier(token):
return self.abort()
self.tag = token
self.state = self._after_tag

def _after_tag(self, token):
if token == '>':
self.state = self._body
elif token == "/":
self.state = self._expecting_self_closing
elif isidentifier(token):
self.state = self._expecting_equal_sign
else:
return self.abort()

def _expecting_self_closing(self, token):
if token == ">":
self.stop()
return self.flush()
return self.abort()

def _expecting_equal_sign(self, token):
if token == '=':
self.state = self._expecting_value
else:
return self.abort()

def _expecting_value(self, token):
if token[0] in "'\"":
self.state = self._after_tag
elif token == "{":
self.cache.append("}")
self.sub_tokenizer = JSTokenizer()
self.state = self._after_tag

def _body(self, token):
if token == "<":
self.sub_tokenizer = XMLTagWithAttrTokenizer()
self.cache.pop()
return self.flush()

if token.startswith("</"):
self.stop()
return self.flush()

if token == '{':
self.sub_tokenizer = JSTokenizer()
return self.flush()


def isidentifier(token):
try:
return token.isidentifier()
except AttributeError:
return token.encode(encoding='UTF-8')[0].isalpha()
84 changes: 83 additions & 1 deletion lizard_languages/jsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
Language parser for JSX
'''

from .javascript import JavaScriptReader, JSTokenizer, XMLTagWithAttrTokenizer, isidentifier
from .javascript import JavaScriptReader, JSTokenizer
from .code_reader import CodeReader
from .js_style_regex_expression import js_style_regex_expression
from .javascript import Tokenizer # Added import for base Tokenizer class


class JSXReader(JavaScriptReader):
Expand All @@ -25,3 +26,84 @@ def generate_tokens(source_code, addition='', token_class=None):
source_code, addition, token_class):
for tok in js_tokenizer(token):
yield tok


class XMLTagWithAttrTokenizer(Tokenizer):
def __init__(self):
super(XMLTagWithAttrTokenizer, self).__init__()
self.tag = None
self.state = self._global_state
self.cache = ['<']

def process_token(self, token):
self.cache.append(token)
if not token.isspace():
result = self.state(token)
if result is not None:
return result
return ()

def abort(self):
self.stop()
return self.cache

def flush(self):
tmp, self.cache = self.cache, []
return [''.join(tmp)]

def _global_state(self, token):
if not isidentifier(token):
return self.abort()
self.tag = token
self.state = self._after_tag

def _after_tag(self, token):
if token == '>':
self.state = self._body
elif token == "/":
self.state = self._expecting_self_closing
elif isidentifier(token):
self.state = self._expecting_equal_sign
else:
return self.abort()

def _expecting_self_closing(self, token):
if token == ">":
self.stop()
return self.flush()
return self.abort()

def _expecting_equal_sign(self, token):
if token == '=':
self.state = self._expecting_value
else:
return self.abort()

def _expecting_value(self, token):
if token[0] in "'\"":
self.state = self._after_tag
elif token == "{":
self.cache.append("}")
self.sub_tokenizer = JSTokenizer()
self.state = self._after_tag

def _body(self, token):
if token == "<":
self.sub_tokenizer = XMLTagWithAttrTokenizer()
self.cache.pop()
return self.flush()

if token.startswith("</"):
self.stop()
return self.flush()

if token == '{':
self.sub_tokenizer = JSTokenizer()
return self.flush()


def isidentifier(token):
try:
return token.isidentifier()
except AttributeError:
return token.encode(encoding='UTF-8')[0].isalpha()
3 changes: 2 additions & 1 deletion lizard_languages/tsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
'''

from .typescript import TypeScriptReader, TypeScriptStates
from .javascript import JSTokenizer, XMLTagWithAttrTokenizer, isidentifier
from .javascript import JSTokenizer
from .jsx import XMLTagWithAttrTokenizer, isidentifier
from .code_reader import CodeReader
from .js_style_regex_expression import js_style_regex_expression

Expand Down

0 comments on commit 23b2f32

Please sign in to comment.