diff --git a/PLATFORM/C/LIB/regex.lm b/PLATFORM/C/LIB/regex.lm index cf94a160..3d743f5f 100644 --- a/PLATFORM/C/LIB/regex.lm +++ b/PLATFORM/C/LIB/regex.lm @@ -23,6 +23,24 @@ atom suffix Regex _rgx; (==( (as status U64) 0_u64 )) ) U64); +== := λ(: text String)(: rgx Regex). (: ( + (let matches (: __uninitialized C_regmatch__t_[1])) + (let status (regexec( + (as (& rgx) C_regex__t_*_) + (as text C_char_*) + (as 1_u64 C_size__t_) + (as matches C_regmatch__t_*) + (as 0_u64 C_int) + ))) + (&&( + (==( (as status U64) 0_u64 )) + (&&( + (==( (.rm_so([]( matches 0_u64 ))) 0_u64 )) + (==( (.rm_eo([]( matches 0_u64 ))) (.length text) )) + )) + )) +) U64); + .remove-prefix := λ(: text SmartString)(: rgx Regex). (: ( (let matches (: __uninitialized C_regmatch__t_[1])) (let status (regexec( diff --git a/PLUGINS/FRONTEND/C/c-parse.lsts b/PLUGINS/FRONTEND/C/c-parse.lsts index e9066b2e..baea8a5b 100644 --- a/PLUGINS/FRONTEND/C/c-parse.lsts +++ b/PLUGINS/FRONTEND/C/c-parse.lsts @@ -6,12 +6,84 @@ # However, there are also a large number of compiler-specific extensions that are parsed but mostly ignored # Example: __extension__ ( f, g ) +type CConstant = CConstantInteger{value:CString} + | CConstantCharacter{value:CString} + | CConstantFloating{value:CString} + | CConstantEnumeration{value:CString}; + +let cmp(l: CConstant, r: CConstant): Ord = ( + if $".0"(l) != $".0"(r) then cmp($".0"(l), $".0"(r)) + else match Tuple{l, r} { + Tuple{ first:CConstantInteger{lv=value}, second:CConstantInteger{rv=value} } => cmp(lv, rv); + Tuple{ first:CConstantCharacter{lv=value}, second:CConstantCharacter{rv=value} } => cmp(lv, rv); + Tuple{ first:CConstantFloating{lv=value}, second:CConstantFloating{rv=value} } => cmp(lv, rv); + Tuple{ first:CConstantEnumeration{lv=value}, second:CConstantEnumeration{rv=value} } => cmp(lv, rv); + } +); + let std-c-parse(tokens: List): Nil = ( # while non-zero(tokens) { tokens = std-c-parse-external-declaration(tokens); } ); +let std-c-is-reserved-word(tk: CString): U64 = ( + let reserved = false; + if tk == c"auto" then (reserved = true); + if tk == c"double" then (reserved = true); + if tk == c"int" then (reserved = true); + if tk == c"struct" then (reserved = true); + if tk == c"break" then (reserved = true); + if tk == c"else" then (reserved = true); + if tk == c"long" then (reserved = true); + if tk == c"switch" then (reserved = true); + if tk == c"case" then (reserved = true); + if tk == c"enum" then (reserved = true); + if tk == c"register" then (reserved = true); + if tk == c"typedef" then (reserved = true); + if tk == c"char" then (reserved = true); + if tk == c"extern" then (reserved = true); + if tk == c"return" then (reserved = true); + if tk == c"union" then (reserved = true); + if tk == c"const" then (reserved = true); + if tk == c"float" then (reserved = true); + if tk == c"short" then (reserved = true); + if tk == c"unsigned" then (reserved = true); + if tk == c"continue" then (reserved = true); + if tk == c"for" then (reserved = true); + if tk == c"signed" then (reserved = true); + if tk == c"void" then (reserved = true); + if tk == c"default" then (reserved = true); + if tk == c"goto" then (reserved = true); + if tk == c"sizeof" then (reserved = true); + if tk == c"volatile" then (reserved = true); + if tk == c"do" then (reserved = true); + if tk == c"if" then (reserved = true); + if tk == c"static" then (reserved = true); + if tk == c"while" then (reserved = true); + if tk == c"_Bool" then (reserved = true); + if tk == c"_Imaginary" then (reserved = true); + if tk == c"restrict" then (reserved = true); + if tk == c"_Complex" then (reserved = true); + if tk == c"inline" then (reserved = true); + if tk == c"_Alignas" then (reserved = true); + if tk == c"_Generic" then (reserved = true); + if tk == c"_Thread_local" then (reserved = true); + if tk == c"_Alignof" then (reserved = true); + if tk == c"_Noreturn" then (reserved = true); + if tk == c"_Atomic" then (reserved = true); + if tk == c"_Static_assert" then (reserved = true); + reserved +); + let std-c-has-class(tk: CString, cls: String): U64 = ( - tk == cls + match cls { + "identifier" => tk == r/^[a-zA-Z_][a-zA-Z0-9_]*/ && not(std-c-is-reserved-word(tk)); + "integer" => tk == r/^[0-9]+([uU]|[lL]|wb|WB)*/ # decimal constant + || tk == r/^[0][0-7]+([uU]|[lL]|wb|WB)*/ # octal constant + || tk == r/^[0][x][0-9a-fA-F]+([uU]|[lL]|wb|WB)*/ # hexadecimal constant + || tk == r/^[0][bB][01]+([uU]|[lL]|wb|WB)*/; # binary constant + "character" => tk == r/^(u8|u|U|L)?[']([^']|([\\][']))+[']/; # character constant + _ => tk == cls; + } ); let std-c-can-take(tokens: List, cls: String): U64 = ( @@ -315,9 +387,6 @@ let std-c-take-maybe(tokens: List, cls: String): List = ( #struct-or-union-specifier = struct-or-union, '{', struct-declaration-list, '}' # | struct-or-union, identifier, ['{', struct-declaration-list, '}']; -#struct-or-union = 'struct' -# | 'union'; - #struct-declaration-list = struct-declaration, {struct-declaration}; #struct-declaration = specifier-qualifier-list, ';' (* for anonymous struct/union *) @@ -383,6 +452,47 @@ let std-c-parse-assignment-operator(tokens: List): Tuple,L else Tuple{ no, tokens } ); +let std-c-parse-unary-operator(tokens: List): Tuple,List> = ( + let no = None :: Maybe; + if not(non-zero(tokens)) then Tuple{ no, tokens } + else if head(tokens).key == c"&" then Tuple{ Some{c"&"}, tail(tokens) } + else if head(tokens).key == c"*" then Tuple{ Some{c"*"}, tail(tokens) } + else if head(tokens).key == c"+" then Tuple{ Some{c"+"}, tail(tokens) } + else if head(tokens).key == c"-" then Tuple{ Some{c"-"}, tail(tokens) } + else if head(tokens).key == c"~" then Tuple{ Some{c"~"}, tail(tokens) } + else if head(tokens).key == c"!" then Tuple{ Some{c"!"}, tail(tokens) } + else Tuple{ no, tokens } +); + +let std-c-parse-struct-or-union(tokens: List): Tuple,List> = ( + let no = None :: Maybe; + if not(non-zero(tokens)) then Tuple{ no, tokens } + else if head(tokens).key == c"struct" then Tuple{ Some{c"struct"}, tail(tokens) } + else if head(tokens).key == c"union" then Tuple{ Some{c"union"}, tail(tokens) } + else Tuple{ no, tokens } +); + +let std-c-parse-identifier(tokens: List): Tuple,List> = ( + let no = None :: Maybe; + if std-c-can-take(tokens, "identifier") then Tuple{ Some{head(tokens).key}, tail(tokens) } + else Tuple{ no, tokens } +); + +let std-c-parse-constant(tokens: List): Tuple,List> = ( + let no = None :: Maybe; + if std-c-can-take(tokens, "integer") then Tuple{ Some{CConstantInteger{head(tokens).key}}, tail(tokens) } + else if std-c-can-take(tokens, "character") then Tuple{ Some{CConstantCharacter{head(tokens).key}}, tail(tokens) } + #else if std-c-can-take(tokens, "floating") then Tuple{ Some{CConstantFloating{head(tokens).key}}, tail(tokens) } + #else if std-c-can-take(tokens, "enumeration") then Tuple{ Some{CConstantFloating{head(tokens).key}}, tail(tokens) } + else Tuple{ no, tokens } +); + +#constant = integer-constant +# | character-constant +# | floating-constant +# | enumeration-constant; + + #parameter-list = parameter-declaration, {',', parameter-declaration}; #parameter-declaration = declaration-specifiers, [declarator | abstract-declarator]; @@ -431,12 +541,6 @@ let std-c-parse-assignment-operator(tokens: List): Tuple,L # | postfix-expression, ('++' | '--') # | '(', type-name, ')', '{', initializer-list, [','], '}'; -#unary-operator = '&' -# | '*' -# | '+' -# | '-' -# | '~' -# | '!'; #primary-expression = identifier # | constant @@ -446,10 +550,6 @@ let std-c-parse-assignment-operator(tokens: List): Tuple,L #argument-expression-list = assignment-expression, {',', assignment-expression}; -#constant = integer-constant -# | character-constant -# | floating-constant -# | enumeration-constant; #string = string-literal # | '__func__'; diff --git a/PLUGINS/FRONTEND/C/c-smart-tokenize.lsts b/PLUGINS/FRONTEND/C/c-smart-tokenize.lsts index 42fbc5c3..ee44b50a 100644 --- a/PLUGINS/FRONTEND/C/c-smart-tokenize.lsts +++ b/PLUGINS/FRONTEND/C/c-smart-tokenize.lsts @@ -59,6 +59,9 @@ let std-c-tokenize-string(file-path: String, text: String): List = ( (lit=r/^["]([^"\\]|([\\].))*["]/).. rest => ( tokens = cons(text[:lit.length], tokens); text = rest; ); + (cl=r/^(u8|u|U|L)?[']([^']|([\\][']))+[']/).. rest => ( + tokens = cons(text[:cl.length], tokens); text = rest; + ); (id=r/^[a-zA-Z0-9_]+/).. rest => ( tokens = cons(text[:id.length], tokens); text = rest; diff --git a/tests/c/c-parse.lsts b/tests/c/c-parse.lsts index bd6ede71..c4001328 100644 --- a/tests/c/c-parse.lsts +++ b/tests/c/c-parse.lsts @@ -70,3 +70,117 @@ if true then { assert( std-c-parse-assignment-operator(tokens).first == Some{c"|="} ); }; }; + +if true then { + let abc = std-c-tokenize-string("abc", "abc"); + assert( std-c-parse-unary-operator(abc).first == None :: Maybe ); + if true then { + let tokens = std-c-tokenize-string("[&]", "&"); + assert( std-c-parse-unary-operator(tokens).first == Some{c"&"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[*]", "*"); + assert( std-c-parse-unary-operator(tokens).first == Some{c"*"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[+]", "+"); + assert( std-c-parse-unary-operator(tokens).first == Some{c"+"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[-]", "-"); + assert( std-c-parse-unary-operator(tokens).first == Some{c"-"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[~]", "~"); + assert( std-c-parse-unary-operator(tokens).first == Some{c"~"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[!]", "!"); + assert( std-c-parse-unary-operator(tokens).first == Some{c"!"} ); + }; +}; + +if true then { + let abc = std-c-tokenize-string("abc", "abc"); + assert( std-c-parse-struct-or-union(abc).first == None :: Maybe ); + if true then { + let tokens = std-c-tokenize-string("[struct]", "struct"); + assert( std-c-parse-struct-or-union(tokens).first == Some{c"struct"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[union]", "union"); + assert( std-c-parse-struct-or-union(tokens).first == Some{c"union"} ); + }; +}; + +if true then { + if true then { + let tokens = std-c-tokenize-string("[abc]", "abc"); + assert( std-c-parse-identifier(tokens).first == Some{c"abc"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[ABC]", "ABC"); + assert( std-c-parse-identifier(tokens).first == Some{c"ABC"} ); + }; + if true then { + let tokens = std-c-tokenize-string("_0", "_0"); + assert( std-c-parse-identifier(tokens).first == Some{c"_0"} ); + }; + if true then { + let tokens = std-c-tokenize-string("[int]", "int"); + assert( std-c-parse-identifier(tokens).first == None :: Maybe ); + }; +}; + +if true then { + let abc = std-c-tokenize-string("abc", "abc"); + assert( std-c-parse-constant(abc).first == None :: Maybe ); + if true then { + let tokens = std-c-tokenize-string("[0123456789]", "0123456789"); + assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0123456789"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("[0x0123456789aAbBcCdDeEfF]", "0x0123456789aAbBcCdDeEfF"); + assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0x0123456789aAbBcCdDeEfF"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("[0b01]", "0b01"); + assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0b01"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("[0ulwb]", "0ulwb"); + assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0ulwb"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("[0UllWB]", "0UllWB"); + assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0UllWB"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("[0ULLWB]", "0ULLWB"); + assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0ULLWB"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("[0ULWB]", "0ULWB"); + assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0ULWB"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("'a'", "'a'"); + match std-c-parse-constant(tokens).first { + Some{content:CConstantCharacter{value=value}} => print(value); + None{} => print("None"); + }; + assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"'a'"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("u8'\\''", "u8'\\''"); + assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"u8'\\''"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("L'\\0'", "L'\\0'"); + assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"L'\\0'"}} ); + }; + if true then { + let tokens = std-c-tokenize-string("u'\\u123'", "u'\\u123'"); + assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"u'\\u123'"}} ); + }; +};