From e1b8c2f5e57436b515001a70e717bba455b1da80 Mon Sep 17 00:00:00 2001 From: Will Wray Date: Thu, 24 Nov 2022 07:09:15 -0400 Subject: [PATCH] Add PP_NUMBER, remove CPP_INTEGER, CPP_FLOAT A simple proof of concept change that fixes #79. With it, pcpp can do codegen using the IREPEAT library. I believe it's conceptually correct, but my Python may not be; please test this against your suite and review the method (hack) carefully. There's not much code! Mostly deletions. The change removes CPP_INTEGER, effectively replacing it with PP_NUMBER, and entirely removes CPP_FLOAT as superfluous for preprocessing purposes. pp-number is sufficient for preprocessing to stage 4 The pp-number regex in the issue is incorrect, lifted from unpublished WG21 https://isocpp.org/files/papers/D2180R0.html "pp-number makes cpp dumber" (best proposal title ever). Instead, I crafted a regex based on the lastest C++ draft https://eel.is/c++draft/lex.ppnumber#ntref:pp-number which accepts character ' as digit separator: regex string r'\.?\d(\.|[\w_]|\'[\w_]|[eEpP][-+])*' (also admits binary literals, with digit separator, of course, so they can now be added to the Value parsing code) Only the conditional evaluator is required to interpret the numbers as integer constant expressions. This is achieved by hacky means: def p_expression_number(p): 'expression : PP_NUMBER' try: p[0] = Value(p[1]) except: p[0] = p[1] The idea is that if the parsed string p[1] can be interpreted as an integer constant-expression Value(p[1]) then do so, otherwise simply pass through the string for possible further pasting and processing. A robust method might check p[1] against the CPP_INTEGER regex (removed in this commit) for a full match, consuming all input. On the other hand, relying on Value to validate the input while parsing and to raise an exception on failure may be Pythonic. It seems that pp-number itself is a hack in the standard; I see no way to incorporate pp-number alongside INTEGER and FLOAT tokens meaningful in C; but then there's no need to. Happy Thanksgiving! --- pcpp/evaluator.py | 10 +++++++--- pcpp/parser.py | 13 +++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pcpp/evaluator.py b/pcpp/evaluator.py index 38abe20..498ec1f 100644 --- a/pcpp/evaluator.py +++ b/pcpp/evaluator.py @@ -357,7 +357,7 @@ def __gt__(self, other): # The subset of tokens from Preprocessor used in preprocessor expressions tokens = ( - 'CPP_ID', 'CPP_INTEGER', 'CPP_CHAR', 'CPP_STRING', + 'CPP_ID', 'PP_NUMBER', 'CPP_CHAR', 'CPP_STRING', 'CPP_PLUS', 'CPP_MINUS', 'CPP_STAR', 'CPP_FSLASH', 'CPP_PERCENT', 'CPP_BAR', 'CPP_AMPERSAND', 'CPP_TILDE', 'CPP_HAT', 'CPP_LESS', 'CPP_GREATER', 'CPP_EXCLAMATION', 'CPP_QUESTION', 'CPP_LPAREN', 'CPP_RPAREN', @@ -394,8 +394,12 @@ def p_error(p): raise SyntaxError("at EOF") def p_expression_number(p): - 'expression : CPP_INTEGER' - p[0] = Value(p[1]) + 'expression : PP_NUMBER' + try: + p[0] = Value(p[1]) + except: + p[0] = p[1] + def p_expression_character(p): 'expression : CPP_CHAR' diff --git a/pcpp/parser.py b/pcpp/parser.py index 8e6a10a..6e55987 100644 --- a/pcpp/parser.py +++ b/pcpp/parser.py @@ -27,7 +27,7 @@ # ----------------------------------------------------------------------------- tokens = ( - 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_LINECONT', 'CPP_COMMENT1', 'CPP_COMMENT2', + 'CPP_ID', 'PP_NUMBER', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_LINECONT', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND', 'CPP_PLUS', 'CPP_MINUS', 'CPP_STAR', 'CPP_FSLASH', 'CPP_PERCENT', 'CPP_BAR', 'CPP_AMPERSAND', 'CPP_TILDE', 'CPP_HAT', 'CPP_LESS', 'CPP_GREATER', 'CPP_EQUAL', 'CPP_EXCLAMATION', 'CPP_QUESTION', 'CPP_LPAREN', 'CPP_RPAREN', 'CPP_LBRACKET', 'CPP_RBRACKET', 'CPP_LCURLY', 'CPP_RCURLY', @@ -111,15 +111,12 @@ def t_CPP_LINECONT(t): # Identifier t_CPP_ID = r'[A-Za-z_][\w_]*' -# Integer literal -def CPP_INTEGER(t): - r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' +# Preprocessor number +def PP_NUMBER(t): + r'\.?\d(\.|[\w_]|\'[\w_]|[eEpP][-+])*' return t -t_CPP_INTEGER = CPP_INTEGER - -# Floating literal -t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))?|(\d+)e(\+|-)?(\d+))([lL]|[fF])?' +t_PP_NUMBER = PP_NUMBER # String literal def t_CPP_STRING(t):