From 2cc489c89934822ee9abb2715182777f2f505f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Brochet?= Date: Tue, 15 Dec 2015 16:53:09 +0100 Subject: [PATCH 1/3] Require boost --- histFactory/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/histFactory/CMakeLists.txt b/histFactory/CMakeLists.txt index 0a88cd6..bef45e4 100644 --- a/histFactory/CMakeLists.txt +++ b/histFactory/CMakeLists.txt @@ -28,6 +28,11 @@ if(IN_CMSSW) execute_process(COMMAND scram tool tag python LIB OUTPUT_VARIABLE PYTHON_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) set(PYTHON_LIBRARY "${PYTHON_LIBRARY_PATH}/lib${PYTHON_LIBRARY}.so") execute_process(COMMAND scram tool tag python INCLUDE OUTPUT_VARIABLE PYTHON_INCLUDE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Boost + execute_process(COMMAND scram tool tag boost BOOST_BASE OUTPUT_VARIABLE + BOOST_ROOT OUTPUT_STRIP_TRAILING_WHITESPACE) + set(Boost_NO_SYSTEM_PATHS ON) else() execute_process(COMMAND python-config --prefix OUTPUT_VARIABLE PYTHON_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -35,6 +40,10 @@ else() list(APPEND CMAKE_INCLUDE_PATH "${PYTHON_PREFIX}/include") endif() +set(Boost_NO_BOOST_CMAKE ON) +find_package(Boost REQUIRED) +include_directories(${Boost_INCLUDE_DIRS}) + find_package(PythonLibs REQUIRED) include_directories(${PYTHON_INCLUDE_PATH}) From f616c0c2181bf2ff0409e9934ee2bf0b407ffcb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Brochet?= Date: Tue, 15 Dec 2015 16:57:38 +0100 Subject: [PATCH 2/3] Get rid of TTreeFormula for parsing expression Introduce a small parser instead of using TTreeFormula for parsing expressions. This has some benefits: - It's (a lot) faster - It handles all valid C++ expressions - It does not complain on unknown identifiers The only drawback is that compilation time for the `createPlotter` tool is now a bit longer than before. --- histFactory/CMakeLists.txt | 7 +- histFactory/src/createPlotter.cpp | 88 +++---- histFactory/src/formula_parser.cpp | 16 ++ histFactory/src/formula_parser.h | 368 +++++++++++++++++++++++++++++ 4 files changed, 433 insertions(+), 46 deletions(-) create mode 100644 histFactory/src/formula_parser.cpp create mode 100644 histFactory/src/formula_parser.h diff --git a/histFactory/CMakeLists.txt b/histFactory/CMakeLists.txt index bef45e4..1205499 100644 --- a/histFactory/CMakeLists.txt +++ b/histFactory/CMakeLists.txt @@ -11,7 +11,7 @@ include(CMSSW) include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) if(COMPILER_SUPPORTS_CXX0X) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -g") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++0x -g") else() message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") endif() @@ -47,6 +47,8 @@ include_directories(${Boost_INCLUDE_DIRS}) find_package(PythonLibs REQUIRED) include_directories(${PYTHON_INCLUDE_PATH}) +include_directories(src) + # Configure external include(BuildExternals) @@ -61,7 +63,7 @@ configure_file(scripts/createPlotter.sh.in createPlotter.sh @ONLY NEWLINE_STYLE # plotter sources set(PLOTTER_SOURCES src/createPlotter.cpp - ${EXTERNAL_SRC_DIR}/jsoncpp.cpp + src/formula_parser.cpp ) # Multidraw sources @@ -112,7 +114,6 @@ set_target_properties(count PROPERTIES OUTPUT_NAME "createProcessedEvents.exe") # Link libraries target_link_libraries(plotter ${ROOT_LIBRARIES}) -target_link_libraries(plotter ${ROOT_TREEPLAYER_LIBRARY}) target_link_libraries(plotter ${PYTHON_LIBRARY}) target_link_libraries(plotter "uuid") diff --git a/histFactory/src/createPlotter.cpp b/histFactory/src/createPlotter.cpp index e8ada04..ee8fa35 100644 --- a/histFactory/src/createPlotter.cpp +++ b/histFactory/src/createPlotter.cpp @@ -8,16 +8,15 @@ #include #include #include -#include #include +#include #include +#include +#include #include -// Ugly hack to access list of leaves in the formula -#define protected public -#include -#undef protected +#include #include @@ -278,56 +277,54 @@ bool execute(const std::string& skeleton, const std::string& config_file, std::s plot.name = uuid; } + parser::parser parser; + std::unique_ptr t(new TChain("t")); t->Add(skeleton.c_str()); - std::vector branches; - std::function getBranches = [&branches, &getBranches](TTreeFormula* f) { - if (!f) - return; - - for (size_t i = 0; i < f->GetNcodes(); i++) { - TLeaf* leaf = f->GetLeaf(i); - if (! leaf) - continue; - - TBranch* p_branch = getTopBranch(leaf->GetBranch()); + // Get list of all branches + std::unordered_map tree_branches; + TObjArray* root_tree_branches = t->GetListOfBranches(); + for (size_t i = 0; i < static_cast(root_tree_branches->GetEntries()); i++) { + TBranch* b = static_cast(root_tree_branches->UncheckedAt(i)); - Branch branch; - branch.name = p_branch->GetName(); - if (std::find_if(branches.begin(), branches.end(), [&branch](const Branch& b) { return b.name == branch.name; }) == branches.end()) { - branch.type = p_branch->GetClassName(); - if (branch.type.empty()) - branch.type = leaf->GetTypeName(); + Branch branch; + branch.name = b->GetName(); + branch.type = b->GetClassName(); - branches.push_back(branch); - } - - for (size_t j = 0; j < f->fNdimensions[i]; j++) { - if (f->fVarIndexes[i][j]) - getBranches(f->fVarIndexes[i][j]); + if (branch.type.empty()) { + TLeaf* leaf = b->GetLeaf(branch.name.c_str()); + if (! leaf) { + std::cerr << "Error: can't deduce type for branch '" << branch.name << "'" << std::endl; + continue; } + branch.type = leaf->GetTypeName(); } - for (size_t i = 0; i < f->fAliases.GetEntriesFast(); i++) { - getBranches((TTreeFormula*) f->fAliases.UncheckedAt(i)); - } - }; + tree_branches.emplace(branch.name, branch); + } std::string hists_declaration; std::string text_plots; + std::set identifiers; + size_t index = 0; for (auto& p: plots) { - // Create formulas - std::shared_ptr selector(new TTreeFormula("selector", p.cut.c_str(), t.get())); - std::shared_ptr weight(new TTreeFormula("weight", p.weight.c_str(), t.get())); - getBranches(selector.get()); - getBranches(weight.get()); + if ((index % 200) == 0) + std::cout << "Parsing plot #" << index << " / " << plots.size() << std::endl; + + index++; + + // Create formulas + if (! parser.parse(p.cut, identifiers)) + std::cerr << "Warning: " << p.cut << " failed to parse." << std::endl; + if (! parser.parse(p.weight, identifiers)) + std::cerr << "Warning: " << p.weight << " failed to parse." << std::endl; std::vector splitted_variables = split(p.variable, ":::"); for (const std::string& variable: splitted_variables) { - std::shared_ptr var(new TTreeFormula("var", variable.c_str(), t.get())); - getBranches(var.get()); + if (!parser.parse(variable, identifiers)) + std::cerr << "Warning: " << variable << " failed to parse." << std::endl; } std::string binning = p.binning; @@ -360,10 +357,15 @@ bool execute(const std::string& skeleton, const std::string& config_file, std::s ctemplate::ExpandTemplate(getTemplate("Plot"), ctemplate::DO_NOT_STRIP, &plot, &text_plots); } - // Sort alphabetically - std::sort(branches.begin(), branches.end(), [](const Branch& a, const Branch& b) { - return a.name < b.name; - }); + // Everything is parsed. Collect the list of branches used by the formula + std::vector branches; + for (const auto& id: identifiers) { + auto branch = tree_branches.find(id); + if (branch == tree_branches.end()) + continue; + + branches.push_back(branch->second); + } std::string text_branches; for (const auto& branch: branches) { diff --git a/histFactory/src/formula_parser.cpp b/histFactory/src/formula_parser.cpp new file mode 100644 index 0000000..bb8beb8 --- /dev/null +++ b/histFactory/src/formula_parser.cpp @@ -0,0 +1,16 @@ +#include + +namespace parser +{ + bool parser::parse(const std::string& line, std::set& identifiers) { + m_grammar.set_identifiers(identifiers); + + bool result = qi::phrase_parse( + line.begin(), + line.end(), + m_grammar, + ascii::space); + + return result; + } +} diff --git a/histFactory/src/formula_parser.h b/histFactory/src/formula_parser.h new file mode 100644 index 0000000..a017e6d --- /dev/null +++ b/histFactory/src/formula_parser.h @@ -0,0 +1,368 @@ +#include +#include +#include + +#include +#include + +namespace parser +{ + namespace qi = boost::spirit::qi; + namespace phoenix = boost::phoenix; + namespace ascii = boost::spirit::ascii; + + template + struct grammar: qi::grammar { + + public: + grammar(): grammar::base_type(translation_unit), + ELLIPSIS("..."), RIGHT_ASSIGN(">>="), LEFT_ASSIGN("<<="), + ADD_ASSIGN("+="), SUB_ASSIGN("-="), MUL_ASSIGN("*="), + DIV_ASSIGN("/="), MOD_ASSIGN("%="), AND_ASSIGN("&="), + XOR_ASSIGN("^="), OR_ASSIGN("|="), RIGHT_OP(">>"), LEFT_OP("<<"), + INC_OP("++"), DEC_OP("--"), PTR_OP("->"), AND_OP("&&"), + OR_OP("||"), LE_OP("<="), GE_OP(">="), EQ_OP("=="), NE_OP("!="), + SEMICOLON(';'), + COMMA(','), COLON(':'), ASSIGN('='), LEFT_PAREN('('), + RIGHT_PAREN(')'), DOT('.'), ADDROF('&'), BANG('!'), TILDE('~'), + MINUS('-'), PLUS('+'), STAR('*'), SLASH('/'), PERCENT('%'), + LT_OP('<'), GT_OP('>'), XOR('^'), OR('|'), QUEST('?') + { + + keywords = + "auto", "break", "case", "char", "const", "continue", "default", + "do", "double", "else", "enum", "extern", "float", "for", + "goto", "if", "int", "long", "register", "return", "short", + "signed", "sizeof", "static", "struct", "switch", "typedef", + "union", "unsigned", "void", "volatile", "while"; + + LEFT_BRACE = qi::lit('{') | qi::lit("<%"); + RIGHT_BRACE = qi::lit('}') | qi::lit("%>"); + LEFT_BRACKET = qi::lit('[') | qi::lit("<:"); + RIGHT_BRACKET = qi::lit(']') | qi::lit(":>"); + + CHAR = qi::lit("char"); + CONST = qi::lit("const"); + DOUBLE = qi::lit("double"); + FLOAT = qi::lit("float"); + INT = qi::lit("int"); + LONG = qi::lit("long"); + SHORT = qi::lit("short"); + SIGNED = qi::lit("signed"); + VOID = qi::lit("void"); + VOLATILE = qi::lit("volatile"); + + using qi::eps; + using qi::double_; + using qi::float_; + using qi::int_; + using ascii::char_; + using ascii::alpha; + using ascii::alnum; + using qi::lexeme; + using phoenix::push_back; + + IDENTIFIER = qi::as_string[((alpha | char_('_') | char_('$')) >> *(alnum | char_('_') | char_('$'))) + - (keywords >> (char_ - (alnum | '_' | '$'))) + ][phoenix::bind(&grammar::new_id, this, qi::_1)]; + + QUOTED_STRING %= lexeme['"' >> +(char_ - '"') >> '"']; + + primary_expression + = IDENTIFIER + | double_ + | float_ + | int_ + | QUOTED_STRING + | LEFT_PAREN >> expression >> RIGHT_PAREN + ; + + + postfix_expression + = primary_expression >> postfix_expression_helper + ; + + postfix_expression_helper + = ( + LEFT_BRACKET >> expression >> RIGHT_BRACKET + | LEFT_PAREN >> -argument_expression_list >> RIGHT_PAREN + | DOT >> IDENTIFIER + | PTR_OP >> IDENTIFIER + ) >> + postfix_expression_helper + | eps + ; + + argument_expression_list + = assignment_expression >> *(COMMA >> assignment_expression) + ; + + unary_expression + = postfix_expression.alias() + ; + + cast_expression + = LEFT_PAREN >> type_name >> RIGHT_PAREN >> cast_expression + | unary_expression + ; + + multiplicative_expression + = cast_expression >> multiplicative_expression_helper + ; + + multiplicative_expression_helper + = ( + STAR >> cast_expression + | SLASH >> cast_expression + | PERCENT >> cast_expression + ) >> + multiplicative_expression_helper + | eps + ; + + additive_expression + = multiplicative_expression >> additive_expression_helper + ; + + additive_expression_helper + = ( + PLUS >> multiplicative_expression + | MINUS >> multiplicative_expression + ) >> + additive_expression_helper + | eps + ; + + shift_expression + = additive_expression >> shift_expression_helper + ; + + shift_expression_helper + = ( + LEFT_OP >> additive_expression + | RIGHT_OP >> additive_expression + ) >> + shift_expression_helper + | eps + ; + + relational_expression + = shift_expression >> relational_expression_helper + ; + + relational_expression_helper + = ( + LT_OP >> shift_expression + | GT_OP >> shift_expression + | LE_OP >> shift_expression + | GE_OP >> shift_expression + ) >> + relational_expression_helper + | eps + ; + + equality_expression + = relational_expression >> equality_expression_helper + ; + + equality_expression_helper + = ( + EQ_OP >> relational_expression + | NE_OP >> relational_expression + ) >> + equality_expression_helper + | eps + ; + + and_expression + = equality_expression >> and_expression_helper + ; + + and_expression_helper + = ADDROF >> equality_expression >> and_expression_helper + | eps + ; + + exclusive_or_expression + = and_expression >> exclusive_or_expression_helper + ; + + exclusive_or_expression_helper + = XOR >> and_expression >> exclusive_or_expression_helper + | eps + ; + + inclusive_or_expression + = exclusive_or_expression >> inclusive_or_expression_helper + ; + + inclusive_or_expression_helper + = OR >> exclusive_or_expression >> inclusive_or_expression_helper + | eps + ; + + logical_and_expression + = inclusive_or_expression >> logical_and_expression_helper + ; + + logical_and_expression_helper + = AND_OP >> inclusive_or_expression >> logical_and_expression_helper + | eps + ; + + logical_or_expression + = logical_and_expression >> logical_or_expression_helper + ; + + logical_or_expression_helper + = OR_OP >> logical_and_expression >> logical_or_expression_helper + | eps + ; + + conditional_expression + = logical_or_expression >> conditional_expression_helper + ; + + conditional_expression_helper + = QUEST >> expression >> COLON + >> conditional_expression >> conditional_expression_helper + | eps + ; + + assignment_expression + = conditional_expression.alias() + ; + + expression + = assignment_expression >> expression_helper + ; + + expression_helper + = COMMA >> assignment_expression >> expression_helper + | eps + ; + + constant_expression + = conditional_expression + ; + + type_specifier + = VOID + | CHAR + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + | SIGNED + | UNSIGNED + ; + + specifier_qualifier_list + = ( + type_specifier + | type_qualifier + ) >> + -specifier_qualifier_list + ; + + type_qualifier + = CONST + | VOLATILE + ; + + pointer + = STAR >> -(type_qualifier_list || pointer) + ; + + type_qualifier_list + = +type_qualifier + ; + + identifier_list + = IDENTIFIER >> *(COMMA >> IDENTIFIER) + ; + + type_name + = specifier_qualifier_list >> -abstract_declarator + ; + + // parser start symbol + translation_unit + = expression.alias() + ; + } + + void new_id(const std::string& id) { + m_identifiers->emplace(id); + } + + void set_identifiers(std::set& ids) { + m_identifiers = &ids; + } + + private: + qi::rule IDENTIFIER; + qi::rule QUOTED_STRING; + + qi::rule parameter; + qi::rule function_call; + + // Operators + std::string + ELLIPSIS, RIGHT_ASSIGN, LEFT_ASSIGN, ADD_ASSIGN, SUB_ASSIGN, + MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, AND_ASSIGN, XOR_ASSIGN, + OR_ASSIGN, RIGHT_OP, LEFT_OP, INC_OP, DEC_OP, PTR_OP, AND_OP, + OR_OP, LE_OP, GE_OP, EQ_OP, NE_OP; + + char + SEMICOLON, COMMA, COLON, ASSIGN, LEFT_PAREN, RIGHT_PAREN, + DOT, ADDROF, BANG, TILDE, MINUS, PLUS, STAR, SLASH, PERCENT, + LT_OP, GT_OP, XOR, OR, QUEST; + + qi::symbols<> keywords; + + qi::rule + LEFT_BRACE, RIGHT_BRACE, LEFT_BRACKET, RIGHT_BRACKET; + + qi::rule + CHAR, CONST, DOUBLE, FLOAT, INT, LONG, SHORT, SIGNED, SIZEOF, UNSIGNED, VOID, VOLATILE; + + qi::rule + primary_expression, postfix_expression, postfix_expression_helper, + argument_expression_list, unary_expression, + cast_expression, + multiplicative_expression, multiplicative_expression_helper, + additive_expression, additive_expression_helper, + shift_expression, shift_expression_helper, + relational_expression, relational_expression_helper, + equality_expression, equality_expression_helper, + and_expression, and_expression_helper, + exclusive_or_expression, exclusive_or_expression_helper, + inclusive_or_expression, inclusive_or_expression_helper, + logical_and_expression, logical_and_expression_helper, + logical_or_expression, logical_or_expression_helper, + conditional_expression, conditional_expression_helper, + assignment_expression, assignment_operator, + expression, expression_helper, constant_expression, declaration, + declaration_specifiers, init_declarator_list, init_declarator, + type_specifier, specifier_qualifier_list, type_qualifier, declarator, + pointer, type_qualifier_list, identifier_list, type_name, + abstract_declarator, + direct_abstract_declarator, direct_abstract_declarator_helper, + statement; + + qi::rule + translation_unit; + + std::set* m_identifiers = nullptr; + }; + + class parser { + public: + bool parse(const std::string& line, std::set& identifiers); + + private: + grammar m_grammar; + }; +} From db3be269f0943f7b7ed13bbf0f3e906e564bc626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Brochet?= Date: Thu, 17 Dec 2015 10:04:50 +0100 Subject: [PATCH 3/3] Add boost to travis configuration --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 7a48a76..8d5775e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,7 @@ addons: sources: - ubuntu-toolchain-r-test - kalakris-cmake + - boost-latest packages: - gcc-4.9 - g++-4.9 @@ -18,6 +19,7 @@ addons: - make - cmake - uuid-dev + - libboost-all-dev install: - wget http://sbrochet.web.cern.ch/sbrochet/public/ROOT-${ROOT_VERSION}_Python-2.7_Ubuntu-12.04_gcc4.9.tar.xz