From 0ece5aa8304d99c265241f2bd68907f83fb31c0e Mon Sep 17 00:00:00 2001 From: Krzysztof Bieganski Date: Fri, 28 Apr 2023 16:24:15 +0200 Subject: [PATCH] Format preprocessed token stream in multiple passes Currently, the formatter doesn't handle many scenarios involving preprocessor `ifdef`s/`endif`s interleaved with `begin`s, module headers, etc. (#228, #241, #267) This patch attempts to solve this problem by performing multiple passes of the formatting on preprocessed variants of the source. Each of these variants has a different set of preprocessor branches enabled. Together, they should cover the entire source (though that doesn't work in all cases yet). After several formatting passes for different variants of the AST, a correct and properly formatted file is produced. This is still work in progress, so not everything works, and the code isn't very clean. I'd love to get some early feedback on this. Signed-off-by: Krzysztof Bieganski --- verilog/formatting/formatter.cc | 226 +++++++++++--------- verilog/formatting/token_annotator.cc | 2 +- verilog/formatting/tree_unwrapper.cc | 24 ++- verilog/preprocessor/verilog_preprocess.cc | 197 ++++++++---------- verilog/preprocessor/verilog_preprocess.h | 231 +++++++++++++++------ 5 files changed, 406 insertions(+), 274 deletions(-) diff --git a/verilog/formatting/formatter.cc b/verilog/formatting/formatter.cc index dafe89a799..2929e965c0 100644 --- a/verilog/formatting/formatter.cc +++ b/verilog/formatting/formatter.cc @@ -20,6 +20,7 @@ #include #include +#include "absl/algorithm/container.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "common/formatting/format_token.h" @@ -53,6 +54,7 @@ #include "verilog/formatting/format_style.h" #include "verilog/formatting/token_annotator.h" #include "verilog/formatting/tree_unwrapper.h" +#include "verilog/parser/verilog_lexer.h" #include "verilog/parser/verilog_token_enum.h" #include "verilog/preprocessor/verilog_preprocess.h" @@ -76,25 +78,25 @@ using partition_node_type = VectorTree>; // Takes a TextStructureView and FormatStyle, and formats UnwrappedLines. class Formatter { public: - Formatter(const verible::TextStructureView& text_structure, - const FormatStyle& style) + Formatter(const verible::TextStructureView &text_structure, + const FormatStyle &style) : text_structure_(text_structure), style_(style) {} // Formats the source code - Status Format(const ExecutionControl&); + Status Format(const ExecutionControl &); Status Format() { return Format(ExecutionControl()); } - void SelectLines(const LineNumberSet& lines); + void SelectLines(const LineNumberSet &lines); // Outputs all of the FormattedExcerpt lines to stream. // If "include_disabled" is false, does not contain the disabled ranges. - void Emit(bool include_disabled, std::ostream& stream) const; + void Emit(bool include_disabled, std::ostream &stream) const; private: // Contains structural information about the code to format, such as // TokenSequence from lexing, and ConcreteSyntaxTree from parsing - const verible::TextStructureView& text_structure_; + const verible::TextStructureView &text_structure_; // The style configuration for the formatter FormatStyle style_; @@ -107,7 +109,7 @@ class Formatter { }; // TODO(b/148482625): make this public/re-usable for general content comparison. -Status VerifyFormatting(const verible::TextStructureView& text_structure, +Status VerifyFormatting(const verible::TextStructureView &text_structure, absl::string_view formatted_output, absl::string_view filename) { // Verify that the formatted output creates the same lexical @@ -122,7 +124,7 @@ Status VerifyFormatting(const verible::TextStructureView& text_structure, const auto reparse_status = reanalyzer->ParseStatus(); if (!relex_status.ok() || !reparse_status.ok()) { - const auto& token_errors = reanalyzer->TokenErrorMessages(); + const auto &token_errors = reanalyzer->TokenErrorMessages(); // Only print the first error. if (!token_errors.empty()) { return absl::DataLossError( @@ -158,9 +160,9 @@ Status VerifyFormatting(const verible::TextStructureView& text_structure, static Status ReformatVerilogIncrementally(absl::string_view original_text, absl::string_view formatted_text, absl::string_view filename, - const FormatStyle& style, - std::ostream& reformat_stream, - const ExecutionControl& control) { + const FormatStyle &style, + std::ostream &reformat_stream, + const ExecutionControl &control) { // Differences from the first formatting. const verible::LineDiffs formatting_diffs(original_text, formatted_text); // Added lines will be re-applied to incremental re-formatting. @@ -179,10 +181,10 @@ static Status ReformatVerilogIncrementally(absl::string_view original_text, static Status ReformatVerilog(absl::string_view original_text, absl::string_view formatted_text, absl::string_view filename, - const FormatStyle& style, - std::ostream& reformat_stream, - const LineNumberSet& lines, - const ExecutionControl& control) { + const FormatStyle &style, + std::ostream &reformat_stream, + const LineNumberSet &lines, + const ExecutionControl &control) { // Disable reformat check to terminate recursion. ExecutionControl convergence_control(control); convergence_control.verify_convergence = false; @@ -199,10 +201,10 @@ static Status ReformatVerilog(absl::string_view original_text, } static absl::StatusOr> ParseWithStatus( - absl::string_view text, absl::string_view filename) { + absl::string_view text, absl::string_view filename, + const verilog::VerilogPreprocess::Config &preprocess_config = {}) { std::unique_ptr analyzer = - VerilogAnalyzer::AnalyzeAutomaticMode( - text, filename, verilog::VerilogPreprocess::Config()); + VerilogAnalyzer::AnalyzeAutomaticMode(text, filename, preprocess_config); { // Lex and parse code. Exit on failure. const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus(); @@ -212,7 +214,7 @@ static absl::StatusOr> ParseWithStatus( constexpr bool with_diagnostic_context = false; const std::vector syntax_error_messages( analyzer->LinterTokenErrorMessages(with_diagnostic_context)); - for (const auto& message : syntax_error_messages) { + for (const auto &message : syntax_error_messages) { errstream << message << std::endl; } // Don't bother printing original code @@ -222,11 +224,11 @@ static absl::StatusOr> ParseWithStatus( return analyzer; } -absl::Status FormatVerilog(const verible::TextStructureView& text_structure, - absl::string_view filename, const FormatStyle& style, - std::string* formatted_text, - const verible::LineNumberSet& lines, - const ExecutionControl& control) { +absl::Status FormatVerilog(const verible::TextStructureView &text_structure, + absl::string_view filename, const FormatStyle &style, + std::string *formatted_text, + const verible::LineNumberSet &lines, + const ExecutionControl &control) { Formatter fmt(text_structure, style); fmt.SelectLines(lines); @@ -262,19 +264,39 @@ absl::Status FormatVerilog(const verible::TextStructureView& text_structure, } Status FormatVerilog(absl::string_view text, absl::string_view filename, - const FormatStyle& style, std::ostream& formatted_stream, - const LineNumberSet& lines, - const ExecutionControl& control) { - const auto analyzer = ParseWithStatus(text, filename); - if (!analyzer.ok()) return analyzer.status(); + const FormatStyle &style, std::ostream &formatted_stream, + const LineNumberSet &lines, + const ExecutionControl &control) { + VerilogPreprocess preprocessor( + VerilogPreprocess::Config{.gather_branches = true}); + verilog::VerilogPreprocessData preprocessed_data; + { + VerilogAnalyzer analyzer(text, filename); + absl::Status tokenize_status = analyzer.Tokenize(); + if (!tokenize_status.ok()) return tokenize_status; + preprocessed_data = + preprocessor.ScanStream(analyzer.Data().GetTokenStreamView()); + } + std::string text_to_format{text}; + for (const auto &define_set : preprocessed_data.define_variants) { + VerilogPreprocess::Config config{.filter_branches = true}; + for (auto &define : define_set) { + config.macro_definitions.emplace(define, std::nullopt); + } + + const auto analyzer = ParseWithStatus(text_to_format, filename, config); + if (!analyzer.ok()) return analyzer.status(); - const verible::TextStructureView& text_structure = analyzer->get()->Data(); - std::string formatted_text; - Status format_status = FormatVerilog(text_structure, filename, style, - &formatted_text, lines, control); - // Commit formatted text to the output stream independent of status. + const verible::TextStructureView &text_structure = analyzer->get()->Data(); + std::string formatted_text; + Status format_status = FormatVerilog(text_structure, filename, style, + &formatted_text, lines, control); + // Commit formatted text to the output stream independent of status. + if (!format_status.ok()) return format_status; + text_to_format = formatted_text; + } + const absl::string_view formatted_text = text_to_format; formatted_stream << formatted_text; - if (!format_status.ok()) return format_status; // When formatting whole-file (no --lines are specified), ensure that // the formatting transformation is convergent after one iteration. @@ -287,18 +309,18 @@ Status FormatVerilog(absl::string_view text, absl::string_view filename, !reformat_status.ok()) { return reformat_status; } - const std::string& reformatted_text(reformat_stream.str()); + const std::string &reformatted_text(reformat_stream.str()); return verible::ReformatMustMatch(text, lines, formatted_text, reformatted_text); } - return format_status; + return absl::OkStatus(); } -absl::Status FormatVerilogRange(const verible::TextStructureView& structure, - const FormatStyle& style, - std::string* formatted_text, - const verible::Interval& line_range, - const ExecutionControl& control) { +absl::Status FormatVerilogRange(const verible::TextStructureView &structure, + const FormatStyle &style, + std::string *formatted_text, + const verible::Interval &line_range, + const ExecutionControl &control) { if (line_range.empty()) { return absl::OkStatus(); } @@ -327,7 +349,7 @@ absl::Status FormatVerilogRange(const verible::TextStructureView& structure, // beginning of the original range, erase it in the formatted output. // TODO(hzeller): This can go when whitespace handling is revisited. // (Emit(), FormatWhitespaceWithDisabledByteRanges()) - const auto& text_lines = structure.Lines(); + const auto &text_lines = structure.Lines(); const char unformatted_begin = *text_lines[line_range.min - 1].begin(); if (!formatted_text->empty() && (*formatted_text)[0] == '\n' && unformatted_begin != '\n') { @@ -343,10 +365,10 @@ absl::Status FormatVerilogRange(const verible::TextStructureView& structure, absl::Status FormatVerilogRange(absl::string_view full_content, absl::string_view filename, - const FormatStyle& style, - std::string* formatted_text, - const verible::Interval& line_range, - const ExecutionControl& control) { + const FormatStyle &style, + std::string *formatted_text, + const verible::Interval &line_range, + const ExecutionControl &control) { const auto analyzer = ParseWithStatus(full_content, filename); if (!analyzer.ok()) return analyzer.status(); return FormatVerilogRange(analyzer->get()->Data(), style, formatted_text, @@ -365,12 +387,12 @@ static verible::Interval DisableByteOffsetRange( // Decided at each node in UnwrappedLine partition tree whether or not // it should be expanded or unexpanded. static void DeterminePartitionExpansion( - partition_node_type* node, - std::vector* preformatted_tokens, - absl::string_view full_text, const ByteOffsetSet& disabled_ranges, - const FormatStyle& style) { - auto& node_view = node->Value(); - const UnwrappedLine& uwline = node_view.Value(); + partition_node_type *node, + std::vector *preformatted_tokens, + absl::string_view full_text, const ByteOffsetSet &disabled_ranges, + const FormatStyle &style) { + auto &node_view = node->Value(); + const UnwrappedLine &uwline = node_view.Value(); VLOG(3) << "unwrapped line: " << uwline; const verible::FormatTokenRange ftoken_range(uwline.TokensRange()); const auto partition_policy = uwline.PartitionPolicy(); @@ -404,9 +426,9 @@ static void DeterminePartitionExpansion( // If any children are expanded, then this node must be expanded, // regardless of the UnwrappedLine's chosen policy. // Thus, this function must be executed with a post-order traversal. - const auto& children = node->Children(); + const auto &children = node->Children(); if (std::any_of(children.begin(), children.end(), - [](const partition_node_type& child) { + [](const partition_node_type &child) { return child.Value().IsExpanded(); })) { VLOG(3) << "Child forces parent to expand."; @@ -452,8 +474,8 @@ static void DeterminePartitionExpansion( // Check whether the whole function call fits on one line. If possible, // unexpand and fit into one line. Otherwise expand argument list with // tabular alignment. - auto& node_view_parent = node->Parent()->Value(); - const UnwrappedLine& uwline_parent = node_view_parent.Value(); + auto &node_view_parent = node->Parent()->Value(); + const UnwrappedLine &uwline_parent = node_view_parent.Value(); if (verible::FitsOnLine(uwline_parent, style).fits) { node_view.Unexpand(); break; @@ -508,10 +530,10 @@ static void DeterminePartitionExpansion( // value = function_name(8'hA, signal, // signal_1234); // end - const auto& children_tmp = node->Children(); - auto look_for_arglist = [](const partition_node_type& child) { - const auto& node_view_child = child.Value(); - const UnwrappedLine& uwline_child = node_view_child.Value(); + const auto &children_tmp = node->Children(); + auto look_for_arglist = [](const partition_node_type &child) { + const auto &node_view_child = child.Value(); + const UnwrappedLine &uwline_child = node_view_child.Value(); return (uwline_child.Origin() && uwline_child.Origin()->Kind() == verible::SymbolKind::kNode && verible::SymbolCastToNode(*uwline_child.Origin()) @@ -550,10 +572,10 @@ static void DeterminePartitionExpansion( // Produce a worklist of independently formattable UnwrappedLines. static std::vector MakeUnwrappedLinesWorklist( - const FormatStyle& style, absl::string_view full_text, - const ByteOffsetSet& disabled_ranges, - const TokenPartitionTree& format_tokens_partitions, - std::vector* preformatted_tokens) { + const FormatStyle &style, absl::string_view full_text, + const ByteOffsetSet &disabled_ranges, + const TokenPartitionTree &format_tokens_partitions, + std::vector *preformatted_tokens) { // Initialize a tree view that treats partitions as fully-expanded. ExpandableTreeView format_tokens_partition_view( format_tokens_partitions); @@ -563,7 +585,7 @@ static std::vector MakeUnwrappedLinesWorklist( // so must all of its parents (and transitively, ancestors). format_tokens_partition_view.ApplyPostOrder( [&full_text, &disabled_ranges, &style, - preformatted_tokens](partition_node_type& node) { + preformatted_tokens](partition_node_type &node) { DeterminePartitionExpansion(&node, preformatted_tokens, full_text, disabled_ranges, style); }); @@ -578,15 +600,15 @@ static std::vector MakeUnwrappedLinesWorklist( } static void PrintLargestPartitions( - std::ostream& stream, const TokenPartitionTree& token_partitions, - size_t max_partitions, const verible::LineColumnMap& line_column_map, + std::ostream &stream, const TokenPartitionTree &token_partitions, + size_t max_partitions, const verible::LineColumnMap &line_column_map, absl::string_view base_text) { stream << "Showing the " << max_partitions << " largest (leaf) token partitions:" << std::endl; const auto ranked_partitions = FindLargestPartitions(token_partitions, max_partitions); const verible::Spacer hline(80, '='); - for (const auto& partition : ranked_partitions) { + for (const auto &partition : ranked_partitions) { stream << hline << "\n[" << partition->Size() << " tokens"; if (!partition->IsEmpty()) { stream << ", starting at line:col " @@ -599,11 +621,11 @@ static void PrintLargestPartitions( stream << hline << std::endl; } -std::ostream& ExecutionControl::Stream() const { +std::ostream &ExecutionControl::Stream() const { return (stream != nullptr) ? *stream : std::cout; } -void Formatter::SelectLines(const LineNumberSet& lines) { +void Formatter::SelectLines(const LineNumberSet &lines) { disabled_ranges_ = EnabledLinesToDisabledByteRanges( lines, text_structure_.GetLineColumnMap()); } @@ -612,9 +634,9 @@ void Formatter::SelectLines(const LineNumberSet& lines) { // of text from formatting. This provides an easy way to preserve spacing on // selected syntax subtrees to reduce formatter harm while allowing // development to progress. -static void DisableSyntaxBasedRanges(ByteOffsetSet* disabled_ranges, - const verible::Symbol& root, - const FormatStyle& style, +static void DisableSyntaxBasedRanges(ByteOffsetSet *disabled_ranges, + const verible::Symbol &root, + const FormatStyle &style, absl::string_view full_text) { /** // Basic template: @@ -648,7 +670,7 @@ class ContinuationCommentAligner { static constexpr int kMaxColumnDifference = 1; public: - ContinuationCommentAligner(const verible::LineColumnMap& line_column_map, + ContinuationCommentAligner(const verible::LineColumnMap &line_column_map, const absl::string_view base_text) : line_column_map_(line_column_map), base_text_(base_text) {} @@ -661,8 +683,8 @@ class ContinuationCommentAligner { // Return value informs whether the line has been formatted and added // to already_formatted_lines. bool HandleLine( - const UnwrappedLine& uwline, - std::vector* already_formatted_lines) { + const UnwrappedLine &uwline, + std::vector *already_formatted_lines) { VLOG(4) << __FUNCTION__ << ": " << uwline; if (already_formatted_lines->empty()) { @@ -679,7 +701,7 @@ class ContinuationCommentAligner { return false; } - const auto& previous_line = already_formatted_lines->back(); + const auto &previous_line = already_formatted_lines->back(); VLOG(4) << __FUNCTION__ << ": previous line: " << previous_line; if (original_column_ == kInvalidColumn) { if (previous_line.Tokens().size() <= 1) { @@ -687,7 +709,7 @@ class ContinuationCommentAligner { << "too few tokens in previous line."; return false; } - const auto* previous_comment = previous_line.Tokens().back().token; + const auto *previous_comment = previous_line.Tokens().back().token; if (previous_comment->token_enum() != verilog_tokentype::TK_EOL_COMMENT) { VLOG(4) << "Not a continuation comment line: " << "no EOL comment in previous line."; @@ -696,7 +718,7 @@ class ContinuationCommentAligner { original_column_ = GetTokenColumn(previous_comment); } - const auto* comment = uwline.TokensRange().back().token; + const auto *comment = uwline.TokensRange().back().token; const int comment_column = GetTokenColumn(comment); VLOG(4) << "Original column: " << original_column_ << " vs. " @@ -721,7 +743,7 @@ class ContinuationCommentAligner { } private: - int GetTokenColumn(const verible::TokenInfo* token) { + int GetTokenColumn(const verible::TokenInfo *token) { CHECK_NOTNULL(token); const int column = line_column_map_.GetLineColAtOffset(base_text_, token->left(base_text_)) @@ -731,7 +753,7 @@ class ContinuationCommentAligner { } static void AdjustColumnUsingTokenSpacing( - const verible::FormattedToken& token, int* column) { + const verible::FormattedToken &token, int *column) { switch (token.before.action) { case verible::SpacingDecision::kPreserve: { if (token.before.preserved_space_start != nullptr) { @@ -751,9 +773,9 @@ class ContinuationCommentAligner { } } - static int CalculateEolCommentColumn(const verible::FormattedExcerpt& line) { + static int CalculateEolCommentColumn(const verible::FormattedExcerpt &line) { int column = 0; - const auto& front = line.Tokens().front(); + const auto &front = line.Tokens().front(); if (front.before.action != verible::SpacingDecision::kPreserve) { column += line.IndentationSpaces(); @@ -763,7 +785,7 @@ class ContinuationCommentAligner { } column += front.token->text().length(); - for (const auto& ftoken : verible::make_range(line.Tokens().begin() + 1, + for (const auto &ftoken : verible::make_range(line.Tokens().begin() + 1, line.Tokens().end() - 1)) { AdjustColumnUsingTokenSpacing(ftoken, &column); column += ftoken.token->text().length(); @@ -774,7 +796,7 @@ class ContinuationCommentAligner { return column; } - const verible::LineColumnMap& line_column_map_; + const verible::LineColumnMap &line_column_map_; const absl::string_view base_text_; // Used when the most recenly handled line can't have a continuation comment. @@ -786,9 +808,9 @@ class ContinuationCommentAligner { int formatted_column_ = kInvalidColumn; }; -Status Formatter::Format(const ExecutionControl& control) { +Status Formatter::Format(const ExecutionControl &control) { const absl::string_view full_text(text_structure_.Contents()); - const auto& token_stream(text_structure_.TokenStream()); + const auto &token_stream(text_structure_.TokenStream()); // Initialize auxiliary data needed for TreeUnwrapper. UnwrapperData unwrapper_data(token_stream); @@ -797,7 +819,7 @@ Status Formatter::Format(const ExecutionControl& control) { TreeUnwrapper tree_unwrapper(text_structure_, style_, unwrapper_data.preformatted_tokens); - const TokenPartitionTree* format_tokens_partitions = nullptr; + const TokenPartitionTree *format_tokens_partitions = nullptr; // TODO(fangism): The following block could be parallelized because // full-partitioning does not depend on format annotations. { @@ -813,7 +835,7 @@ Status Formatter::Format(const ExecutionControl& control) { // Find disabled formatting ranges for specific syntax tree node types. // These are typically temporary workarounds for sections that users // habitually prefer to format themselves. - if (const auto& root = text_structure_.SyntaxTree()) { + if (const auto &root = text_structure_.SyntaxTree()) { DisableSyntaxBasedRanges(&disabled_ranges_, *root, style_, full_text); } @@ -846,8 +868,8 @@ Status Formatter::Format(const ExecutionControl& control) { { // In this pass, perform additional modifications to the partitions and // spacings. - tree_unwrapper.ApplyPreOrder([&](TokenPartitionTree& node) { - const auto& uwline = node.Value(); + tree_unwrapper.ApplyPreOrder([&](TokenPartitionTree &node) { + const auto &uwline = node.Value(); const auto partition_policy = uwline.PartitionPolicy(); switch (partition_policy) { @@ -877,7 +899,7 @@ Status Formatter::Format(const ExecutionControl& control) { // Apply token spacing from partitions to tokens. This is permanent, so it // must be done after all reshaping is done. { - auto* root = tree_unwrapper.CurrentTokenPartition(); + auto *root = tree_unwrapper.CurrentTokenPartition(); auto node_iter = VectorTreeLeavesIterator(&LeftmostDescendant(*root)); const auto end = ++VectorTreeLeavesIterator(&RightmostDescendant(*root)); @@ -890,7 +912,7 @@ Status Formatter::Format(const ExecutionControl& control) { verible::ApplyAlreadyFormattedPartitionPropertiesToTokens( &(*node_iter), &unwrapper_data.preformatted_tokens); } else if (partition_policy == PartitionPolicyEnum::kInline) { - auto* parent = node_iter->Parent(); + auto *parent = node_iter->Parent(); CHECK_NOTNULL(parent); CHECK_EQ(parent->Value().PartitionPolicy(), PartitionPolicyEnum::kAlreadyFormatted); @@ -912,11 +934,11 @@ Status Formatter::Format(const ExecutionControl& control) { // For each UnwrappedLine: minimize total penalty of wrap/break decisions. // TODO(fangism): This could be parallelized if results are written // to their own 'slots'. - std::vector partially_formatted_lines; + std::vector partially_formatted_lines; formatted_lines_.reserve(unwrapped_lines.size()); ContinuationCommentAligner continuation_comment_aligner( text_structure_.GetLineColumnMap(), text_structure_.Contents()); - for (const auto& uwline : unwrapped_lines) { + for (const auto &uwline : unwrapped_lines) { // TODO(fangism): Use different formatting strategies depending on // uwline.PartitionPolicy(). if (continuation_comment_aligner.HandleLine(uwline, &formatted_lines_)) { @@ -949,7 +971,7 @@ Status Formatter::Format(const ExecutionControl& control) { err_stream << "*** Some token partitions failed to complete within the " "search limit:" << std::endl; - for (const auto* line : partially_formatted_lines) { + for (const auto *line : partially_formatted_lines) { err_stream << *line << std::endl; } err_stream << "*** end of partially formatted partition list" << std::endl; @@ -960,19 +982,19 @@ Status Formatter::Format(const ExecutionControl& control) { return absl::OkStatus(); } -void Formatter::Emit(bool include_disabled, std::ostream& stream) const { +void Formatter::Emit(bool include_disabled, std::ostream &stream) const { const absl::string_view full_text(text_structure_.Contents()); - std::function include_token_p; + std::function include_token_p; if (include_disabled) { - include_token_p = [](const verible::TokenInfo&) { return true; }; + include_token_p = [](const verible::TokenInfo &) { return true; }; } else { - include_token_p = [this, &full_text](const verible::TokenInfo& tok) { + include_token_p = [this, &full_text](const verible::TokenInfo &tok) { return !disabled_ranges_.Contains(tok.left(full_text)); }; } int position = 0; // tracks with the position in the original full_text - for (const verible::FormattedExcerpt& line : formatted_lines_) { + for (const verible::FormattedExcerpt &line : formatted_lines_) { // TODO(fangism): The handling of preserved spaces before tokens is messy: // some of it is handled here, some of it is inside FormattedToken. // TODO(mglb): Test empty line handling when this method becomes testable. diff --git a/verilog/formatting/token_annotator.cc b/verilog/formatting/token_annotator.cc index 7e2ac5ff70..4e60736a8c 100644 --- a/verilog/formatting/token_annotator.cc +++ b/verilog/formatting/token_annotator.cc @@ -938,7 +938,7 @@ void AnnotateFormatToken(const FormatStyle& style, const auto p = SpacesRequiredBetween(style, prev_token, *curr_token, prev_context, curr_context); curr_token->before.spaces_required = p.spaces_required; - if (p.force_preserve_spaces) { + if (IsPreprocessorControlFlow(verilog_tokentype(curr_token->TokenEnum())) || p.force_preserve_spaces) { // forego all inter-token calculations curr_token->before.break_decision = SpacingOptions::kPreserve; } else { diff --git a/verilog/formatting/tree_unwrapper.cc b/verilog/formatting/tree_unwrapper.cc index 346542985e..7079053b5f 100644 --- a/verilog/formatting/tree_unwrapper.cc +++ b/verilog/formatting/tree_unwrapper.cc @@ -198,7 +198,7 @@ class TreeUnwrapper::TokenScanner { // Calls TransitionState using current_state_ and the tranition token_Type void UpdateState(verilog_tokentype token_type) { current_state_ = TransitionState(current_state_, token_type); - seen_any_nonspace_ |= IsComment(token_type); + seen_any_nonspace_ |= !IsWhitespace(token_type); } // Returns true if this is a state that should start a new token partition. @@ -242,10 +242,16 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState( VLOG(4) << "state transition on: " << old_state << ", token: " << verilog_symbol_name(token_type); State new_state = old_state; + //if (IsPreprocessorControlToken(token_type)) return kEndNoNewline; + //if (token_type == PP_Identifier) return kNewPartition; switch (old_state) { case kStart: { if (IsNewlineOrEOF(token_type)) { new_state = kHaveNewline; + } else if (IsPreprocessorControlFlow(token_type)) { + new_state = kNewPartition; + } else if (token_type == verilog_tokentype::PP_Identifier) { + new_state = kStart; } else if (IsComment(token_type)) { new_state = kStart; // same state } else { @@ -256,6 +262,10 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState( case kHaveNewline: { if (IsNewlineOrEOF(token_type)) { new_state = kHaveNewline; // same state + } else if (IsPreprocessorControlFlow(token_type)) { + new_state = kNewPartition; + } else if (token_type == verilog_tokentype::PP_Identifier) { + new_state = kStart; } else if (IsComment(token_type)) { new_state = kNewPartition; } else { @@ -266,6 +276,10 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState( case kNewPartition: { if (IsNewlineOrEOF(token_type)) { new_state = kHaveNewline; + } else if (IsPreprocessorControlFlow(token_type)) { + new_state = kNewPartition; + } else if (token_type == verilog_tokentype::PP_Identifier) { + new_state = kStart; } else if (IsComment(token_type)) { new_state = kStart; } else { @@ -274,6 +288,13 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState( break; } case kEndWithNewline: + if (IsPreprocessorControlFlow(token_type)) { + new_state = kNewPartition; + } else if (token_type == verilog_tokentype::PP_Identifier) { + new_state = kStart; + } else if (!IsComment(token_type)) { + new_state = kStart; + } case kEndNoNewline: { // Terminal states. Must Reset() next. break; @@ -2966,6 +2987,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( // TODO(fangism): always,initial,final should be handled the same way case NodeEnum::kInitialStatement: case NodeEnum::kFinalStatement: { + break; // TODO: maybe not needed? // Check if 'initial' is separated from 'begin' by EOL_COMMENT. If so, // adjust indentation and do not merge leaf into previous leaf. const verible::UnwrappedLine& unwrapped_line = partition.Value(); diff --git a/verilog/preprocessor/verilog_preprocess.cc b/verilog/preprocessor/verilog_preprocess.cc index b8e2b21f40..704a2d0b0a 100644 --- a/verilog/preprocessor/verilog_preprocess.cc +++ b/verilog/preprocessor/verilog_preprocess.cc @@ -44,23 +44,24 @@ namespace verilog { using verible::TokenGenerator; using verible::TokenStreamView; using verible::container::FindOrNull; +using verible::container::FindWithDefault; using verible::container::InsertOrUpdate; -VerilogPreprocess::VerilogPreprocess(const Config& config) +VerilogPreprocess::VerilogPreprocess(const Config &config) : VerilogPreprocess(config, nullptr) {} -VerilogPreprocess::VerilogPreprocess(const Config& config, FileOpener opener) +VerilogPreprocess::VerilogPreprocess(const Config &config, FileOpener opener) : config_(config), file_opener_(std::move(opener)) { // To avoid having to check at every place if the stack is empty, we always // place a toplevel 'conditional' that is always selected. // Thus we only need to test in `else and `endif to see if we underrun due // to unbalanced statements. - conditional_block_.push( - BranchBlock(true, true, verible::TokenInfo::EOFToken())); + conditional_block_.push(block_tree.get()); + preprocess_data_.macro_definitions = config.macro_definitions; } TokenStreamView::const_iterator VerilogPreprocess::GenerateBypassWhiteSpaces( - const StreamIteratorGenerator& generator) { + const StreamIteratorGenerator &generator) { auto iterator = generator(); // iterator should be pointing to a non-whitespace token; while (verilog::VerilogLexer::KeepSyntaxTreeTokens(**iterator) == 0) { @@ -70,7 +71,7 @@ TokenStreamView::const_iterator VerilogPreprocess::GenerateBypassWhiteSpaces( } absl::StatusOr -VerilogPreprocess::ExtractMacroName(const StreamIteratorGenerator& generator) { +VerilogPreprocess::ExtractMacroName(const StreamIteratorGenerator &generator) { // Next token to expect is macro definition name. TokenStreamView::const_iterator token_iter = GenerateBypassWhiteSpaces(generator); @@ -79,7 +80,7 @@ VerilogPreprocess::ExtractMacroName(const StreamIteratorGenerator& generator) { **token_iter, "unexpected EOF where expecting macro name"); return absl::InvalidArgumentError("Unexpected EOF"); } - const auto& macro_name = *token_iter; + const auto ¯o_name = *token_iter; if (macro_name->token_enum() != PP_Identifier) { preprocess_data_.errors.emplace_back( **token_iter, @@ -94,7 +95,7 @@ VerilogPreprocess::ExtractMacroName(const StreamIteratorGenerator& generator) { // Assumes that the last token of a definition is the un-lexed definition body. // Tokens are copied from the 'generator' into 'define_tokens'. absl::Status VerilogPreprocess::ConsumeMacroDefinition( - const StreamIteratorGenerator& generator, TokenStreamView* define_tokens) { + const StreamIteratorGenerator &generator, TokenStreamView *define_tokens) { auto macro_name_extract = ExtractMacroName(generator); if (!macro_name_extract.ok()) { return macro_name_extract.status(); @@ -124,9 +125,9 @@ absl::Status VerilogPreprocess::ConsumeMacroDefinition( // Interprets a single macro definition parameter. // Tokens are scanned by advancing the token_scan iterator (by-reference). std::unique_ptr VerilogPreprocess::ParseMacroParameter( - TokenStreamView::const_iterator* token_scan, - MacroParameterInfo* macro_parameter) { - auto advance = [](TokenStreamView::const_iterator* scan) { return *++*scan; }; + TokenStreamView::const_iterator *token_scan, + MacroParameterInfo *macro_parameter) { + auto advance = [](TokenStreamView::const_iterator *scan) { return *++*scan; }; auto token_iter = **token_scan; // Extract macro name. if (token_iter->token_enum() != PP_Identifier) { @@ -185,7 +186,7 @@ std::unique_ptr VerilogPreprocess::ParseMacroParameter( // The span of tokens that covers a macro definition is expected to // be in define_tokens. std::unique_ptr VerilogPreprocess::ParseMacroDefinition( - const TokenStreamView& define_tokens, MacroDefinition* macro_definition) { + const TokenStreamView &define_tokens, MacroDefinition *macro_definition) { auto token_scan = define_tokens.begin() + 2; // skip `define and the name auto token_iter = *token_scan; if (token_iter->token_enum() == '(') { @@ -223,8 +224,8 @@ std::unique_ptr VerilogPreprocess::ParseMacroDefinition( // Parses a callable macro actual parameters, and saves it into a MacroCall absl::Status VerilogPreprocess::ConsumeAndParseMacroCall( TokenStreamView::const_iterator iter, - const StreamIteratorGenerator& generator, verible::MacroCall* macro_call, - const verible::MacroDefinition& macro_definition) { + const StreamIteratorGenerator &generator, verible::MacroCall *macro_call, + const verible::MacroDefinition ¯o_definition) { // Parsing the macro . const absl::string_view macro_name_str = (*iter)->text().substr(1); verible::TokenInfo macro_name_token(MacroCallId, macro_name_str); @@ -282,14 +283,14 @@ absl::Status VerilogPreprocess::HandleMacroIdentifier( const TokenStreamView::const_iterator iter // points to `MACROIDENTIFIER token , - const StreamIteratorGenerator& generator, bool forward = true) { + const StreamIteratorGenerator &generator, bool forward = true) { // Note: since this function is called we know that config_.expand_macros is // true. // Finding the macro definition. const absl::string_view sv = (*iter)->text(); - const auto* found = - FindOrNull(preprocess_data_.macro_definitions, sv.substr(1)); + const auto found = FindWithDefault(preprocess_data_.macro_definitions, + sv.substr(1), std::nullopt); if (!found) { preprocess_data_.errors.emplace_back( **iter, @@ -302,21 +303,17 @@ absl::Status VerilogPreprocess::HandleMacroIdentifier( verible::MacroCall macro_call; RETURN_IF_ERROR( ConsumeAndParseMacroCall(iter, generator, ¯o_call, *found)); - RETURN_IF_ERROR(ExpandMacro(macro_call, found)); + RETURN_IF_ERROR(ExpandMacro(macro_call, *found)); } - auto& lexed = preprocess_data_.lexed_macros_backup.back(); + auto &lexed = preprocess_data_.lexed_macros_backup.back(); if (!forward) return absl::OkStatus(); auto iter_generator = verible::MakeConstIteratorStreamer(lexed); - const auto it_end = lexed.end(); - for (auto it = iter_generator(); it != it_end; it++) { - preprocess_data_.preprocessed_token_stream.push_back(it); - } return absl::OkStatus(); } // Stores a macro definition for later use. void VerilogPreprocess::RegisterMacroDefinition( - const MacroDefinition& definition) { + const MacroDefinition &definition) { // For now, unconditionally register the macro definition, keeping the last // definition if macro is re-defined. const bool inserted = InsertOrUpdate(&preprocess_data_.macro_definitions, @@ -332,7 +329,7 @@ void VerilogPreprocess::RegisterMacroDefinition( // preprocess_data_.lexed_macros_backup Can be accessed directly after expansion // as: preprocess_data_.lexed_macros_backup.back() absl::Status VerilogPreprocess::ExpandText( - const absl::string_view& definition_text) { + const absl::string_view &definition_text) { VerilogLexer lexer(definition_text); verible::TokenSequence lexed_sequence; verible::TokenSequence expanded_lexed_sequence; @@ -350,7 +347,7 @@ absl::Status VerilogPreprocess::ExpandText( // Token-pulling loop. for (auto iter = iter_generator(); iter != end; iter = iter_generator()) { - auto& last_token = **iter; + auto &last_token = **iter; // TODO: handle lexical error if (lexer.GetLastToken().token_enum() == TK_SPACE) { continue; // don't forward spaces @@ -364,8 +361,8 @@ absl::Status VerilogPreprocess::ExpandText( last_token.token_enum() == MacroCallId) { RETURN_IF_ERROR(HandleMacroIdentifier(iter, iter_generator, false)); // merge the expanded macro tokens into 'expanded_lexed_sequence' - auto& expanded_child = preprocess_data_.lexed_macros_backup.back(); - for (auto& u : expanded_child) expanded_lexed_sequence.push_back(u); + auto &expanded_child = preprocess_data_.lexed_macros_backup.back(); + for (auto &u : expanded_child) expanded_lexed_sequence.push_back(u); continue; } expanded_lexed_sequence.push_back(last_token); @@ -377,17 +374,17 @@ absl::Status VerilogPreprocess::ExpandText( // This method expands a callable macro call, that follows this form: // `MACRO([param1],[param2],...) absl::Status VerilogPreprocess::ExpandMacro( - const verible::MacroCall& macro_call, - const verible::MacroDefinition* macro_definition) { - const auto& actual_parameters = macro_call.positional_arguments; + const verible::MacroCall ¯o_call, + const verible::MacroDefinition ¯o_definition) { + const auto &actual_parameters = macro_call.positional_arguments; std::map subs_map; - if (macro_definition->IsCallable()) { - RETURN_IF_ERROR(macro_definition->PopulateSubstitutionMap(actual_parameters, - &subs_map)); + if (macro_definition.IsCallable()) { + RETURN_IF_ERROR( + macro_definition.PopulateSubstitutionMap(actual_parameters, &subs_map)); } - VerilogLexer lexer(macro_definition->DefinitionText().text()); + VerilogLexer lexer(macro_definition.DefinitionText().text()); verible::TokenSequence lexed_sequence; verible::TokenSequence expanded_lexed_sequence; // Populating the lexed token sequence. @@ -405,7 +402,7 @@ absl::Status VerilogPreprocess::ExpandMacro( // Token-pulling loop. for (auto iter = iter_generator(); iter != end; iter = iter_generator()) { // TODO: handle lexical error - auto& last_token = **iter; + auto &last_token = **iter; if (last_token.token_enum() == TK_SPACE) continue; // don't forward spaces // If the expanded token is another macro identifier that needs to be // expanded. @@ -416,18 +413,18 @@ absl::Status VerilogPreprocess::ExpandMacro( last_token.token_enum() == MacroCallId) { RETURN_IF_ERROR(HandleMacroIdentifier(iter, iter_generator, false)); // merge the expanded macro tokens into 'expanded_lexed_sequence' - auto& expanded_child = preprocess_data_.lexed_macros_backup.back(); - for (auto& u : expanded_child) expanded_lexed_sequence.push_back(u); + auto &expanded_child = preprocess_data_.lexed_macros_backup.back(); + for (auto &u : expanded_child) expanded_lexed_sequence.push_back(u); continue; } - if (macro_definition->IsCallable()) { + if (macro_definition.IsCallable()) { // Check if the last token is a formal parameter - const auto* replacement = FindOrNull(subs_map, last_token.text()); + const auto *replacement = FindOrNull(subs_map, last_token.text()); if (replacement) { RETURN_IF_ERROR(ExpandText(replacement->text())); // merge the expanded macro tokens into 'expanded_lexed_sequence' - auto& expanded_child = preprocess_data_.lexed_macros_backup.back(); - for (auto& u : expanded_child) expanded_lexed_sequence.push_back(u); + auto &expanded_child = preprocess_data_.lexed_macros_backup.back(); + for (auto &u : expanded_child) expanded_lexed_sequence.push_back(u); continue; } } @@ -441,7 +438,7 @@ absl::Status VerilogPreprocess::ExpandMacro( // for use within the same file. absl::Status VerilogPreprocess::HandleDefine( const TokenStreamView::const_iterator iter, // points to `define token - const StreamIteratorGenerator& generator) { + const StreamIteratorGenerator &generator) { TokenStreamView define_tokens; define_tokens.push_back(*iter); RETURN_IF_ERROR(ConsumeMacroDefinition(generator, &define_tokens)); @@ -458,90 +455,69 @@ absl::Status VerilogPreprocess::HandleDefine( return absl::InvalidArgumentError("Error parsing macro definition."); } - // Parsing showed that things are syntatically correct. - // But let's only emit things if we're in an active preprocessing branch. - if (conditional_block_.top().InSelectedBranch()) { - RegisterMacroDefinition(macro_definition); - - // For now, forward all definition tokens. - for (const auto& token : define_tokens) { - preprocess_data_.preprocessed_token_stream.push_back(token); - } - } - + // FIXME: RegisterMacroDefinition(macro_definition); return absl::OkStatus(); } absl::Status VerilogPreprocess::HandleUndef( TokenStreamView::const_iterator undef_it, - const StreamIteratorGenerator& generator) { + const StreamIteratorGenerator &generator) { auto macro_name_extract = ExtractMacroName(generator); if (!macro_name_extract.ok()) { return macro_name_extract.status(); } - const auto& macro_name = *macro_name_extract.value(); + const auto ¯o_name = *macro_name_extract.value(); preprocess_data_.macro_definitions.erase(macro_name->text()); - // For now, forward all `undef tokens. - if (conditional_block_.top().InSelectedBranch()) { - preprocess_data_.preprocessed_token_stream.push_back(*undef_it); - preprocess_data_.preprocessed_token_stream.push_back(macro_name); - } + // FIXME: Unregister macro definition return absl::OkStatus(); } absl::Status VerilogPreprocess::HandleIf( const TokenStreamView::const_iterator ifpos, // `ifdef, `ifndef, `elseif - const StreamIteratorGenerator& generator) { - if (!config_.filter_branches) { // nothing to do. - preprocess_data_.preprocessed_token_stream.push_back(*ifpos); - return absl::OkStatus(); - } - + const StreamIteratorGenerator &generator) { auto macro_name_extract = ExtractMacroName(generator); if (!macro_name_extract.ok()) { return macro_name_extract.status(); } - const auto& macro_name = *macro_name_extract.value(); - const bool negative_if = (*ifpos)->token_enum() == PP_ifndef; - const auto& defs = preprocess_data_.macro_definitions; - const bool name_is_defined = defs.find(macro_name->text()) != defs.end(); - const bool condition_met = (name_is_defined ^ negative_if); + const auto ¯o_name = *macro_name_extract.value(); if ((*ifpos)->token_enum() == PP_elsif) { if (conditional_block_.size() <= 1) { preprocess_data_.errors.emplace_back(**ifpos, "Unmatched `elsif"); return absl::InvalidArgumentError("Unmatched `else"); } - if (!conditional_block_.top().UpdateCondition(**ifpos, condition_met)) { + if (auto *block = + conditional_block_.top()->StartElse(ifpos, macro_name->text())) { + conditional_block_.pop(); + conditional_block_.push(block); + } else { preprocess_data_.errors.emplace_back(**ifpos, "`elsif after `else"); - preprocess_data_.errors.emplace_back(conditional_block_.top().token(), + preprocess_data_.errors.emplace_back(conditional_block_.top()->token(), "Previous `else started here."); return absl::InvalidArgumentError("Duplicate `else"); } } else { // A new, nested if-branch. - const bool scope_enabled = conditional_block_.top().InSelectedBranch(); - conditional_block_.push(BranchBlock(scope_enabled, condition_met, **ifpos)); + auto &block = conditional_block_.top()->StartIf(ifpos, macro_name->text()); + conditional_block_.push(&block); } return absl::OkStatus(); } absl::Status VerilogPreprocess::HandleElse( TokenStreamView::const_iterator else_pos) { - if (!config_.filter_branches) { // nothing to do. - preprocess_data_.preprocessed_token_stream.push_back(*else_pos); - return absl::OkStatus(); - } - if (conditional_block_.size() <= 1) { preprocess_data_.errors.emplace_back(**else_pos, "Unmatched `else"); return absl::InvalidArgumentError("Unmatched `else"); } - if (!conditional_block_.top().StartElse(**else_pos)) { + if (auto *block = conditional_block_.top()->StartElse(else_pos)) { + conditional_block_.pop(); + conditional_block_.push(block); + } else { preprocess_data_.errors.emplace_back(**else_pos, "Duplicate `else"); - preprocess_data_.errors.emplace_back(conditional_block_.top().token(), + preprocess_data_.errors.emplace_back(conditional_block_.top()->token(), "Previous `else started here."); return absl::InvalidArgumentError("Duplicate `else"); } @@ -550,15 +526,11 @@ absl::Status VerilogPreprocess::HandleElse( absl::Status VerilogPreprocess::HandleEndif( TokenStreamView::const_iterator endif_pos) { - if (!config_.filter_branches) { // nothing to do. - preprocess_data_.preprocessed_token_stream.push_back(*endif_pos); - return absl::OkStatus(); - } - if (conditional_block_.size() <= 1) { preprocess_data_.errors.emplace_back(**endif_pos, "Unmatched `endif"); return absl::InvalidArgumentError("Unmatched `endif"); } + conditional_block_.top()->End(endif_pos); conditional_block_.pop(); return absl::OkStatus(); } @@ -579,7 +551,7 @@ absl::Status VerilogPreprocess::HandleEndif( absl::Status VerilogPreprocess::HandleInclude( TokenStreamView::const_iterator iter, - const StreamIteratorGenerator& generator) { + const StreamIteratorGenerator &generator) { if (!file_opener_) { return absl::FailedPreconditionError("file_opener_ is not defined"); } @@ -596,7 +568,7 @@ absl::Status VerilogPreprocess::HandleInclude( return absl::InvalidArgumentError("Expected a path to a SV file."); } // Currently the file path looks like "path", we need to remove "" or <> - const auto& token_text = file_token_iter->text(); + const auto &token_text = file_token_iter->text(); std::filesystem::path file_path = std::string(token_text.substr(1, token_text.size() - 2)); @@ -621,11 +593,11 @@ absl::Status VerilogPreprocess::HandleInclude( // TODO(karimtera): limit number of nested includes, detect cycles? maybe. preprocess_data_.included_text_structure.emplace_back( new verible::TextStructure(source_contents)); - verible::TextStructure& included_structure = + verible::TextStructure &included_structure = *preprocess_data_.included_text_structure.back(); // "included_sequence" should contain the lexed token sequence. - verible::TokenSequence& included_sequence = + verible::TokenSequence &included_sequence = included_structure.MutableData().MutableTokenStream(); // Lexing the included file content, and storing it in "included_sequence". @@ -652,15 +624,11 @@ absl::Status VerilogPreprocess::HandleInclude( // Need to move the text structures of the child preprocessor to avoid // destruction. - for (auto& u : child_preprocessed_data.included_text_structure) { + for (auto &u : child_preprocessed_data.included_text_structure) { preprocess_data_.included_text_structure.push_back(std::move(u)); } - // Forwarding the included preprocessed view. - for (const auto& u : child_preprocessed_data.preprocessed_token_stream) { - preprocess_data_.preprocessed_token_stream.push_back(u); - } - + // FIXME: Forwarding the included preprocessed view. return absl::OkStatus(); } @@ -668,7 +636,7 @@ absl::Status VerilogPreprocess::HandleInclude( // object and possibly transform the input token stream. absl::Status VerilogPreprocess::HandleTokenIterator( TokenStreamView::const_iterator iter, - const StreamIteratorGenerator& generator) { + const StreamIteratorGenerator &generator) { switch ((*iter)->token_enum()) { case PP_define: return HandleDefine(iter, generator); @@ -693,21 +661,15 @@ absl::Status VerilogPreprocess::HandleTokenIterator( if (config_.include_files && (*iter)->token_enum() == PP_include) { return HandleInclude(iter, generator); } - - // If not return'ed above, any other tokens are passed through unmodified - // unless filtered by a branch. - if (conditional_block_.top().InSelectedBranch()) { - preprocess_data_.preprocessed_token_stream.push_back(*iter); - } return absl::OkStatus(); } void VerilogPreprocess::setPreprocessingInfo( - const verilog::FileList::PreprocessingInfo& preprocess_info) { + const verilog::FileList::PreprocessingInfo &preprocess_info) { preprocess_info_ = preprocess_info; // Adding defines. - for (const auto& define : preprocess_info_.defines) { + for (const auto &define : preprocess_info_.defines) { // manually create the tokens to save them into a MacroDefinition. verible::TokenInfo macro_directive(PP_define, "`define"); verible::TokenInfo macro_name(PP_Identifier, define.name); @@ -723,10 +685,11 @@ void VerilogPreprocess::setPreprocessingInfo( } VerilogPreprocessData VerilogPreprocess::ScanStream( - const TokenStreamView& token_stream) { + const TokenStreamView &token_stream) { preprocess_data_.preprocessed_token_stream.reserve(token_stream.size()); + block_tree->actual_begin = block_tree->begin = token_stream.begin(); + const auto end = block_tree->end = token_stream.end(); auto iter_generator = verible::MakeConstIteratorStreamer(token_stream); - const auto end = token_stream.end(); // Token-pulling loop. for (auto iter = iter_generator(); iter != end; iter = iter_generator()) { const auto status = HandleTokenIterator(iter, iter_generator); @@ -739,10 +702,24 @@ VerilogPreprocessData VerilogPreprocess::ScanStream( if (conditional_block_.size() > 1 && preprocess_data_.errors.empty()) { // Only report if not followup-error preprocess_data_.errors.emplace_back( - conditional_block_.top().token(), + conditional_block_.top()->token(), "Unterminated preprocessing conditional here, but never completed at " "end of file."); } + + if (config_.filter_branches) { + block_tree->Emit( + preprocess_data_.preprocessed_token_stream, [this](const auto &block) { + return block.ConditionMet(preprocess_data_.macro_definitions); + }); + } else { + block_tree->Emit(preprocess_data_.preprocessed_token_stream); + } + + if (config_.gather_branches) { + preprocess_data_.define_variants = block_tree->GatherBranches(); + } + return std::move(preprocess_data_); } diff --git a/verilog/preprocessor/verilog_preprocess.h b/verilog/preprocessor/verilog_preprocess.h index 69d9759835..559f5b7f0c 100644 --- a/verilog/preprocessor/verilog_preprocess.h +++ b/verilog/preprocessor/verilog_preprocess.h @@ -41,7 +41,9 @@ #include #include #include +#include +#include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" @@ -50,6 +52,7 @@ #include "common/text/token_info.h" #include "common/text/token_stream_view.h" #include "verilog/analysis/verilog_filelist.h" +#include "verilog/parser/verilog_token_enum.h" namespace verilog { @@ -61,15 +64,16 @@ struct VerilogPreprocessError { verible::TokenInfo token_info; // offending token std::string error_message; - VerilogPreprocessError(const verible::TokenInfo& token, - const std::string& message) + VerilogPreprocessError(const verible::TokenInfo &token, + const std::string &message) : token_info(token), error_message(message) {} }; +using verible::MacroDefinition; +using MacroDefinitionRegistry = std::map>; + // Information that results from preprocessing. struct VerilogPreprocessData { - using MacroDefinition = verible::MacroDefinition; - using MacroDefinitionRegistry = std::map; using TokenSequence = std::vector; // Resulting token stream after preprocessing @@ -87,6 +91,8 @@ struct VerilogPreprocessData { // are two separate vectors. std::vector errors; std::vector warnings; + + std::vector> define_variants; }; // VerilogPreprocess transforms a TokenStreamView. @@ -116,10 +122,13 @@ class VerilogPreprocess { // Expand macro definition bodies, this will relexes the macro body. bool expand_macros = false; // TODO(hzeller): Provide a map of command-line provided +define+'s + + bool gather_branches = false; + MacroDefinitionRegistry macro_definitions; }; - explicit VerilogPreprocess(const Config& config); - VerilogPreprocess(const Config& config, FileOpener opener); + explicit VerilogPreprocess(const Config &config); + VerilogPreprocess(const Config &config, FileOpener opener); // Initialize preprocessing with safe default options // TODO(hzeller): remove this constructor once all places using the @@ -129,14 +138,14 @@ class VerilogPreprocess { // ScanStream reads in a stream of tokens returns the result as a move // of preprocessor_data_. preprocessor_data_ should not be accessed // after this returns. - VerilogPreprocessData ScanStream(const TokenStreamView& token_stream); + VerilogPreprocessData ScanStream(const TokenStreamView &token_stream); // TODO(fangism): ExpandMacro, ExpandMacroCall // TODO(b/111544845): ExpandEvalStringLiteral // Sets the preprocessing information containing defines and incdirs. void setPreprocessingInfo( - const verilog::FileList::PreprocessingInfo& preprocess_info); + const verilog::FileList::PreprocessingInfo &preprocess_info); private: using StreamIteratorGenerator = @@ -146,49 +155,48 @@ class VerilogPreprocess { // iterator of macro name or a failure status. // Updates error messages on failure. absl::StatusOr ExtractMacroName( - const StreamIteratorGenerator&); + const StreamIteratorGenerator &); absl::Status HandleTokenIterator(TokenStreamView::const_iterator, - const StreamIteratorGenerator&); + const StreamIteratorGenerator &); absl::Status HandleMacroIdentifier(TokenStreamView::const_iterator, - const StreamIteratorGenerator&, + const StreamIteratorGenerator &, bool forward); absl::Status HandleDefine(TokenStreamView::const_iterator, - const StreamIteratorGenerator&); + const StreamIteratorGenerator &); absl::Status HandleUndef(TokenStreamView::const_iterator, - const StreamIteratorGenerator&); + const StreamIteratorGenerator &); absl::Status HandleIf(TokenStreamView::const_iterator ifpos, - const StreamIteratorGenerator&); + const StreamIteratorGenerator &); absl::Status HandleElse(TokenStreamView::const_iterator else_pos); absl::Status HandleEndif(TokenStreamView::const_iterator endif_pos); - static absl::Status ConsumeAndParseMacroCall(TokenStreamView::const_iterator, - const StreamIteratorGenerator&, - verible::MacroCall*, - const verible::MacroDefinition&); + static absl::Status ConsumeAndParseMacroCall( + TokenStreamView::const_iterator, const StreamIteratorGenerator &, + verible::MacroCall *, const verible::MacroDefinition &); // The following functions return nullptr when there is no error: - absl::Status ConsumeMacroDefinition(const StreamIteratorGenerator&, - TokenStreamView*); + absl::Status ConsumeMacroDefinition(const StreamIteratorGenerator &, + TokenStreamView *); static std::unique_ptr ParseMacroDefinition( - const TokenStreamView&, MacroDefinition*); + const TokenStreamView &, MacroDefinition *); static std::unique_ptr ParseMacroParameter( - TokenStreamView::const_iterator*, MacroParameterInfo*); + TokenStreamView::const_iterator *, MacroParameterInfo *); - void RegisterMacroDefinition(const MacroDefinition&); - absl::Status ExpandText(const absl::string_view&); - absl::Status ExpandMacro(const verible::MacroCall&, - const verible::MacroDefinition*); + void RegisterMacroDefinition(const MacroDefinition &); + absl::Status ExpandText(const absl::string_view &); + absl::Status ExpandMacro(const verible::MacroCall &, + const verible::MacroDefinition &); absl::Status HandleInclude(TokenStreamView::const_iterator, - const StreamIteratorGenerator&); + const StreamIteratorGenerator &); // Generate a const_iterator to a non-whitespace token. static TokenStreamView::const_iterator GenerateBypassWhiteSpaces( - const StreamIteratorGenerator&); + const StreamIteratorGenerator &); const Config config_; @@ -197,52 +205,155 @@ class VerilogPreprocess { // matches will be selected - or the else block if none before matched. class BranchBlock { public: - BranchBlock(bool is_enabled, bool condition, - const verible::TokenInfo& token) - : outer_scope_enabled_(is_enabled), branch_token_(token) { - UpdateCondition(token, condition); + using TokenStreamView = verible::TokenStreamView; + + explicit BranchBlock(TokenStreamView::const_iterator pos = {}, + std::optional macro = {}) + : begin(pos), actual_begin(pos), macro_name(macro) { + if (actual_begin != TokenStreamView::const_iterator{}) { + actual_begin++; + if (macro) actual_begin++; + } } - // Is the current block selected, i.e. should tokens pass through. - bool InSelectedBranch() const { - return outer_scope_enabled_ && current_branch_condition_met_; + BranchBlock &StartIf(TokenStreamView::const_iterator pos, + absl::string_view macro) { + children.emplace_back(BranchBlock{pos, macro}); + return children.back(); } - // Update condition e.g. in `elsif. Return 'false' if already in an - // else clause. - bool UpdateCondition(const verible::TokenInfo& token, bool condition) { - if (in_else_) return false; - branch_token_ = token; - // Only the first in a row of matching conditions will select block. - current_branch_condition_met_ = !any_branch_matched_ && condition; - any_branch_matched_ |= condition; - return true; + BranchBlock *StartElse(TokenStreamView::const_iterator pos, + std::optional if_macro = {}) { + end = pos; + if (!macro_name) return nullptr; + else_branch = std::make_unique(pos, if_macro); + return else_branch.get(); + } + + void End(TokenStreamView::const_iterator pos) { end = pos; } + TokenStreamView::const_iterator GetEnd() const { + return else_branch ? else_branch->GetEnd() : end; + } + + void Emit(TokenStreamView &token_stream) const { + token_stream.insert(token_stream.end(), actual_begin, begin); + if (else_branch) { + else_branch->Emit(token_stream); + } + } + + void Emit(TokenStreamView &token_stream, + const std::function &pred) const { + if (pred(*this)) { + auto it = actual_begin; + for (const auto &child : children) { + token_stream.insert(token_stream.end(), it, child.begin); + child.Emit(token_stream, pred); + it = child.GetEnd() + 1; + } + token_stream.insert(token_stream.end(), it, end); + } else if (else_branch) { + else_branch->Emit(token_stream, pred); + } } - // Start an `else block. Uses its internal state to determine if - // this will put is InSelectedBranch(). - // Returns 'false' if already in an else block. - bool StartElse(const verible::TokenInfo& token) { - if (in_else_) return false; - in_else_ = true; - branch_token_ = token; - current_branch_condition_met_ = !any_branch_matched_; + bool ConditionMet(const MacroDefinitionRegistry &defs) const { + if (macro_name) { + const bool negative_if = (*begin)->token_enum() == PP_ifndef; + const bool name_is_defined = defs.find(*macro_name) != defs.end(); + return name_is_defined ^ negative_if; + } return true; } - const verible::TokenInfo& token() const { return branch_token_; } + std::vector> GatherBranches() { + absl::flat_hash_set visited; + std::vector> define_variants; + std::vector token_stream_variants; // For + // debugging + const auto was_visited = [&](const BranchBlock &block) { + return visited.contains(&block); + }; + + // To keep from looping forever + // (in case of something like `ifdef FOO `ifndef FOO), + // introduce a special termination condition + absl::flat_hash_set all_defines; + int define_count = -1; + + while (!EachInBothBranches(was_visited) && + define_count != static_cast(all_defines.size())) { + define_count = all_defines.size(); + absl::flat_hash_set defines; + token_stream_variants.emplace_back(); + Emit(token_stream_variants.back(), [&](const auto &block) { + // FIXME: handle `ifndef branches. Currently they're all treated like + // `ifdef here + const bool do_first_branch = + // We already took this branch, we must do it again + (block.macro_name && defines.contains(*block.macro_name)) || + // ...Or it's the first time we see this macro, so take this + // branch + !block.EachInFirstBranch(was_visited) || + // ...Else only take it if there's no else and there's no + // condition + (!block.else_branch && !block.macro_name); + if (do_first_branch) { + visited.insert(&block); + if (block.macro_name) { + defines.insert(*block.macro_name); + } + } + return do_first_branch; + }); + all_defines.merge(defines); + define_variants.push_back(std::move(defines)); + } + + VLOG(0) << "Number of preprocess variants: " + << token_stream_variants.size(); + for (const auto &variant : token_stream_variants) { + std::ostringstream ss; + for (const auto &token : variant) { + ss << *token; + } + VLOG(0) << "Preprocess variant:\n" << ss.str() << std::endl; + } + return define_variants; + } + + bool EachInFirstBranch( + const std::function &pred) const { + return pred(*this) && std::all_of(children.begin(), children.end(), + [&pred](const auto &child) { + return child.EachInBothBranches(pred); + }); + } + + bool EachInBothBranches( + const std::function &pred) const { + return EachInFirstBranch(pred) && + (!else_branch || else_branch->EachInBothBranches(pred)); + } - private: - const bool outer_scope_enabled_; - verible::TokenInfo branch_token_; // FYI for error reporting. - bool any_branch_matched_ = false; // only if no branch, `else will - bool in_else_ = false; - bool current_branch_condition_met_; + const verible::TokenInfo &token() const { return **begin; } + + std::list children; // FIXME: use a better container. It's a + // list for now to keep pointers valid + std::unique_ptr else_branch; + TokenStreamView::const_iterator begin; + TokenStreamView::const_iterator actual_begin; // First token after + // branch token(s) + TokenStreamView::const_iterator end; + std::optional macro_name; + std::vector defines; + std::vector undefs; }; // State of nested conditional blocks. For code simplicity, this always has // a toplevel branch that is selected. - std::stack conditional_block_; + std::unique_ptr block_tree = std::make_unique(); + std::stack conditional_block_; // Results of preprocessing VerilogPreprocessData preprocess_data_;