Skip to content

Commit

Permalink
Format preprocessed token stream in multiple passes
Browse files Browse the repository at this point in the history
Currently, the formatter doesn't handle many scenarios involving preprocessor
`ifdef`s/`endif`s interleaved with `begin`s, module headers, etc.
This patch attempts to solve this problem by performing multiple passes of the
formatting on preprocessed variants of the source. Each of these variants has a
different set of preprocessor branches enabled. Together, they should cover the
entire source (though that doesn't work in all cases yet). After several
formatting passes for different variants of the AST, a correct and properly
formatted file is produced.

This is still work in progress, so not everything works, and the code isn't very
clean. I'd love to get some early feedback on this.

Signed-off-by: Krzysztof Bieganski <[email protected]>
  • Loading branch information
kbieganski committed Jun 7, 2023
1 parent 529b519 commit 598c025
Show file tree
Hide file tree
Showing 20 changed files with 448 additions and 151 deletions.
121 changes: 82 additions & 39 deletions common/text/text_structure.cc
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,11 @@ void TextStructureView::TrimTokensToSubstring(int left_offset,
tokens_view_.begin(), tokens_view_.end(), view_trim_range.begin());
const auto iter_trim_end = std::lower_bound(
iter_trim_begin, tokens_view_.end(), view_trim_range.end());
const auto iter_trim_begin2 =
std::lower_bound(prep_tokens_view_.begin(), prep_tokens_view_.end(),
view_trim_range.begin());
const auto iter_trim_end2 = std::lower_bound(
iter_trim_begin2, prep_tokens_view_.end(), view_trim_range.end());

// Copy subset of tokens to new token sequence.
TokenSequence trimmed_stream(view_trim_range.begin(), view_trim_range.end());
Expand Down Expand Up @@ -278,9 +283,17 @@ void TextStructureView::TrimTokensToSubstring(int left_offset,
const int new_index = old_index - index_difference;
trimmed_view.push_back(trimmed_stream.begin() + new_index);
}
TokenStreamView prep_trimmed_view;
prep_trimmed_view.reserve(std::distance(iter_trim_begin2, iter_trim_end2));
for (auto token_iterator : make_range(iter_trim_begin2, iter_trim_end2)) {
const int old_index = std::distance(tokens_.cbegin(), token_iterator);
const int new_index = old_index - index_difference;
prep_trimmed_view.push_back(trimmed_stream.begin() + new_index);
}

// Swap new-old arrays, which will cause old arrays to be deleted.
tokens_view_.swap(trimmed_view);
prep_tokens_view_.swap(prep_trimmed_view);
tokens_.swap(trimmed_stream);
}

Expand Down Expand Up @@ -431,23 +444,28 @@ absl::Status TextStructureView::InternalConsistencyCheck() const {
return SyntaxTreeConsistencyCheck();
}

// TokenRange can be a container reference or iterator range.
template <typename TokenRange>
static void CopyTokens(TokenSequence* destination,
const TokenRange& token_source) {
// Copy tokens up to this expansion point.
for (const auto& token : token_source) {
destination->push_back(token);
}
}

// TokenRange can be a container reference or iterator range.
// TokenViewRange can be a container reference or iterator range.
template <typename TokenRange, typename TokenViewRange>
static void CopyTokensAndView(TokenSequence* destination,
std::vector<int>* view_indices,
const TokenRange& token_source,
const TokenViewRange& view_source) {
static void CopyView(size_t base, std::vector<int>* view_indices,
const TokenRange& token_source,
const TokenViewRange& view_source) {
// Translate token_view's iterators into array indices, adjusting for the
// number of pre-existing tokens.
for (const auto& token_iter : view_source) {
view_indices->push_back(destination->size() +
view_indices->push_back(base +
std::distance(token_source.begin(), token_iter));
}
// Copy tokens up to this expansion point.
for (const auto& token : token_source) {
destination->push_back(token);
}
}

// Incrementally copies a slice of tokens and expands a single subtree.
Expand All @@ -457,29 +475,34 @@ static void CopyTokensAndView(TokenSequence* destination,
// token_view_indices. Offset is the location of each expansion point.
void TextStructureView::ConsumeDeferredExpansion(
TokenSequence::const_iterator* next_token_iter,
TokenStreamView::const_iterator* next_token_view_iter,
const std::vector<TokenStreamView::const_iterator*>& next_token_view_iters,
DeferredExpansion* expansion, TokenSequence* combined_tokens,
std::vector<int>* token_view_indices, const char* offset) {
const std::vector<TokenStreamView*>& token_views,
const std::vector<std::vector<int>*>& token_view_indices,
const char* offset) {
auto token_iter = *next_token_iter;
auto token_view_iter = *next_token_view_iter;
// Find the position up to each expansion point.
*next_token_iter =
std::lower_bound(token_iter, tokens_.cend(), offset,
[](const TokenInfo& token, const char* target) {
return std::distance(target, token.text().begin()) < 0;
});
CHECK(*next_token_iter != tokens_.cend());
*next_token_view_iter = std::lower_bound(
token_view_iter, tokens_view_.cend(), offset,
[](TokenStreamView::const_reference token_ref, const char* target) {
return std::distance(target, token_ref->text().begin()) < 0;
});
CHECK(*next_token_view_iter != tokens_view_.cend());

// Copy tokens and partial view into output.
CopyTokensAndView(combined_tokens, token_view_indices,
make_range(token_iter, *next_token_iter),
make_range(token_view_iter, *next_token_view_iter));
for (size_t i = 0; i < token_view_indices.size(); i++) {
auto token_view_iter = *next_token_view_iters[i];
// Find the position up to each expansion point.
*next_token_iter = std::lower_bound(
token_iter, tokens_.cend(), offset,
[](const TokenInfo& token, const char* target) {
return std::distance(target, token.text().begin()) < 0;
});
CHECK(*next_token_iter != tokens_.cend());
*next_token_view_iters[i] = std::lower_bound(
token_view_iter, token_views[i]->cend(), offset,
[](TokenStreamView::const_reference token_ref, const char* target) {
return std::distance(target, token_ref->text().begin()) < 0;
});
CHECK(*next_token_view_iters[i] != tokens_view_.cend());

// Copy tokens and partial view into output.
CopyView(combined_tokens->size(), token_view_indices[i],
make_range(token_iter, *next_token_iter),
make_range(token_view_iter, *next_token_view_iters[i]));
}
CopyTokens(combined_tokens, make_range(token_iter, *next_token_iter));

// Adjust locations of tokens in the expanded tree by pointing them
// into the original text (contents_).
Expand All @@ -498,16 +521,21 @@ void TextStructureView::ConsumeDeferredExpansion(
// Don't want to splice it into result.
sub_data.tokens_.pop_back();
}
CopyTokensAndView(combined_tokens, token_view_indices, sub_data.tokens_,
sub_data.tokens_view_);
for (std::vector<int>* indices : token_view_indices) {
CopyView(combined_tokens->size(), indices, sub_data.tokens_,
sub_data.tokens_view_);
}
CopyTokens(combined_tokens, sub_data.tokens_);

// Transfer ownership of transformed syntax tree to this object's tree.
*expansion->expansion_point = std::move(sub_data.MutableSyntaxTree());
subanalysis->MutableData().Clear();

// Advance one past expansion point to skip over expanded token.
++*next_token_iter;
++*next_token_view_iter;
for (auto next_token_view_iter : next_token_view_iters) {
++*next_token_view_iter;
}
}

TextStructure::TextStructure(std::shared_ptr<MemBlock> contents)
Expand All @@ -530,19 +558,29 @@ void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) {
// Gather indices and reconstruct iterators after there are no more
// reallocations due to growing combined_tokens.
std::vector<int> combined_token_view_indices;
std::vector<int> combined_prep_token_view_indices;
auto token_iter = tokens_.cbegin();
auto token_view_iter = tokens_view_.cbegin();
auto prep_token_view_iter = prep_tokens_view_.cbegin();
for (auto& expansion_entry : *expansions) {
const auto offset = Contents().begin() + expansion_entry.first;
ConsumeDeferredExpansion(&token_iter, &token_view_iter,
&expansion_entry.second, &combined_tokens,
&combined_token_view_indices, offset);
ConsumeDeferredExpansion(
&token_iter, {&token_view_iter, &prep_token_view_iter},
&expansion_entry.second, &combined_tokens,
{&tokens_view_, &prep_tokens_view_},
{&combined_token_view_indices, &combined_prep_token_view_indices},
offset);
}

// Copy the remaining tokens beyond the last expansion point.
CopyTokensAndView(&combined_tokens, &combined_token_view_indices,
make_range(token_iter, tokens_.cend()),
make_range(token_view_iter, tokens_view_.cend()));
CopyView(combined_tokens.size(), &combined_token_view_indices,
make_range(token_iter, tokens_.cend()),
make_range(token_view_iter, tokens_view_.cend()));
CopyView(combined_tokens.size(), &combined_prep_token_view_indices,
make_range(token_iter, tokens_.cend()),
make_range(prep_token_view_iter, prep_tokens_view_.cend()));

CopyTokens(&combined_tokens, make_range(token_iter, tokens_.cend()));

// Commit the newly expanded sequence of tokens.
tokens_.swap(combined_tokens);
Expand All @@ -553,6 +591,11 @@ void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) {
for (const auto index : combined_token_view_indices) {
tokens_view_.push_back(tokens_.cbegin() + index);
}
prep_tokens_view_.clear();
prep_tokens_view_.reserve(combined_prep_token_view_indices.size());
for (const auto index : combined_prep_token_view_indices) {
prep_tokens_view_.push_back(tokens_.cbegin() + index);
}

// Recalculate line-by-line token ranges.
// TODO(fangism): Should be possible to update line_token_map_ incrementally
Expand Down
16 changes: 14 additions & 2 deletions common/text/text_structure.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ class TextStructureView {

TokenStreamView& MutableTokenStreamView() { return tokens_view_; }

const TokenStreamView& GetPreprocessedTokenStreamView() const {
return prep_tokens_view_;
}

TokenStreamView& MutablePreprocessedTokenStreamView() {
return prep_tokens_view_;
}

// Creates a stream of modifiable iterators to the filtered tokens.
// Uses tokens_view_ to create the iterators.
TokenStreamReferenceView MakeTokenStreamReferenceView();
Expand Down Expand Up @@ -205,6 +213,8 @@ class TextStructureView {
// Possibly modified view of the tokens_ token sequence.
TokenStreamView tokens_view_;

TokenStreamView prep_tokens_view_;

// Index of token iterators that mark the beginnings of each line.
// Lazily calculated on request.
mutable std::vector<TokenSequence::const_iterator> lazy_line_token_map_;
Expand All @@ -220,9 +230,11 @@ class TextStructureView {

void ConsumeDeferredExpansion(
TokenSequence::const_iterator* next_token_iter,
TokenStreamView::const_iterator* next_token_view_iter,
const std::vector<TokenStreamView::const_iterator*>& next_token_view_iter,
DeferredExpansion* expansion, TokenSequence* combined_tokens,
std::vector<int>* token_view_indices, const char* offset);
const std::vector<TokenStreamView*>& token_views,
const std::vector<std::vector<int>*>& token_view_indices,
const char* offset);

// Resets all fields. Only needed in tests.
void Clear();
Expand Down
2 changes: 2 additions & 0 deletions common/text/token_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ class TokenInfo {

bool isEOF() const { return token_enum_ == TK_EOF; }

bool visible = true; // FIXME: do this differently, or put in protected

protected: // protected, as ExpectedTokenInfo accesses it.
int token_enum_;

Expand Down
2 changes: 1 addition & 1 deletion verilog/CST/expression_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
namespace verilog {
namespace {

static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

using verible::SyntaxTreeSearchTestCase;
using verible::TextStructureView;
Expand Down
3 changes: 3 additions & 0 deletions verilog/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,13 @@ cc_library(
hdrs = ["flow_tree.h"],
deps = [
"//common/text:token-stream-view",
"//common/util:interval-set",
"//common/util:logging",
"//common/util:status-macros",
"//verilog/parser:verilog-token-enum",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)
Expand Down
2 changes: 1 addition & 1 deletion verilog/analysis/extractors_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
namespace verilog {
namespace analysis {
namespace {
static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

TEST(CollectInterfaceNamesTest, NonModuleTests) {
const std::pair<absl::string_view, std::set<std::string>> kTestCases[] = {
Expand Down
91 changes: 90 additions & 1 deletion verilog/analysis/flow_tree.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-2022 The Verible Authors.
// Copyright 2017-2023 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -126,6 +126,10 @@ absl::Status FlowTree::MacroFollows(
return absl::InvalidArgumentError("Error macro name can't be extracted.");
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
if (macro_iterator->token_enum() != PP_Identifier) {
return absl::InvalidArgumentError("Expected identifier for macro name.");
}
Expand All @@ -142,6 +146,10 @@ absl::Status FlowTree::AddMacroOfConditional(
"Error no macro follows the conditional directive.");
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
auto macro_identifier = macro_iterator->text();
if (conditional_macro_id_.find(macro_identifier) ==
conditional_macro_id_.end()) {
Expand All @@ -162,6 +170,10 @@ int FlowTree::GetMacroIDOfConditional(
return -1;
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
auto macro_identifier = macro_iterator->text();
// It is always assumed that the macro already exists in the map.
return conditional_macro_id_[macro_identifier];
Expand All @@ -176,6 +188,83 @@ absl::Status FlowTree::GenerateVariants(const VariantReceiver &receiver) {
return DepthFirstSearch(receiver, source_sequence_.begin());
}

absl::StatusOr<FlowTree::DefineVariants> FlowTree::MinCoverDefineVariants() {
auto status = GenerateControlFlowTree();
if (!status.ok()) return status;
verible::IntervalSet<int64_t> covered; // Tokens covered by
// MinCoverDefineVariants.
verible::IntervalSet<int64_t> last_covered; // Tokens covered
// by the previous iterations.
DefineVariants define_variants; // The result – all define variants that
// should cover the entire source
DefineSet visited; // Visited defines are ones that are assumed to be defined
// or undefined (decided in a previous iteration)
const int64_t tok_count = static_cast<int64_t>(source_sequence_.size());
while (!covered.Contains({0, tok_count})) {
DefineSet defines; // Define sets are moved into the define variants list,
// so we make a new one each iteration
visited.clear(); // We keep the visited set to avoid unnecessary
// allocations, but clear it each iteration
TokenSequenceConstIterator tok_it = source_sequence_.begin();
while (tok_it < source_sequence_.end()) {
covered.Add(std::distance(source_sequence_.begin(), tok_it));
if (tok_it->token_enum() == PP_ifdef ||
tok_it->token_enum() == PP_ifndef ||
tok_it->token_enum() == PP_elsif) {
const auto macro_id_it = tok_it + 2;
auto macro_text = macro_id_it->text();
bool negated = tok_it->token_enum() == PP_ifndef;
// If this macro was already visited (either defined/undefined), we
// to stick to the same branch. TODO: handle `defines
if (visited.contains(macro_text)) {
bool assume_condition_is_true =
(negated ^ defines.contains(macro_text));
tok_it = edges_[tok_it][assume_condition_is_true ? 0 : 1];
} else {
// First time we see this macro; mark as visited
visited.insert(macro_text);
const auto if_it = edges_[tok_it][0];
const auto if_idx = std::distance(source_sequence_.begin(), if_it);
const auto else_it = edges_[tok_it][1];
const auto else_idx =
std::distance(source_sequence_.begin(), else_it);
if (!covered.Contains({if_idx, else_idx})) {
// If the `ifdef is not covered, we assume the condition is true
if (!negated) defines.insert(macro_text);
tok_it = if_it;
} else {
// Else we assume the condition is false
if (negated) defines.insert(macro_text);
tok_it = else_it;
}
}
} else {
const auto it = edges_.find(tok_it);
if (it == edges_.end() || it->second.empty()) {
// If there's no outgoing edge, just move to the next token.
tok_it++;
} else {
// Else jump
tok_it = edges_[tok_it][0];
}
}
}
define_variants.push_back(std::move(defines));
// To prevent an infinite loop, if nothing new was covered, break.
if (last_covered == covered) {
// TODO: If there are nested `ifdefs that contradict each other early in
// the source, this will prevent us from traversing the rest of the flow
// tree. It would be better to detect this case, assume that the
// contradicting part is covered, and continue the analysis.
VLOG(4) << "Giving up on finding all define variants";
break; // FIXME: Perhaps we should error?
}
last_covered = covered;
}
VLOG(4) << "Done generating define variants. Coverage: " << covered;
return define_variants;
}

// Constructs the control flow tree, which determines the edge from each node
// (token index) to the next possible childs, And save edge_from_iterator in
// edges_.
Expand Down
Loading

0 comments on commit 598c025

Please sign in to comment.