diff --git a/base/compiler/ssair/basicblock.jl b/base/compiler/ssair/basicblock.jl new file mode 100644 index 0000000000000..427aae707e664 --- /dev/null +++ b/base/compiler/ssair/basicblock.jl @@ -0,0 +1,32 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +""" +Like UnitRange{Int}, but can handle the `last` field, being temporarily +< first (this can happen during compacting) +""" +struct StmtRange <: AbstractUnitRange{Int} + start::Int + stop::Int +end + +first(r::StmtRange) = r.start +last(r::StmtRange) = r.stop +iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1) + +StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range)) + +struct BasicBlock + stmts::StmtRange + preds::Vector{Int} + succs::Vector{Int} +end + +function BasicBlock(stmts::StmtRange) + return BasicBlock(stmts, Int[], Int[]) +end + +function BasicBlock(old_bb, stmts) + return BasicBlock(stmts, old_bb.preds, old_bb.succs) +end + +copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs)) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index f9b407f9ddb3e..1ab2876b769da 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -1,63 +1,573 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# This file implements the Semi-NCA (SNCA) dominator tree construction +# described in Georgiadis' PhD thesis [LG05], which itself is a simplification +# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches +# the algorithm choice in LLVM and seems to be a sweet spot in implementation +# simplicity and efficiency. +# +# This file also implements an extension of SNCA that supports updating the +# dominator tree with insertion and deletion of edges in the control flow +# graph, described in [GI16] as Dynamic SNCA. DSNCA was chosen over DBS, a +# different algorithm which achieves the best overall performance in [GI16], +# because it is simpler to understand and implement, performs well with edge +# deletions, and is of similar performance overall. +# +# SNCA works by first computing semidominators, then computing immediate +# dominators from them. The semidominator of a node is the node with minimum +# preorder number such that there is a semidominator path from it to the node. +# A semidominator path is a path in which the preorder numbers of all nodes not +# at the endpoints are greater than the preorder number of the last node. +# Intuitively, the semidominator approximates the immediate dominator of a node +# by taking the path (in the CFG) that gets as close to the root as possible +# while avoiding ancestors of the node in the DFS tree. +# +# In computing the semidominators, SNCA performs "path compression" whenever a +# node has a nontrivial semidominator (i.e. a semidominator that is not just +# its parent in the DFS tree). Path compression propagates the "label" of a +# node, which represents a possible semidominator with associated semidominator +# path passing through that node. +# +# For example, path compression will be performed for the following CFG, where +# the edge not in the DFS tree is marked with asterisks. Note that nodes are +# labeled with their preorder numbers, and all edges point downward. +# +# 1 +# |\ +# | \ +# | 4 +# | | +# 2 5 +# | | +# | 6 +# | * +# |* +# 3 +# +# There is a nontrivial semidominator path from 1 to 3, passing through 4, 5, +# and 6. Stepping through the whole algorithm on paper with an example like +# this is very helpful for understanding how it works. +# +# DSNCA runs the whole algorithm from scratch if the DFS tree is invalidated by +# the insertion or deletion, but otherwise recomputes a subset of the +# semidominators (all immediate dominators then need to be recomputed). +# +# [LG05] Linear-Time Algorithms for Dominators and Related Problems +# Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: +# ftp://ftp.cs.princeton.edu/reports/2005/737.pdf +# +# [LT79] A fast algorithm for finding dominators in a flowgraph +# Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1 +# http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf +# +# [GI16] An Experimental Study of Dynamic Dominators +# Loukas Georgiadis, Giuseppe F. Italiano, Luigi Laura, Federico +# Santaroni, April 2016 +# https://arxiv.org/abs/1604.02711 + +# We could make these real structs, but probably not worth the extra +# overhead. Still, give them names for documentary purposes. +const BBNumber = Int +const PreNumber = Int +const PostNumber = Int + +struct DFSTree + # These map between BB number and pre- or postorder numbers + to_pre::Vector{PreNumber} + from_pre::Vector{BBNumber} + to_post::Vector{PostNumber} + from_post::Vector{BBNumber} + + # Records parent relationships in the DFS tree + # (preorder number -> preorder number) + # Storing it this way saves a few lookups in the snca_compress! algorithm + to_parent_pre::Vector{PreNumber} +end + +function DFSTree(n_blocks::Int) + return DFSTree(zeros(PreNumber, n_blocks), + Vector{BBNumber}(undef, n_blocks), + zeros(PostNumber, n_blocks), + Vector{BBNumber}(undef, n_blocks), + zeros(PreNumber, n_blocks)) +end + +copy(D::DFSTree) = DFSTree(copy(D.to_pre), + copy(D.from_pre), + copy(D.to_post), + copy(D.from_post), + copy(D.to_parent_pre)) + +function copy!(dst::DFSTree, src::DFSTree) + copy!(dst.to_pre, src.to_pre) + copy!(dst.from_pre, src.from_pre) + copy!(dst.to_post, src.to_post) + copy!(dst.from_post, src.from_post) + copy!(dst.to_parent_pre, src.to_parent_pre) + return dst +end + +length(D::DFSTree) = length(D.from_pre) + +function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) + copy!(D, DFSTree(length(blocks))) + to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)] + pre_num = 1 + post_num = 1 + while !isempty(to_visit) + # Because we want the postorder number as well as the preorder number, + # we don't pop the current node from the stack until we're moving up + # the tree + (current_node_bb, parent_pre, pushed_children) = to_visit[end] + + if pushed_children + # Going up the DFS tree, so all we need to do is record the + # postorder number, then move on + D.to_post[current_node_bb] = post_num + D.from_post[post_num] = current_node_bb + post_num += 1 + pop!(to_visit) + + elseif D.to_pre[current_node_bb] != 0 + # Node has already been visited, move on + pop!(to_visit) + continue + else + # Going down the DFS tree + + # Record preorder number + D.to_pre[current_node_bb] = pre_num + D.from_pre[pre_num] = current_node_bb + D.to_parent_pre[pre_num] = parent_pre + + # Record that children (will) have been pushed + to_visit[end] = (current_node_bb, parent_pre, true) + + # Push children to the stack + for succ_bb in blocks[current_node_bb].succs + push!(to_visit, (succ_bb, pre_num, false)) + end + + pre_num += 1 + end + end + + # If all blocks are reachable, this is a no-op, otherwise, we shrink these + # arrays. + resize!(D.from_pre, pre_num - 1) + resize!(D.from_post, post_num - 1) # should be same size as pre_num - 1 + resize!(D.to_parent_pre, pre_num - 1) + + return D +end + +DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks) + +""" +Keeps the per-BB state of the Semi NCA algorithm. In the original formulation, +there are three separate length `n` arrays, `label`, `semi` and `ancestor`. +Instead, for efficiency, we use one array in a array-of-structs style setup. +""" +struct SNCAData + semi::PreNumber + label::PreNumber +end + "Represents a Basic Block, in the DomTree" struct DomTreeNode # How deep we are in the DomTree level::Int # The BB indices in the CFG for all Basic Blocks we immediately dominate - children::Vector{Int} + children::Vector{BBNumber} end -DomTreeNode() = DomTreeNode(1, Vector{Int}()) + +DomTreeNode() = DomTreeNode(1, Vector{BBNumber}()) "Data structure that encodes which basic block dominates which." struct DomTree - # Which basic block immediately dominates each basic block (ordered by BB indices) - # Note: this is the inverse of the nodes, children field - idoms::Vector{Int} + # These can be reused when updating domtree dynamically + dfs_tree::DFSTree + snca_state::Vector{SNCAData} + + # Which basic block immediately dominates each basic block, using BB indices + idoms_bb::Vector{BBNumber} # The nodes in the tree (ordered by BB indices) nodes::Vector{DomTreeNode} end +function DomTree() + return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) +end + +function construct_domtree(blocks::Vector{BasicBlock}) + return update_domtree!(blocks, DomTree(), true, 0) +end + +function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree, + recompute_dfs::Bool, max_pre::PreNumber) + if recompute_dfs + DFS!(domtree.dfs_tree, blocks) + end + + if max_pre == 0 + max_pre = length(domtree.dfs_tree) + end + + SNCA!(domtree, blocks, max_pre) + compute_domtree_nodes!(domtree) + return domtree +end + +function compute_domtree_nodes!(domtree::DomTree) + # Compute children + copy!(domtree.nodes, + DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)]) + for (idx, idom) in Iterators.enumerate(domtree.idoms_bb) + (idx == 1 || idom == 0) && continue + push!(domtree.nodes[idom].children, idx) + end + # Recursively set level + update_level!(domtree.nodes, 1, 1) + return domtree.nodes +end + +function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int) + worklist = Tuple{BBNumber, Int}[(node, level)] + while !isempty(worklist) + (node, level) = pop!(worklist) + nodes[node] = DomTreeNode(level, nodes[node].children) + foreach(nodes[node].children) do child + push!(worklist, (child, level+1)) + end + end +end + +""" +The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. Note that the +pseudocode in [LG05] is not entirely accurate. The best way to understand +what's happening is to read [LT79], then the description of SLT in [LG05] +(warning: inconsistent notation), then the description of Semi-NCA. +""" +function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) + D = domtree.dfs_tree + state = domtree.snca_state + # There may be more blocks than are reachable in the DFS / dominator tree + n_blocks = length(blocks) + n_nodes = length(D) + + # `label` is initialized to the identity mapping (though the paper doesn't + # make that clear). The rationale for this is Lemma 2.4 in [LG05] (i.e. + # Theorem 4 in [LT79]). Note however, that we don't ever look at `semi` + # until it is fully initialized, so we could leave it uninitialized here if + # we wanted to. + resize!(state, n_nodes) + for w in 1:max_pre + # Only reset semidominators for nodes we want to recompute + state[w] = SNCAData(typemax(PreNumber), w) + end + + # If we are only recomputing some of the semidominators, the remaining + # labels should be reset, because they may have become inapplicable to the + # node/semidominator we are currently processing/recomputing. They can + # become inapplicable because of path compressions that were triggered by + # nodes that should only be processed after the current one (but were + # processed the last time `SNCA!` was run). + # + # So, for every node that is not being reprocessed, we reset its label to + # its semidominator, which is the value that its label assumes once its + # semidominator is computed. If this was too conservative, i.e. if the + # label would have been updated before we process the current node in a + # situation where all semidominators were recomputed, then path compression + # will produce the correct label. + for w in max_pre+1:n_nodes + semi = state[w].semi + state[w] = SNCAData(semi, semi) + end + + # Calculate semidominators, but only for blocks with preorder number up to + # max_pre + ancestors = copy(D.to_parent_pre) + for w::PreNumber in reverse(2:max_pre) + # LLVM initializes this to the parent, the paper initializes this to + # `w`, but it doesn't really matter (the parent is a predecessor, so at + # worst we'll discover it below). Save a memory reference here. + semi_w = typemax(PreNumber) + last_linked = PreNumber(w + 1) + for v ∈ blocks[D.from_pre[w]].preds + # For the purpose of the domtree, ignore virtual predecessors into + # catch blocks. + v == 0 && continue + + v_pre = D.to_pre[v] + + # Ignore unreachable predecessors + v_pre == 0 && continue + + # N.B.: This conditional is missing from the pseudocode in figure + # 2.8 of [LG05]. It corresponds to the `ancestor[v] != 0` check in + # the `eval` implementation in figure 2.6 + if v_pre >= last_linked + # `v` has already been processed, so perform path compression + + # For performance, if the number of ancestors is small avoid + # the extra allocation of the worklist. + if length(ancestors) <= 32 + snca_compress!(state, ancestors, v_pre, last_linked) + else + snca_compress_worklist!(state, ancestors, v_pre, last_linked) + end + end + + # The (preorder number of the) semidominator of a block is the + # minimum over the labels of its predecessors + semi_w = min(semi_w, state[v_pre].label) + end + state[w] = SNCAData(semi_w, semi_w) + end + + # Compute immediate dominators, which for a node must be the nearest common + # ancestor in the (immediate) dominator tree between its semidominator and + # its parent (see Lemma 2.6 in [LG05]). + idoms_pre = copy(D.to_parent_pre) + for v in 2:n_nodes + idom = idoms_pre[v] + vsemi = state[v].semi + while idom > vsemi + idom = idoms_pre[idom] + end + idoms_pre[v] = idom + end + + # Express idoms in BB indexing + resize!(domtree.idoms_bb, n_blocks) + for i::BBNumber in 1:n_blocks + if i == 1 || D.to_pre[i] == 0 + domtree.idoms_bb[i] = 0 + else + domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]] + end + end +end + """ - Checks if bb1 dominates bb2. - bb1 and bb2 are indexes into the CFG blocks. - bb1 dominates bb2 if the only way to enter bb2 is via bb1. - (Other blocks may be in between, e.g bb1->bbX->bb2). +Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the +modification suggested in the paper to use `last_linked` to determine whether +an ancestor has been processed rather than storing `0` in the ancestor array. """ -function dominates(domtree::DomTree, bb1::Int, bb2::Int) +function snca_compress!(state::Vector{SNCAData}, ancestors::Vector{PreNumber}, + v::PreNumber, last_linked::PreNumber) + u = ancestors[v] + @assert u < v + if u >= last_linked + snca_compress!(state, ancestors, u, last_linked) + if state[u].label < state[v].label + state[v] = SNCAData(state[v].semi, state[u].label) + end + ancestors[v] = ancestors[u] + end + nothing +end + +function snca_compress_worklist!( + state::Vector{SNCAData}, ancestors::Vector{PreNumber}, + v::PreNumber, last_linked::PreNumber) + # TODO: There is a smarter way to do this + u = ancestors[v] + worklist = Tuple{PreNumber, PreNumber}[(u,v)] + @assert u < v + while !isempty(worklist) + u, v = last(worklist) + if u >= last_linked + if ancestors[u] >= last_linked + push!(worklist, (ancestors[u], u)) + continue + end + if state[u].label < state[v].label + state[v] = SNCAData(state[v].semi, state[u].label) + end + ancestors[v] = ancestors[u] + end + pop!(worklist) + end +end + +"Given updated blocks, update the given dominator tree with an inserted edge." +function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, + from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + + # Implements Section 3.1 of [GI16] + dt = domtree.dfs_tree + from_pre = dt.to_pre[from] + to_pre = dt.to_pre[to] + from_post = dt.to_post[from] + to_post = dt.to_post[to] + if to_pre == 0 || (from_pre < to_pre && from_post < to_post) + # The DFS tree is invalidated by the edge insertion, so run from + # scratch + update_domtree!(blocks, domtree, true, 0) + else + # DFS tree is still valid, so update only affected nodes + update_domtree!(blocks, domtree, false, to_pre) + end + + return domtree +end + +"Given updated blocks, update the given dominator tree with a deleted edge." +function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, + from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + + # Implements Section 3.1 of [GI16] + if is_parent(domtree.dfs_tree, from, to) + # The `from` block is the parent of the `to` block in the DFS tree, so + # deleting the edge invalidates the DFS tree, so start from scratch + update_domtree!(blocks, domtree, true, 0) + elseif on_semidominator_path(domtree, from, to) + # Recompute semidominators for blocks with preorder number up to that + # of `to` block. Semidominators for blocks with preorder number greater + # than that of `to` aren't affected because no semidominator path to + # the block can pass through the `to` block (the preorder number of + # `to` would be lower than those of these blocks, and `to` is not their + # parent in the DFS tree). + to_pre = domtree.dfs_tree.to_pre[to] + update_domtree!(blocks, domtree, false, to_pre) + end + # Otherwise, dominator tree is not affected + + return domtree +end + +"Check if x is the parent of y in the given DFS tree." +function is_parent(dfs_tree::DFSTree, x::BBNumber, y::BBNumber) + x_pre = dfs_tree.to_pre[x] + y_pre = dfs_tree.to_pre[y] + return x_pre == dfs_tree.to_parent_pre[y_pre] +end + +""" +Check if x is on some semidominator path from the semidominator of y to y, +assuming there is an edge from x to y. +""" +function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber) + x_pre = domtree.dfs_tree.to_pre[x] + y_pre = domtree.dfs_tree.to_pre[y] + + semi_y = domtree.snca_state[y_pre].semi + current_block = x_pre + + # Follow the semidominators of `x` up the DFS tree to see if we ever reach + # the semidominator of `y`. If so, `x` is on a semidominator path between + # `y` and its semidominator. We can stop if the preorder number of the + # semidominators becomes less than that of the semidominator of `y`, + # because it can only decrease further. + while current_block >= semi_y + if semi_y == current_block + return true + end + current_block = domtree.snca_state[current_block].semi + end + return false +end + +""" +Rename basic block numbers in a dominator tree, removing the block if it is +renamed to -1. +""" +function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber}) + # Rename DFS tree + rename_nodes!(domtree.dfs_tree, rename_bb) + + # `snca_state` is indexed by preorder number, so should be unchanged + + # Rename `idoms_bb` and `nodes` + old_idoms_bb = copy(domtree.idoms_bb) + old_nodes = copy(domtree.nodes) + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + domtree.idoms_bb[new_bb] = (new_bb == 1) ? + 0 : rename_bb[old_idoms_bb[old_bb]] + domtree.nodes[new_bb] = old_nodes[old_bb] + map!(i -> rename_bb[i], + domtree.nodes[new_bb].children, + domtree.nodes[new_bb].children) + end + end + + # length of `to_pre` after renaming DFS tree is new number of basic blocks + resize!(domtree.idoms_bb, length(domtree.dfs_tree.to_pre)) + resize!(domtree.nodes, length(domtree.dfs_tree.to_pre)) + return domtree +end + +""" +Rename basic block numbers in a DFS tree, removing the block if it is renamed +to -1. +""" +function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber}) + n_blocks = length(D.to_pre) + n_reachable_blocks = length(D.from_pre) + + old_to_pre = copy(D.to_pre) + old_from_pre = copy(D.from_pre) + old_to_post = copy(D.to_post) + old_from_post = copy(D.from_post) + max_new_bb = 0 + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + D.to_pre[new_bb] = old_to_pre[old_bb] + D.from_pre[old_to_pre[old_bb]] = new_bb + D.to_post[new_bb] = old_to_post[old_bb] + D.from_post[old_to_post[old_bb]] = new_bb + + # Keep track of highest BB number to resize arrays with + if new_bb > max_new_bb + max_new_bb = new_bb + end + end + end + resize!(D.to_pre, max_new_bb) + resize!(D.to_post, max_new_bb) + # `to_parent_pre` should be unchanged + return D +end + +""" +Checks if bb1 dominates bb2. +bb1 and bb2 are indexes into the CFG blocks. +bb1 dominates bb2 if the only way to enter bb2 is via bb1. +(Other blocks may be in between, e.g bb1->bbX->bb2). +""" +function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) bb1 == bb2 && return true target_level = domtree.nodes[bb1].level source_level = domtree.nodes[bb2].level source_level < target_level && return false for _ in (source_level - 1):-1:target_level - bb2 = domtree.idoms[bb2] + bb2 = domtree.idoms_bb[bb2] end return bb1 == bb2 end -bb_unreachable(domtree::DomTree, bb::Int) = bb != 1 && domtree.nodes[bb].level == 1 - -function update_level!(domtree::Vector{DomTreeNode}, node::Int, level::Int) - worklist = Tuple{Int, Int}[(node, level)] - while !isempty(worklist) - (node, level) = pop!(worklist) - domtree[node] = DomTreeNode(level, domtree[node].children) - foreach(domtree[node].children) do child - push!(worklist, (child, level+1)) - end - end -end +bb_unreachable(domtree::DomTree, bb::BBNumber) = bb != 1 && domtree.dfs_tree.to_pre[bb] == 0 "Iterable data structure that walks though all dominated blocks" struct DominatedBlocks domtree::DomTree - worklist::Vector{Int} + worklist::Vector{BBNumber} end "Returns an iterator that walks through all blocks dominated by the basic block at index `root`" -function dominated(domtree::DomTree, root::Int) - doms = DominatedBlocks(domtree, Vector{Int}()) +function dominated(domtree::DomTree, root::BBNumber) + doms = DominatedBlocks(domtree, Vector{BBNumber}()) push!(doms.worklist, root) doms end @@ -71,8 +581,8 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing) return (bb, nothing) end -function naive_idoms(cfg::CFG) - nblocks = length(cfg.blocks) +function naive_idoms(blocks::Vector{BasicBlock}) + nblocks = length(blocks) # The extra +1 helps us detect unreachable blocks below dom_all = BitSet(1:nblocks+1) dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks] @@ -80,10 +590,10 @@ function naive_idoms(cfg::CFG) while changed changed = false for n = 2:nblocks - if isempty(cfg.blocks[n].preds) + if isempty(blocks[n].preds) continue end - firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, cfg.blocks[n].preds)) + firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds)) new_doms = copy(dominators[firstp]) for p in rest intersect!(new_doms, dominators[p]) @@ -115,213 +625,3 @@ function naive_idoms(cfg::CFG) end idoms end - -# Construct Dom Tree -function construct_domtree(cfg::CFG) - idoms = SNCA(cfg) - # Compute children - nblocks = length(cfg.blocks) - domtree = DomTreeNode[DomTreeNode() for _ = 1:nblocks] - for (idx, idom) in Iterators.enumerate(idoms) - (idx == 1 || idom == 0) && continue - push!(domtree[idom].children, idx) - end - # Recursively set level - update_level!(domtree, 1, 1) - DomTree(idoms, domtree) -end - -#================================ [SNCA] ======================================# -# -# This section implements the Semi-NCA (SNCA) dominator tree construction from -# described in Georgiadis' PhD thesis [LG05], which itself is a simplification -# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches -# the algorithm choice in LLVM and seems to be a sweet spot in implementation -# simplicity and efficiency. -# -# [LG05] Linear-Time Algorithms for Dominators and Related Problems -# Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: -# ftp://ftp.cs.princeton.edu/reports/2005/737.pdf -# -# [LT79] A fast algorithm for finding dominators in a flowgraph -# Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1 -# http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf -# -begin - # We could make these real structs, but probably not worth the extra - # overhead. Still, give them names for documentary purposes. - const BBNumber = UInt - const DFSNumber = UInt - - """ - Keeps the per-BB state of the Semi NCA algorithm. In the original - formulation, there are three separate length `n` arrays, `label`, `semi` and - `ancestor`. Instead, for efficiency, we use one array in a array-of-structs - style setup. - """ - struct Node - semi::DFSNumber - label::DFSNumber - end - - struct DFSTree - # Maps DFS number to BB number - numbering::Vector{BBNumber} - # Maps BB number to DFS number - reverse::Vector{DFSNumber} - # Records parent relationships in the DFS tree (DFS number -> DFS number) - # Storing it this way saves a few lookups in the snca_compress! algorithm - parents::Vector{DFSNumber} - end - length(D::DFSTree) = length(D.numbering) - preorder(D::DFSTree) = OneTo(length(D)) - _drop(xs::AbstractUnitRange, n::Integer) = (first(xs)+n):last(xs) - - function DFSTree(nblocks::Int) - DFSTree( - Vector{BBNumber}(undef, nblocks), - zeros(DFSNumber, nblocks), - Vector{DFSNumber}(undef, nblocks)) - end - - function DFS(cfg::CFG, current_node::BBNumber)::DFSTree - dfs = DFSTree(length(cfg.blocks)) - # TODO: We could reuse the storage in DFSTree for our worklist. We're - # guaranteed for the worklist to be smaller than the remaining space in - # DFSTree - worklist = Tuple{DFSNumber, BBNumber}[(0, current_node)] - dfs_num = 1 - parent = 0 - while !isempty(worklist) - (parent, current_node) = pop!(worklist) - dfs.reverse[current_node] != 0 && continue - dfs.reverse[current_node] = dfs_num - dfs.numbering[dfs_num] = current_node - dfs.parents[dfs_num] = parent - for succ in cfg.blocks[current_node].succs - push!(worklist, (dfs_num, succ)) - end - dfs_num += 1 - end - # If all blocks are reachable, this is a no-op, otherwise, - # we shrink these arrays. - resize!(dfs.numbering, dfs_num - 1) - resize!(dfs.parents, dfs_num - 1) - dfs - end - - """ - Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the - modification suggested in the paper to use `last_linked` to determine - whether an ancestor has been processed rather than storing `0` in the - ancestor array. - """ - function snca_compress!(state::Vector{Node}, ancestors::Vector{DFSNumber}, - v::DFSNumber, last_linked::DFSNumber) - u = ancestors[v] - @assert u < v - if u >= last_linked - snca_compress!(state, ancestors, u, last_linked) - if state[u].label < state[v].label - state[v] = Node(state[v].semi, state[u].label) - end - ancestors[v] = ancestors[u] - end - nothing - end - - function snca_compress_worklist!( - state::Vector{Node}, ancestors::Vector{DFSNumber}, - v::DFSNumber, last_linked::DFSNumber) - # TODO: There is a smarter way to do this - u = ancestors[v] - worklist = Tuple{DFSNumber, DFSNumber}[(u,v)] - @assert u < v - while !isempty(worklist) - u, v = last(worklist) - if u >= last_linked - if ancestors[u] >= last_linked - push!(worklist, (ancestors[u], u)) - continue - end - if state[u].label < state[v].label - state[v] = Node(state[v].semi, state[u].label) - end - ancestors[v] = ancestors[u] - end - pop!(worklist) - end - end - - """ - SNCA(cfg::CFG) - - Determines a map from basic blocks to the block which immediately dominate them. - Expressed as indexes into `cfg.blocks`. - - The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. - Note that the pseudocode in [LG05] is not entirely accurate. - The best way to understand what's happening is to read [LT79], then the - description of SLT in [LG05] (warning: inconsistent notation), then - the description of Semi-NCA. - """ - function SNCA(cfg::CFG) - D = DFS(cfg, BBNumber(1)) - # `label` is initialized to the identity mapping (though - # the paper doesn't make that clear). The rational for this is Lemma - # 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't - # ever look at `semi` until it is fully initialized, so we could leave - # it uninitialized here if we wanted to. - state = Node[ Node(typemax(DFSNumber), w) for w in preorder(D) ] - # Initialize idoms to parents. Note that while idoms are eventually - # BB indexed, we keep it DFS indexed until a final post-processing - # pass to avoid extra memory references during the O(N^2) phase below. - idoms_dfs = copy(D.parents) - # We abuse the parents array as the ancestors array. - # Semi-NCA does not look at the parents array at all. - # SLT would, but never simultaneously, so we could still - # do this. - ancestors = D.parents - for w::DFSNumber ∈ reverse(_drop(preorder(D), 1)) - # LLVM initializes this to the parent, the paper initializes this to - # `w`, but it doesn't really matter (the parent is a predecessor, - # so at worst we'll discover it below). Save a memory reference here. - semi_w = typemax(DFSNumber) - for v ∈ cfg.blocks[D.numbering[w]].preds - # For the purpose of the domtree, ignore virtual predecessors - # into catch blocks. - v == 0 && continue - vdfs = D.reverse[v] - # Ignore unreachable predecessors - vdfs == 0 && continue - last_linked = DFSNumber(w + 1) - # N.B.: This conditional is missing from the psuedocode - # in figure 2.8 of [LG05]. It corresponds to the - # `ancestor[v] != 0` check in the `eval` implementation in - # figure 2.6 - if vdfs >= last_linked - # For performance, if the number of ancestors is small - # avoid the extra allocation of the worklist. - if length(ancestors) <= 32 - snca_compress!(state, ancestors, vdfs, last_linked) - else - snca_compress_worklist!(state, ancestors, vdfs, last_linked) - end - end - semi_w = min(semi_w, state[vdfs].label) - end - state[w] = Node(semi_w, semi_w) - end - for v ∈ _drop(preorder(D), 1) - idom = idoms_dfs[v] - vsemi = state[v].semi - while idom > vsemi - idom = idoms_dfs[idom] - end - idoms_dfs[v] = idom - end - # Reexpress the idom relationship in BB indexing - idoms_bb = Int[ (i == 1 || D.reverse[i] == 0) ? 0 : D.numbering[idoms_dfs[D.reverse[i]]] for i = 1:length(cfg.blocks) ] - idoms_bb - end -end diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index 465102e82e155..83205033342d6 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -10,8 +10,9 @@ else end end -include("compiler/ssair/ir.jl") +include("compiler/ssair/basicblock.jl") include("compiler/ssair/domtree.jl") +include("compiler/ssair/ir.jl") include("compiler/ssair/slot2ssa.jl") include("compiler/ssair/queries.jl") include("compiler/ssair/passes.jl") @@ -111,7 +112,7 @@ end function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code - @timeit "domtree 1" domtree = construct_domtree(ir.cfg) + @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks) defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst) @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` return ir diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index fb987b3bc3aa0..3960ab44649b1 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -3,40 +3,32 @@ @inline isexpr(@nospecialize(stmt), head::Symbol) = isa(stmt, Expr) && stmt.head === head Core.PhiNode() = Core.PhiNode(Int32[], Any[]) -""" -Like UnitRange{Int}, but can handle the `last` field, being temporarily -< first (this can happen during compacting) -""" -struct StmtRange <: AbstractUnitRange{Int} - start::Int - stop::Int -end -first(r::StmtRange) = r.start -last(r::StmtRange) = r.stop -iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1) - -StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range)) - -struct BasicBlock - stmts::StmtRange - preds::Vector{Int} - succs::Vector{Int} -end -function BasicBlock(stmts::StmtRange) - return BasicBlock(stmts, Int[], Int[]) -end -function BasicBlock(old_bb, stmts) - return BasicBlock(stmts, old_bb.preds, old_bb.succs) -end -copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs)) +isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode) struct CFG blocks::Vector{BasicBlock} index::Vector{Int} # map from instruction => basic-block number # TODO: make this O(1) instead of O(log(n_blocks))? end + copy(c::CFG) = CFG(BasicBlock[copy(b) for b in c.blocks], copy(c.index)) +function cfg_insert_edge!(cfg::CFG, from::Int, to::Int) + # Assumes that this edge does not already exist + push!(cfg.blocks[to].preds, from) + push!(cfg.blocks[from].succs, to) + nothing +end + +function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) + preds = cfg.blocks[to].preds + succs = cfg.blocks[from].succs + # Assumes that blocks appear at most once in preds and succs + deleteat!(preds, findfirst(x->x === from, preds)::Int) + deleteat!(succs, findfirst(x->x === to, succs)::Int) + nothing +end + function block_for_inst(index::Vector{Int}, inst::Int) return searchsortedfirst(index, inst, lt=(<=)) end @@ -550,8 +542,9 @@ mutable struct IncrementalCompact if allow_cfg_transforms bb_rename = Vector{Int}(undef, length(blocks)) cur_bb = 1 + domtree = construct_domtree(blocks) for i = 1:length(bb_rename) - if i != 1 && length(blocks[i].preds) == 0 + if bb_unreachable(domtree, i) bb_rename[i] = -1 else bb_rename[i] = cur_bb @@ -1203,8 +1196,8 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= resize!(compact, old_result_idx) end bb = compact.ir.cfg.blocks[active_bb] - if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && length(bb.preds) == 0 - # No predecessors, kill the entire block. + if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && compact.bb_rename_succ[active_bb] == -1 + # Dead block, so kill the entire block. compact.idx = last(bb.stmts) # Pop any remaining insertion nodes while compact.new_nodes_idx <= length(compact.perm) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 4af13d81b76d0..8ce0c01cf5f75 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -55,7 +55,7 @@ function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int) # TODO: This can be much faster by looking at current level and only # searching for those blocks in a sorted order while !(curblock in allblocks) - curblock = domtree.idoms[curblock] + curblock = domtree.idoms_bb[curblock] end return curblock end @@ -728,7 +728,7 @@ function getfield_elim_pass!(ir::IRCode) # IR. This needs to be after we iterate through the IR with # `IncrementalCompact` because removing dead blocks can invalidate the # domtree. - @timeit "domtree 2" domtree = construct_domtree(ir.cfg) + @timeit "domtree 2" domtree = construct_domtree(ir.cfg.blocks) # Now go through any mutable structs and see which ones we can eliminate for (idx, (intermediaries, defuse)) in defuses diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 94ac41f9ac345..1d64a77444d47 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -66,7 +66,7 @@ function verify_ir(ir::IRCode, print::Bool=true) # Verify CFG last_end = 0 # Verify statements - domtree = construct_domtree(ir.cfg) + domtree = construct_domtree(ir.cfg.blocks) for (idx, block) in pairs(ir.cfg.blocks) if first(block.stmts) != last_end + 1 #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)] diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 26bdced75983e..855100c8de85f 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -34,7 +34,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg) + domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) ir = Core.Compiler.domsort_ssa!(ir, domtree) Core.Compiler.verify_ir(ir) phi = ir.stmts.inst[3] @@ -62,7 +62,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg) + domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) ir = Core.Compiler.domsort_ssa!(ir, domtree) Core.Compiler.verify_ir(ir) end diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index c4ccd753e34a8..1d223c1064b88 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -67,10 +67,10 @@ let cfg = CFG(BasicBlock[ make_bb([2, 3] , [5] ), make_bb([2, 4] , [] ), ], Int[]) - dfs = Compiler.DFS(cfg, Compiler.BBNumber(1)) - @test dfs.numbering[dfs.parents[dfs.reverse[5]]] == 4 - let correct_idoms = Compiler.naive_idoms(cfg) - @test Compiler.SNCA(cfg) == correct_idoms + dfs = Compiler.DFS(cfg.blocks) + @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4 + let correct_idoms = Compiler.naive_idoms(cfg.blocks) + @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms # For completeness, reverse the order of pred/succ in the CFG and verify # the answer doesn't change (it does change the which node is chosen # as the semi-dominator, since it changes the DFS numbering). @@ -81,7 +81,7 @@ let cfg = CFG(BasicBlock[ c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs)) d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs)) cfg′ = CFG(blocks, cfg.index) - @test Compiler.SNCA(cfg′) == correct_idoms + @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms end end end @@ -239,3 +239,79 @@ let ci = code_lowered(()->@isdefined(_not_def_37919_), ())[1] ir = Core.Compiler.inflate_ir(ci) @test Core.Compiler.verify_ir(ir) === nothing end + +# Test dynamic update of domtree with edge insertions and deletions in the +# following CFG: +# +# 1,1 +# | \ +# | \ +# | 3,4 < +# | | \ +# 2,2 4,5 | +# | | / +# | 6,6 / +# | / +# | / +# 5,3 +# +# Nodes indicate BB number, preorder number +# Edges point down, except the arrow that points up +let cfg = CFG(BasicBlock[ + make_bb([], [3, 2]), # the order of the successors is deliberate + make_bb([1], [5]), # and is to determine the preorder numbers + make_bb([1, 6], [4]), + make_bb([3], [6]), + make_bb([2, 6], []), + make_bb([4], [5, 3]), + ], Int[]) + domtree = Compiler.construct_domtree(cfg.blocks) + @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Test removal of edge between a parent and child in the DFS tree, which + # should trigger complete recomputation of domtree (first case in algorithm + # for removing edge from domtree dynamically) + Compiler.cfg_delete_edge!(cfg, 2, 5) + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4] + # Add edge back (testing first case for insertion) + Compiler.cfg_insert_edge!(cfg, 2, 5) + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Test second case in algorithm for removing edges from domtree, in which + # `from` is on a semidominator path from the semidominator of `to` to `to` + Compiler.cfg_delete_edge!(cfg, 6, 5) + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4] + # Add edge back (testing second case for insertion) + Compiler.cfg_insert_edge!(cfg, 6, 5) + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Test last case for removing edges, in which edge does not satisfy either + # of the above conditions + Compiler.cfg_delete_edge!(cfg, 6, 3) + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + # Add edge back (testing second case for insertion) + Compiler.cfg_insert_edge!(cfg, 6, 3) + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Try removing all edges from root + Compiler.cfg_delete_edge!(cfg, 1, 2) + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4] + Compiler.cfg_delete_edge!(cfg, 1, 3) + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0] + # Add edges back + Compiler.cfg_insert_edge!(cfg, 1, 2) + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0] + Compiler.cfg_insert_edge!(cfg, 1, 3) + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] +end