/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ #include "base/arena_bit_vector.h" #include "base/arena_containers.h" #include "base/bit_vector-inl.h" #include "nodes.h" namespace art { class InductionVarRange; static const bool kSuperblockClonerLogging = false; static const bool kSuperblockClonerVerify = false; // Represents an edge between two HBasicBlocks. // // Note: objects of this class are small - pass them by value. class HEdge : public ArenaObject { public: HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { DCHECK_NE(to_, kInvalidBlockId); DCHECK_NE(from_, kInvalidBlockId); } HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { DCHECK_NE(to_, kInvalidBlockId); DCHECK_NE(from_, kInvalidBlockId); } HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} uint32_t GetFrom() const { return from_; } uint32_t GetTo() const { return to_; } bool operator==(const HEdge& other) const { return this->from_ == other.from_ && this->to_ == other.to_; } bool operator!=(const HEdge& other) const { return !operator==(other); } void Dump(std::ostream& stream) const; // Returns whether an edge represents a valid edge in CF graph: whether the from_ block // has to_ block as a successor. bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } private: // Predecessor block id. uint32_t from_; // Successor block id. uint32_t to_; }; // Returns whether a HEdge edge corresponds to an existing edge in the graph. inline bool IsEdgeValid(HEdge edge, HGraph* graph) { if (!edge.IsValid()) { return false; } uint32_t from = edge.GetFrom(); uint32_t to = edge.GetTo(); if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { return false; } HBasicBlock* block_from = graph->GetBlocks()[from]; HBasicBlock* block_to = graph->GetBlocks()[to]; if (block_from == nullptr || block_to == nullptr) { return false; } return block_from->HasSuccessor(block_to, 0); } // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted // automatically. The clone transformation is defined by specifying a set of basic blocks to copy // and a set of rules how to treat edges, remap their successors. By using this approach such // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling, Loop Versioning can be // implemented. // // The idea of the transformation is based on "Superblock cloning" technique described in the book // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. // // There are two states of the IR graph: original graph (before the transformation) and // copy graph (after). // // Before the transformation: // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", // where pred, succ - basic blocks): // - internal - pred, succ are members of ‘orig_bb_set’. // - outside - pred, succ are not members of ‘orig_bb_set’. // - incoming - pred is not a member of ‘orig_bb_set’, succ is. // - outgoing - pred is a member of ‘orig_bb_set’, succ is not. // // Transformation: // // 1. Initial cloning: // 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks // form ‘copy_bb_set’. // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge // set. // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge // set. // 2. Successors remapping. // 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). // 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). // 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into // (X, Y_1)). // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). // 4. Fix/resolve data flow. // 5. Do cleanups (DCE, critical edges splitting, etc). // class SuperblockCloner : public ValueObject { public: // TODO: Investigate optimal types for the containers. using HBasicBlockMap = ArenaSafeMap; using HInstructionMap = ArenaSafeMap; using HBasicBlockSet = ArenaBitVector; using HEdgeSet = ArenaHashSet; SuperblockCloner(HGraph* graph, const HBasicBlockSet* orig_bb_set, HBasicBlockMap* bb_map, HInstructionMap* hir_map, InductionVarRange* induction_range); // Sets edge successor remapping info specified by corresponding edge sets. void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, const HEdgeSet* remap_copy_internal, const HEdgeSet* remap_incoming); // Returns whether the specified subgraph is copyable. // TODO: Start from small range of graph patterns then extend it. bool IsSubgraphClonable() const; // Returns whether selected subgraph satisfies the criteria for fast data flow resolution // when iterative DF algorithm is not required and dominators/instructions inputs can be // trivially adjusted. // // TODO: formally describe the criteria. // // Loop peeling, unrolling and versioning satisfy the criteria. bool IsFastCase() const; // Runs the copy algorithm according to the description. void Run(); // Cleans up the graph after transformation: splits critical edges, recalculates control flow // information (back-edges, dominators, loop info, etc), eliminates redundant phis. void CleanUp(); // Returns a clone of a basic block (orig_block). // // - The copy block will have no successors/predecessors; they should be set up manually. // - For each instruction in the orig_block a copy is created and inserted into the copy block; // this correspondence is recorded in the map (old instruction, new instruction). // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the // same, as in the original block, PHIs do not reflect a correct correspondence between the // value and predecessors (as the copy block has no predecessors by now), etc. HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ // and hir_map_. void CloneBasicBlocks(); HInstruction* GetInstrCopy(HInstruction* orig_instr) const { auto copy_input_iter = hir_map_->find(orig_instr); DCHECK(copy_input_iter != hir_map_->end()); return copy_input_iter->second; } HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { HBasicBlock* block = bb_map_->Get(orig_block); DCHECK(block != nullptr); return block; } HInstruction* GetInstrOrig(HInstruction* copy_instr) const { for (auto it : *hir_map_) { if (it.second == copy_instr) { return it.first; } } return nullptr; } bool IsInOrigBBSet(uint32_t block_id) const { return orig_bb_set_.IsBitSet(block_id); } bool IsInOrigBBSet(const HBasicBlock* block) const { return IsInOrigBBSet(block->GetBlockId()); } // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops, // dominators) needs to be adjusted. HLoopInformation* GetRegionToBeAdjusted() const { return outer_loop_; } private: // Fills the 'exits' vector with the subgraph exits. void SearchForSubgraphExits(ArenaVector* exits) const; // Finds and records information about the area in the graph for which control flow (back edges, // loops, dominators) needs to be adjusted. void FindAndSetLocalAreaForAdjustments(); // Remaps edges' successors according to the info specified in the edges sets. // // Only edge successors/predecessors and phis' input records (to have a correspondence between // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither // phis' nor instructions' inputs values are resolved. void RemapEdgesSuccessors(); // Adjusts control flow (back edges, loops, dominators) for the local area defined by // FindAndSetLocalAreaForAdjustments. void AdjustControlFlowInfo(); // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in // the SSA form. void ResolveDataFlow(); // // Helpers for live-outs processing and Subgraph-closed SSA. // // - live-outs - values which are defined inside the subgraph and have uses outside. // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph // have no outside uses except for the phi-nodes in the subgraph exits. // // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this // makes the subgraph-closed SSA form construction much easier. // // TODO: Support subgraphs with live-outs and multiple exits. // // For each live-out value 'val' in the region puts a record into the map. // Returns whether all of the instructions in the subgraph are clonable. bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const; // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit. // // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates // the record in the map to and replaces all outside uses with this phi. void ConstructSubgraphClosedSSA(); // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding // () phi after the cloning is done. void FixSubgraphClosedSSAAfterCloning(); // // Helpers for CloneBasicBlock. // // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. void ReplaceInputsWithCopies(HInstruction* copy_instr); // Recursively clones the environment for the copy instruction. If the input of the original // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise // leaves it the same as original. void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); // // Helpers for RemapEdgesSuccessors. // // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and // copy_succ. void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); // Checks whether the edges remapping info corresponds to the subgraph versioning case: // - none of the incoming edges are to be remapped (they are being duplicated). // - none of the internal edges are to be remapped. bool IsRemapInfoForVersioning() const; // Processes incoming edges for subgraph versioning case: for each incoming edge (X, Y) adds // an edge (X, Y_1) where Y_1 = Copy(Y) and add corresponding phi input to copy phi. // // Note: such node X will now have two successors, its unconditional branch instruction // will be invalid and should be adjusted to some conditional branch by the client code. void CopyIncomingEdgesForVersioning(); // // Local versions of control flow calculation/adjustment routines. // void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); void CleanUpControlFlow(); // // Helpers for ResolveDataFlow // // Resolves the inputs of the phi. void ResolvePhi(HPhi* phi); // Update induction range after when fixing SSA. void UpdateInductionRangeInfoOf( HInstruction* user, HInstruction* old_instruction, HInstruction* replacement); // // Debug and logging methods. // void CheckInstructionInputsRemapping(HInstruction* orig_instr); bool CheckRemappingInfoIsValid(); void VerifyGraph(); void DumpInputSets(); HBasicBlock* GetBlockById(uint32_t block_id) const { DCHECK(block_id < graph_->GetBlocks().size()); HBasicBlock* block = graph_->GetBlocks()[block_id]; DCHECK(block != nullptr); return block; } HGraph* const graph_; ArenaAllocator* const arena_; // Set of basic block in the original graph to be copied. HBasicBlockSet orig_bb_set_; // Sets of edges which require successors remapping. const HEdgeSet* remap_orig_internal_; const HEdgeSet* remap_copy_internal_; const HEdgeSet* remap_incoming_; // Correspondence map for blocks: (original block, copy block). HBasicBlockMap* bb_map_; // Correspondence map for instructions: (original HInstruction, copy HInstruction). HInstructionMap* hir_map_; // As a result of cloning, the induction range analysis information can be invalidated // and must be updated. If not null, the cloner updates it for changed instructions. InductionVarRange* induction_range_; // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted. HLoopInformation* outer_loop_; HBasicBlockSet outer_loop_bb_set_; HInstructionMap live_outs_; ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); }; // Helper class to perform loop peeling/unrolling/versioning. // // This helper should be used when correspondence map between original and copied // basic blocks/instructions are demanded. class LoopClonerHelper : public ValueObject { public: LoopClonerHelper(HLoopInformation* info, SuperblockCloner::HBasicBlockMap* bb_map, SuperblockCloner::HInstructionMap* hir_map, InductionVarRange* induction_range) : loop_info_(info), cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) { // For now do transformations only for natural loops. DCHECK(!info->IsIrreducible()); } // Returns whether the loop can be peeled/unrolled (static function). static bool IsLoopClonable(HLoopInformation* loop_info); // Returns whether the loop can be peeled/unrolled. bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); } // Perform loop peeling. // // Control flow of an example (ignoring critical edges splitting). // // Before After // // |B| |B| // | | // v v // |1| |1| // | | // v v // |2|<-\ |2A| // / \ / / \ // v v/ / v // |4| |3| / |3A| // | / / // v | v // |E| \ |2|<-\ // \ / \ / // v v / // |4| |3| // | // v // |E| HBasicBlock* DoPeeling() { return DoLoopTransformationImpl(TransformationKind::kPeeling); } // Perform loop unrolling. // // Control flow of an example (ignoring critical edges splitting). // // Before After // // |B| |B| // | | // v v // |1| |1| // | | // v v // |2|<-\ |2A|<-\ // / \ / / \ \ // v v/ / v \ // |4| |3| / |3A| | // | / / / // v | v / // |E| \ |2| / // \ / \ / // v v/ // |4| |3| // | // v // |E| HBasicBlock* DoUnrolling() { return DoLoopTransformationImpl(TransformationKind::kUnrolling); } // Perform loop versioning. // // Control flow of an example (ignoring critical edges splitting). // // Before After // // |B| |B| // | | // v v // |1| |1|_________ // | | | // v v v // |2|<-\ |2|<-\ |2A|<-\ // / \ / / \ / / \ / // v v/ | v/ | v/ // | |3| | |3| | |3A| // | | __________| // | || // v vv // |4| |4| // | | // v v // |E| |E| HBasicBlock* DoVersioning() { return DoLoopTransformationImpl(TransformationKind::kVersioning); } HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } protected: enum class TransformationKind { kPeeling, kUnrolling, kVersioning, }; // Applies a specific loop transformation to the loop. HBasicBlock* DoLoopTransformationImpl(TransformationKind transformation); private: HLoopInformation* loop_info_; SuperblockCloner cloner_; DISALLOW_COPY_AND_ASSIGN(LoopClonerHelper); }; // Helper class to perform loop peeling/unrolling/versioning. // // This helper should be used when there is no need to get correspondence information between // original and copied basic blocks/instructions. class LoopClonerSimpleHelper : public ValueObject { public: LoopClonerSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range); bool IsLoopClonable() const { return helper_.IsLoopClonable(); } HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } HBasicBlock* DoVersioning() { return helper_.DoVersioning(); } HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } private: SuperblockCloner::HBasicBlockMap bb_map_; SuperblockCloner::HInstructionMap hir_map_; LoopClonerHelper helper_; DISALLOW_COPY_AND_ASSIGN(LoopClonerSimpleHelper); }; // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info. void CollectRemappingInfoForPeelUnroll(bool to_unroll, HLoopInformation* loop_info, SuperblockCloner::HEdgeSet* remap_orig_internal, SuperblockCloner::HEdgeSet* remap_copy_internal, SuperblockCloner::HEdgeSet* remap_incoming); // Returns whether blocks from 'work_set' are reachable from the rest of the graph. // // Returns whether such a set 'outer_entries' of basic blocks exists that: // - each block from 'outer_entries' is not from 'work_set'. // - each block from 'work_set' is reachable from at least one block from 'outer_entries'. // // After the function returns work_set contains only blocks from the original 'work_set' // which are unreachable from the rest of the graph. bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph); // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole // graph. HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2); } // namespace art namespace std { template <> struct hash { size_t operator()(art::HEdge const& x) const noexcept { // Use Cantor pairing function as the hash function. size_t a = x.GetFrom(); size_t b = x.GetTo(); return (a + b) * (a + b + 1) / 2 + b; } }; ostream& operator<<(ostream& os, const art::HEdge& e); } // namespace std #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_