/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "superblock_cloner.h" #include "common_dominator.h" #include "induction_var_range.h" #include "graph_checker.h" #include namespace art { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; using HEdgeSet = SuperblockCloner::HEdgeSet; void HEdge::Dump(std::ostream& stream) const { stream << "(" << from_ << "->" << to_ << ")"; } // // Static helper methods. // // Returns whether instruction has any uses (regular or environmental) outside the region, // defined by basic block set. static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) { auto& uses = instr->GetUses(); for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) { HInstruction* user = use_node->GetUser(); if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { return true; } } auto& env_uses = instr->GetEnvUses(); for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) { HInstruction* user = use_node->GetUser()->GetHolder(); if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { return true; } } return false; } // Returns whether the phi's inputs are the same HInstruction. static bool ArePhiInputsTheSame(const HPhi* phi) { HInstruction* first_input = phi->InputAt(0); for (size_t i = 1, e = phi->InputCount(); i < e; i++) { if (phi->InputAt(i) != first_input) { return false; } } return true; } // Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method). static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) { if (set1->size() != set2->size()) { return false; } for (auto e : *set1) { if (set2->find(e) == set2->end()) { return false; } } return true; } // Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. static void OrderLoopsHeadersPredecessors(HGraph* graph) { for (HBasicBlock* block : graph->GetPostOrder()) { if (block->IsLoopHeader()) { graph->OrderLoopHeaderPredecessors(block); } } } // Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while // traversing the function removes basic blocks from the bb_set (instead of traditional DFS // 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start // block. static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) { DCHECK(bb_set->IsBitSet(block->GetBlockId())); bb_set->ClearBit(block->GetBlockId()); for (HBasicBlock* succ : block->GetSuccessors()) { if (bb_set->IsBitSet(succ->GetBlockId())) { TraverseSubgraphForConnectivity(succ, bb_set); } } } // // Helpers for CloneBasicBlock. // void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) { DCHECK(!copy_instr->IsPhi()); for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) { // Copy instruction holds the same input as the original instruction holds. HInstruction* orig_input = copy_instr->InputAt(i); if (!IsInOrigBBSet(orig_input->GetBlock())) { // Defined outside the subgraph. continue; } HInstruction* copy_input = GetInstrCopy(orig_input); // copy_instr will be registered as a user of copy_inputs after returning from this function: // 'copy_block->AddInstruction(copy_instr)'. copy_instr->SetRawInputAt(i, copy_input); } } void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env) { if (orig_env->GetParent() != nullptr) { DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent()); } HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr); for (size_t i = 0; i < orig_env->Size(); i++) { HInstruction* env_input = orig_env->GetInstructionAt(i); if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) { env_input = GetInstrCopy(env_input); DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr); } copy_env->SetRawEnvAt(i, env_input); if (env_input != nullptr) { env_input->AddEnvUseAt(copy_env, i); } } // InsertRawEnvironment assumes that instruction already has an environment that's why we use // SetRawEnvironment in the 'else' case. // As this function calls itself recursively with the same copy_instr - this copy_instr may // have partially copied chain of HEnvironments. if (copy_instr->HasEnvironment()) { copy_instr->InsertRawEnvironment(copy_env); } else { copy_instr->SetRawEnvironment(copy_env); } } // // Helpers for RemapEdgesSuccessors. // void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ) { DCHECK(IsInOrigBBSet(orig_succ)); HBasicBlock* copy_succ = GetBlockCopy(orig_succ); size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block); size_t phi_input_count = 0; // This flag reflects whether the original successor has at least one phi and this phi // has been already processed in the loop. Used for validation purposes in DCHECK to check that // in the end all of the phis in the copy successor have the same number of inputs - the number // of copy successor's predecessors. bool first_phi_met = false; for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { HPhi* orig_phi = it.Current()->AsPhi(); HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); HInstruction* orig_phi_input = orig_phi->InputAt(this_index); // Remove corresponding input for original phi. orig_phi->RemoveInputAt(this_index); // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds // to orig_block, so add the input at the end of the list. copy_phi->AddInput(orig_phi_input); if (!first_phi_met) { phi_input_count = copy_phi->InputCount(); first_phi_met = true; } else { DCHECK_EQ(phi_input_count, copy_phi->InputCount()); } } // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds // to the previously added phi inputs position. orig_block->ReplaceSuccessor(orig_succ, copy_succ); DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count); } void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ) { DCHECK(IsInOrigBBSet(orig_succ)); HBasicBlock* copy_block = GetBlockCopy(orig_block); HBasicBlock* copy_succ = GetBlockCopy(orig_succ); copy_block->AddSuccessor(copy_succ); size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { HPhi* orig_phi = it.Current()->AsPhi(); HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); copy_phi->AddInput(orig_phi_input); } } void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ) { DCHECK(IsInOrigBBSet(orig_succ)); HBasicBlock* copy_block = GetBlockCopy(orig_block); copy_block->AddSuccessor(orig_succ); DCHECK(copy_block->HasSuccessor(orig_succ)); size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { HPhi* orig_phi = it.Current()->AsPhi(); HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); orig_phi->AddInput(orig_phi_input); } } bool SuperblockCloner::IsRemapInfoForVersioning() const { return remap_incoming_->empty() && remap_orig_internal_->empty() && remap_copy_internal_->empty(); } void SuperblockCloner::CopyIncomingEdgesForVersioning() { for (uint32_t orig_block_id : orig_bb_set_.Indexes()) { HBasicBlock* orig_block = GetBlockById(orig_block_id); size_t incoming_edge_count = 0; for (HBasicBlock* orig_pred : orig_block->GetPredecessors()) { uint32_t orig_pred_id = orig_pred->GetBlockId(); if (IsInOrigBBSet(orig_pred_id)) { continue; } HBasicBlock* copy_block = GetBlockCopy(orig_block); // This corresponds to the requirement on the order of predecessors: all the incoming // edges must be seen before the internal ones. This is always true for natural loops. // TODO: remove this requirement. DCHECK_EQ(orig_block->GetPredecessorIndexOf(orig_pred), incoming_edge_count); for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { HPhi* orig_phi = it.Current()->AsPhi(); HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); HInstruction* orig_phi_input = orig_phi->InputAt(incoming_edge_count); // Add the corresponding input of the original phi to the copy one. copy_phi->AddInput(orig_phi_input); } copy_block->AddPredecessor(orig_pred); incoming_edge_count++; } } } // // Local versions of CF calculation/adjustment routines. // // TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect // the performance of the base version by checking the local set. // TODO: this version works when updating the back edges info for natural loop-based local_set. // Check which exactly types of subgraphs can be analysed or rename it to // FindBackEdgesInTheNaturalLoop. void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) { ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. DCHECK_EQ(visited.GetHighestBitSet(), -1); // Nodes that we're currently visiting, indexed by block id. ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder); // Number of successors visited from a given node, indexed by block id. ArenaVector successors_visited(graph_->GetBlocks().size(), 0u, arena_->Adapter(kArenaAllocGraphBuilder)); // Stack of nodes that we're currently visiting (same as marked in "visiting" above). ArenaVector worklist(arena_->Adapter(kArenaAllocGraphBuilder)); constexpr size_t kDefaultWorklistSize = 8; worklist.reserve(kDefaultWorklistSize); visited.SetBit(entry_block->GetBlockId()); visiting.SetBit(entry_block->GetBlockId()); worklist.push_back(entry_block); while (!worklist.empty()) { HBasicBlock* current = worklist.back(); uint32_t current_id = current->GetBlockId(); if (successors_visited[current_id] == current->GetSuccessors().size()) { visiting.ClearBit(current_id); worklist.pop_back(); } else { HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; uint32_t successor_id = successor->GetBlockId(); if (!local_set->IsBitSet(successor_id)) { continue; } if (visiting.IsBitSet(successor_id)) { DCHECK(ContainsElement(worklist, successor)); successor->AddBackEdgeWhileUpdating(current); } else if (!visited.IsBitSet(successor_id)) { visited.SetBit(successor_id); visiting.SetBit(successor_id); worklist.push_back(successor); } } } } void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { HBasicBlock* block_entry = nullptr; if (outer_loop_ == nullptr) { for (auto block : graph_->GetBlocks()) { if (block != nullptr) { outer_loop_bb_set->SetBit(block->GetBlockId()); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr) { info->ResetBasicBlockData(); } } } block_entry = graph_->GetEntryBlock(); } else { outer_loop_bb_set->Copy(&outer_loop_bb_set_); block_entry = outer_loop_->GetHeader(); // Add newly created copy blocks. for (auto entry : *bb_map_) { outer_loop_bb_set->SetBit(entry.second->GetBlockId()); } // Clear loop_info for the whole outer loop. for (uint32_t idx : outer_loop_bb_set->Indexes()) { HBasicBlock* block = GetBlockById(idx); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr) { info->ResetBasicBlockData(); } } } FindBackEdgesLocal(block_entry, outer_loop_bb_set); for (uint32_t idx : outer_loop_bb_set->Indexes()) { HBasicBlock* block = GetBlockById(idx); HLoopInformation* info = block->GetLoopInformation(); // Reset LoopInformation for regular blocks and old headers which are no longer loop headers. if (info != nullptr && (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) { block->SetLoopInformation(nullptr); } } } // This is a modified version of HGraph::AnalyzeLoops. GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) { // We iterate post order to ensure we visit inner loops before outer loops. // `PopulateRecursive` needs this guarantee to know whether a natural loop // contains an irreducible loop. for (HBasicBlock* block : graph_->GetPostOrder()) { if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { continue; } if (block->IsLoopHeader()) { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because // they only approximate the real control flow. Bail out for now. return kAnalysisFailThrowCatchLoop; } block->GetLoopInformation()->Populate(); } } for (HBasicBlock* block : graph_->GetPostOrder()) { if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { continue; } if (block->IsLoopHeader()) { HLoopInformation* cur_loop = block->GetLoopInformation(); HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation(); if (outer_loop != nullptr) { outer_loop->PopulateInnerLoopUpwards(cur_loop); } } } return kAnalysisSuccess; } void SuperblockCloner::CleanUpControlFlow() { // TODO: full control flow clean up for now, optimize it. graph_->ClearDominanceInformation(); ArenaBitVector outer_loop_bb_set( arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); RecalculateBackEdgesInfo(&outer_loop_bb_set); // TODO: do it locally. graph_->SimplifyCFG(); graph_->ComputeDominanceInformation(); // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just // before in ComputeDominanceInformation. GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set); DCHECK_EQ(result, kAnalysisSuccess); // TODO: do it locally OrderLoopsHeadersPredecessors(graph_); graph_->ComputeTryBlockInformation(); } // // Helpers for ResolveDataFlow // void SuperblockCloner::ResolvePhi(HPhi* phi) { HBasicBlock* phi_block = phi->GetBlock(); for (size_t i = 0, e = phi->InputCount(); i < e; i++) { HInstruction* input = phi->InputAt(i); HBasicBlock* input_block = input->GetBlock(); // Originally defined outside the region. if (!IsInOrigBBSet(input_block)) { continue; } HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i]; if (!IsInOrigBBSet(corresponding_block)) { phi->ReplaceInput(GetInstrCopy(input), i); } } } // // Main algorithm methods. // void SuperblockCloner::SearchForSubgraphExits(ArenaVector* exits) const { DCHECK(exits->empty()); for (uint32_t block_id : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(block_id); for (HBasicBlock* succ : block->GetSuccessors()) { if (!IsInOrigBBSet(succ)) { exits->push_back(succ); } } } } void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { DCHECK(outer_loop_ == nullptr); ArenaVector exits(arena_->Adapter(kArenaAllocSuperblockCloner)); SearchForSubgraphExits(&exits); // For a reducible graph we need to update back-edges and dominance information only for // the outermost loop which is affected by the transformation - it can be found by picking // the common most outer loop of loops to which the subgraph exits blocks belong. // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case). for (HBasicBlock* exit : exits) { HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation(); if (loop_exit_loop_info == nullptr) { outer_loop_ = nullptr; break; } if (outer_loop_ == nullptr) { // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer // common loop. outer_loop_ = loop_exit_loop_info; } outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); } if (outer_loop_ != nullptr) { // Save the loop population info as it will be changed later. outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks()); } } void SuperblockCloner::RemapEdgesSuccessors() { // By this stage all the blocks have been copied, copy phis - created with no inputs; // no copy edges have been created so far. if (IsRemapInfoForVersioning()) { CopyIncomingEdgesForVersioning(); } // Redirect incoming edges. for (HEdge e : *remap_incoming_) { HBasicBlock* orig_block = GetBlockById(e.GetFrom()); HBasicBlock* orig_succ = GetBlockById(e.GetTo()); RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); } // Redirect internal edges. for (uint32_t orig_block_id : orig_bb_set_.Indexes()) { HBasicBlock* orig_block = GetBlockById(orig_block_id); for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) { uint32_t orig_succ_id = orig_succ->GetBlockId(); // Check for outgoing edge. if (!IsInOrigBBSet(orig_succ)) { HBasicBlock* copy_block = GetBlockCopy(orig_block); copy_block->AddSuccessor(orig_succ); continue; } auto orig_redir = remap_orig_internal_->find(HEdge(orig_block_id, orig_succ_id)); auto copy_redir = remap_copy_internal_->find(HEdge(orig_block_id, orig_succ_id)); // Due to construction all successors of copied block were set to original. if (copy_redir != remap_copy_internal_->end()) { RemapCopyInternalEdge(orig_block, orig_succ); } else { AddCopyInternalEdge(orig_block, orig_succ); } if (orig_redir != remap_orig_internal_->end()) { RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); } } } } void SuperblockCloner::AdjustControlFlowInfo() { ArenaBitVector outer_loop_bb_set( arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); RecalculateBackEdgesInfo(&outer_loop_bb_set); graph_->ClearDominanceInformation(); // TODO: Do it locally. graph_->ComputeDominanceInformation(); } // TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to // the valid values; only phis' inputs must be adjusted. void SuperblockCloner::ResolveDataFlow() { for (auto entry : *bb_map_) { HBasicBlock* orig_block = entry.first; for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { HPhi* orig_phi = it.Current()->AsPhi(); HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); ResolvePhi(orig_phi); ResolvePhi(copy_phi); } if (kIsDebugBuild) { // Inputs of instruction copies must be already mapped to correspondent inputs copies. for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { CheckInstructionInputsRemapping(it.Current()); } } } } // // Helpers for live-outs processing and Subgraph-closed SSA. // bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const { DCHECK(live_outs->empty()); for (uint32_t idx : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(idx); for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); DCHECK(instr->IsClonable()); if (IsUsedOutsideRegion(instr, orig_bb_set_)) { live_outs->FindOrAdd(instr, instr); } } for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); if (!instr->IsClonable()) { return false; } if (IsUsedOutsideRegion(instr, orig_bb_set_)) { // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input. if (instr->IsLoadClass()) { return false; } live_outs->FindOrAdd(instr, instr); } } } return true; } void SuperblockCloner::UpdateInductionRangeInfoOf( HInstruction* user, HInstruction* old_instruction, HInstruction* replacement) { if (induction_range_ != nullptr) { induction_range_->Replace(user, old_instruction, replacement); } } void SuperblockCloner::ConstructSubgraphClosedSSA() { if (live_outs_.empty()) { return; } ArenaVector exits(arena_->Adapter(kArenaAllocSuperblockCloner)); SearchForSubgraphExits(&exits); if (exits.empty()) { DCHECK(live_outs_.empty()); return; } DCHECK_EQ(exits.size(), 1u); HBasicBlock* exit_block = exits[0]; // There should be no critical edges. DCHECK_EQ(exit_block->GetPredecessors().size(), 1u); DCHECK(exit_block->GetPhis().IsEmpty()); // For each live-out value insert a phi into the loop exit and replace all the value's uses // external to the loop with this phi. The phi will have the original value as its only input; // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the // original value as the second input thus merging data flow from the original and copy parts of // the subgraph. Also update the record in the live_outs_ map from (value, value) to // (value, new_phi). for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) { HInstruction* value = live_out_it->first; HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType()); if (value->GetType() == DataType::Type::kReference) { phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo()); } exit_block->AddPhi(phi); live_out_it->second = phi; const HUseList& uses = value->GetUses(); for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { HInstruction* user = it->GetUser(); size_t index = it->GetIndex(); // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). ++it; if (!IsInOrigBBSet(user->GetBlock())) { user->ReplaceInput(phi, index); UpdateInductionRangeInfoOf(user, value, phi); } } const HUseList& env_uses = value->GetEnvUses(); for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) { HEnvironment* env = it->GetUser(); size_t index = it->GetIndex(); ++it; if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) { env->ReplaceInput(phi, index); } } phi->AddInput(value); } } void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() { for (auto it : live_outs_) { DCHECK(it.first != it.second); HInstruction* orig_value = it.first; HPhi* phi = it.second->AsPhi(); HInstruction* copy_value = GetInstrCopy(orig_value); // Copy edges are inserted after the original so we can just add new input to the phi. phi->AddInput(copy_value); } } // // Debug and logging methods. // // Debug function to dump graph' BasicBlocks info. void DumpBB(HGraph* graph) { for (HBasicBlock* bb : graph->GetBlocks()) { if (bb == nullptr) { continue; } std::ostringstream oss; oss << bb->GetBlockId(); oss << " <- "; for (HBasicBlock* pred : bb->GetPredecessors()) { oss << pred->GetBlockId() << " "; } oss << " -> "; for (HBasicBlock* succ : bb->GetSuccessors()) { oss << succ->GetBlockId() << " "; } if (bb->GetDominator()) { oss << " dom " << bb->GetDominator()->GetBlockId(); } if (bb->GetLoopInformation()) { oss << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId(); } LOG(INFO) << oss.str(); } } void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { DCHECK(!orig_instr->IsPhi()); HInstruction* copy_instr = GetInstrCopy(orig_instr); for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { HInstruction* orig_input = orig_instr->InputAt(i); DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock())); // If original input is defined outside the region then it will remain for both original // instruction and the copy after the transformation. if (!IsInOrigBBSet(orig_input->GetBlock())) { continue; } HInstruction* copy_input = GetInstrCopy(orig_input); DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); } // Resolve environment. if (orig_instr->HasEnvironment()) { HEnvironment* orig_env = orig_instr->GetEnvironment(); for (size_t i = 0, e = orig_env->Size(); i < e; ++i) { HInstruction* orig_input = orig_env->GetInstructionAt(i); // If original input is defined outside the region then it will remain for both original // instruction and the copy after the transformation. if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) { continue; } HInstruction* copy_input = GetInstrCopy(orig_input); DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); } } } bool SuperblockCloner::CheckRemappingInfoIsValid() { for (HEdge edge : *remap_orig_internal_) { if (!IsEdgeValid(edge, graph_) || !IsInOrigBBSet(edge.GetFrom()) || !IsInOrigBBSet(edge.GetTo())) { return false; } } for (auto edge : *remap_copy_internal_) { if (!IsEdgeValid(edge, graph_) || !IsInOrigBBSet(edge.GetFrom()) || !IsInOrigBBSet(edge.GetTo())) { return false; } } for (auto edge : *remap_incoming_) { if (!IsEdgeValid(edge, graph_) || IsInOrigBBSet(edge.GetFrom()) || !IsInOrigBBSet(edge.GetTo())) { return false; } } return true; } void SuperblockCloner::VerifyGraph() { for (auto it : *hir_map_) { HInstruction* orig_instr = it.first; HInstruction* copy_instr = it.second; if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) { DCHECK(it.first->GetBlock() != nullptr); } if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) { DCHECK(it.second->GetBlock() != nullptr); } } if (kSuperblockClonerVerify) { GraphChecker checker(graph_); checker.Run(); if (!checker.IsValid()) { for (const std::string& error : checker.GetErrors()) { LOG(ERROR) << error; } LOG(FATAL) << "GraphChecker failed: superblock cloner"; } } } void DumpBBSet(const ArenaBitVector* set) { for (uint32_t idx : set->Indexes()) { LOG(INFO) << idx; } } void SuperblockCloner::DumpInputSets() { LOG(INFO) << "orig_bb_set:"; for (uint32_t idx : orig_bb_set_.Indexes()) { LOG(INFO) << idx; } LOG(INFO) << "remap_orig_internal:"; for (HEdge e : *remap_orig_internal_) { LOG(INFO) << e; } LOG(INFO) << "remap_copy_internal:"; for (auto e : *remap_copy_internal_) { LOG(INFO) << e; } LOG(INFO) << "remap_incoming:"; for (auto e : *remap_incoming_) { LOG(INFO) << e; } } // // Public methods. // SuperblockCloner::SuperblockCloner(HGraph* graph, const HBasicBlockSet* orig_bb_set, HBasicBlockMap* bb_map, HInstructionMap* hir_map, InductionVarRange* induction_range) : graph_(graph), arena_(graph->GetAllocator()), orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), remap_orig_internal_(nullptr), remap_copy_internal_(nullptr), remap_incoming_(nullptr), bb_map_(bb_map), hir_map_(hir_map), induction_range_(induction_range), outer_loop_(nullptr), outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), live_outs_(std::less(), graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) { orig_bb_set_.Copy(orig_bb_set); } void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, const HEdgeSet* remap_copy_internal, const HEdgeSet* remap_incoming) { remap_orig_internal_ = remap_orig_internal; remap_copy_internal_ = remap_copy_internal; remap_incoming_ = remap_incoming; DCHECK(CheckRemappingInfoIsValid()); } bool SuperblockCloner::IsSubgraphClonable() const { // TODO: Support irreducible graphs and graphs with try-catch. if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) { return false; } HInstructionMap live_outs( std::less(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); if (!CollectLiveOutsAndCheckClonable(&live_outs)) { return false; } ArenaVector exits(arena_->Adapter(kArenaAllocSuperblockCloner)); SearchForSubgraphExits(&exits); // The only loops with live-outs which are currently supported are loops with a single exit. if (!live_outs.empty() && exits.size() != 1) { return false; } return true; } // Checks that loop unrolling/peeling/versioning is being conducted. bool SuperblockCloner::IsFastCase() const { // Check that all the basic blocks belong to the same loop. bool flag = false; HLoopInformation* common_loop_info = nullptr; for (uint32_t idx : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(idx); HLoopInformation* block_loop_info = block->GetLoopInformation(); if (!flag) { common_loop_info = block_loop_info; } else { if (block_loop_info != common_loop_info) { return false; } } } // Check that orig_bb_set_ corresponds to loop peeling/unrolling/versioning. if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) { return false; } if (IsRemapInfoForVersioning()) { return true; } bool peeling_or_unrolling = false; HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); // Check whether remapping info corresponds to loop unrolling. CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true, common_loop_info, &remap_orig_internal, &remap_copy_internal, &remap_incoming); peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && EdgeHashSetsEqual(&remap_incoming, remap_incoming_); remap_orig_internal.clear(); remap_copy_internal.clear(); remap_incoming.clear(); // Check whether remapping info corresponds to loop peeling. CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false, common_loop_info, &remap_orig_internal, &remap_copy_internal, &remap_incoming); peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && EdgeHashSetsEqual(&remap_incoming, remap_incoming_); return peeling_or_unrolling; } void SuperblockCloner::Run() { DCHECK(bb_map_ != nullptr); DCHECK(hir_map_ != nullptr); DCHECK(remap_orig_internal_ != nullptr && remap_copy_internal_ != nullptr && remap_incoming_ != nullptr); DCHECK(IsSubgraphClonable()); DCHECK(IsFastCase()); if (kSuperblockClonerLogging) { DumpInputSets(); } CollectLiveOutsAndCheckClonable(&live_outs_); // Find an area in the graph for which control flow information should be adjusted. FindAndSetLocalAreaForAdjustments(); ConstructSubgraphClosedSSA(); // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be // adjusted. CloneBasicBlocks(); // Connect the blocks together/remap successors and fix phis which are directly affected my the // remapping. RemapEdgesSuccessors(); // Check that the subgraph is connected. if (kIsDebugBuild) { HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner); // Add original and copy blocks of the subgraph to the work set. for (auto iter : *bb_map_) { work_set.SetBit(iter.first->GetBlockId()); // Original block. work_set.SetBit(iter.second->GetBlockId()); // Copy block. } CHECK(IsSubgraphConnected(&work_set, graph_)); } // Recalculate dominance and backedge information which is required by the next stage. AdjustControlFlowInfo(); // Fix data flow of the graph. ResolveDataFlow(); FixSubgraphClosedSSAAfterCloning(); } void SuperblockCloner::CleanUp() { CleanUpControlFlow(); // Remove phis which have all inputs being same. // When a block has a single predecessor it must not have any phis. However after the // transformation it could happen that there is such block with a phi with a single input. // As this is needed to be processed we also simplify phis with multiple same inputs here. for (auto entry : *bb_map_) { HBasicBlock* orig_block = entry.first; for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); if (ArePhiInputsTheSame(phi)) { phi->ReplaceWith(phi->InputAt(0)); orig_block->RemovePhi(phi); } } HBasicBlock* copy_block = GetBlockCopy(orig_block); for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); if (ArePhiInputsTheSame(phi)) { phi->ReplaceWith(phi->InputAt(0)); copy_block->RemovePhi(phi); } } } if (kIsDebugBuild) { VerifyGraph(); } } HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { HGraph* graph = orig_block->GetGraph(); HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc()); graph->AddBlock(copy_block); // Clone all the phis and add them to the map. for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { HInstruction* orig_instr = it.Current(); HInstruction* copy_instr = orig_instr->Clone(arena_); copy_block->AddPhi(copy_instr->AsPhi()); copy_instr->AsPhi()->RemoveAllInputs(); DCHECK(!orig_instr->HasEnvironment()); hir_map_->Put(orig_instr, copy_instr); } // Clone all the instructions and add them to the map. for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* orig_instr = it.Current(); HInstruction* copy_instr = orig_instr->Clone(arena_); ReplaceInputsWithCopies(copy_instr); copy_block->AddInstruction(copy_instr); if (orig_instr->HasEnvironment()) { DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment()); } hir_map_->Put(orig_instr, copy_instr); } return copy_block; } void SuperblockCloner::CloneBasicBlocks() { // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied // instructions might be replaced by copies of the original inputs (depending where those inputs // are defined). So the definitions of the original inputs must be visited before their original // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')" // guarantees that. for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) { if (!IsInOrigBBSet(orig_block)) { continue; } HBasicBlock* copy_block = CloneBasicBlock(orig_block); bb_map_->Put(orig_block, copy_block); if (kSuperblockClonerLogging) { LOG(INFO) << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId(); } } } // // Stand-alone methods. // void CollectRemappingInfoForPeelUnroll(bool to_unroll, HLoopInformation* loop_info, HEdgeSet* remap_orig_internal, HEdgeSet* remap_copy_internal, HEdgeSet* remap_incoming) { DCHECK(loop_info != nullptr); HBasicBlock* loop_header = loop_info->GetHeader(); // Set up remap_orig_internal edges set - set is empty. // Set up remap_copy_internal edges set. for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) { HEdge e = HEdge(back_edge_block, loop_header); if (to_unroll) { remap_orig_internal->insert(e); remap_copy_internal->insert(e); } else { remap_copy_internal->insert(e); } } // Set up remap_incoming edges set. if (!to_unroll) { remap_incoming->insert(HEdge(loop_info->GetPreHeader(), loop_header)); } } bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) { ArenaVector entry_blocks( graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); // Find subgraph entry blocks. for (uint32_t orig_block_id : work_set->Indexes()) { HBasicBlock* block = graph->GetBlocks()[orig_block_id]; for (HBasicBlock* pred : block->GetPredecessors()) { if (!work_set->IsBitSet(pred->GetBlockId())) { entry_blocks.push_back(block); break; } } } for (HBasicBlock* entry_block : entry_blocks) { if (work_set->IsBitSet(entry_block->GetBlockId())) { TraverseSubgraphForConnectivity(entry_block, work_set); } } // Return whether there are unvisited - unreachable - blocks. return work_set->NumSetBits() == 0; } HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { if (loop1 == nullptr || loop2 == nullptr) { return nullptr; } if (loop1->IsIn(*loop2)) { return loop2; } HLoopInformation* current = loop1; while (current != nullptr && !loop2->IsIn(*current)) { current = current->GetPreHeader()->GetLoopInformation(); } return current; } bool LoopClonerHelper::IsLoopClonable(HLoopInformation* loop_info) { LoopClonerHelper helper( loop_info, /* bb_map= */ nullptr, /* hir_map= */ nullptr, /* induction_range= */ nullptr); return helper.IsLoopClonable(); } HBasicBlock* LoopClonerHelper::DoLoopTransformationImpl(TransformationKind transformation) { // For now do transformations only for natural loops. DCHECK(!loop_info_->IsIrreducible()); HBasicBlock* loop_header = loop_info_->GetHeader(); // Check that loop info is up-to-date. DCHECK(loop_info_ == loop_header->GetLoopInformation()); HGraph* graph = loop_header->GetGraph(); if (kSuperblockClonerLogging) { LOG(INFO) << "Method: " << graph->PrettyMethod(); std::ostringstream oss; oss << "Scalar loop "; switch (transformation) { case TransformationKind::kPeeling: oss << "peeling"; break; case TransformationKind::kUnrolling: oss<< "unrolling"; break; case TransformationKind::kVersioning: oss << "versioning"; break; default: LOG(FATAL) << "Unreachable"; UNREACHABLE(); } oss << " was applied to the loop <" << loop_header->GetBlockId() << ">."; LOG(INFO) << oss.str(); } ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool()); HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); // No remapping needed for loop versioning. if (transformation != TransformationKind::kVersioning) { CollectRemappingInfoForPeelUnroll(transformation == TransformationKind::kUnrolling, loop_info_, &remap_orig_internal, &remap_copy_internal, &remap_incoming); } cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); cloner_.Run(); cloner_.CleanUp(); // Check that loop info is preserved. DCHECK(loop_info_ == loop_header->GetLoopInformation()); return loop_header; } LoopClonerSimpleHelper::LoopClonerSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range) : bb_map_(std::less(), info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), hir_map_(std::less(), info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), helper_(info, &bb_map_, &hir_map_, induction_range) {} } // namespace art namespace std { ostream& operator<<(ostream& os, const art::HEdge& e) { e.Dump(os); return os; } } // namespace std