1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
19 
20 #include "scheduler.h"
21 
22 namespace art {
23 namespace arm64 {
24 
25 static constexpr uint32_t kArm64MemoryLoadLatency = 5;
26 static constexpr uint32_t kArm64MemoryStoreLatency = 3;
27 
28 static constexpr uint32_t kArm64CallInternalLatency = 10;
29 static constexpr uint32_t kArm64CallLatency = 5;
30 
31 // AArch64 instruction latency.
32 // We currently assume that all arm64 CPUs share the same instruction latency list.
33 static constexpr uint32_t kArm64IntegerOpLatency = 2;
34 static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
35 
36 
37 static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
38 static constexpr uint32_t kArm64DivDoubleLatency = 30;
39 static constexpr uint32_t kArm64DivFloatLatency = 15;
40 static constexpr uint32_t kArm64DivIntegerLatency = 5;
41 static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
42 static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
43 static constexpr uint32_t kArm64MulIntegerLatency = 6;
44 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
45 static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
46 
47 static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
48 static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
49 static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
50 static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
51 static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
52 static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
53 static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
54 static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
55 static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
56 static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
57 
58 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
59  public:
60   // Default visitor for instructions not handled specifically below.
VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)61   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
62     last_visited_latency_ = kArm64IntegerOpLatency;
63   }
64 
65 // We add a second unused parameter to be able to use this macro like the others
66 // defined in `nodes.h`.
67 #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
68   M(ArrayGet             , unused)                   \
69   M(ArrayLength          , unused)                   \
70   M(ArraySet             , unused)                   \
71   M(BoundsCheck          , unused)                   \
72   M(Div                  , unused)                   \
73   M(InstanceFieldGet     , unused)                   \
74   M(InstanceOf           , unused)                   \
75   M(LoadString           , unused)                   \
76   M(Mul                  , unused)                   \
77   M(NewArray             , unused)                   \
78   M(NewInstance          , unused)                   \
79   M(Rem                  , unused)                   \
80   M(StaticFieldGet       , unused)                   \
81   M(SuspendCheck         , unused)                   \
82   M(TypeConversion       , unused)                   \
83   M(VecReplicateScalar   , unused)                   \
84   M(VecExtractScalar     , unused)                   \
85   M(VecReduce            , unused)                   \
86   M(VecCnv               , unused)                   \
87   M(VecNeg               , unused)                   \
88   M(VecAbs               , unused)                   \
89   M(VecNot               , unused)                   \
90   M(VecAdd               , unused)                   \
91   M(VecHalvingAdd        , unused)                   \
92   M(VecSub               , unused)                   \
93   M(VecMul               , unused)                   \
94   M(VecDiv               , unused)                   \
95   M(VecMin               , unused)                   \
96   M(VecMax               , unused)                   \
97   M(VecAnd               , unused)                   \
98   M(VecAndNot            , unused)                   \
99   M(VecOr                , unused)                   \
100   M(VecXor               , unused)                   \
101   M(VecShl               , unused)                   \
102   M(VecShr               , unused)                   \
103   M(VecUShr              , unused)                   \
104   M(VecSetScalars        , unused)                   \
105   M(VecMultiplyAccumulate, unused)                   \
106   M(VecLoad              , unused)                   \
107   M(VecStore             , unused)
108 
109 #define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M)   \
110   M(BinaryOperation      , unused)                   \
111   M(Invoke               , unused)
112 
113 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
114   M(BitwiseNegatedRight, unused)                 \
115   M(MultiplyAccumulate, unused)                  \
116   M(IntermediateAddress, unused)                 \
117   M(IntermediateAddressIndex, unused)            \
118   M(DataProcWithShifterOp, unused)
119 
120 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
121   void Visit##type(H##type* instruction) override;
122 
123   FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
124   FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
125   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
126   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
127 
128 #undef DECLARE_VISIT_INSTRUCTION
129 
130  private:
131   void HandleSimpleArithmeticSIMD(HVecOperation *instr);
132   void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
133 };
134 
135 class HSchedulerARM64 : public HScheduler {
136  public:
HSchedulerARM64(SchedulingNodeSelector * selector)137   explicit HSchedulerARM64(SchedulingNodeSelector* selector)
138       : HScheduler(&arm64_latency_visitor_, selector) {}
~HSchedulerARM64()139   ~HSchedulerARM64() override {}
140 
IsSchedulable(const HInstruction * instruction)141   bool IsSchedulable(const HInstruction* instruction) const override {
142 #define CASE_INSTRUCTION_KIND(type, unused) case \
143   HInstruction::InstructionKind::k##type:
144     switch (instruction->GetKind()) {
145       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
146         return true;
147       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
148         return true;
149       FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
150         return true;
151       default:
152         return HScheduler::IsSchedulable(instruction);
153     }
154 #undef CASE_INSTRUCTION_KIND
155   }
156 
157   // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
158   // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
159   // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
160   // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
161   //
162   // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
IsSchedulingBarrier(const HInstruction * instr)163   bool IsSchedulingBarrier(const HInstruction* instr) const override {
164     return HScheduler::IsSchedulingBarrier(instr) ||
165            instr->IsVecReduce() ||
166            instr->IsVecExtractScalar() ||
167            instr->IsVecSetScalars() ||
168            instr->IsVecReplicateScalar();
169   }
170 
171  private:
172   SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
173   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
174 };
175 
176 }  // namespace arm64
177 }  // namespace art
178 
179 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
180