1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef RSD_CPU_CORE_H 18 #define RSD_CPU_CORE_H 19 20 #include "rsd_cpu.h" 21 #include "rsSignal.h" 22 #include "rsContext.h" 23 #include "rsCppUtils.h" 24 #include "rsElement.h" 25 #include "rsScriptC.h" 26 #include "rsCpuCoreRuntime.h" 27 28 #include <string> 29 30 namespace android { 31 namespace renderscript { 32 33 // Whether the CPU we're running on supports SIMD instructions 34 extern bool gArchUseSIMD; 35 36 // Function types found in RenderScript code 37 typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum); 38 typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other); 39 typedef void (*ReduceInitializerFunc_t)(uint8_t *accum); 40 typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum); 41 typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride); 42 typedef void (*InvokeFunc_t)(void *params); 43 typedef void (*InitOrDtorFunc_t)(void); 44 typedef int (*RootFunc_t)(void); 45 46 struct ReduceDescription { 47 ReduceAccumulatorFunc_t accumFunc; // expanded accumulator function 48 ReduceInitializerFunc_t initFunc; // user initializer function 49 ReduceCombinerFunc_t combFunc; // user combiner function 50 ReduceOutConverterFunc_t outFunc; // user outconverter function 51 size_t accumSize; // accumulator datum size, in bytes 52 }; 53 54 // Internal driver callback used to execute a kernel 55 typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); 56 57 class RsdCpuScriptImpl; 58 class RsdCpuReferenceImpl; 59 60 struct ScriptTLSStruct { 61 android::renderscript::Context * mContext; 62 const android::renderscript::Script * mScript; 63 RsdCpuScriptImpl *mImpl; 64 }; 65 66 // MTLaunchStruct passes information about a multithreaded kernel launch. 67 struct MTLaunchStructCommon { 68 RsdCpuReferenceImpl *rs; 69 RsdCpuScriptImpl *script; 70 71 uint32_t mSliceSize; 72 volatile int mSliceNum; 73 bool isThreadable; 74 75 // Boundary information about the launch 76 RsLaunchDimensions start; 77 RsLaunchDimensions end; 78 // Points to MTLaunchStructForEach::fep::dim or 79 // MTLaunchStructReduce::redp::dim. 80 RsLaunchDimensions *dimPtr; 81 }; 82 83 struct MTLaunchStructForEach : public MTLaunchStructCommon { 84 // Driver info structure 85 RsExpandKernelDriverInfo fep; 86 87 ForEachFunc_t kernel; 88 const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; 89 Allocation *aout[RS_KERNEL_INPUT_LIMIT]; 90 }; 91 92 struct MTLaunchStructReduce : public MTLaunchStructCommon { 93 // Driver info structure 94 RsExpandKernelDriverInfo redp; 95 96 const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; 97 98 ReduceAccumulatorFunc_t accumFunc; 99 ReduceInitializerFunc_t initFunc; 100 ReduceCombinerFunc_t combFunc; 101 ReduceOutConverterFunc_t outFunc; 102 103 size_t accumSize; // accumulator datum size in bytes 104 105 size_t accumStride; // stride between accumulators in accumAlloc (below) 106 107 // These fields are used for managing accumulator data items in a 108 // multithreaded execution. 109 // 110 // Let the number of threads be N. 111 // Let Outc be true iff there is an outconverter. 112 // 113 // accumAlloc is a pointer to a single allocation of (N - !Outc) 114 // accumulators. (If there is no outconverter, then the output 115 // allocation acts as an accumulator.) It is created at kernel 116 // launch time. Within that allocation, the distance between the 117 // start of adjacent accumulators is accumStride bytes -- this 118 // might be the same as accumSize, or it might be larger, if we 119 // are attempting to avoid false sharing. 120 // 121 // accumCount is an atomic counter of how many accumulators have 122 // been grabbed by threads. It is initialized to zero at kernel 123 // launch time. See accumPtr for further description. 124 // 125 // accumPtr is pointer to an array of N pointers to accumulators. 126 // The array is created at kernel launch time, and each element is 127 // initialized to nullptr. When a particular thread goes to work, 128 // that thread obtains its accumulator from its entry in this 129 // array. If the entry is nullptr, that thread needs to obtain an 130 // accumulator, and initialize its entry in the array accordingly. 131 // It does so via atomic access (fetch-and-add) to accumCount. 132 // - If Outc, then the fetched value is used as an index into 133 // accumAlloc. 134 // - If !Outc, then 135 // - If the fetched value is zero, then this thread gets the 136 // output allocation for its accumulator. 137 // - If the fetched value is nonzero, then (fetched value - 1) 138 // is used as an index into accumAlloc. 139 uint8_t *accumAlloc; 140 uint8_t **accumPtr; 141 uint32_t accumCount; 142 143 // Logging control 144 uint32_t logReduce; 145 }; 146 147 class RsdCpuReferenceImpl : public RsdCpuReference { 148 public: 149 ~RsdCpuReferenceImpl() override; 150 explicit RsdCpuReferenceImpl(Context *); 151 152 void lockMutex(); 153 void unlockMutex(); 154 155 bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t); 156 void setPriority(int32_t priority) override; 157 virtual void launchThreads(WorkerCallback_t cbk, void *data); 158 static void * helperThreadProc(void *vrsc); 159 RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc); 160 getContext()161 Context * getContext() {return mRSC;} getThreadCount()162 uint32_t getThreadCount() const { 163 return mWorkers.mCount + 1; 164 } 165 166 // Launch foreach kernel 167 void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout, 168 const RsScriptCall *sc, MTLaunchStructForEach *mtls); 169 170 // Launch a general reduce kernel 171 void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout, 172 MTLaunchStructReduce *mtls); 173 174 CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir, 175 uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override; 176 CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override; 177 void* createScriptGroup(const ScriptGroupBase *sg) override; 178 179 const RsdCpuReference::CpuSymbol *symLookup(const char *); 180 lookupScript(const Script * s)181 RsdCpuReference::CpuScript *lookupScript(const Script *s) { 182 return mScriptLookupFn(mRSC, s); 183 } 184 setSelectRTCallback(RSSelectRTCallback pSelectRTCallback)185 void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) { 186 mSelectRTCallback = pSelectRTCallback; 187 } getSelectRTCallback()188 RSSelectRTCallback getSelectRTCallback() { 189 return mSelectRTCallback; 190 } 191 setBccPluginName(const char * name)192 virtual void setBccPluginName(const char *name) { 193 mBccPluginName.assign(name); 194 } getBccPluginName()195 virtual const char *getBccPluginName() const { 196 return mBccPluginName.c_str(); 197 } getInKernel()198 bool getInKernel() override { return mInKernel; } 199 200 // Set to true if we should embed global variable information in the code. setEmbedGlobalInfo(bool v)201 void setEmbedGlobalInfo(bool v) override { 202 mEmbedGlobalInfo = v; 203 } 204 205 // Returns true if we should embed global variable information in the code. getEmbedGlobalInfo()206 bool getEmbedGlobalInfo() const override { 207 return mEmbedGlobalInfo; 208 } 209 210 // Set to true if we should skip constant (immutable) global variables when 211 // potentially embedding information about globals. setEmbedGlobalInfoSkipConstant(bool v)212 void setEmbedGlobalInfoSkipConstant(bool v) override { 213 mEmbedGlobalInfoSkipConstant = v; 214 } 215 216 // Returns true if we should skip constant (immutable) global variables when 217 // potentially embedding information about globals. getEmbedGlobalInfoSkipConstant()218 bool getEmbedGlobalInfoSkipConstant() const override { 219 return mEmbedGlobalInfoSkipConstant; 220 } 221 222 protected: 223 Context *mRSC; 224 uint32_t version_major; 225 uint32_t version_minor; 226 //bool mHasGraphics; 227 bool mInKernel; // Is a parallel kernel execution underway? 228 229 struct Workers { 230 volatile int mRunningCount; 231 volatile int mLaunchCount; 232 uint32_t mCount; 233 pthread_t *mThreadId; 234 pid_t *mNativeThreadId; 235 Signal mCompleteSignal; 236 Signal *mLaunchSignals; 237 WorkerCallback_t mLaunchCallback; 238 void *mLaunchData; 239 }; 240 Workers mWorkers; 241 bool mExit; 242 sym_lookup_t mSymLookupFn; 243 script_lookup_t mScriptLookupFn; 244 245 ScriptTLSStruct mTlsStruct; 246 247 RSSelectRTCallback mSelectRTCallback; 248 std::string mBccPluginName; 249 250 // Specifies whether we should embed global variable information in the 251 // code via special RS variables that can be examined later by the driver. 252 // Defaults to true. 253 bool mEmbedGlobalInfo; 254 255 // Specifies whether we should skip constant (immutable) global variables 256 // when potentially embedding information about globals. 257 // Defaults to true. 258 bool mEmbedGlobalInfoSkipConstant; 259 260 long mPageSize; 261 262 // Launch a general reduce kernel 263 void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout, 264 MTLaunchStructReduce *mtls); 265 void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout, 266 MTLaunchStructReduce *mtls); 267 }; 268 269 270 } // namespace renderscript 271 } // namespace android 272 273 #endif 274