1 #include "rs_core.rsh"
2 #include "rs_structs.h"
3 
4 #include "rsCpuCoreRuntime.h"
5 
rsFrac(float v)6 extern float __attribute__((overloadable)) rsFrac(float v) {
7     int i = (int)floor(v);
8     return fmin(v - i, 0x1.fffffep-1f);
9 }
10 
11 /* Function declarations from libRS */
12 extern float4 __attribute__((overloadable)) convert_float4(uchar4 c);
13 
14 /* Implementation of Core Runtime */
15 
rsUnpackColor8888(uchar4 c)16 extern float4 rsUnpackColor8888(uchar4 c)
17 {
18     return convert_float4(c) * 0.003921569f;
19 }
20 
21 
rsClamp(float v,float l,float h)22 extern float __attribute__((overloadable)) rsClamp(float v, float l, float h) {
23     return clamp(v, l, h);
24 }
rsClamp(char v,char l,char h)25 extern char __attribute__((overloadable)) rsClamp(char v, char l, char h) {
26     return clamp(v, l, h);
27 }
rsClamp(uchar v,uchar l,uchar h)28 extern uchar __attribute__((overloadable)) rsClamp(uchar v, uchar l, uchar h) {
29     return clamp(v, l, h);
30 }
rsClamp(short v,short l,short h)31 extern short __attribute__((overloadable)) rsClamp(short v, short l, short h) {
32     return clamp(v, l, h);
33 }
rsClamp(ushort v,ushort l,ushort h)34 extern ushort __attribute__((overloadable)) rsClamp(ushort v, ushort l, ushort h) {
35     return clamp(v, l, h);
36 }
rsClamp(int v,int l,int h)37 extern int __attribute__((overloadable)) rsClamp(int v, int l, int h) {
38     return clamp(v, l, h);
39 }
rsClamp(uint v,uint l,uint h)40 extern uint __attribute__((overloadable)) rsClamp(uint v, uint l, uint h) {
41     return clamp(v, l, h);
42 }
43 
rsAtomicCas(volatile int32_t * ptr,int32_t expectedValue,int32_t newValue)44 extern int32_t __attribute__((overloadable)) rsAtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) {
45     return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
46 }
47 
rsAtomicCas(volatile uint32_t * ptr,uint32_t expectedValue,uint32_t newValue)48 extern uint32_t __attribute__((overloadable)) rsAtomicCas(volatile uint32_t *ptr, uint32_t expectedValue, uint32_t newValue) {
49     return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
50 }
51 
rsAtomicInc(volatile int32_t * ptr)52 extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile int32_t *ptr) {
53     return __sync_fetch_and_add(ptr, 1);
54 }
55 
rsAtomicInc(volatile uint32_t * ptr)56 extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile uint32_t *ptr) {
57     return __sync_fetch_and_add(ptr, 1);
58 }
59 
rsAtomicDec(volatile int32_t * ptr)60 extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile int32_t *ptr) {
61     return __sync_fetch_and_sub(ptr, 1);
62 }
63 
rsAtomicDec(volatile uint32_t * ptr)64 extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile uint32_t *ptr) {
65     return __sync_fetch_and_sub(ptr, 1);
66 }
67 
rsAtomicAdd(volatile int32_t * ptr,int32_t value)68 extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile int32_t *ptr, int32_t value) {
69     return __sync_fetch_and_add(ptr, value);
70 }
71 
rsAtomicAdd(volatile uint32_t * ptr,uint32_t value)72 extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile uint32_t *ptr, uint32_t value) {
73     return __sync_fetch_and_add(ptr, value);
74 }
75 
rsAtomicSub(volatile int32_t * ptr,int32_t value)76 extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile int32_t *ptr, int32_t value) {
77     return __sync_fetch_and_sub(ptr, value);
78 }
79 
rsAtomicSub(volatile uint32_t * ptr,uint32_t value)80 extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile uint32_t *ptr, uint32_t value) {
81     return __sync_fetch_and_sub(ptr, value);
82 }
83 
rsAtomicAnd(volatile int32_t * ptr,int32_t value)84 extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile int32_t *ptr, int32_t value) {
85     return __sync_fetch_and_and(ptr, value);
86 }
87 
rsAtomicAnd(volatile uint32_t * ptr,uint32_t value)88 extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile uint32_t *ptr, uint32_t value) {
89     return __sync_fetch_and_and(ptr, value);
90 }
91 
rsAtomicOr(volatile int32_t * ptr,int32_t value)92 extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile int32_t *ptr, int32_t value) {
93     return __sync_fetch_and_or(ptr, value);
94 }
95 
rsAtomicOr(volatile uint32_t * ptr,uint32_t value)96 extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile uint32_t *ptr, uint32_t value) {
97     return __sync_fetch_and_or(ptr, value);
98 }
99 
rsAtomicXor(volatile int32_t * ptr,int32_t value)100 extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile int32_t *ptr, int32_t value) {
101     return __sync_fetch_and_xor(ptr, value);
102 }
103 
rsAtomicXor(volatile uint32_t * ptr,uint32_t value)104 extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile uint32_t *ptr, uint32_t value) {
105     return __sync_fetch_and_xor(ptr, value);
106 }
107 
108 extern uint32_t __attribute__((overloadable)) min(uint32_t, uint32_t);
109 extern int32_t __attribute__((overloadable)) min(int32_t, int32_t);
110 extern uint32_t __attribute__((overloadable)) max(uint32_t, uint32_t);
111 extern int32_t __attribute__((overloadable)) max(int32_t, int32_t);
112 
rsAtomicMin(volatile uint32_t * ptr,uint32_t value)113 extern uint32_t __attribute__((overloadable)) rsAtomicMin(volatile uint32_t *ptr, uint32_t value) {
114     uint32_t prev, status;
115     do {
116         prev = *ptr;
117         uint32_t n = min(value, prev);
118         status = __sync_val_compare_and_swap(ptr, prev, n);
119     } while (status != prev);
120     return prev;
121 }
122 
rsAtomicMin(volatile int32_t * ptr,int32_t value)123 extern int32_t __attribute__((overloadable)) rsAtomicMin(volatile int32_t *ptr, int32_t value) {
124     int32_t prev, status;
125     do {
126         prev = *ptr;
127         int32_t n = min(value, prev);
128         status = __sync_val_compare_and_swap(ptr, prev, n);
129     } while (status != prev);
130     return prev;
131 }
132 
rsAtomicMax(volatile uint32_t * ptr,uint32_t value)133 extern uint32_t __attribute__((overloadable)) rsAtomicMax(volatile uint32_t *ptr, uint32_t value) {
134     uint32_t prev, status;
135     do {
136         prev = *ptr;
137         uint32_t n = max(value, prev);
138         status = __sync_val_compare_and_swap(ptr, prev, n);
139     } while (status != prev);
140     return prev;
141 }
142 
rsAtomicMax(volatile int32_t * ptr,int32_t value)143 extern int32_t __attribute__((overloadable)) rsAtomicMax(volatile int32_t *ptr, int32_t value) {
144     int32_t prev, status;
145     do {
146         prev = *ptr;
147         int32_t n = max(value, prev);
148         status = __sync_val_compare_and_swap(ptr, prev, n);
149     } while (status != prev);
150     return prev;
151 }
152 
153 
154 
155 extern int32_t rand();
156 #define RAND_MAX 0x7fffffff
157 
158 
159 
160 extern float __attribute__((overloadable)) rsRand(float min, float max);/* {
161     float r = (float)rand();
162     r /= RAND_MAX;
163     r = r * (max - min) + min;
164     return r;
165 }
166 */
167 
rsRand(float max)168 extern float __attribute__((overloadable)) rsRand(float max) {
169     return rsRand(0.f, max);
170     //float r = (float)rand();
171     //r *= max;
172     //r /= RAND_MAX;
173     //return r;
174 }
175 
rsRand(int max)176 extern int __attribute__((overloadable)) rsRand(int max) {
177     return (int)rsRand((float)max);
178 }
179 
rsRand(int min,int max)180 extern int __attribute__((overloadable)) rsRand(int min, int max) {
181     return (int)rsRand((float)min, (float)max);
182 }
183 
rsGetArray0(rs_kernel_context ctxt)184 extern uint32_t __attribute__((overloadable)) rsGetArray0(rs_kernel_context ctxt) {
185     return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[0];
186 }
187 
rsGetArray1(rs_kernel_context ctxt)188 extern uint32_t __attribute__((overloadable)) rsGetArray1(rs_kernel_context ctxt) {
189     return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[1];
190 }
191 
rsGetArray2(rs_kernel_context ctxt)192 extern uint32_t __attribute__((overloadable)) rsGetArray2(rs_kernel_context ctxt) {
193     return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[2];
194 }
195 
rsGetArray3(rs_kernel_context ctxt)196 extern uint32_t __attribute__((overloadable)) rsGetArray3(rs_kernel_context ctxt) {
197     return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[3];
198 }
199 
rsGetFace(rs_kernel_context ctxt)200 extern rs_allocation_cubemap_face __attribute__((overloadable)) rsGetFace(rs_kernel_context ctxt) {
201     return (rs_allocation_cubemap_face)(((struct RsExpandKernelDriverInfo *)ctxt)->current.face);
202 }
203 
rsGetLod(rs_kernel_context ctxt)204 extern uint32_t __attribute__((overloadable)) rsGetLod(rs_kernel_context ctxt) {
205     return ((struct RsExpandKernelDriverInfo *)ctxt)->current.lod;
206 }
207 
rsGetDimX(rs_kernel_context ctxt)208 extern uint32_t __attribute__((overloadable)) rsGetDimX(rs_kernel_context ctxt) {
209     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.x;
210 }
211 
rsGetDimY(rs_kernel_context ctxt)212 extern uint32_t __attribute__((overloadable)) rsGetDimY(rs_kernel_context ctxt) {
213     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.y;
214 }
215 
rsGetDimZ(rs_kernel_context ctxt)216 extern uint32_t __attribute__((overloadable)) rsGetDimZ(rs_kernel_context ctxt) {
217     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.z;
218 }
219 
rsGetDimArray0(rs_kernel_context ctxt)220 extern uint32_t __attribute__((overloadable)) rsGetDimArray0(rs_kernel_context ctxt) {
221     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[0];
222 }
223 
rsGetDimArray1(rs_kernel_context ctxt)224 extern uint32_t __attribute__((overloadable)) rsGetDimArray1(rs_kernel_context ctxt) {
225     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[1];
226 }
227 
rsGetDimArray2(rs_kernel_context ctxt)228 extern uint32_t __attribute__((overloadable)) rsGetDimArray2(rs_kernel_context ctxt) {
229     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[2];
230 }
231 
rsGetDimArray3(rs_kernel_context ctxt)232 extern uint32_t __attribute__((overloadable)) rsGetDimArray3(rs_kernel_context ctxt) {
233     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[3];
234 }
235 
rsGetDimHasFaces(rs_kernel_context ctxt)236 extern bool __attribute__((overloadable)) rsGetDimHasFaces(rs_kernel_context ctxt) {
237     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.face != 0;
238 }
239 
rsGetDimLod(rs_kernel_context ctxt)240 extern uint32_t __attribute__((overloadable)) rsGetDimLod(rs_kernel_context ctxt) {
241     return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.lod;
242 }
243 
244 #define PRIM_DEBUG(T)                               \
245 extern void __attribute__((overloadable)) rsDebug(const char *, const T *);     \
246 void __attribute__((overloadable)) rsDebug(const char *txt, T val) {            \
247     rsDebug(txt, &val);                                                         \
248 }
249 
250 PRIM_DEBUG(char2)
251 PRIM_DEBUG(char3)
252 PRIM_DEBUG(char4)
253 PRIM_DEBUG(uchar2)
254 PRIM_DEBUG(uchar3)
255 PRIM_DEBUG(uchar4)
256 PRIM_DEBUG(short2)
257 PRIM_DEBUG(short3)
258 PRIM_DEBUG(short4)
259 PRIM_DEBUG(ushort2)
260 PRIM_DEBUG(ushort3)
261 PRIM_DEBUG(ushort4)
262 PRIM_DEBUG(int2)
263 PRIM_DEBUG(int3)
264 PRIM_DEBUG(int4)
265 PRIM_DEBUG(uint2)
266 PRIM_DEBUG(uint3)
267 PRIM_DEBUG(uint4)
268 PRIM_DEBUG(long2)
269 PRIM_DEBUG(long3)
270 PRIM_DEBUG(long4)
271 PRIM_DEBUG(ulong2)
272 PRIM_DEBUG(ulong3)
273 PRIM_DEBUG(ulong4)
274 PRIM_DEBUG(float2)
275 PRIM_DEBUG(float3)
276 PRIM_DEBUG(float4)
277 PRIM_DEBUG(double2)
278 PRIM_DEBUG(double3)
279 PRIM_DEBUG(double4)
280 
281 #undef PRIM_DEBUG
282 
283 // Convert the half values to float before handing off to the driver.  This
284 // eliminates the need in the driver to properly support the half datatype
285 // (either by adding compiler flags for half or link against compiler_rt).
286 // Also, pass the bit-equivalent ushort to be printed.
287 extern void __attribute__((overloadable)) rsDebug(const char *s, float f,
288                                                   ushort us);
rsDebug(const char * s,half h)289 extern void __attribute__((overloadable)) rsDebug(const char *s, half h) {
290     rsDebug(s, (float) h, *(ushort *) &h);
291 }
292 
293 extern void __attribute__((overloadable)) rsDebug(const char *s,
294                                                   const float2 *f,
295                                                   const ushort2 *us);
rsDebug(const char * s,half2 h2)296 extern void __attribute__((overloadable)) rsDebug(const char *s, half2 h2) {
297     float2 f = convert_float2(h2);
298     rsDebug(s, &f, (ushort2 *) &h2);
299 }
300 
301 extern void __attribute__((overloadable)) rsDebug(const char *s,
302                                                   const float3 *f,
303                                                   const ushort3 *us);
rsDebug(const char * s,half3 h3)304 extern void __attribute__((overloadable)) rsDebug(const char *s, half3 h3) {
305     float3 f = convert_float3(h3);
306     rsDebug(s, &f, (ushort3 *) &h3);
307 }
308 
309 extern void __attribute__((overloadable)) rsDebug(const char *s,
310                                                   const float4 *f,
311                                                   const ushort4 *us);
rsDebug(const char * s,half4 h4)312 extern void __attribute__((overloadable)) rsDebug(const char *s, half4 h4) {
313     float4 f = convert_float4(h4);
314     rsDebug(s, &f, (ushort4 *) &h4);
315 }
316