1 #include "rs_core.rsh"
2 #include "rs_structs.h"
3
4 #include "rsCpuCoreRuntime.h"
5
rsFrac(float v)6 extern float __attribute__((overloadable)) rsFrac(float v) {
7 int i = (int)floor(v);
8 return fmin(v - i, 0x1.fffffep-1f);
9 }
10
11 /* Function declarations from libRS */
12 extern float4 __attribute__((overloadable)) convert_float4(uchar4 c);
13
14 /* Implementation of Core Runtime */
15
rsUnpackColor8888(uchar4 c)16 extern float4 rsUnpackColor8888(uchar4 c)
17 {
18 return convert_float4(c) * 0.003921569f;
19 }
20
21
rsClamp(float v,float l,float h)22 extern float __attribute__((overloadable)) rsClamp(float v, float l, float h) {
23 return clamp(v, l, h);
24 }
rsClamp(char v,char l,char h)25 extern char __attribute__((overloadable)) rsClamp(char v, char l, char h) {
26 return clamp(v, l, h);
27 }
rsClamp(uchar v,uchar l,uchar h)28 extern uchar __attribute__((overloadable)) rsClamp(uchar v, uchar l, uchar h) {
29 return clamp(v, l, h);
30 }
rsClamp(short v,short l,short h)31 extern short __attribute__((overloadable)) rsClamp(short v, short l, short h) {
32 return clamp(v, l, h);
33 }
rsClamp(ushort v,ushort l,ushort h)34 extern ushort __attribute__((overloadable)) rsClamp(ushort v, ushort l, ushort h) {
35 return clamp(v, l, h);
36 }
rsClamp(int v,int l,int h)37 extern int __attribute__((overloadable)) rsClamp(int v, int l, int h) {
38 return clamp(v, l, h);
39 }
rsClamp(uint v,uint l,uint h)40 extern uint __attribute__((overloadable)) rsClamp(uint v, uint l, uint h) {
41 return clamp(v, l, h);
42 }
43
rsAtomicCas(volatile int32_t * ptr,int32_t expectedValue,int32_t newValue)44 extern int32_t __attribute__((overloadable)) rsAtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) {
45 return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
46 }
47
rsAtomicCas(volatile uint32_t * ptr,uint32_t expectedValue,uint32_t newValue)48 extern uint32_t __attribute__((overloadable)) rsAtomicCas(volatile uint32_t *ptr, uint32_t expectedValue, uint32_t newValue) {
49 return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
50 }
51
rsAtomicInc(volatile int32_t * ptr)52 extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile int32_t *ptr) {
53 return __sync_fetch_and_add(ptr, 1);
54 }
55
rsAtomicInc(volatile uint32_t * ptr)56 extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile uint32_t *ptr) {
57 return __sync_fetch_and_add(ptr, 1);
58 }
59
rsAtomicDec(volatile int32_t * ptr)60 extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile int32_t *ptr) {
61 return __sync_fetch_and_sub(ptr, 1);
62 }
63
rsAtomicDec(volatile uint32_t * ptr)64 extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile uint32_t *ptr) {
65 return __sync_fetch_and_sub(ptr, 1);
66 }
67
rsAtomicAdd(volatile int32_t * ptr,int32_t value)68 extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile int32_t *ptr, int32_t value) {
69 return __sync_fetch_and_add(ptr, value);
70 }
71
rsAtomicAdd(volatile uint32_t * ptr,uint32_t value)72 extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile uint32_t *ptr, uint32_t value) {
73 return __sync_fetch_and_add(ptr, value);
74 }
75
rsAtomicSub(volatile int32_t * ptr,int32_t value)76 extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile int32_t *ptr, int32_t value) {
77 return __sync_fetch_and_sub(ptr, value);
78 }
79
rsAtomicSub(volatile uint32_t * ptr,uint32_t value)80 extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile uint32_t *ptr, uint32_t value) {
81 return __sync_fetch_and_sub(ptr, value);
82 }
83
rsAtomicAnd(volatile int32_t * ptr,int32_t value)84 extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile int32_t *ptr, int32_t value) {
85 return __sync_fetch_and_and(ptr, value);
86 }
87
rsAtomicAnd(volatile uint32_t * ptr,uint32_t value)88 extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile uint32_t *ptr, uint32_t value) {
89 return __sync_fetch_and_and(ptr, value);
90 }
91
rsAtomicOr(volatile int32_t * ptr,int32_t value)92 extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile int32_t *ptr, int32_t value) {
93 return __sync_fetch_and_or(ptr, value);
94 }
95
rsAtomicOr(volatile uint32_t * ptr,uint32_t value)96 extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile uint32_t *ptr, uint32_t value) {
97 return __sync_fetch_and_or(ptr, value);
98 }
99
rsAtomicXor(volatile int32_t * ptr,int32_t value)100 extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile int32_t *ptr, int32_t value) {
101 return __sync_fetch_and_xor(ptr, value);
102 }
103
rsAtomicXor(volatile uint32_t * ptr,uint32_t value)104 extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile uint32_t *ptr, uint32_t value) {
105 return __sync_fetch_and_xor(ptr, value);
106 }
107
108 extern uint32_t __attribute__((overloadable)) min(uint32_t, uint32_t);
109 extern int32_t __attribute__((overloadable)) min(int32_t, int32_t);
110 extern uint32_t __attribute__((overloadable)) max(uint32_t, uint32_t);
111 extern int32_t __attribute__((overloadable)) max(int32_t, int32_t);
112
rsAtomicMin(volatile uint32_t * ptr,uint32_t value)113 extern uint32_t __attribute__((overloadable)) rsAtomicMin(volatile uint32_t *ptr, uint32_t value) {
114 uint32_t prev, status;
115 do {
116 prev = *ptr;
117 uint32_t n = min(value, prev);
118 status = __sync_val_compare_and_swap(ptr, prev, n);
119 } while (status != prev);
120 return prev;
121 }
122
rsAtomicMin(volatile int32_t * ptr,int32_t value)123 extern int32_t __attribute__((overloadable)) rsAtomicMin(volatile int32_t *ptr, int32_t value) {
124 int32_t prev, status;
125 do {
126 prev = *ptr;
127 int32_t n = min(value, prev);
128 status = __sync_val_compare_and_swap(ptr, prev, n);
129 } while (status != prev);
130 return prev;
131 }
132
rsAtomicMax(volatile uint32_t * ptr,uint32_t value)133 extern uint32_t __attribute__((overloadable)) rsAtomicMax(volatile uint32_t *ptr, uint32_t value) {
134 uint32_t prev, status;
135 do {
136 prev = *ptr;
137 uint32_t n = max(value, prev);
138 status = __sync_val_compare_and_swap(ptr, prev, n);
139 } while (status != prev);
140 return prev;
141 }
142
rsAtomicMax(volatile int32_t * ptr,int32_t value)143 extern int32_t __attribute__((overloadable)) rsAtomicMax(volatile int32_t *ptr, int32_t value) {
144 int32_t prev, status;
145 do {
146 prev = *ptr;
147 int32_t n = max(value, prev);
148 status = __sync_val_compare_and_swap(ptr, prev, n);
149 } while (status != prev);
150 return prev;
151 }
152
153
154
155 extern int32_t rand();
156 #define RAND_MAX 0x7fffffff
157
158
159
160 extern float __attribute__((overloadable)) rsRand(float min, float max);/* {
161 float r = (float)rand();
162 r /= RAND_MAX;
163 r = r * (max - min) + min;
164 return r;
165 }
166 */
167
rsRand(float max)168 extern float __attribute__((overloadable)) rsRand(float max) {
169 return rsRand(0.f, max);
170 //float r = (float)rand();
171 //r *= max;
172 //r /= RAND_MAX;
173 //return r;
174 }
175
rsRand(int max)176 extern int __attribute__((overloadable)) rsRand(int max) {
177 return (int)rsRand((float)max);
178 }
179
rsRand(int min,int max)180 extern int __attribute__((overloadable)) rsRand(int min, int max) {
181 return (int)rsRand((float)min, (float)max);
182 }
183
rsGetArray0(rs_kernel_context ctxt)184 extern uint32_t __attribute__((overloadable)) rsGetArray0(rs_kernel_context ctxt) {
185 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[0];
186 }
187
rsGetArray1(rs_kernel_context ctxt)188 extern uint32_t __attribute__((overloadable)) rsGetArray1(rs_kernel_context ctxt) {
189 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[1];
190 }
191
rsGetArray2(rs_kernel_context ctxt)192 extern uint32_t __attribute__((overloadable)) rsGetArray2(rs_kernel_context ctxt) {
193 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[2];
194 }
195
rsGetArray3(rs_kernel_context ctxt)196 extern uint32_t __attribute__((overloadable)) rsGetArray3(rs_kernel_context ctxt) {
197 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[3];
198 }
199
rsGetFace(rs_kernel_context ctxt)200 extern rs_allocation_cubemap_face __attribute__((overloadable)) rsGetFace(rs_kernel_context ctxt) {
201 return (rs_allocation_cubemap_face)(((struct RsExpandKernelDriverInfo *)ctxt)->current.face);
202 }
203
rsGetLod(rs_kernel_context ctxt)204 extern uint32_t __attribute__((overloadable)) rsGetLod(rs_kernel_context ctxt) {
205 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.lod;
206 }
207
rsGetDimX(rs_kernel_context ctxt)208 extern uint32_t __attribute__((overloadable)) rsGetDimX(rs_kernel_context ctxt) {
209 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.x;
210 }
211
rsGetDimY(rs_kernel_context ctxt)212 extern uint32_t __attribute__((overloadable)) rsGetDimY(rs_kernel_context ctxt) {
213 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.y;
214 }
215
rsGetDimZ(rs_kernel_context ctxt)216 extern uint32_t __attribute__((overloadable)) rsGetDimZ(rs_kernel_context ctxt) {
217 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.z;
218 }
219
rsGetDimArray0(rs_kernel_context ctxt)220 extern uint32_t __attribute__((overloadable)) rsGetDimArray0(rs_kernel_context ctxt) {
221 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[0];
222 }
223
rsGetDimArray1(rs_kernel_context ctxt)224 extern uint32_t __attribute__((overloadable)) rsGetDimArray1(rs_kernel_context ctxt) {
225 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[1];
226 }
227
rsGetDimArray2(rs_kernel_context ctxt)228 extern uint32_t __attribute__((overloadable)) rsGetDimArray2(rs_kernel_context ctxt) {
229 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[2];
230 }
231
rsGetDimArray3(rs_kernel_context ctxt)232 extern uint32_t __attribute__((overloadable)) rsGetDimArray3(rs_kernel_context ctxt) {
233 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[3];
234 }
235
rsGetDimHasFaces(rs_kernel_context ctxt)236 extern bool __attribute__((overloadable)) rsGetDimHasFaces(rs_kernel_context ctxt) {
237 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.face != 0;
238 }
239
rsGetDimLod(rs_kernel_context ctxt)240 extern uint32_t __attribute__((overloadable)) rsGetDimLod(rs_kernel_context ctxt) {
241 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.lod;
242 }
243
244 #define PRIM_DEBUG(T) \
245 extern void __attribute__((overloadable)) rsDebug(const char *, const T *); \
246 void __attribute__((overloadable)) rsDebug(const char *txt, T val) { \
247 rsDebug(txt, &val); \
248 }
249
250 PRIM_DEBUG(char2)
251 PRIM_DEBUG(char3)
252 PRIM_DEBUG(char4)
253 PRIM_DEBUG(uchar2)
254 PRIM_DEBUG(uchar3)
255 PRIM_DEBUG(uchar4)
256 PRIM_DEBUG(short2)
257 PRIM_DEBUG(short3)
258 PRIM_DEBUG(short4)
259 PRIM_DEBUG(ushort2)
260 PRIM_DEBUG(ushort3)
261 PRIM_DEBUG(ushort4)
262 PRIM_DEBUG(int2)
263 PRIM_DEBUG(int3)
264 PRIM_DEBUG(int4)
265 PRIM_DEBUG(uint2)
266 PRIM_DEBUG(uint3)
267 PRIM_DEBUG(uint4)
268 PRIM_DEBUG(long2)
269 PRIM_DEBUG(long3)
270 PRIM_DEBUG(long4)
271 PRIM_DEBUG(ulong2)
272 PRIM_DEBUG(ulong3)
273 PRIM_DEBUG(ulong4)
274 PRIM_DEBUG(float2)
275 PRIM_DEBUG(float3)
276 PRIM_DEBUG(float4)
277 PRIM_DEBUG(double2)
278 PRIM_DEBUG(double3)
279 PRIM_DEBUG(double4)
280
281 #undef PRIM_DEBUG
282
283 // Convert the half values to float before handing off to the driver. This
284 // eliminates the need in the driver to properly support the half datatype
285 // (either by adding compiler flags for half or link against compiler_rt).
286 // Also, pass the bit-equivalent ushort to be printed.
287 extern void __attribute__((overloadable)) rsDebug(const char *s, float f,
288 ushort us);
rsDebug(const char * s,half h)289 extern void __attribute__((overloadable)) rsDebug(const char *s, half h) {
290 rsDebug(s, (float) h, *(ushort *) &h);
291 }
292
293 extern void __attribute__((overloadable)) rsDebug(const char *s,
294 const float2 *f,
295 const ushort2 *us);
rsDebug(const char * s,half2 h2)296 extern void __attribute__((overloadable)) rsDebug(const char *s, half2 h2) {
297 float2 f = convert_float2(h2);
298 rsDebug(s, &f, (ushort2 *) &h2);
299 }
300
301 extern void __attribute__((overloadable)) rsDebug(const char *s,
302 const float3 *f,
303 const ushort3 *us);
rsDebug(const char * s,half3 h3)304 extern void __attribute__((overloadable)) rsDebug(const char *s, half3 h3) {
305 float3 f = convert_float3(h3);
306 rsDebug(s, &f, (ushort3 *) &h3);
307 }
308
309 extern void __attribute__((overloadable)) rsDebug(const char *s,
310 const float4 *f,
311 const ushort4 *us);
rsDebug(const char * s,half4 h4)312 extern void __attribute__((overloadable)) rsDebug(const char *s, half4 h4) {
313 float4 f = convert_float4(h4);
314 rsDebug(s, &f, (ushort4 *) &h4);
315 }
316