1 #include "rs_core.rsh"
2 #include "rs_f16_util.h"
3 
4 extern float2 __attribute__((overloadable)) convert_float2(int2 c);
5 extern float3 __attribute__((overloadable)) convert_float3(int3 c);
6 extern float4 __attribute__((overloadable)) convert_float4(int4 c);
7 
8 extern int2 __attribute__((overloadable)) convert_int2(float2 c);
9 extern int3 __attribute__((overloadable)) convert_int3(float3 c);
10 extern int4 __attribute__((overloadable)) convert_int4(float4 c);
11 
12 
13 extern float __attribute__((overloadable)) fmin(float v, float v2);
14 extern float2 __attribute__((overloadable)) fmin(float2 v, float v2);
15 extern float3 __attribute__((overloadable)) fmin(float3 v, float v2);
16 extern float4 __attribute__((overloadable)) fmin(float4 v, float v2);
17 
18 extern float __attribute__((overloadable)) fmax(float v, float v2);
19 extern float2 __attribute__((overloadable)) fmax(float2 v, float v2);
20 extern float3 __attribute__((overloadable)) fmax(float3 v, float v2);
21 extern float4 __attribute__((overloadable)) fmax(float4 v, float v2);
22 
23 // Float ops, 6.11.2
24 
25 #define FN_FUNC_FN(fnc)                                         \
26 extern float2 __attribute__((overloadable)) fnc(float2 v) { \
27     float2 r;                                                   \
28     r.x = fnc(v.x);                                             \
29     r.y = fnc(v.y);                                             \
30     return r;                                                   \
31 }                                                               \
32 extern float3 __attribute__((overloadable)) fnc(float3 v) { \
33     float3 r;                                                   \
34     r.x = fnc(v.x);                                             \
35     r.y = fnc(v.y);                                             \
36     r.z = fnc(v.z);                                             \
37     return r;                                                   \
38 }                                                               \
39 extern float4 __attribute__((overloadable)) fnc(float4 v) { \
40     float4 r;                                                   \
41     r.x = fnc(v.x);                                             \
42     r.y = fnc(v.y);                                             \
43     r.z = fnc(v.z);                                             \
44     r.w = fnc(v.w);                                             \
45     return r;                                                   \
46 }
47 
48 #define IN_FUNC_FN(fnc)                                         \
49 extern int2 __attribute__((overloadable)) fnc(float2 v) {   \
50     int2 r;                                                     \
51     r.x = fnc(v.x);                                             \
52     r.y = fnc(v.y);                                             \
53     return r;                                                   \
54 }                                                               \
55 extern int3 __attribute__((overloadable)) fnc(float3 v) {   \
56     int3 r;                                                     \
57     r.x = fnc(v.x);                                             \
58     r.y = fnc(v.y);                                             \
59     r.z = fnc(v.z);                                             \
60     return r;                                                   \
61 }                                                               \
62 extern int4 __attribute__((overloadable)) fnc(float4 v) {   \
63     int4 r;                                                     \
64     r.x = fnc(v.x);                                             \
65     r.y = fnc(v.y);                                             \
66     r.z = fnc(v.z);                                             \
67     r.w = fnc(v.w);                                             \
68     return r;                                                   \
69 }
70 
71 #define FN_FUNC_FN_FN(fnc)                                                  \
72 extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
73     float2 r;                                                               \
74     r.x = fnc(v1.x, v2.x);                                                  \
75     r.y = fnc(v1.y, v2.y);                                                  \
76     return r;                                                               \
77 }                                                                           \
78 extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
79     float3 r;                                                               \
80     r.x = fnc(v1.x, v2.x);                                                  \
81     r.y = fnc(v1.y, v2.y);                                                  \
82     r.z = fnc(v1.z, v2.z);                                                  \
83     return r;                                                               \
84 }                                                                           \
85 extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
86     float4 r;                                                               \
87     r.x = fnc(v1.x, v2.x);                                                  \
88     r.y = fnc(v1.y, v2.y);                                                  \
89     r.z = fnc(v1.z, v2.z);                                                  \
90     r.w = fnc(v1.w, v2.w);                                                  \
91     return r;                                                               \
92 }
93 
94 #define FN_FUNC_FN_F(fnc)                                                   \
95 extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) {  \
96     float2 r;                                                               \
97     r.x = fnc(v1.x, v2);                                                    \
98     r.y = fnc(v1.y, v2);                                                    \
99     return r;                                                               \
100 }                                                                           \
101 extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) {  \
102     float3 r;                                                               \
103     r.x = fnc(v1.x, v2);                                                    \
104     r.y = fnc(v1.y, v2);                                                    \
105     r.z = fnc(v1.z, v2);                                                    \
106     return r;                                                               \
107 }                                                                           \
108 extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) {  \
109     float4 r;                                                               \
110     r.x = fnc(v1.x, v2);                                                    \
111     r.y = fnc(v1.y, v2);                                                    \
112     r.z = fnc(v1.z, v2);                                                    \
113     r.w = fnc(v1.w, v2);                                                    \
114     return r;                                                               \
115 }
116 
117 #define FN_FUNC_FN_IN(fnc)                                                  \
118 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) {   \
119     float2 r;                                                               \
120     r.x = fnc(v1.x, v2.x);                                                  \
121     r.y = fnc(v1.y, v2.y);                                                  \
122     return r;                                                               \
123 }                                                                           \
124 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) {   \
125     float3 r;                                                               \
126     r.x = fnc(v1.x, v2.x);                                                  \
127     r.y = fnc(v1.y, v2.y);                                                  \
128     r.z = fnc(v1.z, v2.z);                                                  \
129     return r;                                                               \
130 }                                                                           \
131 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) {   \
132     float4 r;                                                               \
133     r.x = fnc(v1.x, v2.x);                                                  \
134     r.y = fnc(v1.y, v2.y);                                                  \
135     r.z = fnc(v1.z, v2.z);                                                  \
136     r.w = fnc(v1.w, v2.w);                                                  \
137     return r;                                                               \
138 }
139 
140 #define FN_FUNC_FN_I(fnc)                                                   \
141 extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) {    \
142     float2 r;                                                               \
143     r.x = fnc(v1.x, v2);                                                    \
144     r.y = fnc(v1.y, v2);                                                    \
145     return r;                                                               \
146 }                                                                           \
147 extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) {    \
148     float3 r;                                                               \
149     r.x = fnc(v1.x, v2);                                                    \
150     r.y = fnc(v1.y, v2);                                                    \
151     r.z = fnc(v1.z, v2);                                                    \
152     return r;                                                               \
153 }                                                                           \
154 extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) {    \
155     float4 r;                                                               \
156     r.x = fnc(v1.x, v2);                                                    \
157     r.y = fnc(v1.y, v2);                                                    \
158     r.z = fnc(v1.z, v2);                                                    \
159     r.w = fnc(v1.w, v2);                                                    \
160     return r;                                                               \
161 }
162 
163 #define FN_FUNC_FN_PFN(fnc)                     \
164 extern float2 __attribute__((overloadable)) \
165         fnc(float2 v1, float2 *v2) {            \
166     float2 r;                                   \
167     float t[2];                                 \
168     r.x = fnc(v1.x, &t[0]);                     \
169     r.y = fnc(v1.y, &t[1]);                     \
170     v2->x = t[0];                               \
171     v2->y = t[1];                               \
172     return r;                                   \
173 }                                               \
174 extern float3 __attribute__((overloadable)) \
175         fnc(float3 v1, float3 *v2) {            \
176     float3 r;                                   \
177     float t[3];                                 \
178     r.x = fnc(v1.x, &t[0]);                     \
179     r.y = fnc(v1.y, &t[1]);                     \
180     r.z = fnc(v1.z, &t[2]);                     \
181     v2->x = t[0];                               \
182     v2->y = t[1];                               \
183     v2->z = t[2];                               \
184     return r;                                   \
185 }                                               \
186 extern float4 __attribute__((overloadable)) \
187         fnc(float4 v1, float4 *v2) {            \
188     float4 r;                                   \
189     float t[4];                                 \
190     r.x = fnc(v1.x, &t[0]);                     \
191     r.y = fnc(v1.y, &t[1]);                     \
192     r.z = fnc(v1.z, &t[2]);                     \
193     r.w = fnc(v1.w, &t[3]);                     \
194     v2->x = t[0];                               \
195     v2->y = t[1];                               \
196     v2->z = t[2];                               \
197     v2->w = t[3];                               \
198     return r;                                   \
199 }
200 
201 #define FN_FUNC_FN_PIN(fnc)                                                 \
202 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) {  \
203     float2 r;                                                               \
204     int t[2];                                                               \
205     r.x = fnc(v1.x, &t[0]);                                                 \
206     r.y = fnc(v1.y, &t[1]);                                                 \
207     v2->x = t[0];                                                           \
208     v2->y = t[1];                                                           \
209     return r;                                                               \
210 }                                                                           \
211 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) {  \
212     float3 r;                                                               \
213     int t[3];                                                               \
214     r.x = fnc(v1.x, &t[0]);                                                 \
215     r.y = fnc(v1.y, &t[1]);                                                 \
216     r.z = fnc(v1.z, &t[2]);                                                 \
217     v2->x = t[0];                                                           \
218     v2->y = t[1];                                                           \
219     v2->z = t[2];                                                           \
220     return r;                                                               \
221 }                                                                           \
222 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) {  \
223     float4 r;                                                               \
224     int t[4];                                                               \
225     r.x = fnc(v1.x, &t[0]);                                                 \
226     r.y = fnc(v1.y, &t[1]);                                                 \
227     r.z = fnc(v1.z, &t[2]);                                                 \
228     r.w = fnc(v1.w, &t[3]);                                                 \
229     v2->x = t[0];                                                           \
230     v2->y = t[1];                                                           \
231     v2->z = t[2];                                                           \
232     v2->w = t[3];                                                           \
233     return r;                                                               \
234 }
235 
236 #define FN_FUNC_FN_FN_FN(fnc)                   \
237 extern float2 __attribute__((overloadable)) \
238         fnc(float2 v1, float2 v2, float2 v3) {  \
239     float2 r;                                   \
240     r.x = fnc(v1.x, v2.x, v3.x);                \
241     r.y = fnc(v1.y, v2.y, v3.y);                \
242     return r;                                   \
243 }                                               \
244 extern float3 __attribute__((overloadable)) \
245         fnc(float3 v1, float3 v2, float3 v3) {  \
246     float3 r;                                   \
247     r.x = fnc(v1.x, v2.x, v3.x);                \
248     r.y = fnc(v1.y, v2.y, v3.y);                \
249     r.z = fnc(v1.z, v2.z, v3.z);                \
250     return r;                                   \
251 }                                               \
252 extern float4 __attribute__((overloadable)) \
253         fnc(float4 v1, float4 v2, float4 v3) {  \
254     float4 r;                                   \
255     r.x = fnc(v1.x, v2.x, v3.x);                \
256     r.y = fnc(v1.y, v2.y, v3.y);                \
257     r.z = fnc(v1.z, v2.z, v3.z);                \
258     r.w = fnc(v1.w, v2.w, v3.w);                \
259     return r;                                   \
260 }
261 
262 #define FN_FUNC_FN_FN_PIN(fnc)                  \
263 extern float2 __attribute__((overloadable)) \
264         fnc(float2 v1, float2 v2, int2 *v3) {   \
265     float2 r;                                   \
266     int t[2];                                   \
267     r.x = fnc(v1.x, v2.x, &t[0]);               \
268     r.y = fnc(v1.y, v2.y, &t[1]);               \
269     v3->x = t[0];                               \
270     v3->y = t[1];                               \
271     return r;                                   \
272 }                                               \
273 extern float3 __attribute__((overloadable)) \
274         fnc(float3 v1, float3 v2, int3 *v3) {   \
275     float3 r;                                   \
276     int t[3];                                   \
277     r.x = fnc(v1.x, v2.x, &t[0]);               \
278     r.y = fnc(v1.y, v2.y, &t[1]);               \
279     r.z = fnc(v1.z, v2.z, &t[2]);               \
280     v3->x = t[0];                               \
281     v3->y = t[1];                               \
282     v3->z = t[2];                               \
283     return r;                                   \
284 }                                               \
285 extern float4 __attribute__((overloadable)) \
286         fnc(float4 v1, float4 v2, int4 *v3) {   \
287     float4 r;                                   \
288     int t[4];                                   \
289     r.x = fnc(v1.x, v2.x, &t[0]);               \
290     r.y = fnc(v1.y, v2.y, &t[1]);               \
291     r.z = fnc(v1.z, v2.z, &t[2]);               \
292     r.w = fnc(v1.w, v2.w, &t[3]);               \
293     v3->x = t[0];                               \
294     v3->y = t[1];                               \
295     v3->z = t[2];                               \
296     v3->w = t[3];                               \
297     return r;                                   \
298 }
299 
300 static const unsigned int iposinf = 0x7f800000;
301 static const unsigned int ineginf = 0xff800000;
302 
posinf()303 static float posinf() {
304     float f = *((float*)&iposinf);
305     return f;
306 }
307 
float_bits(float f)308 static unsigned int float_bits(float f) {
309     /* TODO(jeanluc) Use this better approach once the Mac(SDK) build issues are fixed.
310     // Get the bits while following the strict aliasing rules.
311     unsigned int result;
312     memcpy(&result, &f, sizeof(f));
313     return result;
314     */
315     return *(unsigned int*)(char*)(&f);
316 }
317 
isinf(float f)318 static bool isinf(float f) {
319     unsigned int i = float_bits(f);
320     return (i == iposinf) || (i == ineginf);
321 }
322 
isnan(float f)323 static bool isnan(float f) {
324     unsigned int i = float_bits(f);
325     return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
326 }
327 
isposzero(float f)328 static bool isposzero(float f) {
329     return (float_bits(f) == 0x00000000);
330 }
331 
isnegzero(float f)332 static bool isnegzero(float f) {
333     return (float_bits(f) == 0x80000000);
334 }
335 
iszero(float f)336 static bool iszero(float f) {
337     return isposzero(f) || isnegzero(f);
338 }
339 
340 
341 extern float __attribute__((overloadable)) SC_acosf(float);
acos(float v)342 float __attribute__((overloadable)) acos(float v) {
343     return SC_acosf(v);
344 }
345 FN_FUNC_FN(acos)
346 
347 extern float __attribute__((overloadable)) SC_acoshf(float);
acosh(float v)348 float __attribute__((overloadable)) acosh(float v) {
349     return SC_acoshf(v);
350 }
FN_FUNC_FN(acosh)351 FN_FUNC_FN(acosh)
352 
353 
354 extern float __attribute__((overloadable)) acospi(float v) {
355     return acos(v) / M_PI;
356 }
357 FN_FUNC_FN(acospi)
358 
359 extern float __attribute__((overloadable)) SC_asinf(float);
asin(float v)360 float __attribute__((overloadable)) asin(float v) {
361     return SC_asinf(v);
362 }
363 FN_FUNC_FN(asin)
364 
365 extern float __attribute__((overloadable)) SC_asinhf(float);
asinh(float v)366 float __attribute__((overloadable)) asinh(float v) {
367     return SC_asinhf(v);
368 }
FN_FUNC_FN(asinh)369 FN_FUNC_FN(asinh)
370 
371 extern float __attribute__((overloadable)) asinpi(float v) {
372     return asin(v) / M_PI;
373 }
374 FN_FUNC_FN(asinpi)
375 
376 extern float __attribute__((overloadable)) SC_atanf(float);
atan(float v)377 float __attribute__((overloadable)) atan(float v) {
378     return SC_atanf(v);
379 }
380 FN_FUNC_FN(atan)
381 
382 extern float __attribute__((overloadable)) SC_atan2f(float, float);
atan2(float v1,float v2)383 float __attribute__((overloadable)) atan2(float v1, float v2) {
384     return SC_atan2f(v1, v2);
385 }
386 FN_FUNC_FN_FN(atan2)
387 
388 extern float __attribute__((overloadable)) SC_atanhf(float);
atanh(float v)389 float __attribute__((overloadable)) atanh(float v) {
390     return SC_atanhf(v);
391 }
FN_FUNC_FN(atanh)392 FN_FUNC_FN(atanh)
393 
394 extern float __attribute__((overloadable)) atanpi(float v) {
395     return atan(v) / M_PI;
396 }
FN_FUNC_FN(atanpi)397 FN_FUNC_FN(atanpi)
398 
399 
400 extern float __attribute__((overloadable)) atan2pi(float y, float x) {
401     return atan2(y, x) / M_PI;
402 }
403 FN_FUNC_FN_FN(atan2pi)
404 
405 extern float __attribute__((overloadable)) SC_cbrtf(float);
cbrt(float v)406 float __attribute__((overloadable)) cbrt(float v) {
407     return SC_cbrtf(v);
408 }
409 FN_FUNC_FN(cbrt)
410 
411 extern float __attribute__((overloadable)) SC_ceilf(float);
ceil(float v)412 float __attribute__((overloadable)) ceil(float v) {
413     return SC_ceilf(v);
414 }
415 FN_FUNC_FN(ceil)
416 
417 extern float __attribute__((overloadable)) SC_copysignf(float, float);
copysign(float v1,float v2)418 float __attribute__((overloadable)) copysign(float v1, float v2) {
419     return SC_copysignf(v1, v2);
420 }
421 FN_FUNC_FN_FN(copysign)
422 
423 extern float __attribute__((overloadable)) SC_cosf(float);
cos(float v)424 float __attribute__((overloadable)) cos(float v) {
425     return SC_cosf(v);
426 }
427 FN_FUNC_FN(cos)
428 
429 extern float __attribute__((overloadable)) SC_coshf(float);
cosh(float v)430 float __attribute__((overloadable)) cosh(float v) {
431     return SC_coshf(v);
432 }
FN_FUNC_FN(cosh)433 FN_FUNC_FN(cosh)
434 
435 extern float __attribute__((overloadable)) cospi(float v) {
436     return cos(v * M_PI);
437 }
438 FN_FUNC_FN(cospi)
439 
440 extern float __attribute__((overloadable)) SC_erfcf(float);
erfc(float v)441 float __attribute__((overloadable)) erfc(float v) {
442     return SC_erfcf(v);
443 }
444 FN_FUNC_FN(erfc)
445 
446 extern float __attribute__((overloadable)) SC_erff(float);
erf(float v)447 float __attribute__((overloadable)) erf(float v) {
448     return SC_erff(v);
449 }
450 FN_FUNC_FN(erf)
451 
452 extern float __attribute__((overloadable)) SC_expf(float);
exp(float v)453 float __attribute__((overloadable)) exp(float v) {
454     return SC_expf(v);
455 }
456 FN_FUNC_FN(exp)
457 
458 extern float __attribute__((overloadable)) SC_exp2f(float);
exp2(float v)459 float __attribute__((overloadable)) exp2(float v) {
460     return SC_exp2f(v);
461 }
462 FN_FUNC_FN(exp2)
463 
464 extern float __attribute__((overloadable)) pow(float, float);
465 
exp10(float v)466 extern float __attribute__((overloadable)) exp10(float v) {
467     return exp2(v * 3.321928095f);
468 }
469 FN_FUNC_FN(exp10)
470 
471 extern float __attribute__((overloadable)) SC_expm1f(float);
expm1(float v)472 float __attribute__((overloadable)) expm1(float v) {
473     return SC_expm1f(v);
474 }
FN_FUNC_FN(expm1)475 FN_FUNC_FN(expm1)
476 
477 extern float __attribute__((overloadable)) fabs(float v) {
478     int i = *((int*)(void*)&v) & 0x7fffffff;
479     return  *((float*)(void*)&i);
480 }
481 FN_FUNC_FN(fabs)
482 
483 extern float __attribute__((overloadable)) SC_fdimf(float, float);
fdim(float v1,float v2)484 float __attribute__((overloadable)) fdim(float v1, float v2) {
485     return SC_fdimf(v1, v2);
486 }
487 FN_FUNC_FN_FN(fdim)
488 
489 extern float __attribute__((overloadable)) SC_floorf(float);
floor(float v)490 float __attribute__((overloadable)) floor(float v) {
491     return SC_floorf(v);
492 }
493 FN_FUNC_FN(floor)
494 
495 extern float __attribute__((overloadable)) SC_fmaf(float, float, float);
fma(float v1,float v2,float v3)496 float __attribute__((overloadable)) fma(float v1, float v2, float v3) {
497     return SC_fmaf(v1, v2, v3);
498 }
499 FN_FUNC_FN_FN_FN(fma)
500 
501 extern float __attribute__((overloadable)) SC_fminf(float, float);
502 
503 extern float __attribute__((overloadable)) SC_fmodf(float, float);
fmod(float v1,float v2)504 float __attribute__((overloadable)) fmod(float v1, float v2) {
505     return SC_fmodf(v1, v2);
506 }
FN_FUNC_FN_FN(fmod)507 FN_FUNC_FN_FN(fmod)
508 
509 extern float __attribute__((overloadable)) fract(float v, float *iptr) {
510     int i = (int)floor(v);
511     if (iptr) {
512         iptr[0] = i;
513     }
514     return fmin(v - i, 0x1.fffffep-1f);
515 }
FN_FUNC_FN_PFN(fract)516 FN_FUNC_FN_PFN(fract)
517 
518 extern float __attribute__((const, overloadable)) fract(float v) {
519     float unused;
520     return fract(v, &unused);
521 }
522 FN_FUNC_FN(fract)
523 
524 extern float __attribute__((overloadable)) SC_frexpf(float, int *);
frexp(float v1,int * v2)525 float __attribute__((overloadable)) frexp(float v1, int* v2) {
526     return SC_frexpf(v1, v2);
527 }
528 FN_FUNC_FN_PIN(frexp)
529 
530 extern float __attribute__((overloadable)) SC_hypotf(float, float);
hypot(float v1,float v2)531 float __attribute__((overloadable)) hypot(float v1, float v2) {
532     return SC_hypotf(v1, v2);
533 }
534 FN_FUNC_FN_FN(hypot)
535 
536 extern int __attribute__((overloadable)) SC_ilogbf(float);
ilogb(float v)537 int __attribute__((overloadable)) ilogb(float v) {
538     return SC_ilogbf(v);
539 }
540 IN_FUNC_FN(ilogb)
541 
542 extern float __attribute__((overloadable)) SC_ldexpf(float, int);
ldexp(float v1,int v2)543 float __attribute__((overloadable)) ldexp(float v1, int v2) {
544     return SC_ldexpf(v1, v2);
545 }
546 FN_FUNC_FN_IN(ldexp)
547 FN_FUNC_FN_I(ldexp)
548 
549 extern float __attribute__((overloadable)) SC_lgammaf(float);
lgamma(float v)550 float __attribute__((overloadable)) lgamma(float v) {
551     return SC_lgammaf(v);
552 }
553 FN_FUNC_FN(lgamma)
554 extern float __attribute__((overloadable)) SC_lgammaf_r(float, int*);
lgamma(float v,int * ptr)555 float __attribute__((overloadable)) lgamma(float v, int* ptr) {
556     return SC_lgammaf_r(v, ptr);
557 }
558 FN_FUNC_FN_PIN(lgamma)
559 
560 extern float __attribute__((overloadable)) SC_logf(float);
log(float v)561 float __attribute__((overloadable)) log(float v) {
562     return SC_logf(v);
563 }
564 FN_FUNC_FN(log)
565 
566 extern float __attribute__((overloadable)) SC_log10f(float);
log10(float v)567 float __attribute__((overloadable)) log10(float v) {
568     return SC_log10f(v);
569 }
FN_FUNC_FN(log10)570 FN_FUNC_FN(log10)
571 
572 
573 extern float __attribute__((overloadable)) log2(float v) {
574     return log10(v) * 3.321928095f;
575 }
576 FN_FUNC_FN(log2)
577 
578 extern float __attribute__((overloadable)) SC_log1pf(float);
log1p(float v)579 float __attribute__((overloadable)) log1p(float v) {
580     return SC_log1pf(v);
581 }
582 FN_FUNC_FN(log1p)
583 
584 extern float __attribute__((overloadable)) SC_logbf(float);
logb(float v)585 float __attribute__((overloadable)) logb(float v) {
586     return SC_logbf(v);
587 }
FN_FUNC_FN(logb)588 FN_FUNC_FN(logb)
589 
590 extern float __attribute__((overloadable)) mad(float a, float b, float c) {
591     return a * b + c;
592 }
mad(float2 a,float2 b,float2 c)593 extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
594     return a * b + c;
595 }
mad(float3 a,float3 b,float3 c)596 extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
597     return a * b + c;
598 }
mad(float4 a,float4 b,float4 c)599 extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
600     return a * b + c;
601 }
602 
603 extern float __attribute__((overloadable)) SC_modff(float, float *);
modf(float v1,float * v2)604 float __attribute__((overloadable)) modf(float v1, float *v2) {
605     return SC_modff(v1, v2);
606 }
607 FN_FUNC_FN_PFN(modf);
608 
nan(uint v)609 extern float __attribute__((overloadable)) nan(uint v) {
610     float f[1];
611     uint32_t *ip = (uint32_t *)f;
612     *ip = v | 0x7fc00000;
613     return f[0];
614 }
615 
616 extern float __attribute__((overloadable)) SC_nextafterf(float, float);
nextafter(float v1,float v2)617 float __attribute__((overloadable)) nextafter(float v1, float v2) {
618     return SC_nextafterf(v1, v2);
619 }
620 FN_FUNC_FN_FN(nextafter)
621 
622 // This function must be defined here if we're compiling with debug info
623 // (libclcore_g.bc), because we need a C source to get debug information.
624 // Otherwise the implementation can be found in IR.
625 #if defined(RS_G_RUNTIME)
626 extern float __attribute__((overloadable)) SC_powf(float, float);
pow(float v1,float v2)627 float __attribute__((overloadable)) pow(float v1, float v2) {
628     return SC_powf(v1, v2);
629 }
630 #endif // defined(RS_G_RUNTIME)
FN_FUNC_FN_FN(pow)631 FN_FUNC_FN_FN(pow)
632 
633 extern float __attribute__((overloadable)) pown(float v, int p) {
634     /* The mantissa of a float has fewer bits than an int (24 effective vs. 31).
635      * For very large ints, we'll lose whether the exponent is even or odd, making
636      * the selection of a correct sign incorrect.  We correct this.  Use copysign
637      * to handle the negative zero case.
638      */
639     float sign = (p & 0x1) ? copysign(1.f, v) : 1.f;
640     float f = pow(v, (float)p);
641     return copysign(f, sign);
642 }
FN_FUNC_FN_IN(pown)643 FN_FUNC_FN_IN(pown)
644 
645 extern float __attribute__((overloadable)) powr(float v, float p) {
646     return pow(v, p);
647 }
powr(float2 v,float2 p)648 extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
649     return pow(v, p);
650 }
powr(float3 v,float3 p)651 extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
652     return pow(v, p);
653 }
powr(float4 v,float4 p)654 extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
655     return pow(v, p);
656 }
657 
658 extern float __attribute__((overloadable)) SC_remainderf(float, float);
remainder(float v1,float v2)659 float __attribute__((overloadable)) remainder(float v1, float v2) {
660     return SC_remainderf(v1, v2);
661 }
662 FN_FUNC_FN_FN(remainder)
663 
664 extern float __attribute__((overloadable)) SC_remquof(float, float, int *);
remquo(float v1,float v2,int * v3)665 float __attribute__((overloadable)) remquo(float v1, float v2, int *v3) {
666     return SC_remquof(v1, v2, v3);
667 }
668 FN_FUNC_FN_FN_PIN(remquo)
669 
670 extern float __attribute__((overloadable)) SC_rintf(float);
rint(float v)671 float __attribute__((overloadable)) rint(float v) {
672     return SC_rintf(v);
673 }
FN_FUNC_FN(rint)674 FN_FUNC_FN(rint)
675 
676 extern float __attribute__((overloadable)) rootn(float v, int r) {
677     if (r == 0) {
678         return posinf();
679     }
680 
681     if (iszero(v)) {
682         if (r < 0) {
683             if (r & 1) {
684                 return copysign(posinf(), v);
685             } else {
686                 return posinf();
687             }
688         } else {
689             if (r & 1) {
690                 return copysign(0.f, v);
691             } else {
692                 return 0.f;
693             }
694         }
695     }
696 
697     if (!isinf(v) && !isnan(v) && (v < 0.f)) {
698         if (r & 1) {
699             return (-1.f * pow(-1.f * v, 1.f / r));
700         } else {
701             return nan(0);
702         }
703     }
704 
705     return pow(v, 1.f / r);
706 }
707 FN_FUNC_FN_IN(rootn);
708 
709 extern float __attribute__((overloadable)) SC_roundf(float);
round(float v)710 float __attribute__((overloadable)) round(float v) {
711     return SC_roundf(v);
712 }
713 FN_FUNC_FN(round)
714 
715 extern float __attribute__((overloadable)) SC_randf2(float, float);
rsRand(float min,float max)716 float __attribute__((overloadable)) rsRand(float min, float max) {
717   return SC_randf2(min, max);
718 }
719 
720 
rsqrt(float v)721 extern float __attribute__((overloadable)) rsqrt(float v) {
722     return 1.f / sqrt(v);
723 }
724 
725 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
726 // These functions must be defined here if we are not using the SSE
727 // implementation, which includes when we are built as part of the
728 // debug runtime (libclcore_debug.bc) or compiling with debug info.
729 #if defined(RS_G_RUNTIME)
730 extern float __attribute__((overloadable)) SC_sqrtf(float);
sqrt(float v)731 float __attribute__((overloadable)) sqrt(float v) {
732     return SC_sqrtf(v);
733 }
734 #endif // defined(RS_G_RUNTIME)
735 
736 FN_FUNC_FN(sqrt)
737 #else
738 extern float2 __attribute__((overloadable)) sqrt(float2);
739 extern float3 __attribute__((overloadable)) sqrt(float3);
740 extern float4 __attribute__((overloadable)) sqrt(float4);
741 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
742 
743 FN_FUNC_FN(rsqrt)
744 
745 extern float __attribute__((overloadable)) SC_sinf(float);
sin(float v)746 float __attribute__((overloadable)) sin(float v) {
747     return SC_sinf(v);
748 }
FN_FUNC_FN(sin)749 FN_FUNC_FN(sin)
750 
751 extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
752     *cosptr = cos(v);
753     return sin(v);
754 }
sincos(float2 v,float2 * cosptr)755 extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
756     *cosptr = cos(v);
757     return sin(v);
758 }
sincos(float3 v,float3 * cosptr)759 extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
760     *cosptr = cos(v);
761     return sin(v);
762 }
sincos(float4 v,float4 * cosptr)763 extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
764     *cosptr = cos(v);
765     return sin(v);
766 }
767 
768 extern float __attribute__((overloadable)) SC_sinhf(float);
sinh(float v)769 float __attribute__((overloadable)) sinh(float v) {
770     return SC_sinhf(v);
771 }
FN_FUNC_FN(sinh)772 FN_FUNC_FN(sinh)
773 
774 extern float __attribute__((overloadable)) sinpi(float v) {
775     return sin(v * M_PI);
776 }
777 FN_FUNC_FN(sinpi)
778 
779 extern float __attribute__((overloadable)) SC_tanf(float);
tan(float v)780 float __attribute__((overloadable)) tan(float v) {
781     return SC_tanf(v);
782 }
783 FN_FUNC_FN(tan)
784 
785 extern float __attribute__((overloadable)) SC_tanhf(float);
tanh(float v)786 float __attribute__((overloadable)) tanh(float v) {
787     return SC_tanhf(v);
788 }
FN_FUNC_FN(tanh)789 FN_FUNC_FN(tanh)
790 
791 extern float __attribute__((overloadable)) tanpi(float v) {
792     return tan(v * M_PI);
793 }
794 FN_FUNC_FN(tanpi)
795 
796 
797 extern float __attribute__((overloadable)) SC_tgammaf(float);
tgamma(float v)798 float __attribute__((overloadable)) tgamma(float v) {
799     return SC_tgammaf(v);
800 }
801 FN_FUNC_FN(tgamma)
802 
803 extern float __attribute__((overloadable)) SC_truncf(float);
trunc(float v)804 float __attribute__((overloadable)) trunc(float v) {
805     return SC_truncf(v);
806 }
FN_FUNC_FN(trunc)807 FN_FUNC_FN(trunc)
808 
809 // Int ops (partial), 6.11.3
810 
811 #define XN_FUNC_YN(typeout, fnc, typein)                                \
812 extern typeout __attribute__((overloadable)) fnc(typein);               \
813 extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) {  \
814     typeout##2 r;                                                       \
815     r.x = fnc(v.x);                                                     \
816     r.y = fnc(v.y);                                                     \
817     return r;                                                           \
818 }                                                                       \
819 extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) {  \
820     typeout##3 r;                                                       \
821     r.x = fnc(v.x);                                                     \
822     r.y = fnc(v.y);                                                     \
823     r.z = fnc(v.z);                                                     \
824     return r;                                                           \
825 }                                                                       \
826 extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) {  \
827     typeout##4 r;                                                       \
828     r.x = fnc(v.x);                                                     \
829     r.y = fnc(v.y);                                                     \
830     r.z = fnc(v.z);                                                     \
831     r.w = fnc(v.w);                                                     \
832     return r;                                                           \
833 }
834 
835 
836 #define UIN_FUNC_IN(fnc)          \
837 XN_FUNC_YN(uchar, fnc, char)      \
838 XN_FUNC_YN(ushort, fnc, short)    \
839 XN_FUNC_YN(uint, fnc, int)
840 
841 #define IN_FUNC_IN(fnc)           \
842 XN_FUNC_YN(uchar, fnc, uchar)     \
843 XN_FUNC_YN(char, fnc, char)       \
844 XN_FUNC_YN(ushort, fnc, ushort)   \
845 XN_FUNC_YN(short, fnc, short)     \
846 XN_FUNC_YN(uint, fnc, uint)       \
847 XN_FUNC_YN(int, fnc, int)
848 
849 
850 #define XN_FUNC_XN_XN_BODY(type, fnc, body)         \
851 extern type __attribute__((overloadable))       \
852         fnc(type v1, type v2) {                     \
853     return body;                                    \
854 }                                                   \
855 extern type##2 __attribute__((overloadable))    \
856         fnc(type##2 v1, type##2 v2) {               \
857     type##2 r;                                      \
858     r.x = fnc(v1.x, v2.x);                          \
859     r.y = fnc(v1.y, v2.y);                          \
860     return r;                                       \
861 }                                                   \
862 extern type##3 __attribute__((overloadable))    \
863         fnc(type##3 v1, type##3 v2) {               \
864     type##3 r;                                      \
865     r.x = fnc(v1.x, v2.x);                          \
866     r.y = fnc(v1.y, v2.y);                          \
867     r.z = fnc(v1.z, v2.z);                          \
868     return r;                                       \
869 }                                                   \
870 extern type##4 __attribute__((overloadable))    \
871         fnc(type##4 v1, type##4 v2) {               \
872     type##4 r;                                      \
873     r.x = fnc(v1.x, v2.x);                          \
874     r.y = fnc(v1.y, v2.y);                          \
875     r.z = fnc(v1.z, v2.z);                          \
876     r.w = fnc(v1.w, v2.w);                          \
877     return r;                                       \
878 }
879 
880 #define IN_FUNC_IN_IN_BODY(fnc, body) \
881 XN_FUNC_XN_XN_BODY(uchar, fnc, body)  \
882 XN_FUNC_XN_XN_BODY(char, fnc, body)   \
883 XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
884 XN_FUNC_XN_XN_BODY(short, fnc, body)  \
885 XN_FUNC_XN_XN_BODY(uint, fnc, body)   \
886 XN_FUNC_XN_XN_BODY(int, fnc, body)    \
887 XN_FUNC_XN_XN_BODY(float, fnc, body)
888 
889 
890 /**
891  * abs
892  */
893 extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
894     if (v < 0)
895         return -v;
896     return v;
897 }
abs(int16_t v)898 extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
899     if (v < 0)
900         return -v;
901     return v;
902 }
abs(int8_t v)903 extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
904     if (v < 0)
905         return -v;
906     return v;
907 }
908 
909 /**
910  * clz
911  * __builtin_clz only accepts a 32-bit unsigned int, so every input will be
912  * expanded to 32 bits. For our smaller data types, we need to subtract off
913  * these unused top bits (that will be always be composed of zeros).
914  */
clz(uint32_t v)915 extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
916     return __builtin_clz(v);
917 }
clz(uint16_t v)918 extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
919     return __builtin_clz(v) - 16;
920 }
clz(uint8_t v)921 extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
922     return __builtin_clz(v) - 24;
923 }
clz(int32_t v)924 extern int32_t __attribute__((overloadable)) clz(int32_t v) {
925     return __builtin_clz(v);
926 }
clz(int16_t v)927 extern int16_t __attribute__((overloadable)) clz(int16_t v) {
928     return __builtin_clz(((uint32_t)v) & 0x0000ffff) - 16;
929 }
clz(int8_t v)930 extern int8_t __attribute__((overloadable)) clz(int8_t v) {
931     return __builtin_clz(((uint32_t)v) & 0x000000ff) - 24;
932 }
933 
934 
935 UIN_FUNC_IN(abs)
IN_FUNC_IN(clz)936 IN_FUNC_IN(clz)
937 
938 
939 // 6.11.4
940 
941 
942 extern float __attribute__((overloadable)) degrees(float radians) {
943     return radians * (180.f / M_PI);
944 }
degrees(float2 radians)945 extern float2 __attribute__((overloadable)) degrees(float2 radians) {
946     return radians * (180.f / M_PI);
947 }
degrees(float3 radians)948 extern float3 __attribute__((overloadable)) degrees(float3 radians) {
949     return radians * (180.f / M_PI);
950 }
degrees(float4 radians)951 extern float4 __attribute__((overloadable)) degrees(float4 radians) {
952     return radians * (180.f / M_PI);
953 }
954 
mix(float start,float stop,float amount)955 extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
956     return start + (stop - start) * amount;
957 }
mix(float2 start,float2 stop,float2 amount)958 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
959     return start + (stop - start) * amount;
960 }
mix(float3 start,float3 stop,float3 amount)961 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
962     return start + (stop - start) * amount;
963 }
mix(float4 start,float4 stop,float4 amount)964 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
965     return start + (stop - start) * amount;
966 }
mix(float2 start,float2 stop,float amount)967 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
968     return start + (stop - start) * amount;
969 }
mix(float3 start,float3 stop,float amount)970 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
971     return start + (stop - start) * amount;
972 }
mix(float4 start,float4 stop,float amount)973 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
974     return start + (stop - start) * amount;
975 }
976 
radians(float degrees)977 extern float __attribute__((overloadable)) radians(float degrees) {
978     return degrees * (M_PI / 180.f);
979 }
radians(float2 degrees)980 extern float2 __attribute__((overloadable)) radians(float2 degrees) {
981     return degrees * (M_PI / 180.f);
982 }
radians(float3 degrees)983 extern float3 __attribute__((overloadable)) radians(float3 degrees) {
984     return degrees * (M_PI / 180.f);
985 }
radians(float4 degrees)986 extern float4 __attribute__((overloadable)) radians(float4 degrees) {
987     return degrees * (M_PI / 180.f);
988 }
989 
step(float edge,float v)990 extern float __attribute__((overloadable)) step(float edge, float v) {
991     return (v < edge) ? 0.f : 1.f;
992 }
step(float2 edge,float2 v)993 extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
994     float2 r;
995     r.x = (v.x < edge.x) ? 0.f : 1.f;
996     r.y = (v.y < edge.y) ? 0.f : 1.f;
997     return r;
998 }
step(float3 edge,float3 v)999 extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
1000     float3 r;
1001     r.x = (v.x < edge.x) ? 0.f : 1.f;
1002     r.y = (v.y < edge.y) ? 0.f : 1.f;
1003     r.z = (v.z < edge.z) ? 0.f : 1.f;
1004     return r;
1005 }
step(float4 edge,float4 v)1006 extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
1007     float4 r;
1008     r.x = (v.x < edge.x) ? 0.f : 1.f;
1009     r.y = (v.y < edge.y) ? 0.f : 1.f;
1010     r.z = (v.z < edge.z) ? 0.f : 1.f;
1011     r.w = (v.w < edge.w) ? 0.f : 1.f;
1012     return r;
1013 }
step(float2 edge,float v)1014 extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
1015     float2 r;
1016     r.x = (v < edge.x) ? 0.f : 1.f;
1017     r.y = (v < edge.y) ? 0.f : 1.f;
1018     return r;
1019 }
step(float3 edge,float v)1020 extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
1021     float3 r;
1022     r.x = (v < edge.x) ? 0.f : 1.f;
1023     r.y = (v < edge.y) ? 0.f : 1.f;
1024     r.z = (v < edge.z) ? 0.f : 1.f;
1025     return r;
1026 }
step(float4 edge,float v)1027 extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
1028     float4 r;
1029     r.x = (v < edge.x) ? 0.f : 1.f;
1030     r.y = (v < edge.y) ? 0.f : 1.f;
1031     r.z = (v < edge.z) ? 0.f : 1.f;
1032     r.w = (v < edge.w) ? 0.f : 1.f;
1033     return r;
1034 }
step(float edge,float2 v)1035 extern float2 __attribute__((overloadable)) step(float edge, float2 v) {
1036     float2 r;
1037     r.x = (v.x < edge) ? 0.f : 1.f;
1038     r.y = (v.y < edge) ? 0.f : 1.f;
1039     return r;
1040 }
step(float edge,float3 v)1041 extern float3 __attribute__((overloadable)) step(float edge, float3 v) {
1042     float3 r;
1043     r.x = (v.x < edge) ? 0.f : 1.f;
1044     r.y = (v.y < edge) ? 0.f : 1.f;
1045     r.z = (v.z < edge) ? 0.f : 1.f;
1046     return r;
1047 }
step(float edge,float4 v)1048 extern float4 __attribute__((overloadable)) step(float edge, float4 v) {
1049     float4 r;
1050     r.x = (v.x < edge) ? 0.f : 1.f;
1051     r.y = (v.y < edge) ? 0.f : 1.f;
1052     r.z = (v.z < edge) ? 0.f : 1.f;
1053     r.w = (v.w < edge) ? 0.f : 1.f;
1054     return r;
1055 }
1056 
sign(float v)1057 extern float __attribute__((overloadable)) sign(float v) {
1058     if (v > 0) return 1.f;
1059     if (v < 0) return -1.f;
1060     return v;
1061 }
FN_FUNC_FN(sign)1062 FN_FUNC_FN(sign)
1063 
1064 
1065 // 6.11.5
1066 extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
1067     float3 r;
1068     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1069     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1070     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1071     return r;
1072 }
1073 
cross(float4 lhs,float4 rhs)1074 extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
1075     float4 r;
1076     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1077     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1078     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1079     r.w = 0.f;
1080     return r;
1081 }
1082 
1083 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
1084 // These functions must be defined here if we are not using the SSE
1085 // implementation, which includes when we are built as part of the
1086 // debug runtime (libclcore_debug.bc) or compiling with debug info.
1087 
dot(float lhs,float rhs)1088 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
1089     return lhs * rhs;
1090 }
dot(float2 lhs,float2 rhs)1091 extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
1092     return lhs.x*rhs.x + lhs.y*rhs.y;
1093 }
dot(float3 lhs,float3 rhs)1094 extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
1095     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
1096 }
dot(float4 lhs,float4 rhs)1097 extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
1098     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
1099 }
1100 
length(float v)1101 extern float __attribute__((overloadable)) length(float v) {
1102     return fabs(v);
1103 }
length(float2 v)1104 extern float __attribute__((overloadable)) length(float2 v) {
1105     return sqrt(v.x*v.x + v.y*v.y);
1106 }
length(float3 v)1107 extern float __attribute__((overloadable)) length(float3 v) {
1108     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1109 }
length(float4 v)1110 extern float __attribute__((overloadable)) length(float4 v) {
1111     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1112 }
1113 
1114 #else
1115 
1116 extern float __attribute__((overloadable)) length(float v);
1117 extern float __attribute__((overloadable)) length(float2 v);
1118 extern float __attribute__((overloadable)) length(float3 v);
1119 extern float __attribute__((overloadable)) length(float4 v);
1120 
1121 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
1122 
distance(float lhs,float rhs)1123 extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
1124     return length(lhs - rhs);
1125 }
distance(float2 lhs,float2 rhs)1126 extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
1127     return length(lhs - rhs);
1128 }
distance(float3 lhs,float3 rhs)1129 extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
1130     return length(lhs - rhs);
1131 }
distance(float4 lhs,float4 rhs)1132 extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
1133     return length(lhs - rhs);
1134 }
1135 
1136 /* For the normalization functions, vectors of length 0 should simply be
1137  * returned (i.e. all the components of that vector are 0).
1138  */
normalize(float v)1139 extern float __attribute__((overloadable)) normalize(float v) {
1140     if (v == 0.0f) {
1141         return 0.0f;
1142     } else if (v < 0.0f) {
1143         return -1.0f;
1144     } else {
1145         return 1.0f;
1146     }
1147 }
normalize(float2 v)1148 extern float2 __attribute__((overloadable)) normalize(float2 v) {
1149     float l = length(v);
1150     return l == 0.0f ? v : v / l;
1151 }
normalize(float3 v)1152 extern float3 __attribute__((overloadable)) normalize(float3 v) {
1153     float l = length(v);
1154     return l == 0.0f ? v : v / l;
1155 }
normalize(float4 v)1156 extern float4 __attribute__((overloadable)) normalize(float4 v) {
1157     float l = length(v);
1158     return l == 0.0f ? v : v / l;
1159 }
1160 
half_sqrt(float v)1161 extern float __attribute__((overloadable)) half_sqrt(float v) {
1162     return sqrt(v);
1163 }
FN_FUNC_FN(half_sqrt)1164 FN_FUNC_FN(half_sqrt)
1165 
1166 extern float __attribute__((overloadable)) fast_length(float v) {
1167     return fabs(v);
1168 }
fast_length(float2 v)1169 extern float __attribute__((overloadable)) fast_length(float2 v) {
1170     return half_sqrt(v.x*v.x + v.y*v.y);
1171 }
fast_length(float3 v)1172 extern float __attribute__((overloadable)) fast_length(float3 v) {
1173     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1174 }
fast_length(float4 v)1175 extern float __attribute__((overloadable)) fast_length(float4 v) {
1176     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1177 }
1178 
fast_distance(float lhs,float rhs)1179 extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
1180     return fast_length(lhs - rhs);
1181 }
fast_distance(float2 lhs,float2 rhs)1182 extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
1183     return fast_length(lhs - rhs);
1184 }
fast_distance(float3 lhs,float3 rhs)1185 extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
1186     return fast_length(lhs - rhs);
1187 }
fast_distance(float4 lhs,float4 rhs)1188 extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
1189     return fast_length(lhs - rhs);
1190 }
1191 
1192 extern float __attribute__((overloadable)) half_rsqrt(float);
1193 
1194 /* For the normalization functions, vectors of length 0 should simply be
1195  * returned (i.e. all the components of that vector are 0).
1196  */
fast_normalize(float v)1197 extern float __attribute__((overloadable)) fast_normalize(float v) {
1198     if (v == 0.0f) {
1199         return 0.0f;
1200     } else if (v < 0.0f) {
1201         return -1.0f;
1202     } else {
1203         return 1.0f;
1204     }
1205 }
1206 // If the length is 0, then rlength should be NaN.
fast_normalize(float2 v)1207 extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
1208     float rlength = half_rsqrt(v.x*v.x + v.y*v.y);
1209     return (rlength == rlength) ? v * rlength : v;
1210 }
fast_normalize(float3 v)1211 extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
1212     float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1213     return (rlength == rlength) ? v * rlength : v;
1214 }
fast_normalize(float4 v)1215 extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
1216     float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1217     return (rlength == rlength) ? v * rlength : v;
1218 }
1219 
half_recip(float v)1220 extern float __attribute__((overloadable)) half_recip(float v) {
1221     return 1.f / v;
1222 }
1223 
1224 /*
1225 extern float __attribute__((overloadable)) approx_atan(float x) {
1226     if (x == 0.f)
1227         return 0.f;
1228     if (x < 0.f)
1229         return -1.f * approx_atan(-1.f * x);
1230     if (x > 1.f)
1231         return M_PI_2 - approx_atan(approx_recip(x));
1232     return x * approx_recip(1.f + 0.28f * x*x);
1233 }
1234 FN_FUNC_FN(approx_atan)
1235 */
1236 
1237 typedef union
1238 {
1239   float fv;
1240   int32_t iv;
1241 } ieee_float_shape_type;
1242 
1243 /* Get a 32 bit int from a float.  */
1244 
1245 #define GET_FLOAT_WORD(i,d)                 \
1246 do {                                \
1247   ieee_float_shape_type gf_u;                   \
1248   gf_u.fv = (d);                     \
1249   (i) = gf_u.iv;                      \
1250 } while (0)
1251 
1252 /* Set a float from a 32 bit int.  */
1253 
1254 #define SET_FLOAT_WORD(d,i)                 \
1255 do {                                \
1256   ieee_float_shape_type sf_u;                   \
1257   sf_u.iv = (i);                      \
1258   (d) = sf_u.fv;                     \
1259 } while (0)
1260 
1261 
1262 
1263 // Valid -125 to 125
native_exp2(float v)1264 extern float __attribute__((overloadable)) native_exp2(float v) {
1265     int32_t iv = (int)v;
1266     int32_t x = iv + (iv >> 31); // ~floor(v)
1267     float r = (v - x);
1268 
1269     float fo;
1270     SET_FLOAT_WORD(fo, (x + 127) << 23);
1271 
1272     r *= 0.694f; // ~ log(e) / log(2)
1273     float r2 = r*r;
1274     float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1275     return fo * adj;
1276 }
1277 
native_exp2(float2 v)1278 extern float2 __attribute__((overloadable)) native_exp2(float2 v) {
1279     int2 iv = convert_int2(v);
1280     int2 x = iv + (iv >> (int2)31);//floor(v);
1281     float2 r = (v - convert_float2(x));
1282 
1283     x += 127;
1284 
1285     float2 fo = (float2)(x << (int2)23);
1286 
1287     r *= 0.694f; // ~ log(e) / log(2)
1288     float2 r2 = r*r;
1289     float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1290     return fo * adj;
1291 }
1292 
native_exp2(float4 v)1293 extern float4 __attribute__((overloadable)) native_exp2(float4 v) {
1294     int4 iv = convert_int4(v);
1295     int4 x = iv + (iv >> (int4)31);//floor(v);
1296     float4 r = (v - convert_float4(x));
1297 
1298     x += 127;
1299 
1300     float4 fo = (float4)(x << (int4)23);
1301 
1302     r *= 0.694f; // ~ log(e) / log(2)
1303     float4 r2 = r*r;
1304     float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1305     return fo * adj;
1306 }
1307 
native_exp2(float3 v)1308 extern float3 __attribute__((overloadable)) native_exp2(float3 v) {
1309     float4 t = 1.f;
1310     t.xyz = v;
1311     return native_exp2(t).xyz;
1312 }
1313 
1314 
native_exp(float v)1315 extern float __attribute__((overloadable)) native_exp(float v) {
1316     return native_exp2(v * 1.442695041f);
1317 }
native_exp(float2 v)1318 extern float2 __attribute__((overloadable)) native_exp(float2 v) {
1319     return native_exp2(v * 1.442695041f);
1320 }
native_exp(float3 v)1321 extern float3 __attribute__((overloadable)) native_exp(float3 v) {
1322     return native_exp2(v * 1.442695041f);
1323 }
native_exp(float4 v)1324 extern float4 __attribute__((overloadable)) native_exp(float4 v) {
1325     return native_exp2(v * 1.442695041f);
1326 }
1327 
native_exp10(float v)1328 extern float __attribute__((overloadable)) native_exp10(float v) {
1329     return native_exp2(v * 3.321928095f);
1330 }
native_exp10(float2 v)1331 extern float2 __attribute__((overloadable)) native_exp10(float2 v) {
1332     return native_exp2(v * 3.321928095f);
1333 }
native_exp10(float3 v)1334 extern float3 __attribute__((overloadable)) native_exp10(float3 v) {
1335     return native_exp2(v * 3.321928095f);
1336 }
native_exp10(float4 v)1337 extern float4 __attribute__((overloadable)) native_exp10(float4 v) {
1338     return native_exp2(v * 3.321928095f);
1339 }
1340 
native_log2(float v)1341 extern float __attribute__((overloadable)) native_log2(float v) {
1342     int32_t ibits;
1343     GET_FLOAT_WORD(ibits, v);
1344 
1345     int32_t e = (ibits >> 23) & 0xff;
1346 
1347     ibits &= 0x7fffff;
1348     ibits |= 127 << 23;
1349 
1350     float ir;
1351     SET_FLOAT_WORD(ir, ibits);
1352     ir -= 1.5f;
1353     float ir2 = ir*ir;
1354     float adj2 = (0.405465108f / 0.693147181f) +
1355                  ((0.666666667f / 0.693147181f) * ir) -
1356                  ((0.222222222f / 0.693147181f) * ir2) +
1357                  ((0.098765432f / 0.693147181f) * ir*ir2) -
1358                  ((0.049382716f / 0.693147181f) * ir2*ir2) +
1359                  ((0.026337449f / 0.693147181f) * ir*ir2*ir2) -
1360                  ((0.014631916f / 0.693147181f) * ir2*ir2*ir2);
1361     return (float)(e - 127) + adj2;
1362 }
native_log2(float2 v)1363 extern float2 __attribute__((overloadable)) native_log2(float2 v) {
1364     float2 v2 = {native_log2(v.x), native_log2(v.y)};
1365     return v2;
1366 }
native_log2(float3 v)1367 extern float3 __attribute__((overloadable)) native_log2(float3 v) {
1368     float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)};
1369     return v2;
1370 }
native_log2(float4 v)1371 extern float4 __attribute__((overloadable)) native_log2(float4 v) {
1372     float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)};
1373     return v2;
1374 }
1375 
native_log(float v)1376 extern float __attribute__((overloadable)) native_log(float v) {
1377     return native_log2(v) * (1.f / 1.442695041f);
1378 }
native_log(float2 v)1379 extern float2 __attribute__((overloadable)) native_log(float2 v) {
1380     return native_log2(v) * (1.f / 1.442695041f);
1381 }
native_log(float3 v)1382 extern float3 __attribute__((overloadable)) native_log(float3 v) {
1383     return native_log2(v) * (1.f / 1.442695041f);
1384 }
native_log(float4 v)1385 extern float4 __attribute__((overloadable)) native_log(float4 v) {
1386     return native_log2(v) * (1.f / 1.442695041f);
1387 }
1388 
native_log10(float v)1389 extern float __attribute__((overloadable)) native_log10(float v) {
1390     return native_log2(v) * (1.f / 3.321928095f);
1391 }
native_log10(float2 v)1392 extern float2 __attribute__((overloadable)) native_log10(float2 v) {
1393     return native_log2(v) * (1.f / 3.321928095f);
1394 }
native_log10(float3 v)1395 extern float3 __attribute__((overloadable)) native_log10(float3 v) {
1396     return native_log2(v) * (1.f / 3.321928095f);
1397 }
native_log10(float4 v)1398 extern float4 __attribute__((overloadable)) native_log10(float4 v) {
1399     return native_log2(v) * (1.f / 3.321928095f);
1400 }
1401 
1402 
native_powr(float v,float y)1403 extern float __attribute__((overloadable)) native_powr(float v, float y) {
1404     float v2 = native_log2(v);
1405     v2 = fmax(v2 * y, -125.f);
1406     return native_exp2(v2);
1407 }
native_powr(float2 v,float2 y)1408 extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) {
1409     float2 v2 = native_log2(v);
1410     v2 = fmax(v2 * y, -125.f);
1411     return native_exp2(v2);
1412 }
native_powr(float3 v,float3 y)1413 extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) {
1414     float3 v2 = native_log2(v);
1415     v2 = fmax(v2 * y, -125.f);
1416     return native_exp2(v2);
1417 }
native_powr(float4 v,float4 y)1418 extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) {
1419     float4 v2 = native_log2(v);
1420     v2 = fmax(v2 * y, -125.f);
1421     return native_exp2(v2);
1422 }
1423 
min(double v1,double v2)1424 extern double __attribute__((overloadable)) min(double v1, double v2) {
1425     return v1 < v2 ? v1 : v2;
1426 }
1427 
min(double2 v1,double2 v2)1428 extern double2 __attribute__((overloadable)) min(double2 v1, double2 v2) {
1429     double2 r;
1430     r.x = v1.x < v2.x ? v1.x : v2.x;
1431     r.y = v1.y < v2.y ? v1.y : v2.y;
1432     return r;
1433 }
1434 
min(double3 v1,double3 v2)1435 extern double3 __attribute__((overloadable)) min(double3 v1, double3 v2) {
1436     double3 r;
1437     r.x = v1.x < v2.x ? v1.x : v2.x;
1438     r.y = v1.y < v2.y ? v1.y : v2.y;
1439     r.z = v1.z < v2.z ? v1.z : v2.z;
1440     return r;
1441 }
1442 
min(double4 v1,double4 v2)1443 extern double4 __attribute__((overloadable)) min(double4 v1, double4 v2) {
1444     double4 r;
1445     r.x = v1.x < v2.x ? v1.x : v2.x;
1446     r.y = v1.y < v2.y ? v1.y : v2.y;
1447     r.z = v1.z < v2.z ? v1.z : v2.z;
1448     r.w = v1.w < v2.w ? v1.w : v2.w;
1449     return r;
1450 }
1451 
min(long v1,long v2)1452 extern long __attribute__((overloadable)) min(long v1, long v2) {
1453     return v1 < v2 ? v1 : v2;
1454 }
min(long2 v1,long2 v2)1455 extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
1456     long2 r;
1457     r.x = v1.x < v2.x ? v1.x : v2.x;
1458     r.y = v1.y < v2.y ? v1.y : v2.y;
1459     return r;
1460 }
min(long3 v1,long3 v2)1461 extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
1462     long3 r;
1463     r.x = v1.x < v2.x ? v1.x : v2.x;
1464     r.y = v1.y < v2.y ? v1.y : v2.y;
1465     r.z = v1.z < v2.z ? v1.z : v2.z;
1466     return r;
1467 }
min(long4 v1,long4 v2)1468 extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
1469     long4 r;
1470     r.x = v1.x < v2.x ? v1.x : v2.x;
1471     r.y = v1.y < v2.y ? v1.y : v2.y;
1472     r.z = v1.z < v2.z ? v1.z : v2.z;
1473     r.w = v1.w < v2.w ? v1.w : v2.w;
1474     return r;
1475 }
1476 
min(ulong v1,ulong v2)1477 extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
1478     return v1 < v2 ? v1 : v2;
1479 }
min(ulong2 v1,ulong2 v2)1480 extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
1481     ulong2 r;
1482     r.x = v1.x < v2.x ? v1.x : v2.x;
1483     r.y = v1.y < v2.y ? v1.y : v2.y;
1484     return r;
1485 }
min(ulong3 v1,ulong3 v2)1486 extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
1487     ulong3 r;
1488     r.x = v1.x < v2.x ? v1.x : v2.x;
1489     r.y = v1.y < v2.y ? v1.y : v2.y;
1490     r.z = v1.z < v2.z ? v1.z : v2.z;
1491     return r;
1492 }
min(ulong4 v1,ulong4 v2)1493 extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
1494     ulong4 r;
1495     r.x = v1.x < v2.x ? v1.x : v2.x;
1496     r.y = v1.y < v2.y ? v1.y : v2.y;
1497     r.z = v1.z < v2.z ? v1.z : v2.z;
1498     r.w = v1.w < v2.w ? v1.w : v2.w;
1499     return r;
1500 }
1501 
max(double v1,double v2)1502 extern double __attribute__((overloadable)) max(double v1, double v2) {
1503     return v1 > v2 ? v1 : v2;
1504 }
1505 
max(double2 v1,double2 v2)1506 extern double2 __attribute__((overloadable)) max(double2 v1, double2 v2) {
1507     double2 r;
1508     r.x = v1.x > v2.x ? v1.x : v2.x;
1509     r.y = v1.y > v2.y ? v1.y : v2.y;
1510     return r;
1511 }
1512 
max(double3 v1,double3 v2)1513 extern double3 __attribute__((overloadable)) max(double3 v1, double3 v2) {
1514     double3 r;
1515     r.x = v1.x > v2.x ? v1.x : v2.x;
1516     r.y = v1.y > v2.y ? v1.y : v2.y;
1517     r.z = v1.z > v2.z ? v1.z : v2.z;
1518     return r;
1519 }
1520 
max(double4 v1,double4 v2)1521 extern double4 __attribute__((overloadable)) max(double4 v1, double4 v2) {
1522     double4 r;
1523     r.x = v1.x > v2.x ? v1.x : v2.x;
1524     r.y = v1.y > v2.y ? v1.y : v2.y;
1525     r.z = v1.z > v2.z ? v1.z : v2.z;
1526     r.w = v1.w > v2.w ? v1.w : v2.w;
1527     return r;
1528 }
1529 
max(long v1,long v2)1530 extern long __attribute__((overloadable)) max(long v1, long v2) {
1531     return v1 > v2 ? v1 : v2;
1532 }
max(long2 v1,long2 v2)1533 extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
1534     long2 r;
1535     r.x = v1.x > v2.x ? v1.x : v2.x;
1536     r.y = v1.y > v2.y ? v1.y : v2.y;
1537     return r;
1538 }
max(long3 v1,long3 v2)1539 extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
1540     long3 r;
1541     r.x = v1.x > v2.x ? v1.x : v2.x;
1542     r.y = v1.y > v2.y ? v1.y : v2.y;
1543     r.z = v1.z > v2.z ? v1.z : v2.z;
1544     return r;
1545 }
max(long4 v1,long4 v2)1546 extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
1547     long4 r;
1548     r.x = v1.x > v2.x ? v1.x : v2.x;
1549     r.y = v1.y > v2.y ? v1.y : v2.y;
1550     r.z = v1.z > v2.z ? v1.z : v2.z;
1551     r.w = v1.w > v2.w ? v1.w : v2.w;
1552     return r;
1553 }
1554 
max(ulong v1,ulong v2)1555 extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
1556     return v1 > v2 ? v1 : v2;
1557 }
max(ulong2 v1,ulong2 v2)1558 extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
1559     ulong2 r;
1560     r.x = v1.x > v2.x ? v1.x : v2.x;
1561     r.y = v1.y > v2.y ? v1.y : v2.y;
1562     return r;
1563 }
max(ulong3 v1,ulong3 v2)1564 extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
1565     ulong3 r;
1566     r.x = v1.x > v2.x ? v1.x : v2.x;
1567     r.y = v1.y > v2.y ? v1.y : v2.y;
1568     r.z = v1.z > v2.z ? v1.z : v2.z;
1569     return r;
1570 }
max(ulong4 v1,ulong4 v2)1571 extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
1572     ulong4 r;
1573     r.x = v1.x > v2.x ? v1.x : v2.x;
1574     r.y = v1.y > v2.y ? v1.y : v2.y;
1575     r.z = v1.z > v2.z ? v1.z : v2.z;
1576     r.w = v1.w > v2.w ? v1.w : v2.w;
1577     return r;
1578 }
1579 
1580 #define THUNK_NATIVE_F(fn) \
1581     float __attribute__((overloadable)) native_##fn(float v) { return fn(v);} \
1582     float2 __attribute__((overloadable)) native_##fn(float2 v) { return fn(v);} \
1583     float3 __attribute__((overloadable)) native_##fn(float3 v) { return fn(v);} \
1584     float4 __attribute__((overloadable)) native_##fn(float4 v) { return fn(v);}
1585 
1586 #define THUNK_NATIVE_F_F(fn) \
1587     float __attribute__((overloadable)) native_##fn(float v1, float v2) { return fn(v1, v2);} \
1588     float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 v2) { return fn(v1, v2);} \
1589     float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 v2) { return fn(v1, v2);} \
1590     float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 v2) { return fn(v1, v2);}
1591 
1592 #define THUNK_NATIVE_F_FP(fn) \
1593     float __attribute__((overloadable)) native_##fn(float v1, float *v2) { return fn(v1, v2);} \
1594     float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 *v2) { return fn(v1, v2);} \
1595     float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 *v2) { return fn(v1, v2);} \
1596     float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 *v2) { return fn(v1, v2);}
1597 
1598 #define THUNK_NATIVE_F_I(fn) \
1599     float __attribute__((overloadable)) native_##fn(float v1, int v2) { return fn(v1, v2);} \
1600     float2 __attribute__((overloadable)) native_##fn(float2 v1, int2 v2) { return fn(v1, v2);} \
1601     float3 __attribute__((overloadable)) native_##fn(float3 v1, int3 v2) { return fn(v1, v2);} \
1602     float4 __attribute__((overloadable)) native_##fn(float4 v1, int4 v2) { return fn(v1, v2);}
1603 
1604 THUNK_NATIVE_F(acos)
THUNK_NATIVE_F(acosh)1605 THUNK_NATIVE_F(acosh)
1606 THUNK_NATIVE_F(acospi)
1607 THUNK_NATIVE_F(asin)
1608 THUNK_NATIVE_F(asinh)
1609 THUNK_NATIVE_F(asinpi)
1610 THUNK_NATIVE_F(atan)
1611 THUNK_NATIVE_F_F(atan2)
1612 THUNK_NATIVE_F(atanh)
1613 THUNK_NATIVE_F(atanpi)
1614 THUNK_NATIVE_F_F(atan2pi)
1615 THUNK_NATIVE_F(cbrt)
1616 THUNK_NATIVE_F(cos)
1617 THUNK_NATIVE_F(cosh)
1618 THUNK_NATIVE_F(cospi)
1619 THUNK_NATIVE_F(expm1)
1620 THUNK_NATIVE_F_F(hypot)
1621 THUNK_NATIVE_F(log1p)
1622 THUNK_NATIVE_F_I(rootn)
1623 THUNK_NATIVE_F(rsqrt)
1624 THUNK_NATIVE_F(sqrt)
1625 THUNK_NATIVE_F(sin)
1626 THUNK_NATIVE_F_FP(sincos)
1627 THUNK_NATIVE_F(sinh)
1628 THUNK_NATIVE_F(sinpi)
1629 THUNK_NATIVE_F(tan)
1630 THUNK_NATIVE_F(tanh)
1631 THUNK_NATIVE_F(tanpi)
1632 
1633 #undef THUNK_NATIVE_F
1634 #undef THUNK_NATIVE_F_F
1635 #undef THUNK_NATIVE_F_I
1636 #undef THUNK_NATIVE_F_FP
1637 
1638 float __attribute__((overloadable)) native_normalize(float v) { return fast_normalize(v);}
native_normalize(float2 v)1639 float2 __attribute__((overloadable)) native_normalize(float2 v) { return fast_normalize(v);}
native_normalize(float3 v)1640 float3 __attribute__((overloadable)) native_normalize(float3 v) { return fast_normalize(v);}
native_normalize(float4 v)1641 float4 __attribute__((overloadable)) native_normalize(float4 v) { return fast_normalize(v);}
1642 
native_distance(float v1,float v2)1643 float __attribute__((overloadable)) native_distance(float v1, float v2) { return fast_distance(v1, v2);}
native_distance(float2 v1,float2 v2)1644 float __attribute__((overloadable)) native_distance(float2 v1, float2 v2) { return fast_distance(v1, v2);}
native_distance(float3 v1,float3 v2)1645 float __attribute__((overloadable)) native_distance(float3 v1, float3 v2) { return fast_distance(v1, v2);}
native_distance(float4 v1,float4 v2)1646 float __attribute__((overloadable)) native_distance(float4 v1, float4 v2) { return fast_distance(v1, v2);}
1647 
native_length(float v)1648 float __attribute__((overloadable)) native_length(float v) { return fast_length(v);}
native_length(float2 v)1649 float __attribute__((overloadable)) native_length(float2 v) { return fast_length(v);}
native_length(float3 v)1650 float __attribute__((overloadable)) native_length(float3 v) { return fast_length(v);}
native_length(float4 v)1651 float __attribute__((overloadable)) native_length(float4 v) { return fast_length(v);}
1652 
native_divide(float v1,float v2)1653 float __attribute__((overloadable)) native_divide(float v1, float v2) { return v1 / v2;}
native_divide(float2 v1,float2 v2)1654 float2 __attribute__((overloadable)) native_divide(float2 v1, float2 v2) { return v1 / v2;}
native_divide(float3 v1,float3 v2)1655 float3 __attribute__((overloadable)) native_divide(float3 v1, float3 v2) { return v1 / v2;}
native_divide(float4 v1,float4 v2)1656 float4 __attribute__((overloadable)) native_divide(float4 v1, float4 v2) { return v1 / v2;}
1657 
native_recip(float v)1658 float __attribute__((overloadable)) native_recip(float v) { return 1.f / v;}
native_recip(float2 v)1659 float2 __attribute__((overloadable)) native_recip(float2 v) { return ((float2)1.f) / v;}
native_recip(float3 v)1660 float3 __attribute__((overloadable)) native_recip(float3 v) { return ((float3)1.f) / v;}
native_recip(float4 v)1661 float4 __attribute__((overloadable)) native_recip(float4 v) { return ((float4)1.f) / v;}
1662 
1663 
1664 
1665 
1666 
1667 #undef FN_FUNC_FN
1668 #undef IN_FUNC_FN
1669 #undef FN_FUNC_FN_FN
1670 #undef FN_FUNC_FN_F
1671 #undef FN_FUNC_FN_IN
1672 #undef FN_FUNC_FN_I
1673 #undef FN_FUNC_FN_PFN
1674 #undef FN_FUNC_FN_PIN
1675 #undef FN_FUNC_FN_FN_FN
1676 #undef FN_FUNC_FN_FN_PIN
1677 #undef XN_FUNC_YN
1678 #undef UIN_FUNC_IN
1679 #undef IN_FUNC_IN
1680 #undef XN_FUNC_XN_XN_BODY
1681 #undef IN_FUNC_IN_IN_BODY
1682 
1683 static const unsigned short kHalfPositiveInfinity = 0x7c00;
1684 
1685 /* Define f16 functions of the form
1686  *     HN output = fn(HN input)
1687  * where HN is scalar or vector half type
1688  */
1689 #define HN_FUNC_HN(fn)                                                    \
1690 extern half __attribute__((overloadable)) fn(half h) {                    \
1691     return (half) fn((float) h);                                          \
1692 }                                                                         \
1693 extern half2 __attribute__((overloadable)) fn(half2 v) {                  \
1694   return convert_half2(fn(convert_float2(v)));                            \
1695 }                                                                         \
1696 extern half3 __attribute__((overloadable)) fn(half3 v) {                  \
1697   return convert_half3(fn(convert_float3(v)));                            \
1698 }                                                                         \
1699 extern half4 __attribute__((overloadable)) fn(half4 v) {                  \
1700   return convert_half4(fn(convert_float4(v)));                            \
1701 }
1702 
1703 /* Define f16 functions of the form
1704  *     HN output = fn(HN input1, HN input2)
1705  * where HN is scalar or vector half type
1706  */
1707 #define HN_FUNC_HN_HN(fn)                                                 \
1708 extern half __attribute__((overloadable)) fn(half h1, half h2) {          \
1709     return (half) fn((float) h1, (float) h2);                             \
1710 }                                                                         \
1711 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) {       \
1712   return convert_half2(fn(convert_float2(v1),                             \
1713                           convert_float2(v2)));                           \
1714 }                                                                         \
1715 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) {       \
1716   return convert_half3(fn(convert_float3(v1),                             \
1717                           convert_float3(v2)));                           \
1718 }                                                                         \
1719 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) {       \
1720   return convert_half4(fn(convert_float4(v1),                             \
1721                           convert_float4(v2)));                           \
1722 }
1723 
1724 /* Define f16 functions of the form
1725  *     HN output = fn(HN input1, half input2)
1726  * where HN is scalar or vector half type
1727  */
1728 #define HN_FUNC_HN_H(fn)                                                  \
1729 extern half2 __attribute__((overloadable)) fn(half2 v1, half v2) {        \
1730   return convert_half2(fn(convert_float2(v1), (float) v2));               \
1731 }                                                                         \
1732 extern half3 __attribute__((overloadable)) fn(half3 v1, half v2) {        \
1733   return convert_half3(fn(convert_float3(v1), (float) v2));               \
1734 }                                                                         \
1735 extern half4 __attribute__((overloadable)) fn(half4 v1, half v2) {        \
1736   return convert_half4(fn(convert_float4(v1), (float) v2));               \
1737 }
1738 
1739 /* Define f16 functions of the form
1740  *     HN output = fn(HN input1, HN input2, HN input3)
1741  * where HN is scalar or vector half type
1742  */
1743 #define HN_FUNC_HN_HN_HN(fn)                                                   \
1744 extern half __attribute__((overloadable)) fn(half h1, half h2, half h3) {      \
1745     return (half) fn((float) h1, (float) h2, (float) h3);                      \
1746 }                                                                              \
1747 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2, half2 v3) {  \
1748   return convert_half2(fn(convert_float2(v1),                                  \
1749                           convert_float2(v2),                                  \
1750                           convert_float2(v3)));                                \
1751 }                                                                              \
1752 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2, half3 v3) {  \
1753   return convert_half3(fn(convert_float3(v1),                                  \
1754                           convert_float3(v2),                                  \
1755                           convert_float3(v3)));                                \
1756 }                                                                              \
1757 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2, half4 v3) {  \
1758   return convert_half4(fn(convert_float4(v1),                                  \
1759                           convert_float4(v2),                                  \
1760                           convert_float4(v3)));                                \
1761 }
1762 
1763 /* Define f16 functions of the form
1764  *     HN output = fn(HN input1, IN input2)
1765  * where HN is scalar or vector half type and IN the equivalent integer type
1766  * of same vector length.
1767  */
1768 #define HN_FUNC_HN_IN(fn)                                                 \
1769 extern half __attribute__((overloadable)) fn(half h1, int v) {            \
1770     return (half) fn((float) h1, v);                                      \
1771 }                                                                         \
1772 extern half2 __attribute__((overloadable)) fn(half2 v1, int2 v2) {        \
1773   return convert_half2(fn(convert_float2(v1), v2));                       \
1774 }                                                                         \
1775 extern half3 __attribute__((overloadable)) fn(half3 v1, int3 v2) {        \
1776   return convert_half3(fn(convert_float3(v1), v2));                       \
1777 }                                                                         \
1778 extern half4 __attribute__((overloadable)) fn(half4 v1, int4 v2) {        \
1779   return convert_half4(fn(convert_float4(v1), v2));                       \
1780 }
1781 
1782 /* Define f16 functions of the form
1783  *     half output = fn(HN input1)
1784  * where HN is a scalar or vector half type.
1785  */
1786 #define H_FUNC_HN(fn)                                                     \
1787 extern half __attribute__((overloadable)) fn(half h) {                    \
1788     return (half) fn((float) h);                                          \
1789 }                                                                         \
1790 extern half __attribute__((overloadable)) fn(half2 v) {                   \
1791   return fn(convert_float2(v));                                           \
1792 }                                                                         \
1793 extern half __attribute__((overloadable)) fn(half3 v) {                   \
1794   return fn(convert_float3(v));                                           \
1795 }                                                                         \
1796 extern half __attribute__((overloadable)) fn(half4 v) {                   \
1797   return fn(convert_float4(v));                                           \
1798 }
1799 
1800 /* Define f16 functions of the form
1801  *     half output = fn(HN input1, HN input2)
1802  * where HN is a scalar or vector half type.
1803  */
1804 #define H_FUNC_HN_HN(fn)                                                  \
1805 extern half __attribute__((overloadable)) fn(half h1, half h2) {          \
1806     return (half) fn((float) h1, (float) h2);                             \
1807 }                                                                         \
1808 extern half __attribute__((overloadable)) fn(half2 v1, half2 v2) {        \
1809   return fn(convert_float2(v1), convert_float2(v2));                      \
1810 }                                                                         \
1811 extern half __attribute__((overloadable)) fn(half3 v1, half3 v2) {        \
1812   return fn(convert_float3(v1), convert_float3(v2));                      \
1813 }                                                                         \
1814 extern half __attribute__((overloadable)) fn(half4 v1, half4 v2) {        \
1815   return fn(convert_float4(v1), convert_float4(v2));                      \
1816 }
1817 
1818 #define SCALARIZE_HN_FUNC_HN_PHN(fnc)                                 \
1819 extern half2 __attribute__((overloadable)) fnc(half2 v1, half2 *v2) { \
1820     half2 ret;                                                        \
1821     half t[2];                                                        \
1822     ret.x = fnc(v1.x, &t[0]);                                         \
1823     ret.y = fnc(v1.y, &t[1]);                                         \
1824     v2->x = t[0];                                                     \
1825     v2->y = t[1];                                                     \
1826     return ret;                                                       \
1827 }                                                                     \
1828 extern half3 __attribute__((overloadable)) fnc(half3 v1, half3 *v2) { \
1829     half3 ret;                                                        \
1830     half t[3];                                                        \
1831     ret.x = fnc(v1.x, &t[0]);                                         \
1832     ret.y = fnc(v1.y, &t[1]);                                         \
1833     ret.z = fnc(v1.z, &t[2]);                                         \
1834     v2->x = t[0];                                                     \
1835     v2->y = t[1];                                                     \
1836     v2->z = t[2];                                                     \
1837     return ret;                                                       \
1838 }                                                                     \
1839 extern half4 __attribute__((overloadable)) fnc(half4 v1, half4 *v2) { \
1840     half4 ret;                                                        \
1841     half t[4];                                                        \
1842     ret.x = fnc(v1.x, &t[0]);                                         \
1843     ret.y = fnc(v1.y, &t[1]);                                         \
1844     ret.z = fnc(v1.z, &t[2]);                                         \
1845     ret.w = fnc(v1.w, &t[3]);                                         \
1846     v2->x = t[0];                                                     \
1847     v2->y = t[1];                                                     \
1848     v2->z = t[2];                                                     \
1849     v2->w = t[3];                                                     \
1850     return ret;                                                       \
1851 }
1852 
1853 /* Define f16 functions of the form
1854  *     HN output = fn(HN input1, HN input2)
1855  * where HN is a vector half type.  The functions are defined to call the
1856  * scalar function of the same name.
1857  */
1858 #define SCALARIZE_HN_FUNC_HN_HN(fn)                                       \
1859 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) {       \
1860   half2 ret;                                                              \
1861   ret.x = fn(v1.x, v2.x);                                                 \
1862   ret.y = fn(v1.y, v2.y);                                                 \
1863   return ret;                                                             \
1864 }                                                                         \
1865 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) {       \
1866   half3 ret;                                                              \
1867   ret.x = fn(v1.x, v2.x);                                                 \
1868   ret.y = fn(v1.y, v2.y);                                                 \
1869   ret.z = fn(v1.z, v2.z);                                                 \
1870   return ret;                                                             \
1871 }                                                                         \
1872 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) {       \
1873   half4 ret;                                                              \
1874   ret.x = fn(v1.x, v2.x);                                                 \
1875   ret.y = fn(v1.y, v2.y);                                                 \
1876   ret.z = fn(v1.z, v2.z);                                                 \
1877   ret.w = fn(v1.w, v2.w);                                                 \
1878   return ret;                                                             \
1879 }                                                                         \
1880 
1881 HN_FUNC_HN(acos);
1882 HN_FUNC_HN(acosh);
1883 HN_FUNC_HN(acospi);
1884 HN_FUNC_HN(asin);
1885 HN_FUNC_HN(asinh);
1886 HN_FUNC_HN(asinpi);
1887 HN_FUNC_HN(atan);
1888 HN_FUNC_HN(atanh);
1889 HN_FUNC_HN(atanpi);
1890 HN_FUNC_HN_HN(atan2);
1891 HN_FUNC_HN_HN(atan2pi);
1892 
1893 HN_FUNC_HN(cbrt);
1894 HN_FUNC_HN(ceil);
1895 
1896 extern half __attribute__((overloadable)) copysign(half x, half y);
1897 SCALARIZE_HN_FUNC_HN_HN(copysign);
1898 
1899 HN_FUNC_HN(cos);
1900 HN_FUNC_HN(cosh);
1901 HN_FUNC_HN(cospi);
1902 
cross(half3 lhs,half3 rhs)1903 extern half3 __attribute__((overloadable)) cross(half3 lhs, half3 rhs) {
1904     half3 r;
1905     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1906     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1907     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1908     return r;
1909 }
1910 
cross(half4 lhs,half4 rhs)1911 extern half4 __attribute__((overloadable)) cross(half4 lhs, half4 rhs) {
1912     half4 r;
1913     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1914     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1915     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1916     r.w = 0.f;
1917     return r;
1918 }
1919 
1920 HN_FUNC_HN(degrees);
1921 H_FUNC_HN_HN(distance);
1922 H_FUNC_HN_HN(dot);
1923 
1924 HN_FUNC_HN(erf);
1925 HN_FUNC_HN(erfc);
1926 HN_FUNC_HN(exp);
1927 HN_FUNC_HN(exp10);
1928 HN_FUNC_HN(exp2);
1929 HN_FUNC_HN(expm1);
1930 
1931 HN_FUNC_HN(fabs);
1932 HN_FUNC_HN_HN(fdim);
1933 HN_FUNC_HN(floor);
1934 HN_FUNC_HN_HN_HN(fma);
1935 HN_FUNC_HN_HN(fmax);
1936 HN_FUNC_HN_H(fmax);
1937 HN_FUNC_HN_HN(fmin);
1938 HN_FUNC_HN_H(fmin);
1939 HN_FUNC_HN_HN(fmod);
1940 
fract(half v,half * iptr)1941 extern half __attribute__((overloadable)) fract(half v, half *iptr) {
1942     // maxLessThanOne = 0.99951171875, the largest value < 1.0
1943     half maxLessThanOne;
1944     SET_HALF_WORD(maxLessThanOne, 0x3bff);
1945 
1946     int i = (int) floor(v);
1947     if (iptr) {
1948         *iptr = i;
1949     }
1950     // return v - floor(v), if strictly less than one
1951     return fmin(v - i, maxLessThanOne);
1952 }
1953 
1954 SCALARIZE_HN_FUNC_HN_PHN(fract);
1955 
fract(half v)1956 extern half __attribute__((const, overloadable)) fract(half v) {
1957     half unused;
1958     return fract(v, &unused);
1959 }
1960 
fract(half2 v)1961 extern half2 __attribute__((const, overloadable)) fract(half2 v) {
1962     half2 unused;
1963     return fract(v, &unused);
1964 }
1965 
fract(half3 v)1966 extern half3 __attribute__((const, overloadable)) fract(half3 v) {
1967     half3 unused;
1968     return fract(v, &unused);
1969 }
1970 
fract(half4 v)1971 extern half4 __attribute__((const, overloadable)) fract(half4 v) {
1972     half4 unused;
1973     return fract(v, &unused);
1974 }
1975 
1976 extern half __attribute__((overloadable)) frexp(half x, int *eptr);
1977 
frexp(half2 v1,int2 * eptr)1978 extern half2 __attribute__((overloadable)) frexp(half2 v1, int2 *eptr) {
1979     half2 ret;
1980     int e[2];
1981     ret.x = frexp(v1.x, &e[0]);
1982     ret.y = frexp(v1.y, &e[1]);
1983     eptr->x = e[0];
1984     eptr->y = e[1];
1985     return ret;
1986 }
1987 
frexp(half3 v1,int3 * eptr)1988 extern half3 __attribute__((overloadable)) frexp(half3 v1, int3 *eptr) {
1989     half3 ret;
1990     int e[3];
1991     ret.x = frexp(v1.x, &e[0]);
1992     ret.y = frexp(v1.y, &e[1]);
1993     ret.z = frexp(v1.z, &e[2]);
1994     eptr->x = e[0];
1995     eptr->y = e[1];
1996     eptr->z = e[2];
1997     return ret;
1998 }
1999 
frexp(half4 v1,int4 * eptr)2000 extern half4 __attribute__((overloadable)) frexp(half4 v1, int4 *eptr) {
2001     half4 ret;
2002     int e[4];
2003     ret.x = frexp(v1.x, &e[0]);
2004     ret.y = frexp(v1.y, &e[1]);
2005     ret.z = frexp(v1.z, &e[2]);
2006     ret.w = frexp(v1.w, &e[3]);
2007     eptr->x = e[0];
2008     eptr->y = e[1];
2009     eptr->z = e[2];
2010     eptr->w = e[3];
2011     return ret;
2012 }
2013 
2014 HN_FUNC_HN_HN(hypot);
2015 
2016 extern int __attribute__((overloadable)) ilogb(half x);
2017 
ilogb(half2 v)2018 extern int2 __attribute__((overloadable)) ilogb(half2 v) {
2019     int2 ret;
2020     ret.x = ilogb(v.x);
2021     ret.y = ilogb(v.y);
2022     return ret;
2023 }
ilogb(half3 v)2024 extern int3 __attribute__((overloadable)) ilogb(half3 v) {
2025     int3 ret;
2026     ret.x = ilogb(v.x);
2027     ret.y = ilogb(v.y);
2028     ret.z = ilogb(v.z);
2029     return ret;
2030 }
ilogb(half4 v)2031 extern int4 __attribute__((overloadable)) ilogb(half4 v) {
2032     int4 ret;
2033     ret.x = ilogb(v.x);
2034     ret.y = ilogb(v.y);
2035     ret.z = ilogb(v.z);
2036     ret.w = ilogb(v.w);
2037     return ret;
2038 }
2039 
2040 HN_FUNC_HN_IN(ldexp);
ldexp(half2 v,int exponent)2041 extern half2 __attribute__((overloadable)) ldexp(half2 v, int exponent) {
2042     return convert_half2(ldexp(convert_float2(v), exponent));
2043 }
ldexp(half3 v,int exponent)2044 extern half3 __attribute__((overloadable)) ldexp(half3 v, int exponent) {
2045     return convert_half3(ldexp(convert_float3(v), exponent));
2046 }
ldexp(half4 v,int exponent)2047 extern half4 __attribute__((overloadable)) ldexp(half4 v, int exponent) {
2048     return convert_half4(ldexp(convert_float4(v), exponent));
2049 }
2050 
2051 H_FUNC_HN(length);
2052 HN_FUNC_HN(lgamma);
2053 
lgamma(half h,int * signp)2054 extern half __attribute__((overloadable)) lgamma(half h, int *signp) {
2055     return (half) lgamma((float) h, signp);
2056 }
lgamma(half2 v,int2 * signp)2057 extern half2 __attribute__((overloadable)) lgamma(half2 v, int2 *signp) {
2058     return convert_half2(lgamma(convert_float2(v), signp));
2059 }
lgamma(half3 v,int3 * signp)2060 extern half3 __attribute__((overloadable)) lgamma(half3 v, int3 *signp) {
2061     return convert_half3(lgamma(convert_float3(v), signp));
2062 }
lgamma(half4 v,int4 * signp)2063 extern half4 __attribute__((overloadable)) lgamma(half4 v, int4 *signp) {
2064     return convert_half4(lgamma(convert_float4(v), signp));
2065 }
2066 
2067 HN_FUNC_HN(log);
2068 HN_FUNC_HN(log10);
2069 HN_FUNC_HN(log1p);
2070 HN_FUNC_HN(log2);
2071 HN_FUNC_HN(logb);
2072 
2073 HN_FUNC_HN_HN_HN(mad);
2074 HN_FUNC_HN_HN(max);
2075 HN_FUNC_HN_H(max); // TODO can this be arch-specific similar to _Z3maxDv2_ff?
2076 HN_FUNC_HN_HN(min);
2077 HN_FUNC_HN_H(min); // TODO can this be arch-specific similar to _Z3minDv2_ff?
2078 
mix(half start,half stop,half amount)2079 extern half __attribute__((overloadable)) mix(half start, half stop, half amount) {
2080     return start + (stop - start) * amount;
2081 }
mix(half2 start,half2 stop,half2 amount)2082 extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half2 amount) {
2083     return start + (stop - start) * amount;
2084 }
mix(half3 start,half3 stop,half3 amount)2085 extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half3 amount) {
2086     return start + (stop - start) * amount;
2087 }
mix(half4 start,half4 stop,half4 amount)2088 extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half4 amount) {
2089     return start + (stop - start) * amount;
2090 }
mix(half2 start,half2 stop,half amount)2091 extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half amount) {
2092     return start + (stop - start) * amount;
2093 }
mix(half3 start,half3 stop,half amount)2094 extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half amount) {
2095     return start + (stop - start) * amount;
2096 }
mix(half4 start,half4 stop,half amount)2097 extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half amount) {
2098     return start + (stop - start) * amount;
2099 }
2100 
2101 extern half __attribute__((overloadable)) modf(half x, half *iptr);
2102 SCALARIZE_HN_FUNC_HN_PHN(modf);
2103 
nan_half()2104 half __attribute__((overloadable)) nan_half() {
2105   unsigned short nan_short = kHalfPositiveInfinity | 0x0200;
2106   half nan;
2107   SET_HALF_WORD(nan, nan_short);
2108   return nan;
2109 }
2110 
2111 HN_FUNC_HN(normalize);
2112 
2113 extern half __attribute__((overloadable)) nextafter(half x, half y);
2114 SCALARIZE_HN_FUNC_HN_HN(nextafter);
2115 
2116 HN_FUNC_HN_HN(pow);
2117 HN_FUNC_HN_IN(pown);
2118 HN_FUNC_HN_HN(powr);
2119 HN_FUNC_HN(radians);
2120 HN_FUNC_HN_HN(remainder);
2121 
remquo(half n,half d,int * quo)2122 extern half __attribute__((overloadable)) remquo(half n, half d, int *quo) {
2123     return (float) remquo((float) n, (float) d, quo);
2124 }
remquo(half2 n,half2 d,int2 * quo)2125 extern half2 __attribute__((overloadable)) remquo(half2 n, half2 d, int2 *quo) {
2126     return convert_half2(remquo(convert_float2(d), convert_float2(n), quo));
2127 }
remquo(half3 n,half3 d,int3 * quo)2128 extern half3 __attribute__((overloadable)) remquo(half3 n, half3 d, int3 *quo) {
2129     return convert_half3(remquo(convert_float3(d), convert_float3(n), quo));
2130 }
remquo(half4 n,half4 d,int4 * quo)2131 extern half4 __attribute__((overloadable)) remquo(half4 n, half4 d, int4 *quo) {
2132     return convert_half4(remquo(convert_float4(d), convert_float4(n), quo));
2133 }
2134 
2135 HN_FUNC_HN(rint);
2136 HN_FUNC_HN_IN(rootn);
2137 HN_FUNC_HN(round);
2138 HN_FUNC_HN(rsqrt);
2139 
sign(half h)2140 extern half __attribute__((overloadable)) sign(half h) {
2141     if (h > 0) return (half) 1.f;
2142     if (h < 0) return (half) -1.f;
2143     return h;
2144 }
sign(half2 v)2145 extern half2 __attribute__((overloadable)) sign(half2 v) {
2146     half2 ret;
2147     ret.x = sign(v.x);
2148     ret.y = sign(v.y);
2149     return ret;
2150 }
sign(half3 v)2151 extern half3 __attribute__((overloadable)) sign(half3 v) {
2152     half3 ret;
2153     ret.x = sign(v.x);
2154     ret.y = sign(v.y);
2155     ret.z = sign(v.z);
2156     return ret;
2157 }
sign(half4 v)2158 extern half4 __attribute__((overloadable)) sign(half4 v) {
2159     half4 ret;
2160     ret.x = sign(v.x);
2161     ret.y = sign(v.y);
2162     ret.z = sign(v.z);
2163     ret.w = sign(v.w);
2164     return ret;
2165 }
2166 
2167 HN_FUNC_HN(sin);
2168 
sincos(half v,half * cosptr)2169 extern half __attribute__((overloadable)) sincos(half v, half *cosptr) {
2170     *cosptr = cos(v);
2171     return sin(v);
2172 }
2173 // TODO verify if LLVM eliminates the duplicate convert_float2
sincos(half2 v,half2 * cosptr)2174 extern half2 __attribute__((overloadable)) sincos(half2 v, half2 *cosptr) {
2175     *cosptr = cos(v);
2176     return sin(v);
2177 }
sincos(half3 v,half3 * cosptr)2178 extern half3 __attribute__((overloadable)) sincos(half3 v, half3 *cosptr) {
2179     *cosptr = cos(v);
2180     return sin(v);
2181 }
sincos(half4 v,half4 * cosptr)2182 extern half4 __attribute__((overloadable)) sincos(half4 v, half4 *cosptr) {
2183     *cosptr = cos(v);
2184     return sin(v);
2185 }
2186 
2187 HN_FUNC_HN(sinh);
2188 HN_FUNC_HN(sinpi);
2189 HN_FUNC_HN(sqrt);
2190 
step(half edge,half v)2191 extern half __attribute__((overloadable)) step(half edge, half v) {
2192     return (v < edge) ? 0.f : 1.f;
2193 }
step(half2 edge,half2 v)2194 extern half2 __attribute__((overloadable)) step(half2 edge, half2 v) {
2195     half2 r;
2196     r.x = (v.x < edge.x) ? 0.f : 1.f;
2197     r.y = (v.y < edge.y) ? 0.f : 1.f;
2198     return r;
2199 }
step(half3 edge,half3 v)2200 extern half3 __attribute__((overloadable)) step(half3 edge, half3 v) {
2201     half3 r;
2202     r.x = (v.x < edge.x) ? 0.f : 1.f;
2203     r.y = (v.y < edge.y) ? 0.f : 1.f;
2204     r.z = (v.z < edge.z) ? 0.f : 1.f;
2205     return r;
2206 }
step(half4 edge,half4 v)2207 extern half4 __attribute__((overloadable)) step(half4 edge, half4 v) {
2208     half4 r;
2209     r.x = (v.x < edge.x) ? 0.f : 1.f;
2210     r.y = (v.y < edge.y) ? 0.f : 1.f;
2211     r.z = (v.z < edge.z) ? 0.f : 1.f;
2212     r.w = (v.w < edge.w) ? 0.f : 1.f;
2213     return r;
2214 }
step(half2 edge,half v)2215 extern half2 __attribute__((overloadable)) step(half2 edge, half v) {
2216     half2 r;
2217     r.x = (v < edge.x) ? 0.f : 1.f;
2218     r.y = (v < edge.y) ? 0.f : 1.f;
2219     return r;
2220 }
step(half3 edge,half v)2221 extern half3 __attribute__((overloadable)) step(half3 edge, half v) {
2222     half3 r;
2223     r.x = (v < edge.x) ? 0.f : 1.f;
2224     r.y = (v < edge.y) ? 0.f : 1.f;
2225     r.z = (v < edge.z) ? 0.f : 1.f;
2226     return r;
2227 }
step(half4 edge,half v)2228 extern half4 __attribute__((overloadable)) step(half4 edge, half v) {
2229     half4 r;
2230     r.x = (v < edge.x) ? 0.f : 1.f;
2231     r.y = (v < edge.y) ? 0.f : 1.f;
2232     r.z = (v < edge.z) ? 0.f : 1.f;
2233     r.w = (v < edge.w) ? 0.f : 1.f;
2234     return r;
2235 }
step(half edge,half2 v)2236 extern half2 __attribute__((overloadable)) step(half edge, half2 v) {
2237     half2 r;
2238     r.x = (v.x < edge) ? 0.f : 1.f;
2239     r.y = (v.y < edge) ? 0.f : 1.f;
2240     return r;
2241 }
step(half edge,half3 v)2242 extern half3 __attribute__((overloadable)) step(half edge, half3 v) {
2243     half3 r;
2244     r.x = (v.x < edge) ? 0.f : 1.f;
2245     r.y = (v.y < edge) ? 0.f : 1.f;
2246     r.z = (v.z < edge) ? 0.f : 1.f;
2247     return r;
2248 }
step(half edge,half4 v)2249 extern half4 __attribute__((overloadable)) step(half edge, half4 v) {
2250     half4 r;
2251     r.x = (v.x < edge) ? 0.f : 1.f;
2252     r.y = (v.y < edge) ? 0.f : 1.f;
2253     r.z = (v.z < edge) ? 0.f : 1.f;
2254     r.w = (v.w < edge) ? 0.f : 1.f;
2255     return r;
2256 }
2257 
2258 HN_FUNC_HN(tan);
2259 HN_FUNC_HN(tanh);
2260 HN_FUNC_HN(tanpi);
2261 HN_FUNC_HN(tgamma);
2262 HN_FUNC_HN(trunc); // TODO: rethink: needs half-specific implementation?
2263 
2264 HN_FUNC_HN(native_acos);
2265 HN_FUNC_HN(native_acosh);
2266 HN_FUNC_HN(native_acospi);
2267 HN_FUNC_HN(native_asin);
2268 HN_FUNC_HN(native_asinh);
2269 HN_FUNC_HN(native_asinpi);
2270 HN_FUNC_HN(native_atan);
2271 HN_FUNC_HN(native_atanh);
2272 HN_FUNC_HN(native_atanpi);
2273 HN_FUNC_HN_HN(native_atan2);
2274 HN_FUNC_HN_HN(native_atan2pi);
2275 
2276 HN_FUNC_HN(native_cbrt);
2277 HN_FUNC_HN(native_cos);
2278 HN_FUNC_HN(native_cosh);
2279 HN_FUNC_HN(native_cospi);
2280 
2281 H_FUNC_HN_HN(native_distance);
2282 HN_FUNC_HN_HN(native_divide);
2283 
2284 HN_FUNC_HN(native_exp);
2285 HN_FUNC_HN(native_exp10);
2286 HN_FUNC_HN(native_exp2);
2287 HN_FUNC_HN(native_expm1);
2288 
2289 HN_FUNC_HN_HN(native_hypot);
2290 H_FUNC_HN(native_length);
2291 
2292 HN_FUNC_HN(native_log);
2293 HN_FUNC_HN(native_log10);
2294 HN_FUNC_HN(native_log1p);
2295 HN_FUNC_HN(native_log2);
2296 
2297 HN_FUNC_HN(native_normalize);
2298 
2299 HN_FUNC_HN_HN(native_powr); // TODO are parameter limits different for half?
2300 
2301 HN_FUNC_HN(native_recip);
2302 HN_FUNC_HN_IN(native_rootn);
2303 HN_FUNC_HN(native_rsqrt);
2304 
2305 HN_FUNC_HN(native_sin);
2306 
native_sincos(half v,half * cosptr)2307 extern half __attribute__((overloadable)) native_sincos(half v, half *cosptr) {
2308     return sincos(v, cosptr);
2309 }
native_sincos(half2 v,half2 * cosptr)2310 extern half2 __attribute__((overloadable)) native_sincos(half2 v, half2 *cosptr) {
2311     return sincos(v, cosptr);
2312 }
native_sincos(half3 v,half3 * cosptr)2313 extern half3 __attribute__((overloadable)) native_sincos(half3 v, half3 *cosptr) {
2314     return sincos(v, cosptr);
2315 }
native_sincos(half4 v,half4 * cosptr)2316 extern half4 __attribute__((overloadable)) native_sincos(half4 v, half4 *cosptr) {
2317     return sincos(v, cosptr);
2318 }
2319 
2320 HN_FUNC_HN(native_sinh);
2321 HN_FUNC_HN(native_sinpi);
2322 HN_FUNC_HN(native_sqrt);
2323 
2324 HN_FUNC_HN(native_tan);
2325 HN_FUNC_HN(native_tanh);
2326 HN_FUNC_HN(native_tanpi);
2327 
2328 #undef HN_FUNC_HN
2329 #undef HN_FUNC_HN_HN
2330 #undef HN_FUNC_HN_H
2331 #undef HN_FUNC_HN_HN_HN
2332 #undef HN_FUNC_HN_IN
2333 #undef H_FUNC_HN
2334 #undef H_FUNC_HN_HN
2335 #undef SCALARIZE_HN_FUNC_HN_HN
2336 
2337 // exports unavailable mathlib functions to compat lib
2338 
2339 #ifdef RS_COMPATIBILITY_LIB
2340 
2341 // !!! DANGER !!!
2342 // These functions are potentially missing on older Android versions.
2343 // Work around the issue by supplying our own variants.
2344 // !!! DANGER !!!
2345 
2346 // The logbl() implementation is taken from the latest bionic/, since
2347 // double == long double on Android.
logbl(long double x)2348 extern "C" long double logbl(long double x) { return logb(x); }
2349 
2350 // __aeabi_idiv0 is a missing function in libcompiler_rt.so, so we just
2351 // pick the simplest implementation based on the ARM EABI doc.
__aeabi_idiv0(int v)2352 extern "C" int __aeabi_idiv0(int v) { return v; }
2353 
2354 #endif // compatibility lib
2355