1 /* libs/pixelflinger/fixed.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <stdio.h>
19 
20 #include <private/pixelflinger/ggl_context.h>
21 #include <private/pixelflinger/ggl_fixed.h>
22 
23 
24 // ------------------------------------------------------------------------
25 
gglRecipQNormalized(int32_t x,int * exponent)26 int32_t gglRecipQNormalized(int32_t x, int* exponent)
27 {
28     const int32_t s = x>>31;
29     uint32_t a = s ? -x : x;
30 
31     // the result will overflow, so just set it to the biggest/inf value
32     if (ggl_unlikely(a <= 2LU)) {
33         *exponent = 0;
34         return s ? FIXED_MIN : FIXED_MAX;
35     }
36 
37     // Newton-Raphson iteration:
38     // x = r*(2 - a*r)
39 
40     const int32_t lz = gglClz(a);
41     a <<= lz;  // 0.32
42     uint32_t r = a;
43     // note: if a == 0x80000000, this means x was a power-of-2, in this
44     // case we don't need to compute anything. We get the reciprocal for
45     // (almost) free.
46     if (a != 0x80000000) {
47         r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
48         // 0.32 + 2.30 = 2.62 -> 2.30
49         // 2.30 + 2.30 = 4.60 -> 2.30
50         r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
51         r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
52     }
53 
54     // shift right 1-bit to make room for the sign bit
55     *exponent = 30-lz-1;
56     r >>= 1;
57     return s ? -r : r;
58 }
59 
gglRecipQ(GGLfixed x,int q)60 int32_t gglRecipQ(GGLfixed x, int q)
61 {
62     int shift;
63     x = gglRecipQNormalized(x, &shift);
64     shift += 16-q;
65     if (shift > 0)
66         x += 1L << (shift-1);   // rounding
67     x >>= shift;
68     return x;
69 }
70 
71 // ------------------------------------------------------------------------
72 
73 static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
74     // 1/sqrt(x) with x = 1-N/16, N=[8...1]
75     0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
76 };
77 
gglSqrtRecipx(GGLfixed x)78 GGLfixed gglSqrtRecipx(GGLfixed x)
79 {
80     if (x == 0)         return FIXED_MAX;
81     if (x == FIXED_ONE) return x;
82     const GGLfixed a = x;
83     const int32_t lz = gglClz(x);
84     x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
85     const int32_t exp = lz - 16;
86     if (exp <= 0)   x >>= -exp>>1;
87     else            x <<= (exp>>1) + (exp & 1);
88     if (exp & 1) {
89         x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
90     }
91     // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
92     x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
93     x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
94     return x;
95 }
96 
gglSqrtx(GGLfixed a)97 GGLfixed gglSqrtx(GGLfixed a)
98 {
99     // Compute a full precision square-root (24 bits accuracy)
100     GGLfixed r = 0;
101     GGLfixed bit = 0x800000;
102     int32_t bshift = 15;
103     do {
104         GGLfixed temp = bit + (r<<1);
105         if (bshift >= 8)    temp <<= (bshift-8);
106         else                temp >>= (8-bshift);
107         if (a >= temp) {
108             r += bit;
109             a -= temp;
110         }
111         bshift--;
112     } while (bit>>=1);
113     return r;
114 }
115 
116 // ------------------------------------------------------------------------
117 
118 static const GGLfixed ggl_log_approx_tab[] = {
119     // -ln(x)/ln(2) with x = N/16, N=[8...16]
120     0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
121 };
122 
123 static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
124 	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
125 };
126 
gglPowx(GGLfixed x,GGLfixed y)127 GGLfixed gglPowx(GGLfixed x, GGLfixed y)
128 {
129     // prerequisite: 0 <= x <= 1, and y >=0
130 
131     // pow(x,y) = 2^(y*log2(x))
132     // =  2^(y*log2(x*(2^exp)*(2^-exp))))
133     // =  2^(y*(log2(X)-exp))
134     // =  2^(log2(X)*y - y*exp)
135     // =  2^( - (-log2(X)*y + y*exp) )
136 
137     int32_t exp = gglClz(x) - 16;
138     GGLfixed f = x << exp;
139     x = (f & 0x0FFF)<<4;
140     f = (f >> 12) & 0x7;
141     GGLfixed p = gglMulAddx(
142             ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
143             ggl_log_approx_tab[f]);
144     p = gglMulAddx(p, y, y*exp);
145     exp = gglFixedToIntFloor(p);
146     if (exp < 31) {
147         p = gglFracx(p);
148         x = (p & 0x1FFF)<<3;
149         p >>= 13;
150         p = gglMulAddx(
151                 ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
152                 ggl_alog_approx_tab[p]);
153         p >>= exp;
154     } else {
155         p = 0;
156     }
157     return p;
158         // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
159 }
160 
161 // ------------------------------------------------------------------------
162 
gglDivQ(GGLfixed n,GGLfixed d,int32_t i)163 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
164 {
165     //int32_t r =int32_t((int64_t(n)<<i)/d);
166     const int32_t ds = n^d;
167     if (n<0) n = -n;
168     if (d<0) d = -d;
169     int nd = gglClz(d) - gglClz(n);
170     i += nd + 1;
171     if (nd > 0) d <<= nd;
172     else        n <<= -nd;
173     uint32_t q = 0;
174 
175     int j = i & 7;
176     i >>= 3;
177 
178     // gcc deals with the code below pretty well.
179     // we get 3.75 cycles per bit in the main loop
180     // and 8 cycles per bit in the termination loop
181     if (ggl_likely(i)) {
182         n -= d;
183         do {
184             q <<= 8;
185             if (n>=0)   q |= 128;
186             else        n += d;
187             n = n*2 - d;
188             if (n>=0)   q |= 64;
189             else        n += d;
190             n = n*2 - d;
191             if (n>=0)   q |= 32;
192             else        n += d;
193             n = n*2 - d;
194             if (n>=0)   q |= 16;
195             else        n += d;
196             n = n*2 - d;
197             if (n>=0)   q |= 8;
198             else        n += d;
199             n = n*2 - d;
200             if (n>=0)   q |= 4;
201             else        n += d;
202             n = n*2 - d;
203             if (n>=0)   q |= 2;
204             else        n += d;
205             n = n*2 - d;
206             if (n>=0)   q |= 1;
207             else        n += d;
208 
209             if (--i == 0)
210                 goto finish;
211 
212             n = n*2 - d;
213         } while(true);
214         do {
215             q <<= 1;
216             n = n*2 - d;
217             if (n>=0)   q |= 1;
218             else        n += d;
219         finish: ;
220         } while (j--);
221         return (ds<0) ? -q : q;
222     }
223 
224     n -= d;
225     if (n>=0)   q |= 1;
226     else        n += d;
227     j--;
228     goto finish;
229 }
230 
231 // ------------------------------------------------------------------------
232 
233 // assumes that the int32_t values of a, b, and c are all positive
234 // use when both a and b are larger than c
235 
236 template <typename T>
swap(T & a,T & b)237 static inline void swap(T& a, T& b) {
238     T t(a);
239     a = b;
240     b = t;
241 }
242 
243 static __attribute__((noinline))
slow_muldiv(uint32_t a,uint32_t b,uint32_t c)244 int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
245 {
246 	// first we compute a*b as a 64-bit integer
247     // (GCC generates umull with the code below)
248     uint64_t ab = uint64_t(a)*b;
249     uint32_t hi = ab>>32;
250     uint32_t lo = ab;
251     uint32_t result;
252 
253 	// now perform the division
254 	if (hi >= c) {
255 	overflow:
256 		result = 0x7fffffff;  // basic overflow
257 	} else if (hi == 0) {
258 		result = lo/c;  // note: c can't be 0
259 		if ((result >> 31) != 0)  // result must fit in 31 bits
260 			goto overflow;
261 	} else {
262 		uint32_t r = hi;
263 		int bits = 31;
264 	    result = 0;
265 		do {
266 			r = (r << 1) | (lo >> 31);
267 			lo <<= 1;
268 			result <<= 1;
269 			if (r >= c) {
270 				r -= c;
271 				result |= 1;
272 			}
273 		} while (bits--);
274 	}
275 	return int32_t(result);
276 }
277 
278 // assumes a >= 0 and c >= b >= 0
279 static inline
quick_muldiv(int32_t a,int32_t b,int32_t c)280 int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
281 {
282     int32_t r = 0, q = 0, i;
283     int leading = gglClz(a);
284     i = 32 - leading;
285     a <<= leading;
286     do {
287         r <<= 1;
288         if (a < 0)
289             r += b;
290         a <<= 1;
291         q <<= 1;
292         if (r >= c) {
293             r -= c;
294             q++;
295         }
296         asm(""::); // gcc generates better code this way
297         if (r >= c) {
298             r -= c;
299             q++;
300         }
301     }
302     while (--i);
303     return q;
304 }
305 
306 // this function computes a*b/c with 64-bit intermediate accuracy
307 // overflows (e.g. division by 0) are handled and return INT_MAX
308 
gglMulDivi(int32_t a,int32_t b,int32_t c)309 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
310 {
311 	int32_t result;
312 	int32_t sign = a^b^c;
313 
314 	if (a < 0) a = -a;
315 	if (b < 0) b = -b;
316 	if (c < 0) c = -c;
317 
318     if (a < b) {
319         swap(a, b);
320     }
321 
322 	if (b <= c) result = quick_muldiv(a, b, c);
323 	else        result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
324 
325 	if (sign < 0)
326 		result = -result;
327 
328     return result;
329 }
330