1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #define LOG_TAG "pixelflinger-code"
19 
20 #include <assert.h>
21 #include <stdio.h>
22 
23 #include <log/log.h>
24 
25 #include "GGLAssembler.h"
26 
27 namespace android {
28 
29 // ----------------------------------------------------------------------------
30 
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32 {
33     const int bits = addr.size;
34     const int inc = (flags & WRITE_BACK)?1:0;
35     switch (bits) {
36     case 32:
37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
38         else        STR(AL, s.reg, addr.reg);
39         break;
40     case 24:
41         // 24 bits formats are a little special and used only for RGB
42         // 0x00BBGGRR is unpacked as R,G,B
43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
48         if (!(s.flags & CORRUPTIBLE)) {
49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50         }
51         if (inc)
52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
53         break;
54     case 16:
55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
56         else        STRH(AL, s.reg, addr.reg);
57         break;
58     case  8:
59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
60         else        STRB(AL, s.reg, addr.reg);
61         break;
62     }
63 }
64 
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66 {
67     Scratch scratches(registerFile());
68     int s0;
69 
70     const int bits = addr.size;
71     const int inc = (flags & WRITE_BACK)?1:0;
72     switch (bits) {
73     case 32:
74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
75         else        LDR(AL, s.reg, addr.reg);
76         break;
77     case 24:
78         // 24 bits formats are a little special and used only for RGB
79         // R,G,B is packed as 0x00BBGGRR
80         s0 = scratches.obtain();
81         if (s.reg != addr.reg) {
82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87         } else {
88             int s1 = scratches.obtain();
89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94         }
95         if (inc)
96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
97         break;
98     case 16:
99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
100         else        LDRH(AL, s.reg, addr.reg);
101         break;
102     case  8:
103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
104         else        LDRB(AL, s.reg, addr.reg);
105         break;
106     }
107 }
108 
extract(integer_t & d,int s,int h,int l,int bits)109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110 {
111     const int maskLen = h-l;
112 
113 #ifdef __mips__
114     assert(maskLen<=11);
115 #else
116     assert(maskLen<=8);
117 #endif
118     assert(h);
119 
120     if (h != bits) {
121         const int mask = ((1<<maskLen)-1) << l;
122         if (isValidImmediate(mask)) {
123             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
124         } else if (isValidImmediate(~mask)) {
125             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
126         } else {
127             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
128             l += 32-h;
129             h = 32;
130         }
131         s = d.reg;
132     }
133 
134     if (l) {
135         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
136         s = d.reg;
137     }
138 
139     if (s != d.reg) {
140         MOV(AL, 0, d.reg, s);
141     }
142 
143     d.s = maskLen;
144 }
145 
extract(integer_t & d,const pixel_t & s,int component)146 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
147 {
148     extract(d,  s.reg,
149                 s.format.c[component].h,
150                 s.format.c[component].l,
151                 s.size());
152 }
153 
extract(component_t & d,const pixel_t & s,int component)154 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
155 {
156     integer_t r(d.reg, 32, d.flags);
157     extract(r,  s.reg,
158                 s.format.c[component].h,
159                 s.format.c[component].l,
160                 s.size());
161     d = component_t(r);
162 }
163 
164 
expand(integer_t & d,const component_t & s,int dbits)165 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
166 {
167     if (s.l || (s.flags & CLEAR_HI)) {
168         extract(d, s.reg, s.h, s.l, 32);
169         expand(d, d, dbits);
170     } else {
171         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
172     }
173 }
174 
expand(component_t & d,const component_t & s,int dbits)175 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
176 {
177     integer_t r(d.reg, 32, d.flags);
178     expand(r, s, dbits);
179     d = component_t(r);
180 }
181 
expand(integer_t & dst,const integer_t & src,int dbits)182 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
183 {
184     assert(src.size());
185 
186     int sbits = src.size();
187     int s = src.reg;
188     int d = dst.reg;
189 
190     // be sure to set 'dst' after we read 'src' as they may be identical
191     dst.s = dbits;
192     dst.flags = 0;
193 
194     if (dbits<=sbits) {
195         if (s != d) {
196             MOV(AL, 0, d, s);
197         }
198         return;
199     }
200 
201     if (sbits == 1) {
202         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
203             // d = (s<<dbits) - s;
204         return;
205     }
206 
207     if (dbits % sbits) {
208         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
209             // d = s << (dbits-sbits);
210         dbits -= sbits;
211         do {
212             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
213                 // d |= d >> sbits;
214             dbits -= sbits;
215             sbits *= 2;
216         } while(dbits>0);
217         return;
218     }
219 
220     dbits -= sbits;
221     do {
222         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
223             // d |= d<<sbits;
224         s = d;
225         dbits -= sbits;
226         if (sbits*2 < dbits) {
227             sbits *= 2;
228         }
229     } while(dbits>0);
230 }
231 
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)232 void GGLAssembler::downshift(
233         pixel_t& d, int component, component_t s, const reg_t& dither)
234 {
235     Scratch scratches(registerFile());
236 
237     int sh = s.h;
238     int sl = s.l;
239     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
240     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
241     int sbits = sh - sl;
242 
243     int dh = d.format.c[component].h;
244     int dl = d.format.c[component].l;
245     int dbits = dh - dl;
246     int dithering = 0;
247 
248     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
249 
250     if (sbits>dbits) {
251         // see if we need to dither
252         dithering = mDithering;
253     }
254 
255     int ireg = d.reg;
256     if (!(d.flags & FIRST)) {
257         if (s.flags & CORRUPTIBLE)  {
258             ireg = s.reg;
259         } else {
260             ireg = scratches.obtain();
261         }
262     }
263     d.flags &= ~FIRST;
264 
265     if (maskHiBits) {
266         // we need to mask the high bits (and possibly the lowbits too)
267         // and we might be able to use immediate mask.
268         if (!dithering) {
269             // we don't do this if we only have maskLoBits because we can
270             // do it more efficiently below (in the case where dl=0)
271             const int offset = sh - dbits;
272             if (dbits<=8 && offset >= 0) {
273                 const uint32_t mask = ((1<<dbits)-1) << offset;
274                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
275                     build_and_immediate(ireg, s.reg, mask, 32);
276                     sl = offset;
277                     s.reg = ireg;
278                     sbits = dbits;
279                     maskLoBits = maskHiBits = 0;
280                 }
281             }
282         } else {
283             // in the dithering case though, we need to preserve the lower bits
284             const uint32_t mask = ((1<<sbits)-1) << sl;
285             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
286                 build_and_immediate(ireg, s.reg, mask, 32);
287                 s.reg = ireg;
288                 maskLoBits = maskHiBits = 0;
289             }
290         }
291     }
292 
293     // XXX: we could special case (maskHiBits & !maskLoBits)
294     // like we do for maskLoBits below, but it happens very rarely
295     // that we have maskHiBits only and the conditions necessary to lead
296     // to better code (like doing d |= s << 24)
297 
298     if (maskHiBits) {
299         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
300         sl += 32-sh;
301         sh = 32;
302         s.reg = ireg;
303         maskHiBits = 0;
304     }
305 
306     //	Downsampling should be performed as follows:
307     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
308     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
309     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
310     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
311     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
312     //
313     //	By approximating (1>>dbits) and (1>>sbits) to 0:
314     //
315     //		V>>(sbits-dbits)	-	V>>sbits
316     //
317 	//  A good approximation is V>>(sbits-dbits),
318     //  but better one (needed for dithering) is:
319     //
320     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
321     //		(V<<dbits	-	V)>>sbits
322     //		(V	-	V>>dbits)>>(sbits-dbits)
323 
324     // Dithering is done here
325     if (dithering) {
326         comment("dithering");
327         if (sl) {
328             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
329             sh -= sl;
330             sl = 0;
331             s.reg = ireg;
332         }
333         // scaling (V-V>>dbits)
334         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
335         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
336         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
337         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
338         else                ADD(AL, 0, ireg, ireg, dither.reg);
339         s.reg = ireg;
340     }
341 
342     if ((maskLoBits|dithering) && (sh > dbits)) {
343         int shift = sh-dbits;
344         if (dl) {
345             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
346             if (ireg == d.reg) {
347                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
348             } else {
349                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
350             }
351         } else {
352             if (ireg == d.reg) {
353                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
354             } else {
355                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
356             }
357         }
358     } else {
359         int shift = sh-dh;
360         if (shift>0) {
361             if (ireg == d.reg) {
362                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
363             } else {
364                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
365             }
366         } else if (shift<0) {
367             if (ireg == d.reg) {
368                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
369             } else {
370                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
371             }
372         } else {
373             if (ireg == d.reg) {
374                 if (s.reg != d.reg) {
375                     MOV(AL, 0, d.reg, s.reg);
376                 }
377             } else {
378                 ORR(AL, 0, d.reg, d.reg, s.reg);
379             }
380         }
381     }
382 }
383 
384 }; // namespace android
385