1 /* libs/pixelflinger/codeflinger/blending.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #define LOG_TAG "pixelflinger-code"
19 
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 
26 #include <android-base/macros.h>
27 #include <log/log.h>
28 
29 #include "GGLAssembler.h"
30 
31 namespace android {
32 
build_fog(component_t & temp,int component,Scratch & regs)33 void GGLAssembler::build_fog(
34                         component_t& temp,      // incomming fragment / output
35                         int component,
36                         Scratch& regs)
37 {
38    if (mInfo[component].fog) {
39         Scratch scratches(registerFile());
40         comment("fog");
41 
42         integer_t fragment(temp.reg, temp.h, temp.flags);
43         if (!(temp.flags & CORRUPTIBLE)) {
44             temp.reg = regs.obtain();
45             temp.flags |= CORRUPTIBLE;
46         }
47 
48         integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
49         LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
50                 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
51 
52         integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
53         CONTEXT_LOAD(factor.reg, generated_vars.f);
54 
55         // clamp fog factor (TODO: see if there is a way to guarantee
56         // we won't overflow, when setting the iterators)
57         BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
58         CMP(AL, factor.reg, imm( 0x10000 ));
59         MOV(HS, 0, factor.reg, imm( 0x10000 ));
60 
61         build_blendFOneMinusF(temp, factor, fragment, fogColor);
62     }
63 }
64 
build_blending(component_t & temp,const pixel_t & pixel,int component,Scratch & regs)65 void GGLAssembler::build_blending(
66                         component_t& temp,      // incomming fragment / output
67                         const pixel_t& pixel,   // framebuffer
68                         int component,
69                         Scratch& regs)
70 {
71    if (!mInfo[component].blend)
72         return;
73 
74     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
75     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
76     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
77         fs = GGL_ONE;
78     const int blending = blending_codes(fs, fd);
79     if (!temp.size()) {
80         // here, blending will produce something which doesn't depend on
81         // that component (eg: GL_ZERO:GL_*), so the register has not been
82         // allocated yet. Will never be used as a source.
83         temp = component_t(regs.obtain(), CORRUPTIBLE);
84     }
85 
86     // we are doing real blending...
87     // fb:          extracted dst
88     // fragment:    extracted src
89     // temp:        component_t(fragment) and result
90 
91     // scoped register allocator
92     Scratch scratches(registerFile());
93     comment("blending");
94 
95     // we can optimize these cases a bit...
96     // (1) saturation is not needed
97     // (2) we can use only one multiply instead of 2
98     // (3) we can reduce the register pressure
99     //      R = S*f + D*(1-f) = (S-D)*f + D
100     //      R = S*(1-f) + D*f = (D-S)*f + S
101 
102     const bool same_factor_opt1 =
103         (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
104         (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
105         (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
106         (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
107 
108     const bool same_factor_opt2 =
109         (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
110         (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
111         (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
112         (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
113 
114 
115     // XXX: we could also optimize these cases:
116     // R = S*f + D*f = (S+D)*f
117     // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
118     // R = S*D + D*S = 2*S*D
119 
120 
121     // see if we need to extract 'component' from the destination (fb)
122     integer_t fb;
123     if (blending & (BLEND_DST|FACTOR_DST)) {
124         fb.setTo(scratches.obtain(), 32);
125         extract(fb, pixel, component);
126         if (mDithering) {
127             // XXX: maybe what we should do instead, is simply
128             // expand fb -or- fragment to the larger of the two
129             if (fb.size() < temp.size()) {
130                 // for now we expand 'fb' to min(fragment, 8)
131                 int new_size = temp.size() < 8 ? temp.size() : 8;
132                 expand(fb, fb, new_size);
133             }
134         }
135     }
136 
137 
138     // convert input fragment to integer_t
139     if (temp.l && (temp.flags & CORRUPTIBLE)) {
140         MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
141         temp.h -= temp.l;
142         temp.l = 0;
143     }
144     integer_t fragment(temp.reg, temp.size(), temp.flags);
145 
146     // if not done yet, convert input fragment to integer_t
147     if (temp.l) {
148         // here we know temp is not CORRUPTIBLE
149         fragment.reg = scratches.obtain();
150         MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
151         fragment.flags |= CORRUPTIBLE;
152     }
153 
154     if (!(temp.flags & CORRUPTIBLE)) {
155         // temp is not corruptible, but since it's the destination it
156         // will be modified, so we need to allocate a new register.
157         temp.reg = regs.obtain();
158         temp.flags &= ~CORRUPTIBLE;
159         fragment.flags &= ~CORRUPTIBLE;
160     }
161 
162     if ((blending & BLEND_SRC) && !same_factor_opt1) {
163         // source (fragment) is needed for the blending stage
164         // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
165         fragment.flags &= ~CORRUPTIBLE;
166     }
167 
168 
169     if (same_factor_opt1) {
170         //  R = S*f + D*(1-f) = (S-D)*f + D
171         integer_t factor;
172         build_blend_factor(factor, fs,
173                 component, pixel, fragment, fb, scratches);
174         // fb is always corruptible from this point
175         fb.flags |= CORRUPTIBLE;
176         build_blendFOneMinusF(temp, factor, fragment, fb);
177     } else if (same_factor_opt2) {
178         //  R = S*(1-f) + D*f = (D-S)*f + S
179         integer_t factor;
180         // fb is always corrruptible here
181         fb.flags |= CORRUPTIBLE;
182         build_blend_factor(factor, fd,
183                 component, pixel, fragment, fb, scratches);
184         build_blendOneMinusFF(temp, factor, fragment, fb);
185     } else {
186         integer_t src_factor;
187         integer_t dst_factor;
188 
189         // if destination (fb) is not needed for the blending stage,
190         // then it can be marked as CORRUPTIBLE
191         if (!(blending & BLEND_DST)) {
192             fb.flags |= CORRUPTIBLE;
193         }
194 
195         // XXX: try to mark some registers as CORRUPTIBLE
196         // in most case we could make those corruptible
197         // when we're processing the last component
198         // but not always, for instance
199         //    when fragment is constant and not reloaded
200         //    when fb is needed for logic-ops or masking
201         //    when a register is aliased (for instance with mAlphaSource)
202 
203         // blend away...
204         if (fs==GGL_ZERO) {
205             if (fd==GGL_ZERO) {         // R = 0
206                 // already taken care of
207             } else if (fd==GGL_ONE) {   // R = D
208                 // already taken care of
209             } else {                    // R = D*fd
210                 // compute fd
211                 build_blend_factor(dst_factor, fd,
212                         component, pixel, fragment, fb, scratches);
213                 mul_factor(temp, fb, dst_factor);
214             }
215         } else if (fs==GGL_ONE) {
216             if (fd==GGL_ZERO) {         // R = S
217                 // NOP, taken care of
218             } else if (fd==GGL_ONE) {   // R = S + D
219                 component_add(temp, fb, fragment); // args order matters
220                 component_sat(temp);
221             } else {                    // R = S + D*fd
222                 // compute fd
223                 build_blend_factor(dst_factor, fd,
224                         component, pixel, fragment, fb, scratches);
225                 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
226                 component_sat(temp);
227             }
228         } else {
229             // compute fs
230             build_blend_factor(src_factor, fs,
231                     component, pixel, fragment, fb, scratches);
232             if (fd==GGL_ZERO) {         // R = S*fs
233                 mul_factor(temp, fragment, src_factor);
234             } else if (fd==GGL_ONE) {   // R = S*fs + D
235                 mul_factor_add(temp, fragment, src_factor, component_t(fb));
236                 component_sat(temp);
237             } else {                    // R = S*fs + D*fd
238                 mul_factor(temp, fragment, src_factor);
239                 if (scratches.isUsed(src_factor.reg))
240                     scratches.recycle(src_factor.reg);
241                 // compute fd
242                 build_blend_factor(dst_factor, fd,
243                         component, pixel, fragment, fb, scratches);
244                 mul_factor_add(temp, fb, dst_factor, temp);
245                 if (!same_factor_opt1 && !same_factor_opt2) {
246                     component_sat(temp);
247                 }
248             }
249         }
250     }
251 
252     // now we can be corrupted (it's the dest)
253     temp.flags |= CORRUPTIBLE;
254 }
255 
build_blend_factor(integer_t & factor,int f,int component,const pixel_t & dst_pixel,integer_t & fragment,integer_t & fb,Scratch & scratches)256 void GGLAssembler::build_blend_factor(
257         integer_t& factor, int f, int component,
258         const pixel_t& dst_pixel,
259         integer_t& fragment,
260         integer_t& fb,
261         Scratch& scratches)
262 {
263     integer_t src_alpha(fragment);
264 
265     // src_factor/dst_factor won't be used after blending,
266     // so it's fine to mark them as CORRUPTIBLE (if not aliased)
267     factor.flags |= CORRUPTIBLE;
268 
269     switch(f) {
270     case GGL_ONE_MINUS_SRC_ALPHA:
271     case GGL_SRC_ALPHA:
272         if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
273             // we're processing alpha, so we already have
274             // src-alpha in fragment, and we need src-alpha just this time.
275         } else {
276            // alpha-src will be needed for other components
277             if (!mBlendFactorCached || mBlendFactorCached==f) {
278                 src_alpha = mAlphaSource;
279                 factor = mAlphaSource;
280                 factor.flags &= ~CORRUPTIBLE;
281                 // we already computed the blend factor before, nothing to do.
282                 if (mBlendFactorCached)
283                     return;
284                 // this is the first time, make sure to compute the blend
285                 // factor properly.
286                 mBlendFactorCached = f;
287                 break;
288             } else {
289                 // we have a cached alpha blend factor, but we want another one,
290                 // this should really not happen because by construction,
291                 // we cannot have BOTH source and destination
292                 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
293                 // the blending stage uses the f/(1-f) optimization
294 
295                 // for completeness, we handle this case though. Since there
296                 // are only 2 choices, this meens we want "the other one"
297                 // (1-factor)
298                 factor = mAlphaSource;
299                 factor.flags &= ~CORRUPTIBLE;
300                 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
301                 mBlendFactorCached = f;
302                 return;
303             }
304         }
305         FALLTHROUGH_INTENDED;
306     case GGL_ONE_MINUS_DST_COLOR:
307     case GGL_DST_COLOR:
308     case GGL_ONE_MINUS_SRC_COLOR:
309     case GGL_SRC_COLOR:
310     case GGL_ONE_MINUS_DST_ALPHA:
311     case GGL_DST_ALPHA:
312     case GGL_SRC_ALPHA_SATURATE:
313         // help us find out what register we can use for the blend-factor
314         // CORRUPTIBLE registers are chosen first, or a new one is allocated.
315         if (fragment.flags & CORRUPTIBLE) {
316             factor.setTo(fragment.reg, 32, CORRUPTIBLE);
317             fragment.flags &= ~CORRUPTIBLE;
318         } else if (fb.flags & CORRUPTIBLE) {
319             factor.setTo(fb.reg, 32, CORRUPTIBLE);
320             fb.flags &= ~CORRUPTIBLE;
321         } else {
322             factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
323         }
324         break;
325     }
326 
327     // XXX: doesn't work if size==1
328 
329     switch(f) {
330     case GGL_ONE_MINUS_DST_COLOR:
331     case GGL_DST_COLOR:
332         factor.s = fb.s;
333         ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
334         break;
335     case GGL_ONE_MINUS_SRC_COLOR:
336     case GGL_SRC_COLOR:
337         factor.s = fragment.s;
338         ADD(AL, 0, factor.reg, fragment.reg,
339             reg_imm(fragment.reg, LSR, fragment.s-1));
340         break;
341     case GGL_ONE_MINUS_SRC_ALPHA:
342     case GGL_SRC_ALPHA:
343         factor.s = src_alpha.s;
344         ADD(AL, 0, factor.reg, src_alpha.reg,
345                 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
346         break;
347     case GGL_ONE_MINUS_DST_ALPHA:
348     case GGL_DST_ALPHA:
349         // XXX: should be precomputed
350         extract(factor, dst_pixel, GGLFormat::ALPHA);
351         ADD(AL, 0, factor.reg, factor.reg,
352                 reg_imm(factor.reg, LSR, factor.s-1));
353         break;
354     case GGL_SRC_ALPHA_SATURATE:
355         // XXX: should be precomputed
356         // XXX: f = min(As, 1-Ad)
357         // btw, we're guaranteed that Ad's size is <= 8, because
358         // it's extracted from the framebuffer
359         break;
360     }
361 
362     switch(f) {
363     case GGL_ONE_MINUS_DST_COLOR:
364     case GGL_ONE_MINUS_SRC_COLOR:
365     case GGL_ONE_MINUS_DST_ALPHA:
366     case GGL_ONE_MINUS_SRC_ALPHA:
367         RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
368     }
369 
370     // don't need more than 8-bits for the blend factor
371     // and this will prevent overflows in the multiplies later
372     if (factor.s > 8) {
373         MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
374         factor.s = 8;
375     }
376 }
377 
blending_codes(int fs,int fd)378 int GGLAssembler::blending_codes(int fs, int fd)
379 {
380     int blending = 0;
381     switch(fs) {
382     case GGL_ONE:
383         blending |= BLEND_SRC;
384         break;
385 
386     case GGL_ONE_MINUS_DST_COLOR:
387     case GGL_DST_COLOR:
388         blending |= FACTOR_DST|BLEND_SRC;
389         break;
390     case GGL_ONE_MINUS_DST_ALPHA:
391     case GGL_DST_ALPHA:
392         // no need to extract 'component' from the destination
393         // for the blend factor, because we need ALPHA only.
394         blending |= BLEND_SRC;
395         break;
396 
397     case GGL_ONE_MINUS_SRC_COLOR:
398     case GGL_SRC_COLOR:
399         blending |= FACTOR_SRC|BLEND_SRC;
400         break;
401     case GGL_ONE_MINUS_SRC_ALPHA:
402     case GGL_SRC_ALPHA:
403     case GGL_SRC_ALPHA_SATURATE:
404         blending |= FACTOR_SRC|BLEND_SRC;
405         break;
406     }
407     switch(fd) {
408     case GGL_ONE:
409         blending |= BLEND_DST;
410         break;
411 
412     case GGL_ONE_MINUS_DST_COLOR:
413     case GGL_DST_COLOR:
414         blending |= FACTOR_DST|BLEND_DST;
415         break;
416     case GGL_ONE_MINUS_DST_ALPHA:
417     case GGL_DST_ALPHA:
418         blending |= FACTOR_DST|BLEND_DST;
419         break;
420 
421     case GGL_ONE_MINUS_SRC_COLOR:
422     case GGL_SRC_COLOR:
423         blending |= FACTOR_SRC|BLEND_DST;
424         break;
425     case GGL_ONE_MINUS_SRC_ALPHA:
426     case GGL_SRC_ALPHA:
427         // no need to extract 'component' from the source
428         // for the blend factor, because we need ALPHA only.
429         blending |= BLEND_DST;
430         break;
431     }
432     return blending;
433 }
434 
435 // ---------------------------------------------------------------------------
436 
build_blendFOneMinusF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)437 void GGLAssembler::build_blendFOneMinusF(
438         component_t& temp,
439         const integer_t& factor,
440         const integer_t& fragment,
441         const integer_t& fb)
442 {
443     //  R = S*f + D*(1-f) = (S-D)*f + D
444     Scratch scratches(registerFile());
445     // compute S-D
446     integer_t diff(fragment.flags & CORRUPTIBLE ?
447             fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
448     const int shift = fragment.size() - fb.size();
449     if (shift>0)        RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
450     else if (shift<0)   RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
451     else                RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
452     mul_factor_add(temp, diff, factor, component_t(fb));
453 }
454 
build_blendOneMinusFF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)455 void GGLAssembler::build_blendOneMinusFF(
456         component_t& temp,
457         const integer_t& factor,
458         const integer_t& fragment,
459         const integer_t& fb)
460 {
461     //  R = S*f + D*(1-f) = (S-D)*f + D
462     Scratch scratches(registerFile());
463     // compute D-S
464     integer_t diff(fb.flags & CORRUPTIBLE ?
465             fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
466     const int shift = fragment.size() - fb.size();
467     if (shift>0)        SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
468     else if (shift<0)   SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
469     else                SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
470     mul_factor_add(temp, diff, factor, component_t(fragment));
471 }
472 
473 // ---------------------------------------------------------------------------
474 
mul_factor(component_t & d,const integer_t & v,const integer_t & f)475 void GGLAssembler::mul_factor(  component_t& d,
476                                 const integer_t& v,
477                                 const integer_t& f)
478 {
479     int vs = v.size();
480     int fs = f.size();
481     int ms = vs+fs;
482 
483     // XXX: we could have special cases for 1 bit mul
484 
485     // all this code below to use the best multiply instruction
486     // wrt the parameters size. We take advantage of the fact
487     // that the 16-bits multiplies allow a 16-bit shift
488     // The trick is that we just make sure that we have at least 8-bits
489     // per component (which is enough for a 8 bits display).
490 
491     int xy;
492     int vshift = 0;
493     int fshift = 0;
494     int smulw = 0;
495 
496     if (vs<16) {
497         if (fs<16) {
498             xy = xyBB;
499         } else if (GGL_BETWEEN(fs, 24, 31)) {
500             ms -= 16;
501             xy = xyTB;
502         } else {
503             // eg: 15 * 18  ->  15 * 15
504             fshift = fs - 15;
505             ms -= fshift;
506             xy = xyBB;
507         }
508     } else if (GGL_BETWEEN(vs, 24, 31)) {
509         if (fs<16) {
510             ms -= 16;
511             xy = xyTB;
512         } else if (GGL_BETWEEN(fs, 24, 31)) {
513             ms -= 32;
514             xy = xyTT;
515         } else {
516             // eg: 24 * 18  ->  8 * 18
517             fshift = fs - 15;
518             ms -= 16 + fshift;
519             xy = xyTB;
520         }
521     } else {
522         if (fs<16) {
523             // eg: 18 * 15  ->  15 * 15
524             vshift = vs - 15;
525             ms -= vshift;
526             xy = xyBB;
527         } else if (GGL_BETWEEN(fs, 24, 31)) {
528             // eg: 18 * 24  ->  15 * 8
529             vshift = vs - 15;
530             ms -= 16 + vshift;
531             xy = xyBT;
532         } else {
533             // eg: 18 * 18  ->  (15 * 18)>>16
534             fshift = fs - 15;
535             ms -= 16 + fshift;
536             xy = yB;    //XXX SMULWB
537             smulw = 1;
538         }
539     }
540 
541     ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
542 
543     int vreg = v.reg;
544     int freg = f.reg;
545     if (vshift) {
546         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
547         vreg = d.reg;
548     }
549     if (fshift) {
550         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
551         freg = d.reg;
552     }
553     if (smulw)  SMULW(AL, xy, d.reg, vreg, freg);
554     else        SMUL(AL, xy, d.reg, vreg, freg);
555 
556 
557     d.h = ms;
558     if (mDithering) {
559         d.l = 0;
560     } else {
561         d.l = fs;
562         d.flags |= CLEAR_LO;
563     }
564 }
565 
mul_factor_add(component_t & d,const integer_t & v,const integer_t & f,const component_t & a)566 void GGLAssembler::mul_factor_add(  component_t& d,
567                                     const integer_t& v,
568                                     const integer_t& f,
569                                     const component_t& a)
570 {
571     // XXX: we could have special cases for 1 bit mul
572     Scratch scratches(registerFile());
573 
574     int vs = v.size();
575     int fs = f.size();
576     int as = a.h;
577     int ms = vs+fs;
578 
579     ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
580 
581     integer_t add(a.reg, a.h, a.flags);
582 
583     // 'a' is a component_t but it is guaranteed to have
584     // its high bits set to 0. However in the dithering case,
585     // we can't get away with truncating the potentially bad bits
586     // so extraction is needed.
587 
588    if ((mDithering) && (a.size() < ms)) {
589         // we need to expand a
590         if (!(a.flags & CORRUPTIBLE)) {
591             // ... but it's not corruptible, so we need to pick a
592             // temporary register.
593             // Try to uses the destination register first (it's likely
594             // to be usable, unless it aliases an input).
595             if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
596                 add.reg = d.reg;
597             } else {
598                 add.reg = scratches.obtain();
599             }
600         }
601         expand(add, a, ms); // extracts and expands
602         as = ms;
603     }
604 
605     if (ms == as) {
606         if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
607         else                MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
608     } else {
609         int temp = d.reg;
610         if (temp == add.reg) {
611             // the mul will modify add.reg, we need an intermediary reg
612             if (v.flags & CORRUPTIBLE)      temp = v.reg;
613             else if (f.flags & CORRUPTIBLE) temp = f.reg;
614             else                            temp = scratches.obtain();
615         }
616 
617         if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
618         else                MUL(AL, 0, temp, v.reg, f.reg);
619 
620         if (ms>as) {
621             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
622         } else if (ms<as) {
623             // not sure if we should expand the mul instead?
624             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
625         }
626     }
627 
628     d.h = ms;
629     if (mDithering) {
630         d.l = a.l;
631     } else {
632         d.l = fs>a.l ? fs : a.l;
633         d.flags |= CLEAR_LO;
634     }
635 }
636 
component_add(component_t & d,const integer_t & dst,const integer_t & src)637 void GGLAssembler::component_add(component_t& d,
638         const integer_t& dst, const integer_t& src)
639 {
640     // here we're guaranteed that fragment.size() >= fb.size()
641     const int shift = src.size() - dst.size();
642     if (!shift) {
643         ADD(AL, 0, d.reg, src.reg, dst.reg);
644     } else {
645         ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
646     }
647 
648     d.h = src.size();
649     if (mDithering) {
650         d.l = 0;
651     } else {
652         d.l = shift;
653         d.flags |= CLEAR_LO;
654     }
655 }
656 
component_sat(const component_t & v)657 void GGLAssembler::component_sat(const component_t& v)
658 {
659     const int one = ((1<<v.size())-1)<<v.l;
660     CMP(AL, v.reg, imm( 1<<v.h ));
661     if (isValidImmediate(one)) {
662         MOV(HS, 0, v.reg, imm( one ));
663     } else if (isValidImmediate(~one)) {
664         MVN(HS, 0, v.reg, imm( ~one ));
665     } else {
666         MOV(HS, 0, v.reg, imm( 1<<v.h ));
667         SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
668     }
669 }
670 
671 // ----------------------------------------------------------------------------
672 
673 }; // namespace android
674 
675