1 /* libs/pixelflinger/codeflinger/blending.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define LOG_TAG "pixelflinger-code"
19
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25
26 #include <android-base/macros.h>
27 #include <log/log.h>
28
29 #include "GGLAssembler.h"
30
31 namespace android {
32
build_fog(component_t & temp,int component,Scratch & regs)33 void GGLAssembler::build_fog(
34 component_t& temp, // incomming fragment / output
35 int component,
36 Scratch& regs)
37 {
38 if (mInfo[component].fog) {
39 Scratch scratches(registerFile());
40 comment("fog");
41
42 integer_t fragment(temp.reg, temp.h, temp.flags);
43 if (!(temp.flags & CORRUPTIBLE)) {
44 temp.reg = regs.obtain();
45 temp.flags |= CORRUPTIBLE;
46 }
47
48 integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
49 LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
50 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
51
52 integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
53 CONTEXT_LOAD(factor.reg, generated_vars.f);
54
55 // clamp fog factor (TODO: see if there is a way to guarantee
56 // we won't overflow, when setting the iterators)
57 BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
58 CMP(AL, factor.reg, imm( 0x10000 ));
59 MOV(HS, 0, factor.reg, imm( 0x10000 ));
60
61 build_blendFOneMinusF(temp, factor, fragment, fogColor);
62 }
63 }
64
build_blending(component_t & temp,const pixel_t & pixel,int component,Scratch & regs)65 void GGLAssembler::build_blending(
66 component_t& temp, // incomming fragment / output
67 const pixel_t& pixel, // framebuffer
68 int component,
69 Scratch& regs)
70 {
71 if (!mInfo[component].blend)
72 return;
73
74 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
75 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
76 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
77 fs = GGL_ONE;
78 const int blending = blending_codes(fs, fd);
79 if (!temp.size()) {
80 // here, blending will produce something which doesn't depend on
81 // that component (eg: GL_ZERO:GL_*), so the register has not been
82 // allocated yet. Will never be used as a source.
83 temp = component_t(regs.obtain(), CORRUPTIBLE);
84 }
85
86 // we are doing real blending...
87 // fb: extracted dst
88 // fragment: extracted src
89 // temp: component_t(fragment) and result
90
91 // scoped register allocator
92 Scratch scratches(registerFile());
93 comment("blending");
94
95 // we can optimize these cases a bit...
96 // (1) saturation is not needed
97 // (2) we can use only one multiply instead of 2
98 // (3) we can reduce the register pressure
99 // R = S*f + D*(1-f) = (S-D)*f + D
100 // R = S*(1-f) + D*f = (D-S)*f + S
101
102 const bool same_factor_opt1 =
103 (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
104 (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
105 (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
106 (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
107
108 const bool same_factor_opt2 =
109 (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
110 (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
111 (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
112 (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
113
114
115 // XXX: we could also optimize these cases:
116 // R = S*f + D*f = (S+D)*f
117 // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
118 // R = S*D + D*S = 2*S*D
119
120
121 // see if we need to extract 'component' from the destination (fb)
122 integer_t fb;
123 if (blending & (BLEND_DST|FACTOR_DST)) {
124 fb.setTo(scratches.obtain(), 32);
125 extract(fb, pixel, component);
126 if (mDithering) {
127 // XXX: maybe what we should do instead, is simply
128 // expand fb -or- fragment to the larger of the two
129 if (fb.size() < temp.size()) {
130 // for now we expand 'fb' to min(fragment, 8)
131 int new_size = temp.size() < 8 ? temp.size() : 8;
132 expand(fb, fb, new_size);
133 }
134 }
135 }
136
137
138 // convert input fragment to integer_t
139 if (temp.l && (temp.flags & CORRUPTIBLE)) {
140 MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
141 temp.h -= temp.l;
142 temp.l = 0;
143 }
144 integer_t fragment(temp.reg, temp.size(), temp.flags);
145
146 // if not done yet, convert input fragment to integer_t
147 if (temp.l) {
148 // here we know temp is not CORRUPTIBLE
149 fragment.reg = scratches.obtain();
150 MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
151 fragment.flags |= CORRUPTIBLE;
152 }
153
154 if (!(temp.flags & CORRUPTIBLE)) {
155 // temp is not corruptible, but since it's the destination it
156 // will be modified, so we need to allocate a new register.
157 temp.reg = regs.obtain();
158 temp.flags &= ~CORRUPTIBLE;
159 fragment.flags &= ~CORRUPTIBLE;
160 }
161
162 if ((blending & BLEND_SRC) && !same_factor_opt1) {
163 // source (fragment) is needed for the blending stage
164 // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
165 fragment.flags &= ~CORRUPTIBLE;
166 }
167
168
169 if (same_factor_opt1) {
170 // R = S*f + D*(1-f) = (S-D)*f + D
171 integer_t factor;
172 build_blend_factor(factor, fs,
173 component, pixel, fragment, fb, scratches);
174 // fb is always corruptible from this point
175 fb.flags |= CORRUPTIBLE;
176 build_blendFOneMinusF(temp, factor, fragment, fb);
177 } else if (same_factor_opt2) {
178 // R = S*(1-f) + D*f = (D-S)*f + S
179 integer_t factor;
180 // fb is always corrruptible here
181 fb.flags |= CORRUPTIBLE;
182 build_blend_factor(factor, fd,
183 component, pixel, fragment, fb, scratches);
184 build_blendOneMinusFF(temp, factor, fragment, fb);
185 } else {
186 integer_t src_factor;
187 integer_t dst_factor;
188
189 // if destination (fb) is not needed for the blending stage,
190 // then it can be marked as CORRUPTIBLE
191 if (!(blending & BLEND_DST)) {
192 fb.flags |= CORRUPTIBLE;
193 }
194
195 // XXX: try to mark some registers as CORRUPTIBLE
196 // in most case we could make those corruptible
197 // when we're processing the last component
198 // but not always, for instance
199 // when fragment is constant and not reloaded
200 // when fb is needed for logic-ops or masking
201 // when a register is aliased (for instance with mAlphaSource)
202
203 // blend away...
204 if (fs==GGL_ZERO) {
205 if (fd==GGL_ZERO) { // R = 0
206 // already taken care of
207 } else if (fd==GGL_ONE) { // R = D
208 // already taken care of
209 } else { // R = D*fd
210 // compute fd
211 build_blend_factor(dst_factor, fd,
212 component, pixel, fragment, fb, scratches);
213 mul_factor(temp, fb, dst_factor);
214 }
215 } else if (fs==GGL_ONE) {
216 if (fd==GGL_ZERO) { // R = S
217 // NOP, taken care of
218 } else if (fd==GGL_ONE) { // R = S + D
219 component_add(temp, fb, fragment); // args order matters
220 component_sat(temp);
221 } else { // R = S + D*fd
222 // compute fd
223 build_blend_factor(dst_factor, fd,
224 component, pixel, fragment, fb, scratches);
225 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
226 component_sat(temp);
227 }
228 } else {
229 // compute fs
230 build_blend_factor(src_factor, fs,
231 component, pixel, fragment, fb, scratches);
232 if (fd==GGL_ZERO) { // R = S*fs
233 mul_factor(temp, fragment, src_factor);
234 } else if (fd==GGL_ONE) { // R = S*fs + D
235 mul_factor_add(temp, fragment, src_factor, component_t(fb));
236 component_sat(temp);
237 } else { // R = S*fs + D*fd
238 mul_factor(temp, fragment, src_factor);
239 if (scratches.isUsed(src_factor.reg))
240 scratches.recycle(src_factor.reg);
241 // compute fd
242 build_blend_factor(dst_factor, fd,
243 component, pixel, fragment, fb, scratches);
244 mul_factor_add(temp, fb, dst_factor, temp);
245 if (!same_factor_opt1 && !same_factor_opt2) {
246 component_sat(temp);
247 }
248 }
249 }
250 }
251
252 // now we can be corrupted (it's the dest)
253 temp.flags |= CORRUPTIBLE;
254 }
255
build_blend_factor(integer_t & factor,int f,int component,const pixel_t & dst_pixel,integer_t & fragment,integer_t & fb,Scratch & scratches)256 void GGLAssembler::build_blend_factor(
257 integer_t& factor, int f, int component,
258 const pixel_t& dst_pixel,
259 integer_t& fragment,
260 integer_t& fb,
261 Scratch& scratches)
262 {
263 integer_t src_alpha(fragment);
264
265 // src_factor/dst_factor won't be used after blending,
266 // so it's fine to mark them as CORRUPTIBLE (if not aliased)
267 factor.flags |= CORRUPTIBLE;
268
269 switch(f) {
270 case GGL_ONE_MINUS_SRC_ALPHA:
271 case GGL_SRC_ALPHA:
272 if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
273 // we're processing alpha, so we already have
274 // src-alpha in fragment, and we need src-alpha just this time.
275 } else {
276 // alpha-src will be needed for other components
277 if (!mBlendFactorCached || mBlendFactorCached==f) {
278 src_alpha = mAlphaSource;
279 factor = mAlphaSource;
280 factor.flags &= ~CORRUPTIBLE;
281 // we already computed the blend factor before, nothing to do.
282 if (mBlendFactorCached)
283 return;
284 // this is the first time, make sure to compute the blend
285 // factor properly.
286 mBlendFactorCached = f;
287 break;
288 } else {
289 // we have a cached alpha blend factor, but we want another one,
290 // this should really not happen because by construction,
291 // we cannot have BOTH source and destination
292 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
293 // the blending stage uses the f/(1-f) optimization
294
295 // for completeness, we handle this case though. Since there
296 // are only 2 choices, this meens we want "the other one"
297 // (1-factor)
298 factor = mAlphaSource;
299 factor.flags &= ~CORRUPTIBLE;
300 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
301 mBlendFactorCached = f;
302 return;
303 }
304 }
305 FALLTHROUGH_INTENDED;
306 case GGL_ONE_MINUS_DST_COLOR:
307 case GGL_DST_COLOR:
308 case GGL_ONE_MINUS_SRC_COLOR:
309 case GGL_SRC_COLOR:
310 case GGL_ONE_MINUS_DST_ALPHA:
311 case GGL_DST_ALPHA:
312 case GGL_SRC_ALPHA_SATURATE:
313 // help us find out what register we can use for the blend-factor
314 // CORRUPTIBLE registers are chosen first, or a new one is allocated.
315 if (fragment.flags & CORRUPTIBLE) {
316 factor.setTo(fragment.reg, 32, CORRUPTIBLE);
317 fragment.flags &= ~CORRUPTIBLE;
318 } else if (fb.flags & CORRUPTIBLE) {
319 factor.setTo(fb.reg, 32, CORRUPTIBLE);
320 fb.flags &= ~CORRUPTIBLE;
321 } else {
322 factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
323 }
324 break;
325 }
326
327 // XXX: doesn't work if size==1
328
329 switch(f) {
330 case GGL_ONE_MINUS_DST_COLOR:
331 case GGL_DST_COLOR:
332 factor.s = fb.s;
333 ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
334 break;
335 case GGL_ONE_MINUS_SRC_COLOR:
336 case GGL_SRC_COLOR:
337 factor.s = fragment.s;
338 ADD(AL, 0, factor.reg, fragment.reg,
339 reg_imm(fragment.reg, LSR, fragment.s-1));
340 break;
341 case GGL_ONE_MINUS_SRC_ALPHA:
342 case GGL_SRC_ALPHA:
343 factor.s = src_alpha.s;
344 ADD(AL, 0, factor.reg, src_alpha.reg,
345 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
346 break;
347 case GGL_ONE_MINUS_DST_ALPHA:
348 case GGL_DST_ALPHA:
349 // XXX: should be precomputed
350 extract(factor, dst_pixel, GGLFormat::ALPHA);
351 ADD(AL, 0, factor.reg, factor.reg,
352 reg_imm(factor.reg, LSR, factor.s-1));
353 break;
354 case GGL_SRC_ALPHA_SATURATE:
355 // XXX: should be precomputed
356 // XXX: f = min(As, 1-Ad)
357 // btw, we're guaranteed that Ad's size is <= 8, because
358 // it's extracted from the framebuffer
359 break;
360 }
361
362 switch(f) {
363 case GGL_ONE_MINUS_DST_COLOR:
364 case GGL_ONE_MINUS_SRC_COLOR:
365 case GGL_ONE_MINUS_DST_ALPHA:
366 case GGL_ONE_MINUS_SRC_ALPHA:
367 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
368 }
369
370 // don't need more than 8-bits for the blend factor
371 // and this will prevent overflows in the multiplies later
372 if (factor.s > 8) {
373 MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
374 factor.s = 8;
375 }
376 }
377
blending_codes(int fs,int fd)378 int GGLAssembler::blending_codes(int fs, int fd)
379 {
380 int blending = 0;
381 switch(fs) {
382 case GGL_ONE:
383 blending |= BLEND_SRC;
384 break;
385
386 case GGL_ONE_MINUS_DST_COLOR:
387 case GGL_DST_COLOR:
388 blending |= FACTOR_DST|BLEND_SRC;
389 break;
390 case GGL_ONE_MINUS_DST_ALPHA:
391 case GGL_DST_ALPHA:
392 // no need to extract 'component' from the destination
393 // for the blend factor, because we need ALPHA only.
394 blending |= BLEND_SRC;
395 break;
396
397 case GGL_ONE_MINUS_SRC_COLOR:
398 case GGL_SRC_COLOR:
399 blending |= FACTOR_SRC|BLEND_SRC;
400 break;
401 case GGL_ONE_MINUS_SRC_ALPHA:
402 case GGL_SRC_ALPHA:
403 case GGL_SRC_ALPHA_SATURATE:
404 blending |= FACTOR_SRC|BLEND_SRC;
405 break;
406 }
407 switch(fd) {
408 case GGL_ONE:
409 blending |= BLEND_DST;
410 break;
411
412 case GGL_ONE_MINUS_DST_COLOR:
413 case GGL_DST_COLOR:
414 blending |= FACTOR_DST|BLEND_DST;
415 break;
416 case GGL_ONE_MINUS_DST_ALPHA:
417 case GGL_DST_ALPHA:
418 blending |= FACTOR_DST|BLEND_DST;
419 break;
420
421 case GGL_ONE_MINUS_SRC_COLOR:
422 case GGL_SRC_COLOR:
423 blending |= FACTOR_SRC|BLEND_DST;
424 break;
425 case GGL_ONE_MINUS_SRC_ALPHA:
426 case GGL_SRC_ALPHA:
427 // no need to extract 'component' from the source
428 // for the blend factor, because we need ALPHA only.
429 blending |= BLEND_DST;
430 break;
431 }
432 return blending;
433 }
434
435 // ---------------------------------------------------------------------------
436
build_blendFOneMinusF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)437 void GGLAssembler::build_blendFOneMinusF(
438 component_t& temp,
439 const integer_t& factor,
440 const integer_t& fragment,
441 const integer_t& fb)
442 {
443 // R = S*f + D*(1-f) = (S-D)*f + D
444 Scratch scratches(registerFile());
445 // compute S-D
446 integer_t diff(fragment.flags & CORRUPTIBLE ?
447 fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
448 const int shift = fragment.size() - fb.size();
449 if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
450 else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
451 else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
452 mul_factor_add(temp, diff, factor, component_t(fb));
453 }
454
build_blendOneMinusFF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)455 void GGLAssembler::build_blendOneMinusFF(
456 component_t& temp,
457 const integer_t& factor,
458 const integer_t& fragment,
459 const integer_t& fb)
460 {
461 // R = S*f + D*(1-f) = (S-D)*f + D
462 Scratch scratches(registerFile());
463 // compute D-S
464 integer_t diff(fb.flags & CORRUPTIBLE ?
465 fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
466 const int shift = fragment.size() - fb.size();
467 if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
468 else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
469 else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
470 mul_factor_add(temp, diff, factor, component_t(fragment));
471 }
472
473 // ---------------------------------------------------------------------------
474
mul_factor(component_t & d,const integer_t & v,const integer_t & f)475 void GGLAssembler::mul_factor( component_t& d,
476 const integer_t& v,
477 const integer_t& f)
478 {
479 int vs = v.size();
480 int fs = f.size();
481 int ms = vs+fs;
482
483 // XXX: we could have special cases for 1 bit mul
484
485 // all this code below to use the best multiply instruction
486 // wrt the parameters size. We take advantage of the fact
487 // that the 16-bits multiplies allow a 16-bit shift
488 // The trick is that we just make sure that we have at least 8-bits
489 // per component (which is enough for a 8 bits display).
490
491 int xy;
492 int vshift = 0;
493 int fshift = 0;
494 int smulw = 0;
495
496 if (vs<16) {
497 if (fs<16) {
498 xy = xyBB;
499 } else if (GGL_BETWEEN(fs, 24, 31)) {
500 ms -= 16;
501 xy = xyTB;
502 } else {
503 // eg: 15 * 18 -> 15 * 15
504 fshift = fs - 15;
505 ms -= fshift;
506 xy = xyBB;
507 }
508 } else if (GGL_BETWEEN(vs, 24, 31)) {
509 if (fs<16) {
510 ms -= 16;
511 xy = xyTB;
512 } else if (GGL_BETWEEN(fs, 24, 31)) {
513 ms -= 32;
514 xy = xyTT;
515 } else {
516 // eg: 24 * 18 -> 8 * 18
517 fshift = fs - 15;
518 ms -= 16 + fshift;
519 xy = xyTB;
520 }
521 } else {
522 if (fs<16) {
523 // eg: 18 * 15 -> 15 * 15
524 vshift = vs - 15;
525 ms -= vshift;
526 xy = xyBB;
527 } else if (GGL_BETWEEN(fs, 24, 31)) {
528 // eg: 18 * 24 -> 15 * 8
529 vshift = vs - 15;
530 ms -= 16 + vshift;
531 xy = xyBT;
532 } else {
533 // eg: 18 * 18 -> (15 * 18)>>16
534 fshift = fs - 15;
535 ms -= 16 + fshift;
536 xy = yB; //XXX SMULWB
537 smulw = 1;
538 }
539 }
540
541 ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
542
543 int vreg = v.reg;
544 int freg = f.reg;
545 if (vshift) {
546 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
547 vreg = d.reg;
548 }
549 if (fshift) {
550 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
551 freg = d.reg;
552 }
553 if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
554 else SMUL(AL, xy, d.reg, vreg, freg);
555
556
557 d.h = ms;
558 if (mDithering) {
559 d.l = 0;
560 } else {
561 d.l = fs;
562 d.flags |= CLEAR_LO;
563 }
564 }
565
mul_factor_add(component_t & d,const integer_t & v,const integer_t & f,const component_t & a)566 void GGLAssembler::mul_factor_add( component_t& d,
567 const integer_t& v,
568 const integer_t& f,
569 const component_t& a)
570 {
571 // XXX: we could have special cases for 1 bit mul
572 Scratch scratches(registerFile());
573
574 int vs = v.size();
575 int fs = f.size();
576 int as = a.h;
577 int ms = vs+fs;
578
579 ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
580
581 integer_t add(a.reg, a.h, a.flags);
582
583 // 'a' is a component_t but it is guaranteed to have
584 // its high bits set to 0. However in the dithering case,
585 // we can't get away with truncating the potentially bad bits
586 // so extraction is needed.
587
588 if ((mDithering) && (a.size() < ms)) {
589 // we need to expand a
590 if (!(a.flags & CORRUPTIBLE)) {
591 // ... but it's not corruptible, so we need to pick a
592 // temporary register.
593 // Try to uses the destination register first (it's likely
594 // to be usable, unless it aliases an input).
595 if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
596 add.reg = d.reg;
597 } else {
598 add.reg = scratches.obtain();
599 }
600 }
601 expand(add, a, ms); // extracts and expands
602 as = ms;
603 }
604
605 if (ms == as) {
606 if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
607 else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
608 } else {
609 int temp = d.reg;
610 if (temp == add.reg) {
611 // the mul will modify add.reg, we need an intermediary reg
612 if (v.flags & CORRUPTIBLE) temp = v.reg;
613 else if (f.flags & CORRUPTIBLE) temp = f.reg;
614 else temp = scratches.obtain();
615 }
616
617 if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
618 else MUL(AL, 0, temp, v.reg, f.reg);
619
620 if (ms>as) {
621 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
622 } else if (ms<as) {
623 // not sure if we should expand the mul instead?
624 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
625 }
626 }
627
628 d.h = ms;
629 if (mDithering) {
630 d.l = a.l;
631 } else {
632 d.l = fs>a.l ? fs : a.l;
633 d.flags |= CLEAR_LO;
634 }
635 }
636
component_add(component_t & d,const integer_t & dst,const integer_t & src)637 void GGLAssembler::component_add(component_t& d,
638 const integer_t& dst, const integer_t& src)
639 {
640 // here we're guaranteed that fragment.size() >= fb.size()
641 const int shift = src.size() - dst.size();
642 if (!shift) {
643 ADD(AL, 0, d.reg, src.reg, dst.reg);
644 } else {
645 ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
646 }
647
648 d.h = src.size();
649 if (mDithering) {
650 d.l = 0;
651 } else {
652 d.l = shift;
653 d.flags |= CLEAR_LO;
654 }
655 }
656
component_sat(const component_t & v)657 void GGLAssembler::component_sat(const component_t& v)
658 {
659 const int one = ((1<<v.size())-1)<<v.l;
660 CMP(AL, v.reg, imm( 1<<v.h ));
661 if (isValidImmediate(one)) {
662 MOV(HS, 0, v.reg, imm( one ));
663 } else if (isValidImmediate(~one)) {
664 MVN(HS, 0, v.reg, imm( ~one ));
665 } else {
666 MOV(HS, 0, v.reg, imm( 1<<v.h ));
667 SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
668 }
669 }
670
671 // ----------------------------------------------------------------------------
672
673 }; // namespace android
674
675