1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define LOG_TAG "GGLAssembler"
19
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25
26 #include <log/log.h>
27
28 #include "GGLAssembler.h"
29
30 namespace android {
31
32 // ----------------------------------------------------------------------------
33
GGLAssembler(ARMAssemblerInterface * target)34 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
35 : ARMAssemblerProxy(target),
36 RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7)
37 {
38 }
39
~GGLAssembler()40 GGLAssembler::~GGLAssembler()
41 {
42 }
43
prolog()44 void GGLAssembler::prolog()
45 {
46 ARMAssemblerProxy::prolog();
47 }
48
epilog(uint32_t touched)49 void GGLAssembler::epilog(uint32_t touched)
50 {
51 ARMAssemblerProxy::epilog(touched);
52 }
53
reset(int opt_level)54 void GGLAssembler::reset(int opt_level)
55 {
56 ARMAssemblerProxy::reset();
57 RegisterAllocator::reset();
58 mOptLevel = opt_level;
59 }
60
61 // ---------------------------------------------------------------------------
62
scanline(const needs_t & needs,context_t const * c)63 int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
64 {
65 int err = 0;
66 int opt_level = mOptLevel;
67 while (opt_level >= 0) {
68 reset(opt_level);
69 err = scanline_core(needs, c);
70 if (err == 0)
71 break;
72 opt_level--;
73 }
74
75 // XXX: in theory, pcForLabel is not valid before generate()
76 uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
77 uint32_t* fragment_end_pc = pcForLabel("epilog");
78 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
79
80 // build a name for our pipeline
81 char name[64];
82 sprintf(name,
83 "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
84 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
85
86 if (err) {
87 ALOGE("Error while generating ""%s""\n", name);
88 disassemble(name);
89 return -1;
90 }
91
92 return generate(name);
93 }
94
scanline_core(const needs_t & needs,context_t const * c)95 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
96 {
97 mBlendFactorCached = 0;
98 mBlending = 0;
99 mMasking = 0;
100 mAA = GGL_READ_NEEDS(P_AA, needs.p);
101 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
102 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
103 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
104 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
105 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
106 mBuilderContext.needs = needs;
107 mBuilderContext.c = c;
108 mBuilderContext.Rctx = reserveReg(R0); // context always in R0
109 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
110
111 // ------------------------------------------------------------------------
112
113 decodeLogicOpNeeds(needs);
114
115 decodeTMUNeeds(needs, c);
116
117 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
118 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
119 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
120 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
121
122 if (!mCbFormat.c[GGLFormat::ALPHA].h) {
123 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
124 (mBlendSrc == GGL_DST_ALPHA)) {
125 mBlendSrc = GGL_ONE;
126 }
127 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
128 (mBlendSrcA == GGL_DST_ALPHA)) {
129 mBlendSrcA = GGL_ONE;
130 }
131 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
132 (mBlendDst == GGL_DST_ALPHA)) {
133 mBlendDst = GGL_ONE;
134 }
135 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
136 (mBlendDstA == GGL_DST_ALPHA)) {
137 mBlendDstA = GGL_ONE;
138 }
139 }
140
141 // if we need the framebuffer, read it now
142 const int blending = blending_codes(mBlendSrc, mBlendDst) |
143 blending_codes(mBlendSrcA, mBlendDstA);
144
145 // XXX: handle special cases, destination not modified...
146 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
147 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
148 // Destination unmodified (beware of logic ops)
149 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
150 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
151 // Destination is zero (beware of logic ops)
152 }
153
154 int fbComponents = 0;
155 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
156 for (int i=0 ; i<4 ; i++) {
157 const int mask = 1<<i;
158 component_info_t& info = mInfo[i];
159 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
160 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
161 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
162 fs = GGL_ONE;
163 info.masked = !!(masking & mask);
164 info.inDest = !info.masked && mCbFormat.c[i].h &&
165 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
166 if (mCbFormat.components >= GGL_LUMINANCE &&
167 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
168 info.inDest = false;
169 }
170 info.needed = (i==GGLFormat::ALPHA) &&
171 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
172 info.replaced = !!(mTextureMachine.replaced & mask);
173 info.iterated = (!info.replaced && (info.inDest || info.needed));
174 info.smooth = mSmooth && info.iterated;
175 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
176 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
177
178 mBlending |= (info.blend ? mask : 0);
179 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
180 fbComponents |= mCbFormat.c[i].h ? mask : 0;
181 }
182
183 mAllMasked = (mMasking == fbComponents);
184 if (mAllMasked) {
185 mDithering = 0;
186 }
187
188 fragment_parts_t parts;
189
190 // ------------------------------------------------------------------------
191 prolog();
192 // ------------------------------------------------------------------------
193
194 build_scanline_prolog(parts, needs);
195
196 if (registerFile().status())
197 return registerFile().status();
198
199 // ------------------------------------------------------------------------
200 label("fragment_loop");
201 // ------------------------------------------------------------------------
202 {
203 Scratch regs(registerFile());
204
205 if (mDithering) {
206 // update the dither index.
207 MOV(AL, 0, parts.count.reg,
208 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
209 ADD(AL, 0, parts.count.reg, parts.count.reg,
210 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
211 MOV(AL, 0, parts.count.reg,
212 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
213 }
214
215 // XXX: could we do an early alpha-test here in some cases?
216 // It would probaly be used only with smooth-alpha and no texture
217 // (or no alpha component in the texture).
218
219 // Early z-test
220 if (mAlphaTest==GGL_ALWAYS) {
221 build_depth_test(parts, Z_TEST|Z_WRITE);
222 } else {
223 // we cannot do the z-write here, because
224 // it might be killed by the alpha-test later
225 build_depth_test(parts, Z_TEST);
226 }
227
228 { // texture coordinates
229 Scratch scratches(registerFile());
230
231 // texel generation
232 build_textures(parts, regs);
233 if (registerFile().status())
234 return registerFile().status();
235 }
236
237 if ((blending & (FACTOR_DST|BLEND_DST)) ||
238 (mMasking && !mAllMasked) ||
239 (mLogicOp & LOGIC_OP_DST))
240 {
241 // blending / logic_op / masking need the framebuffer
242 mDstPixel.setTo(regs.obtain(), &mCbFormat);
243
244 // load the framebuffer pixel
245 comment("fetch color-buffer");
246 load(parts.cbPtr, mDstPixel);
247 }
248
249 if (registerFile().status())
250 return registerFile().status();
251
252 pixel_t pixel;
253 int directTex = mTextureMachine.directTexture;
254 if (directTex | parts.packed) {
255 // note: we can't have both here
256 // iterated color or direct texture
257 pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
258 pixel.flags &= ~CORRUPTIBLE;
259 } else {
260 if (mDithering) {
261 const int ctxtReg = mBuilderContext.Rctx;
262 const int mask = GGL_DITHER_SIZE-1;
263 parts.dither = reg_t(regs.obtain());
264 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
265 ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
266 LDRB(AL, parts.dither.reg, parts.dither.reg,
267 immed12_pre(GGL_OFFSETOF(ditherMatrix)));
268 }
269
270 // allocate a register for the resulting pixel
271 pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
272
273 build_component(pixel, parts, GGLFormat::ALPHA, regs);
274
275 if (mAlphaTest!=GGL_ALWAYS) {
276 // only handle the z-write part here. We know z-test
277 // was successful, as well as alpha-test.
278 build_depth_test(parts, Z_WRITE);
279 }
280
281 build_component(pixel, parts, GGLFormat::RED, regs);
282 build_component(pixel, parts, GGLFormat::GREEN, regs);
283 build_component(pixel, parts, GGLFormat::BLUE, regs);
284
285 pixel.flags |= CORRUPTIBLE;
286 }
287
288 if (registerFile().status())
289 return registerFile().status();
290
291 if (pixel.reg == -1) {
292 // be defensive here. if we're here it's probably
293 // that this whole fragment is a no-op.
294 pixel = mDstPixel;
295 }
296
297 if (!mAllMasked) {
298 // logic operation
299 build_logic_op(pixel, regs);
300
301 // masking
302 build_masking(pixel, regs);
303
304 comment("store");
305 store(parts.cbPtr, pixel, WRITE_BACK);
306 }
307 }
308
309 if (registerFile().status())
310 return registerFile().status();
311
312 // update the iterated color...
313 if (parts.reload != 3) {
314 build_smooth_shade(parts);
315 }
316
317 // update iterated z
318 build_iterate_z(parts);
319
320 // update iterated fog
321 build_iterate_f(parts);
322
323 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
324 B(PL, "fragment_loop");
325 label("epilog");
326 epilog(registerFile().touched());
327
328 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
329 if (mDepthTest!=GGL_ALWAYS) {
330 label("discard_before_textures");
331 build_iterate_texture_coordinates(parts);
332 }
333 label("discard_after_textures");
334 build_smooth_shade(parts);
335 build_iterate_z(parts);
336 build_iterate_f(parts);
337 if (!mAllMasked) {
338 ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
339 }
340 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
341 B(PL, "fragment_loop");
342 epilog(registerFile().touched());
343 }
344
345 return registerFile().status();
346 }
347
348 // ---------------------------------------------------------------------------
349
build_scanline_prolog(fragment_parts_t & parts,const needs_t & needs)350 void GGLAssembler::build_scanline_prolog(
351 fragment_parts_t& parts, const needs_t& needs)
352 {
353 Scratch scratches(registerFile());
354
355 // compute count
356 comment("compute ct (# of pixels to process)");
357 parts.count.setTo(obtainReg());
358 int Rx = scratches.obtain();
359 int Ry = scratches.obtain();
360 CONTEXT_LOAD(Rx, iterators.xl);
361 CONTEXT_LOAD(parts.count.reg, iterators.xr);
362 CONTEXT_LOAD(Ry, iterators.y);
363
364 // parts.count = iterators.xr - Rx
365 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
366 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
367
368 if (mDithering) {
369 // parts.count.reg = 0xNNNNXXDD
370 // NNNN = count-1
371 // DD = dither offset
372 // XX = 0xxxxxxx (x = garbage)
373 Scratch scratches(registerFile());
374 int tx = scratches.obtain();
375 int ty = scratches.obtain();
376 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
377 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
378 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
379 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
380 } else {
381 // parts.count.reg = 0xNNNN0000
382 // NNNN = count-1
383 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
384 }
385
386 if (!mAllMasked) {
387 // compute dst ptr
388 comment("compute color-buffer pointer");
389 const int cb_bits = mCbFormat.size*8;
390 int Rs = scratches.obtain();
391 parts.cbPtr.setTo(obtainReg(), cb_bits);
392 CONTEXT_LOAD(Rs, state.buffers.color.stride);
393 CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
394 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
395 base_offset(parts.cbPtr, parts.cbPtr, Rs);
396 scratches.recycle(Rs);
397 }
398
399 // init fog
400 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
401 if (need_fog) {
402 comment("compute initial fog coordinate");
403 Scratch scratches(registerFile());
404 int dfdx = scratches.obtain();
405 int ydfdy = scratches.obtain();
406 int f = ydfdy;
407 CONTEXT_LOAD(dfdx, generated_vars.dfdx);
408 CONTEXT_LOAD(ydfdy, iterators.ydfdy);
409 MLA(AL, 0, f, Rx, dfdx, ydfdy);
410 CONTEXT_STORE(f, generated_vars.f);
411 }
412
413 // init Z coordinate
414 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
415 parts.z = reg_t(obtainReg());
416 comment("compute initial Z coordinate");
417 Scratch scratches(registerFile());
418 int dzdx = scratches.obtain();
419 int ydzdy = parts.z.reg;
420 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
421 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
422 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
423
424 // we're going to index zbase of parts.count
425 // zbase = base + (xl-count + stride*y)*2
426 int Rs = dzdx;
427 int zbase = scratches.obtain();
428 CONTEXT_LOAD(Rs, state.buffers.depth.stride);
429 CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
430 SMLABB(AL, Rs, Ry, Rs, Rx);
431 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
432 ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
433 CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
434 }
435
436 // init texture coordinates
437 init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
438 scratches.recycle(Ry);
439
440 // iterated color
441 init_iterated_color(parts, reg_t(Rx));
442
443 // init coverage factor application (anti-aliasing)
444 if (mAA) {
445 parts.covPtr.setTo(obtainReg(), 16);
446 CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
447 ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
448 }
449 }
450
451 // ---------------------------------------------------------------------------
452
build_component(pixel_t & pixel,const fragment_parts_t & parts,int component,Scratch & regs)453 void GGLAssembler::build_component( pixel_t& pixel,
454 const fragment_parts_t& parts,
455 int component,
456 Scratch& regs)
457 {
458 static char const * comments[] = {"alpha", "red", "green", "blue"};
459 comment(comments[component]);
460
461 // local register file
462 Scratch scratches(registerFile());
463 const int dst_component_size = pixel.component_size(component);
464
465 component_t temp(-1);
466 build_incoming_component( temp, dst_component_size,
467 parts, component, scratches, regs);
468
469 if (mInfo[component].inDest) {
470
471 // blending...
472 build_blending( temp, mDstPixel, component, scratches );
473
474 // downshift component and rebuild pixel...
475 downshift(pixel, component, temp, parts.dither);
476 }
477 }
478
build_incoming_component(component_t & temp,int dst_size,const fragment_parts_t & parts,int component,Scratch & scratches,Scratch & global_regs)479 void GGLAssembler::build_incoming_component(
480 component_t& temp,
481 int dst_size,
482 const fragment_parts_t& parts,
483 int component,
484 Scratch& scratches,
485 Scratch& global_regs)
486 {
487 const uint32_t component_mask = 1<<component;
488
489 // Figure out what we need for the blending stage...
490 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
491 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
492 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
493 fs = GGL_ONE;
494 }
495
496 // Figure out what we need to extract and for what reason
497 const int blending = blending_codes(fs, fd);
498
499 // Are we actually going to blend?
500 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
501
502 // expand the source if the destination has more bits
503 int need_expander = false;
504 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
505 texture_unit_t& tmu = mTextureMachine.tmu[i];
506 if ((tmu.format_idx) &&
507 (parts.texel[i].component_size(component) < dst_size)) {
508 need_expander = true;
509 }
510 }
511
512 // do we need to extract this component?
513 const bool multiTexture = mTextureMachine.activeUnits > 1;
514 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
515 (isAlphaSourceNeeded());
516 int need_extract = mInfo[component].needed;
517 if (mInfo[component].inDest)
518 {
519 need_extract |= ((need_blending ?
520 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
521 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
522 need_extract |= mInfo[component].smooth;
523 need_extract |= mInfo[component].fog;
524 need_extract |= mDithering;
525 need_extract |= multiTexture;
526 }
527
528 if (need_extract) {
529 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
530 component_t fragment;
531
532 // iterated color
533 build_iterated_color(fragment, parts, component, regs);
534
535 // texture environement (decal, modulate, replace)
536 build_texture_environment(fragment, parts, component, regs);
537
538 // expand the source if the destination has more bits
539 if (need_expander && (fragment.size() < dst_size)) {
540 // we're here only if we fetched a texel
541 // (so we know for sure fragment is CORRUPTIBLE)
542 expand(fragment, fragment, dst_size);
543 }
544
545 // We have a few specific things to do for the alpha-channel
546 if ((component==GGLFormat::ALPHA) &&
547 (mInfo[component].needed || fragment.size()<dst_size))
548 {
549 // convert to integer_t first and make sure
550 // we don't corrupt a needed register
551 if (fragment.l) {
552 component_t incoming(fragment);
553 modify(fragment, regs);
554 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
555 fragment.h -= fragment.l;
556 fragment.l = 0;
557 }
558
559 // coverage factor application
560 build_coverage_application(fragment, parts, regs);
561
562 // alpha-test
563 build_alpha_test(fragment, parts);
564
565 if (blend_needs_alpha_source) {
566 // We keep only 8 bits for the blending stage
567 const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
568 if (fragment.flags & CORRUPTIBLE) {
569 fragment.flags &= ~CORRUPTIBLE;
570 mAlphaSource.setTo(fragment.reg,
571 fragment.size(), fragment.flags);
572 if (shift) {
573 MOV(AL, 0, mAlphaSource.reg,
574 reg_imm(mAlphaSource.reg, LSR, shift));
575 }
576 } else {
577 // XXX: it would better to do this in build_blend_factor()
578 // so we can avoid the extra MOV below.
579 mAlphaSource.setTo(regs.obtain(),
580 fragment.size(), CORRUPTIBLE);
581 if (shift) {
582 MOV(AL, 0, mAlphaSource.reg,
583 reg_imm(fragment.reg, LSR, shift));
584 } else {
585 MOV(AL, 0, mAlphaSource.reg, fragment.reg);
586 }
587 }
588 mAlphaSource.s -= shift;
589 }
590 }
591
592 // fog...
593 build_fog( fragment, component, regs );
594
595 temp = fragment;
596 } else {
597 if (mInfo[component].inDest) {
598 // extraction not needed and replace
599 // we just select the right component
600 if ((mTextureMachine.replaced & component_mask) == 0) {
601 // component wasn't replaced, so use it!
602 temp = component_t(parts.iterated, component);
603 }
604 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
605 const texture_unit_t& tmu = mTextureMachine.tmu[i];
606 if ((tmu.mask & component_mask) &&
607 ((tmu.replaced & component_mask) == 0)) {
608 temp = component_t(parts.texel[i], component);
609 }
610 }
611 }
612 }
613 }
614
isAlphaSourceNeeded() const615 bool GGLAssembler::isAlphaSourceNeeded() const
616 {
617 // XXX: also needed for alpha-test
618 const int bs = mBlendSrc;
619 const int bd = mBlendDst;
620 return bs==GGL_SRC_ALPHA_SATURATE ||
621 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
622 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
623 }
624
625 // ---------------------------------------------------------------------------
626
build_smooth_shade(const fragment_parts_t & parts)627 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
628 {
629 if (mSmooth && !parts.iterated_packed) {
630 // update the iterated color in a pipelined way...
631 comment("update iterated color");
632 Scratch scratches(registerFile());
633
634 const int reload = parts.reload;
635 for (int i=0 ; i<4 ; i++) {
636 if (!mInfo[i].iterated)
637 continue;
638
639 int c = parts.argb[i].reg;
640 int dx = parts.argb_dx[i].reg;
641
642 if (reload & 1) {
643 c = scratches.obtain();
644 CONTEXT_LOAD(c, generated_vars.argb[i].c);
645 }
646 if (reload & 2) {
647 dx = scratches.obtain();
648 CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
649 }
650
651 if (mSmooth) {
652 ADD(AL, 0, c, c, dx);
653 }
654
655 if (reload & 1) {
656 CONTEXT_STORE(c, generated_vars.argb[i].c);
657 scratches.recycle(c);
658 }
659 if (reload & 2) {
660 scratches.recycle(dx);
661 }
662 }
663 }
664 }
665
666 // ---------------------------------------------------------------------------
667
build_coverage_application(component_t & fragment,const fragment_parts_t & parts,Scratch & regs)668 void GGLAssembler::build_coverage_application(component_t& fragment,
669 const fragment_parts_t& parts, Scratch& regs)
670 {
671 // here fragment.l is guarenteed to be 0
672 if (mAA) {
673 // coverages are 1.15 fixed-point numbers
674 comment("coverage application");
675
676 component_t incoming(fragment);
677 modify(fragment, regs);
678
679 Scratch scratches(registerFile());
680 int cf = scratches.obtain();
681 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
682 if (fragment.h > 31) {
683 fragment.h--;
684 SMULWB(AL, fragment.reg, incoming.reg, cf);
685 } else {
686 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
687 SMULWB(AL, fragment.reg, fragment.reg, cf);
688 }
689 }
690 }
691
692 // ---------------------------------------------------------------------------
693
build_alpha_test(component_t & fragment,const fragment_parts_t &)694 void GGLAssembler::build_alpha_test(component_t& fragment,
695 const fragment_parts_t& /*parts*/)
696 {
697 if (mAlphaTest != GGL_ALWAYS) {
698 comment("Alpha Test");
699 Scratch scratches(registerFile());
700 int ref = scratches.obtain();
701 const int shift = GGL_COLOR_BITS-fragment.size();
702 CONTEXT_LOAD(ref, state.alpha_test.ref);
703 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
704 else CMP(AL, fragment.reg, ref);
705 int cc = NV;
706 switch (mAlphaTest) {
707 case GGL_NEVER: cc = NV; break;
708 case GGL_LESS: cc = LT; break;
709 case GGL_EQUAL: cc = EQ; break;
710 case GGL_LEQUAL: cc = LS; break;
711 case GGL_GREATER: cc = HI; break;
712 case GGL_NOTEQUAL: cc = NE; break;
713 case GGL_GEQUAL: cc = HS; break;
714 }
715 B(cc^1, "discard_after_textures");
716 }
717 }
718
719 // ---------------------------------------------------------------------------
720
build_depth_test(const fragment_parts_t & parts,uint32_t mask)721 void GGLAssembler::build_depth_test(
722 const fragment_parts_t& parts, uint32_t mask)
723 {
724 mask &= Z_TEST|Z_WRITE;
725 const needs_t& needs = mBuilderContext.needs;
726 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
727 Scratch scratches(registerFile());
728
729 if (mDepthTest != GGL_ALWAYS || zmask) {
730 int cc=AL, ic=AL;
731 switch (mDepthTest) {
732 case GGL_LESS: ic = HI; break;
733 case GGL_EQUAL: ic = EQ; break;
734 case GGL_LEQUAL: ic = HS; break;
735 case GGL_GREATER: ic = LT; break;
736 case GGL_NOTEQUAL: ic = NE; break;
737 case GGL_GEQUAL: ic = LS; break;
738 case GGL_NEVER:
739 // this never happens, because it's taken care of when
740 // computing the needs. but we keep it for completness.
741 comment("Depth Test (NEVER)");
742 B(AL, "discard_before_textures");
743 return;
744 case GGL_ALWAYS:
745 // we're here because zmask is enabled
746 mask &= ~Z_TEST; // test always passes.
747 break;
748 }
749
750 // inverse the condition
751 cc = ic^1;
752
753 if ((mask & Z_WRITE) && !zmask) {
754 mask &= ~Z_WRITE;
755 }
756
757 if (!mask)
758 return;
759
760 comment("Depth Test");
761
762 int zbase = scratches.obtain();
763 int depth = scratches.obtain();
764 int z = parts.z.reg;
765
766 CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall
767 ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
768 // above does zbase = zbase + ((count >> 16) << 1)
769
770 if (mask & Z_TEST) {
771 LDRH(AL, depth, zbase); // stall
772 CMP(AL, depth, reg_imm(z, LSR, 16));
773 B(cc, "discard_before_textures");
774 }
775 if (mask & Z_WRITE) {
776 if (mask == Z_WRITE) {
777 // only z-write asked, cc is meaningless
778 ic = AL;
779 }
780 MOV(AL, 0, depth, reg_imm(z, LSR, 16));
781 STRH(ic, depth, zbase);
782 }
783 }
784 }
785
build_iterate_z(const fragment_parts_t & parts)786 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
787 {
788 const needs_t& needs = mBuilderContext.needs;
789 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
790 Scratch scratches(registerFile());
791 int dzdx = scratches.obtain();
792 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
793 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
794 }
795 }
796
build_iterate_f(const fragment_parts_t &)797 void GGLAssembler::build_iterate_f(const fragment_parts_t& /*parts*/)
798 {
799 const needs_t& needs = mBuilderContext.needs;
800 if (GGL_READ_NEEDS(P_FOG, needs.p)) {
801 Scratch scratches(registerFile());
802 int dfdx = scratches.obtain();
803 int f = scratches.obtain();
804 CONTEXT_LOAD(f, generated_vars.f);
805 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
806 ADD(AL, 0, f, f, dfdx);
807 CONTEXT_STORE(f, generated_vars.f);
808 }
809 }
810
811 // ---------------------------------------------------------------------------
812
build_logic_op(pixel_t & pixel,Scratch & regs)813 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
814 {
815 const needs_t& needs = mBuilderContext.needs;
816 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
817 if (opcode == GGL_COPY)
818 return;
819
820 comment("logic operation");
821
822 pixel_t s(pixel);
823 if (!(pixel.flags & CORRUPTIBLE)) {
824 pixel.reg = regs.obtain();
825 pixel.flags |= CORRUPTIBLE;
826 }
827
828 pixel_t d(mDstPixel);
829 switch(opcode) {
830 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break;
831 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break;
832 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break;
833 case GGL_COPY: break;
834 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break;
835 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break;
836 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break;
837 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break;
838 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg);
839 MVN(AL, 0, pixel.reg, pixel.reg); break;
840 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg);
841 MVN(AL, 0, pixel.reg, pixel.reg); break;
842 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break;
843 case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
844 BIC(AL, 0, pixel.reg, d.reg, s.reg);
845 MVN(AL, 0, pixel.reg, pixel.reg); break;
846 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break;
847 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
848 BIC(AL, 0, pixel.reg, s.reg, d.reg);
849 MVN(AL, 0, pixel.reg, pixel.reg); break;
850 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg);
851 MVN(AL, 0, pixel.reg, pixel.reg); break;
852 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break;
853 };
854 }
855
856 // ---------------------------------------------------------------------------
857
find_bottom(uint32_t val)858 static uint32_t find_bottom(uint32_t val)
859 {
860 uint32_t i = 0;
861 while (!(val & (3<<i)))
862 i+= 2;
863 return i;
864 }
865
normalize(uint32_t & val,uint32_t & rot)866 static void normalize(uint32_t& val, uint32_t& rot)
867 {
868 rot = 0;
869 while (!(val&3) || (val & 0xFC000000)) {
870 uint32_t newval;
871 newval = val >> 2;
872 newval |= (val&3) << 30;
873 val = newval;
874 rot += 2;
875 if (rot == 32) {
876 rot = 0;
877 break;
878 }
879 }
880 }
881
build_and_immediate(int d,int s,uint32_t mask,int bits)882 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
883 {
884 uint32_t rot;
885 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
886 mask &= size;
887
888 if (mask == size) {
889 if (d != s)
890 MOV( AL, 0, d, s);
891 return;
892 }
893
894 if ((getCodegenArch() == CODEGEN_ARCH_MIPS) ||
895 (getCodegenArch() == CODEGEN_ARCH_MIPS64)) {
896 // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr
897 // the below ' while (mask)' code is buggy on mips
898 // since mips returns true on isValidImmediate()
899 // then we get multiple AND instr (positive logic)
900 AND( AL, 0, d, s, imm(mask) );
901 return;
902 }
903 else if (getCodegenArch() == CODEGEN_ARCH_ARM64) {
904 AND( AL, 0, d, s, imm(mask) );
905 return;
906 }
907
908 int negative_logic = !isValidImmediate(mask);
909 if (negative_logic) {
910 mask = ~mask & size;
911 }
912 normalize(mask, rot);
913
914 if (mask) {
915 while (mask) {
916 uint32_t bitpos = find_bottom(mask);
917 int shift = rot + bitpos;
918 uint32_t m = mask & (0xff << bitpos);
919 mask &= ~m;
920 m >>= bitpos;
921 int32_t newMask = (m<<shift) | (m>>(32-shift));
922 if (!negative_logic) {
923 AND( AL, 0, d, s, imm(newMask) );
924 } else {
925 BIC( AL, 0, d, s, imm(newMask) );
926 }
927 s = d;
928 }
929 } else {
930 MOV( AL, 0, d, imm(0));
931 }
932 }
933
build_masking(pixel_t & pixel,Scratch & regs)934 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
935 {
936 if (!mMasking || mAllMasked) {
937 return;
938 }
939
940 comment("color mask");
941
942 pixel_t fb(mDstPixel);
943 pixel_t s(pixel);
944 if (!(pixel.flags & CORRUPTIBLE)) {
945 pixel.reg = regs.obtain();
946 pixel.flags |= CORRUPTIBLE;
947 }
948
949 int mask = 0;
950 for (int i=0 ; i<4 ; i++) {
951 const int component_mask = 1<<i;
952 const int h = fb.format.c[i].h;
953 const int l = fb.format.c[i].l;
954 if (h && (!(mMasking & component_mask))) {
955 mask |= ((1<<(h-l))-1) << l;
956 }
957 }
958
959 // There is no need to clear the masked components of the source
960 // (unless we applied a logic op), because they're already zeroed
961 // by construction (masked components are not computed)
962
963 if (mLogicOp) {
964 const needs_t& needs = mBuilderContext.needs;
965 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
966 if (opcode != GGL_CLEAR) {
967 // clear masked component of source
968 build_and_immediate(pixel.reg, s.reg, mask, fb.size());
969 s = pixel;
970 }
971 }
972
973 // clear non masked components of destination
974 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
975
976 // or back the channels that were masked
977 if (s.reg == fb.reg) {
978 // this is in fact a MOV
979 if (s.reg == pixel.reg) {
980 // ugh. this in in fact a nop
981 } else {
982 MOV(AL, 0, pixel.reg, fb.reg);
983 }
984 } else {
985 ORR(AL, 0, pixel.reg, s.reg, fb.reg);
986 }
987 }
988
989 // ---------------------------------------------------------------------------
990
base_offset(const pointer_t & d,const pointer_t & b,const reg_t & o)991 void GGLAssembler::base_offset(
992 const pointer_t& d, const pointer_t& b, const reg_t& o)
993 {
994 switch (b.size) {
995 case 32:
996 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
997 break;
998 case 24:
999 if (d.reg == b.reg) {
1000 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
1001 ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
1002 } else {
1003 ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
1004 ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
1005 }
1006 break;
1007 case 16:
1008 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
1009 break;
1010 case 8:
1011 ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
1012 break;
1013 }
1014 }
1015
1016 // ----------------------------------------------------------------------------
1017 // cheezy register allocator...
1018 // ----------------------------------------------------------------------------
1019
1020 // Modified to support MIPS processors, in a very simple way. We retain the
1021 // (Arm) limit of 16 total registers, but shift the mapping of those registers
1022 // from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and
1023 // register 1 has a traditional use as a temp).
1024
RegisterAllocator(int arch)1025 RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch)
1026 {
1027 }
1028
reset()1029 void RegisterAllocator::reset()
1030 {
1031 mRegs.reset();
1032 }
1033
reserveReg(int reg)1034 int RegisterAllocator::reserveReg(int reg)
1035 {
1036 return mRegs.reserve(reg);
1037 }
1038
obtainReg()1039 int RegisterAllocator::obtainReg()
1040 {
1041 return mRegs.obtain();
1042 }
1043
recycleReg(int reg)1044 void RegisterAllocator::recycleReg(int reg)
1045 {
1046 mRegs.recycle(reg);
1047 }
1048
registerFile()1049 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1050 {
1051 return mRegs;
1052 }
1053
1054 // ----------------------------------------------------------------------------
1055
RegisterFile(int codegen_arch)1056 RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch)
1057 : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0)
1058 {
1059 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
1060 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
1061 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
1062 }
1063 reserve(ARMAssemblerInterface::SP);
1064 reserve(ARMAssemblerInterface::PC);
1065 }
1066
RegisterFile(const RegisterFile & rhs,int codegen_arch)1067 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch)
1068 : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0)
1069 {
1070 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
1071 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
1072 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
1073 }
1074 }
1075
~RegisterFile()1076 RegisterAllocator::RegisterFile::~RegisterFile()
1077 {
1078 }
1079
operator ==(const RegisterFile & rhs) const1080 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1081 {
1082 return (mRegs == rhs.mRegs);
1083 }
1084
reset()1085 void RegisterAllocator::RegisterFile::reset()
1086 {
1087 mRegs = mTouched = mStatus = 0;
1088 reserve(ARMAssemblerInterface::SP);
1089 reserve(ARMAssemblerInterface::PC);
1090 }
1091
1092 // RegisterFile::reserve() take a register parameter in the
1093 // range 0-15 (Arm compatible), but on a Mips processor, will
1094 // return the actual allocated register in the range 2-17.
reserve(int reg)1095 int RegisterAllocator::RegisterFile::reserve(int reg)
1096 {
1097 reg += mRegisterOffset;
1098 LOG_ALWAYS_FATAL_IF(isUsed(reg),
1099 "reserving register %d, but already in use",
1100 reg);
1101 mRegs |= (1<<reg);
1102 mTouched |= mRegs;
1103 return reg;
1104 }
1105
1106 // This interface uses regMask in range 2-17 on MIPS, no translation.
reserveSeveral(uint32_t regMask)1107 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1108 {
1109 mRegs |= regMask;
1110 mTouched |= regMask;
1111 }
1112
isUsed(int reg) const1113 int RegisterAllocator::RegisterFile::isUsed(int reg) const
1114 {
1115 LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg);
1116 return mRegs & (1<<reg);
1117 }
1118
obtain()1119 int RegisterAllocator::RegisterFile::obtain()
1120 {
1121 const char priorityList[14] = { 0, 1, 2, 3,
1122 12, 14, 4, 5,
1123 6, 7, 8, 9,
1124 10, 11 };
1125 const int nbreg = sizeof(priorityList);
1126 int i, r, reg;
1127 for (i=0 ; i<nbreg ; i++) {
1128 r = priorityList[i];
1129 if (!isUsed(r + mRegisterOffset)) {
1130 break;
1131 }
1132 }
1133 // this is not an error anymore because, we'll try again with
1134 // a lower optimization level.
1135 //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1136 if (i >= nbreg) {
1137 mStatus |= OUT_OF_REGISTERS;
1138 // we return SP so we can more easily debug things
1139 // the code will never be run anyway.
1140 return ARMAssemblerInterface::SP;
1141 }
1142 reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips.
1143 return reg;
1144 }
1145
hasFreeRegs() const1146 bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1147 {
1148 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
1149 return ((regs & 0xFFFF) == 0xFFFF) ? false : true;
1150 }
1151
countFreeRegs() const1152 int RegisterAllocator::RegisterFile::countFreeRegs() const
1153 {
1154 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
1155 int f = ~regs & 0xFFFF;
1156 // now count number of 1
1157 f = (f & 0x5555) + ((f>>1) & 0x5555);
1158 f = (f & 0x3333) + ((f>>2) & 0x3333);
1159 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1160 f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1161 return f;
1162 }
1163
recycle(int reg)1164 void RegisterAllocator::RegisterFile::recycle(int reg)
1165 {
1166 // commented out, since common failure of running out of regs
1167 // triggers this assertion. Since the code is not execectued
1168 // in that case, it does not matter. No reason to FATAL err.
1169 // LOG_FATAL_IF(!isUsed(reg),
1170 // "recycling unallocated register %d",
1171 // reg);
1172 mRegs &= ~(1<<reg);
1173 }
1174
recycleSeveral(uint32_t regMask)1175 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1176 {
1177 // commented out, since common failure of running out of regs
1178 // triggers this assertion. Since the code is not execectued
1179 // in that case, it does not matter. No reason to FATAL err.
1180 // LOG_FATAL_IF((mRegs & regMask)!=regMask,
1181 // "recycling unallocated registers "
1182 // "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1183 // regMask, mRegs, mRegs®Mask);
1184 mRegs &= ~regMask;
1185 }
1186
touched() const1187 uint32_t RegisterAllocator::RegisterFile::touched() const
1188 {
1189 return mTouched;
1190 }
1191
1192 // ----------------------------------------------------------------------------
1193
1194 }; // namespace android
1195
1196