1 /* libs/opengles/primitives.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <math.h>
21 
22 #include "context.h"
23 #include "primitives.h"
24 #include "light.h"
25 #include "matrix.h"
26 #include "vertex.h"
27 #include "fp.h"
28 #include "TextureObjectManager.h"
29 
30 extern "C" void iterators0032(const void* that,
31         int32_t* it, int32_t c0, int32_t c1, int32_t c2);
32 
33 namespace android {
34 
35 // ----------------------------------------------------------------------------
36 
37 static void primitive_point(ogles_context_t* c, vertex_t* v);
38 static void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
39 static void primitive_clip_triangle(ogles_context_t* c,
40         vertex_t* v0, vertex_t* v1, vertex_t* v2);
41 
42 static void primitive_nop_point(ogles_context_t* c, vertex_t* v);
43 static void primitive_nop_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
44 static void primitive_nop_triangle(ogles_context_t* c,
45         vertex_t* v0, vertex_t* v1, vertex_t* v2);
46 
47 static inline bool cull_triangle(ogles_context_t* c,
48         vertex_t* v0, vertex_t* v1, vertex_t* v2);
49 
50 static void lerp_triangle(ogles_context_t* c,
51         vertex_t* v0, vertex_t* v1, vertex_t* v2);
52 
53 static void lerp_texcoords(ogles_context_t* c,
54         vertex_t* v0, vertex_t* v1, vertex_t* v2);
55 
56 static void lerp_texcoords_w(ogles_context_t* c,
57         vertex_t* v0, vertex_t* v1, vertex_t* v2);
58 
59 static void triangle(ogles_context_t* c,
60         vertex_t* v0, vertex_t* v1, vertex_t* v2);
61 
62 static void clip_triangle(ogles_context_t* c,
63         vertex_t* v0, vertex_t* v1, vertex_t* v2);
64 
65 static unsigned int clip_line(ogles_context_t* c,
66         vertex_t* s, vertex_t* p);
67 
68 // ----------------------------------------------------------------------------
69 #if 0
70 #pragma mark -
71 #endif
72 
lightTriangleDarkSmooth(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)73 static void lightTriangleDarkSmooth(ogles_context_t* c,
74         vertex_t* v0, vertex_t* v1, vertex_t* v2)
75 {
76     if (!(v0->flags & vertex_t::LIT)) {
77         v0->flags |= vertex_t::LIT;
78         const GLvoid* cp = c->arrays.color.element(
79                 v0->index & vertex_cache_t::INDEX_MASK);
80         c->arrays.color.fetch(c, v0->color.v, cp);
81     }
82     if (!(v1->flags & vertex_t::LIT)) {
83         v1->flags |= vertex_t::LIT;
84         const GLvoid* cp = c->arrays.color.element(
85                 v1->index & vertex_cache_t::INDEX_MASK);
86         c->arrays.color.fetch(c, v1->color.v, cp);
87     }
88     if(!(v2->flags & vertex_t::LIT)) {
89         v2->flags |= vertex_t::LIT;
90         const GLvoid* cp = c->arrays.color.element(
91                 v2->index & vertex_cache_t::INDEX_MASK);
92         c->arrays.color.fetch(c, v2->color.v, cp);
93     }
94 }
95 
lightTriangleDarkFlat(ogles_context_t * c,vertex_t *,vertex_t *,vertex_t * v2)96 static void lightTriangleDarkFlat(ogles_context_t* c,
97         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
98 {
99     if (!(v2->flags & vertex_t::LIT)) {
100         v2->flags |= vertex_t::LIT;
101         const GLvoid* cp = c->arrays.color.element(
102                 v2->index & vertex_cache_t::INDEX_MASK);
103         c->arrays.color.fetch(c, v2->color.v, cp);
104     }
105     // configure the rasterizer here, before we clip
106     c->rasterizer.procs.color4xv(c, v2->color.v);
107 }
108 
lightTriangleSmooth(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)109 static void lightTriangleSmooth(ogles_context_t* c,
110         vertex_t* v0, vertex_t* v1, vertex_t* v2)
111 {
112     if (!(v0->flags & vertex_t::LIT))
113         c->lighting.lightVertex(c, v0);
114     if (!(v1->flags & vertex_t::LIT))
115         c->lighting.lightVertex(c, v1);
116     if(!(v2->flags & vertex_t::LIT))
117         c->lighting.lightVertex(c, v2);
118 }
119 
lightTriangleFlat(ogles_context_t * c,vertex_t *,vertex_t *,vertex_t * v2)120 static void lightTriangleFlat(ogles_context_t* c,
121         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
122 {
123     if (!(v2->flags & vertex_t::LIT))
124         c->lighting.lightVertex(c, v2);
125     // configure the rasterizer here, before we clip
126     c->rasterizer.procs.color4xv(c, v2->color.v);
127 }
128 
129 // The fog versions...
130 
131 static inline
lightVertexDarkSmoothFog(ogles_context_t * c,vertex_t * v)132 void lightVertexDarkSmoothFog(ogles_context_t* c, vertex_t* v)
133 {
134     if (!(v->flags & vertex_t::LIT)) {
135         v->flags |= vertex_t::LIT;
136         v->fog = c->fog.fog(c, v->eye.z);
137         const GLvoid* cp = c->arrays.color.element(
138                 v->index & vertex_cache_t::INDEX_MASK);
139         c->arrays.color.fetch(c, v->color.v, cp);
140     }
141 }
142 static inline
lightVertexDarkFlatFog(ogles_context_t * c,vertex_t * v)143 void lightVertexDarkFlatFog(ogles_context_t* c, vertex_t* v)
144 {
145     if (!(v->flags & vertex_t::LIT)) {
146         v->flags |= vertex_t::LIT;
147         v->fog = c->fog.fog(c, v->eye.z);
148     }
149 }
150 static inline
lightVertexSmoothFog(ogles_context_t * c,vertex_t * v)151 void lightVertexSmoothFog(ogles_context_t* c, vertex_t* v)
152 {
153     if (!(v->flags & vertex_t::LIT)) {
154         v->fog = c->fog.fog(c, v->eye.z);
155         c->lighting.lightVertex(c, v);
156     }
157 }
158 
lightTriangleDarkSmoothFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)159 static void lightTriangleDarkSmoothFog(ogles_context_t* c,
160         vertex_t* v0, vertex_t* v1, vertex_t* v2)
161 {
162     lightVertexDarkSmoothFog(c, v0);
163     lightVertexDarkSmoothFog(c, v1);
164     lightVertexDarkSmoothFog(c, v2);
165 }
166 
lightTriangleDarkFlatFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)167 static void lightTriangleDarkFlatFog(ogles_context_t* c,
168         vertex_t* v0, vertex_t* v1, vertex_t* v2)
169 {
170     lightVertexDarkFlatFog(c, v0);
171     lightVertexDarkFlatFog(c, v1);
172     lightVertexDarkSmoothFog(c, v2);
173     // configure the rasterizer here, before we clip
174     c->rasterizer.procs.color4xv(c, v2->color.v);
175 }
176 
lightTriangleSmoothFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)177 static void lightTriangleSmoothFog(ogles_context_t* c,
178         vertex_t* v0, vertex_t* v1, vertex_t* v2)
179 {
180     lightVertexSmoothFog(c, v0);
181     lightVertexSmoothFog(c, v1);
182     lightVertexSmoothFog(c, v2);
183 }
184 
lightTriangleFlatFog(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)185 static void lightTriangleFlatFog(ogles_context_t* c,
186         vertex_t* v0, vertex_t* v1, vertex_t* v2)
187 {
188     lightVertexDarkFlatFog(c, v0);
189     lightVertexDarkFlatFog(c, v1);
190     lightVertexSmoothFog(c, v2);
191     // configure the rasterizer here, before we clip
192     c->rasterizer.procs.color4xv(c, v2->color.v);
193 }
194 
195 
196 
197 typedef void (*light_primitive_t)(ogles_context_t*,
198         vertex_t*, vertex_t*, vertex_t*);
199 
200 // fog 0x4, light 0x2, smooth 0x1
201 static const light_primitive_t lightPrimitive[8] = {
202     lightTriangleDarkFlat,          // no fog | dark  | flat
203     lightTriangleDarkSmooth,        // no fog | dark  | smooth
204     lightTriangleFlat,              // no fog | light | flat
205     lightTriangleSmooth,            // no fog | light | smooth
206     lightTriangleDarkFlatFog,       // fog    | dark  | flat
207     lightTriangleDarkSmoothFog,     // fog    | dark  | smooth
208     lightTriangleFlatFog,           // fog    | light | flat
209     lightTriangleSmoothFog          // fog    | light | smooth
210 };
211 
ogles_validate_primitives(ogles_context_t * c)212 void ogles_validate_primitives(ogles_context_t* c)
213 {
214     const uint32_t enables = c->rasterizer.state.enables;
215 
216     // set up the lighting/shading/smoothing/fogging function
217     int index = enables & GGL_ENABLE_SMOOTH ? 0x1 : 0;
218     index |= c->lighting.enable ? 0x2 : 0;
219     index |= enables & GGL_ENABLE_FOG ? 0x4 : 0;
220     c->lighting.lightTriangle = lightPrimitive[index];
221 
222     // set up the primitive renderers
223     if (ggl_likely(c->arrays.vertex.enable)) {
224         c->prims.renderPoint    = primitive_point;
225         c->prims.renderLine     = primitive_line;
226         c->prims.renderTriangle = primitive_clip_triangle;
227     } else {
228         c->prims.renderPoint    = primitive_nop_point;
229         c->prims.renderLine     = primitive_nop_line;
230         c->prims.renderTriangle = primitive_nop_triangle;
231     }
232 }
233 
234 // ----------------------------------------------------------------------------
235 
initTriangle(vertex_t const * v0,vertex_t const * v1,vertex_t const * v2)236 void compute_iterators_t::initTriangle(
237         vertex_t const* v0, vertex_t const* v1, vertex_t const* v2)
238 {
239     m_dx01 = v1->window.x - v0->window.x;
240     m_dy10 = v0->window.y - v1->window.y;
241     m_dx20 = v0->window.x - v2->window.x;
242     m_dy02 = v2->window.y - v0->window.y;
243     m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
244     (void)m_reserved; // suppress unused warning
245 }
246 
initLine(vertex_t const * v0,vertex_t const * v1)247 void compute_iterators_t::initLine(
248         vertex_t const* v0, vertex_t const* v1)
249 {
250     m_dx01 = m_dy02 = v1->window.x - v0->window.x;
251     m_dy10 = m_dx20 = v0->window.y - v1->window.y;
252     m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
253 }
254 
initLerp(vertex_t const * v0,uint32_t enables)255 void compute_iterators_t::initLerp(vertex_t const* v0, uint32_t enables)
256 {
257     m_x0 = v0->window.x;
258     m_y0 = v0->window.y;
259     const GGLcoord area = (m_area + TRI_HALF) >> TRI_FRACTION_BITS;
260     const GGLcoord minArea = 2; // cannot be inverted
261     // triangles with an area smaller than 1.0 are not smooth-shaded
262 
263     int q=0, s=0, d=0;
264     if (abs(area) >= minArea) {
265         // Here we do some voodoo magic, to compute a suitable scale
266         // factor for deltas/area:
267 
268         // First compute the 1/area with full 32-bits precision,
269         // gglRecipQNormalized returns a number [-0.5, 0.5[ and an exponent.
270         d = gglRecipQNormalized(area, &q);
271 
272         // Then compute the minimum left-shift to not overflow the muls
273         // below.
274         s = 32 - gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
275 
276         // We'll keep 16-bits of precision for deltas/area. So we need
277         // to shift everything left an extra 15 bits.
278         s += 15;
279 
280         // make sure all final shifts are not > 32, because gglMulx
281         // can't handle it.
282         if (s < q) s = q;
283         if (s > 32) {
284             d >>= 32-s;
285             s = 32;
286         }
287     }
288 
289     m_dx01 = gglMulx(m_dx01, d, s);
290     m_dy10 = gglMulx(m_dy10, d, s);
291     m_dx20 = gglMulx(m_dx20, d, s);
292     m_dy02 = gglMulx(m_dy02, d, s);
293     m_area_scale = 32 + q - s;
294     m_scale = 0;
295 
296     if (enables & GGL_ENABLE_TMUS) {
297         const int A = gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
298         const int B = gglClz(abs(m_x0)|abs(m_y0));
299         m_scale = max(0, 32 - (A + 16)) +
300                   max(0, 32 - (B + TRI_FRACTION_BITS)) + 1;
301     }
302 }
303 
iteratorsScale(GGLfixed * it,int32_t c0,int32_t c1,int32_t c2) const304 int compute_iterators_t::iteratorsScale(GGLfixed* it,
305         int32_t c0, int32_t c1, int32_t c2) const
306 {
307     int32_t dc01 = c1 - c0;
308     int32_t dc02 = c2 - c0;
309     const int A = gglClz(abs(c0));
310     const int B = gglClz(abs(dc01)|abs(dc02));
311     const int scale = min(A, B - m_scale) - 2;
312     if (scale >= 0) {
313         c0   <<= scale;
314         dc01 <<= scale;
315         dc02 <<= scale;
316     } else {
317         c0   >>= -scale;
318         dc01 >>= -scale;
319         dc02 >>= -scale;
320     }
321     const int s = m_area_scale;
322     int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
323     int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
324     int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
325             gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
326     it[0] = c;
327     it[1] = dcdx;
328     it[2] = dcdy;
329     return scale;
330 }
331 
iterators1616(GGLfixed * it,GGLfixed c0,GGLfixed c1,GGLfixed c2) const332 void compute_iterators_t::iterators1616(GGLfixed* it,
333         GGLfixed c0, GGLfixed c1, GGLfixed c2) const
334 {
335     const GGLfixed dc01 = c1 - c0;
336     const GGLfixed dc02 = c2 - c0;
337     // 16.16 x 16.16 == 32.32 --> 16.16
338     const int s = m_area_scale;
339     int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
340     int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
341     int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
342             gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
343     it[0] = c;
344     it[1] = dcdx;
345     it[2] = dcdy;
346 }
347 
iterators0032(int64_t * it,int32_t c0,int32_t c1,int32_t c2) const348 void compute_iterators_t::iterators0032(int64_t* it,
349         int32_t c0, int32_t c1, int32_t c2) const
350 {
351     const int s = m_area_scale - 16;
352     int32_t dc01 = (c1 - c0)>>s;
353     int32_t dc02 = (c2 - c0)>>s;
354     // 16.16 x 16.16 == 32.32
355     int64_t dcdx = gglMulii(dc01, m_dy02) + gglMulii(dc02, m_dy10);
356     int64_t dcdy = gglMulii(dc02, m_dx01) + gglMulii(dc01, m_dx20);
357     it[ 0] = (c0<<16) - ((dcdx*m_x0 + dcdy*m_y0)>>4);
358     it[ 1] = dcdx;
359     it[ 2] = dcdy;
360 }
361 
362 #if defined(__arm__) && !defined(__thumb__)
iterators0032(int32_t * it,int32_t c0,int32_t c1,int32_t c2) const363 inline void compute_iterators_t::iterators0032(int32_t* it,
364         int32_t c0, int32_t c1, int32_t c2) const
365 {
366     ::iterators0032(this, it, c0, c1, c2);
367 }
368 #else
iterators0032(int32_t * it,int32_t c0,int32_t c1,int32_t c2) const369 void compute_iterators_t::iterators0032(int32_t* it,
370         int32_t c0, int32_t c1, int32_t c2) const
371 {
372     int64_t it64[3];
373     iterators0032(it64, c0, c1, c2);
374     it[0] = it64[0];
375     it[1] = it64[1];
376     it[2] = it64[2];
377 }
378 #endif
379 
380 // ----------------------------------------------------------------------------
381 
382 static inline int32_t clampZ(GLfixed z) CONST;
clampZ(GLfixed z)383 int32_t clampZ(GLfixed z) {
384     z = (z & ~(z>>31));
385     if (z >= 0x10000)
386         z = 0xFFFF;
387     return z;
388 }
389 
390 static __attribute__((noinline))
fetch_texcoord_impl(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)391 void fetch_texcoord_impl(ogles_context_t* c,
392         vertex_t* v0, vertex_t* v1, vertex_t* v2)
393 {
394     vertex_t* const vtx[3] = { v0, v1, v2 };
395     array_t const * const texcoordArray = c->arrays.texture;
396 
397     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
398         if (!(c->rasterizer.state.texture[i].enable))
399             continue;
400 
401         for (int j=0 ; j<3 ; j++) {
402             vertex_t* const v = vtx[j];
403             if (v->flags & vertex_t::TT)
404                 continue;
405 
406             // NOTE: here we could compute automatic texgen
407             // such as sphere/cube maps, instead of fetching them
408             // from the textcoord array.
409 
410             vec4_t& coords = v->texture[i];
411             const GLubyte* tp = texcoordArray[i].element(
412                     v->index & vertex_cache_t::INDEX_MASK);
413             texcoordArray[i].fetch(c, coords.v, tp);
414 
415             // transform texture coordinates...
416             coords.Q = 0x10000;
417             const transform_t& tr = c->transforms.texture[i].transform;
418             if (ggl_unlikely(tr.ops)) {
419                 c->arrays.tex_transform[i](&tr, &coords, &coords);
420             }
421 
422             // divide by Q
423             const GGLfixed q = coords.Q;
424             if (ggl_unlikely(q != 0x10000)) {
425                 const int32_t qinv = gglRecip28(q);
426                 coords.S = gglMulx(coords.S, qinv, 28);
427                 coords.T = gglMulx(coords.T, qinv, 28);
428             }
429         }
430     }
431     v0->flags |= vertex_t::TT;
432     v1->flags |= vertex_t::TT;
433     v2->flags |= vertex_t::TT;
434 }
435 
fetch_texcoord(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)436 inline void fetch_texcoord(ogles_context_t* c,
437         vertex_t* v0, vertex_t* v1, vertex_t* v2)
438 {
439     const uint32_t enables = c->rasterizer.state.enables;
440     if (!(enables & GGL_ENABLE_TMUS))
441         return;
442 
443     // Fetch & transform texture coordinates...
444     if (ggl_likely(v0->flags & v1->flags & v2->flags & vertex_t::TT)) {
445         // already done for all three vertices, bail...
446         return;
447     }
448     fetch_texcoord_impl(c, v0, v1, v2);
449 }
450 
451 // ----------------------------------------------------------------------------
452 #if 0
453 #pragma mark -
454 #pragma mark Point
455 #endif
456 
primitive_nop_point(ogles_context_t *,vertex_t *)457 void primitive_nop_point(ogles_context_t*, vertex_t*) {
458 }
459 
primitive_point(ogles_context_t * c,vertex_t * v)460 void primitive_point(ogles_context_t* c, vertex_t* v)
461 {
462     // lighting & clamping...
463     const uint32_t enables = c->rasterizer.state.enables;
464 
465     if (ggl_unlikely(!(v->flags & vertex_t::LIT))) {
466         if (c->lighting.enable) {
467             c->lighting.lightVertex(c, v);
468         } else {
469             v->flags |= vertex_t::LIT;
470             const GLvoid* cp = c->arrays.color.element(
471                     v->index & vertex_cache_t::INDEX_MASK);
472             c->arrays.color.fetch(c, v->color.v, cp);
473         }
474         if (enables & GGL_ENABLE_FOG) {
475             v->fog = c->fog.fog(c, v->eye.z);
476         }
477     }
478 
479     // XXX: we don't need to do that each-time
480     // if color array and lighting not enabled
481     c->rasterizer.procs.color4xv(c, v->color.v);
482 
483     // XXX: look into ES point-sprite extension
484     if (enables & GGL_ENABLE_TMUS) {
485         fetch_texcoord(c, v,v,v);
486         for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
487             if (!c->rasterizer.state.texture[i].enable)
488                 continue;
489             int32_t itt[8];
490             itt[1] = itt[2] = itt[4] = itt[5] = 0;
491             itt[6] = itt[7] = 16; // XXX: check that
492             if (c->rasterizer.state.texture[i].s_wrap == GGL_CLAMP) {
493                 int width = c->textures.tmu[i].texture->surface.width;
494                 itt[0] = v->texture[i].S * width;
495                 itt[6] = 0;
496             }
497             if (c->rasterizer.state.texture[i].t_wrap == GGL_CLAMP) {
498                 int height = c->textures.tmu[i].texture->surface.height;
499                 itt[3] = v->texture[i].T * height;
500                 itt[7] = 0;
501             }
502             c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
503         }
504     }
505 
506     if (enables & GGL_ENABLE_DEPTH_TEST) {
507         int32_t itz[3];
508         itz[0] = clampZ(v->window.z) * 0x00010001;
509         itz[1] = itz[2] = 0;
510         c->rasterizer.procs.zGrad3xv(c, itz);
511     }
512 
513     if (enables & GGL_ENABLE_FOG) {
514         GLfixed itf[3];
515         itf[0] = v->fog;
516         itf[1] = itf[2] = 0;
517         c->rasterizer.procs.fogGrad3xv(c, itf);
518     }
519 
520     // Render our point...
521     c->rasterizer.procs.pointx(c, v->window.v, c->point.size);
522 }
523 
524 // ----------------------------------------------------------------------------
525 #if 0
526 #pragma mark -
527 #pragma mark Line
528 #endif
529 
primitive_nop_line(ogles_context_t *,vertex_t *,vertex_t *)530 void primitive_nop_line(ogles_context_t*, vertex_t*, vertex_t*) {
531 }
532 
primitive_line(ogles_context_t * c,vertex_t * v0,vertex_t * v1)533 void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1)
534 {
535     // get texture coordinates
536     fetch_texcoord(c, v0, v1, v1);
537 
538     // light/shade the vertices first (they're copied below)
539     c->lighting.lightTriangle(c, v0, v1, v1);
540 
541     // clip the line if needed
542     if (ggl_unlikely((v0->flags | v1->flags) & vertex_t::CLIP_ALL)) {
543         unsigned int count = clip_line(c, v0, v1);
544         if (ggl_unlikely(count == 0))
545             return;
546     }
547 
548     // compute iterators...
549     const uint32_t enables = c->rasterizer.state.enables;
550     const uint32_t mask =   GGL_ENABLE_TMUS |
551                             GGL_ENABLE_SMOOTH |
552                             GGL_ENABLE_W |
553                             GGL_ENABLE_FOG |
554                             GGL_ENABLE_DEPTH_TEST;
555 
556     if (ggl_unlikely(enables & mask)) {
557         c->lerp.initLine(v0, v1);
558         lerp_triangle(c, v0, v1, v0);
559     }
560 
561     // render our line
562     c->rasterizer.procs.linex(c, v0->window.v, v1->window.v, c->line.width);
563 }
564 
565 // ----------------------------------------------------------------------------
566 #if 0
567 #pragma mark -
568 #pragma mark Triangle
569 #endif
570 
primitive_nop_triangle(ogles_context_t *,vertex_t *,vertex_t *,vertex_t *)571 void primitive_nop_triangle(ogles_context_t* /*c*/,
572         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/) {
573 }
574 
primitive_clip_triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)575 void primitive_clip_triangle(ogles_context_t* c,
576         vertex_t* v0, vertex_t* v1, vertex_t* v2)
577 {
578     uint32_t cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
579     if (ggl_likely(!cc)) {
580         // code below must be as optimized as possible, this is the
581         // common code path.
582 
583         // This triangle is not clipped, test if it's culled
584         // unclipped triangle...
585         c->lerp.initTriangle(v0, v1, v2);
586         if (cull_triangle(c, v0, v1, v2))
587             return; // culled!
588 
589         // Fetch all texture coordinates if needed
590         fetch_texcoord(c, v0, v1, v2);
591 
592         // light (or shade) our triangle!
593         c->lighting.lightTriangle(c, v0, v1, v2);
594 
595         triangle(c, v0, v1, v2);
596         return;
597     }
598 
599     // The assumption here is that we're not going to clip very often,
600     // and even more rarely will we clip a triangle that ends up
601     // being culled out. So it's okay to light the vertices here, even though
602     // in a few cases we won't render the triangle (if culled).
603 
604     // Fetch texture coordinates...
605     fetch_texcoord(c, v0, v1, v2);
606 
607     // light (or shade) our triangle!
608     c->lighting.lightTriangle(c, v0, v1, v2);
609 
610     clip_triangle(c, v0, v1, v2);
611 }
612 
613 // -----------------------------------------------------------------------
614 
triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)615 void triangle(ogles_context_t* c,
616         vertex_t* v0, vertex_t* v1, vertex_t* v2)
617 {
618     // compute iterators...
619     const uint32_t enables = c->rasterizer.state.enables;
620     const uint32_t mask =   GGL_ENABLE_TMUS |
621                             GGL_ENABLE_SMOOTH |
622                             GGL_ENABLE_W |
623                             GGL_ENABLE_FOG |
624                             GGL_ENABLE_DEPTH_TEST;
625 
626     if (ggl_likely(enables & mask))
627         lerp_triangle(c, v0, v1, v2);
628 
629     c->rasterizer.procs.trianglex(c, v0->window.v, v1->window.v, v2->window.v);
630 }
631 
lerp_triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)632 void lerp_triangle(ogles_context_t* c,
633         vertex_t* v0, vertex_t* v1, vertex_t* v2)
634 {
635     const uint32_t enables = c->rasterizer.state.enables;
636     c->lerp.initLerp(v0, enables);
637 
638     // set up texture iterators
639     if (enables & GGL_ENABLE_TMUS) {
640         if (enables & GGL_ENABLE_W) {
641             lerp_texcoords_w(c, v0, v1, v2);
642         } else {
643             lerp_texcoords(c, v0, v1, v2);
644         }
645     }
646 
647     // set up the color iterators
648     const compute_iterators_t& lerp = c->lerp;
649     if (enables & GGL_ENABLE_SMOOTH) {
650         GLfixed itc[12];
651         for (int i=0 ; i<4 ; i++) {
652             const GGLcolor c0 = v0->color.v[i] * 255;
653             const GGLcolor c1 = v1->color.v[i] * 255;
654             const GGLcolor c2 = v2->color.v[i] * 255;
655             lerp.iterators1616(&itc[i*3], c0, c1, c2);
656         }
657         c->rasterizer.procs.colorGrad12xv(c, itc);
658     }
659 
660     if (enables & GGL_ENABLE_DEPTH_TEST) {
661         int32_t itz[3];
662         const int32_t v0z = clampZ(v0->window.z);
663         const int32_t v1z = clampZ(v1->window.z);
664         const int32_t v2z = clampZ(v2->window.z);
665         if (ggl_unlikely(c->polygonOffset.enable)) {
666             const int32_t units = (c->polygonOffset.units << 16);
667             const GLfixed factor = c->polygonOffset.factor;
668             if (factor) {
669                 int64_t itz64[3];
670                 lerp.iterators0032(itz64, v0z, v1z, v2z);
671                 int64_t maxDepthSlope = max(itz64[1], itz64[2]);
672                 itz[0] = uint32_t(itz64[0])
673                         + uint32_t((maxDepthSlope*factor)>>16) + units;
674                 itz[1] = uint32_t(itz64[1]);
675                 itz[2] = uint32_t(itz64[2]);
676             } else {
677                 lerp.iterators0032(itz, v0z, v1z, v2z);
678                 itz[0] += units;
679             }
680         } else {
681             lerp.iterators0032(itz, v0z, v1z, v2z);
682         }
683         c->rasterizer.procs.zGrad3xv(c, itz);
684     }
685 
686     if (ggl_unlikely(enables & GGL_ENABLE_FOG)) {
687         GLfixed itf[3];
688         lerp.iterators1616(itf, v0->fog, v1->fog, v2->fog);
689         c->rasterizer.procs.fogGrad3xv(c, itf);
690     }
691 }
692 
693 
694 static inline
compute_lod(ogles_context_t * c,int i,int32_t s0,int32_t t0,int32_t s1,int32_t t1,int32_t s2,int32_t t2)695 int compute_lod(ogles_context_t* c, int i,
696         int32_t s0, int32_t t0, int32_t s1, int32_t t1, int32_t s2, int32_t t2)
697 {
698     // Compute mipmap level / primitive
699     // rho = sqrt( texelArea / area )
700     // lod = log2( rho )
701     // lod = log2( texelArea / area ) / 2
702     // lod = (log2( texelArea ) - log2( area )) / 2
703     const compute_iterators_t& lerp = c->lerp;
704     const GGLcoord area = abs(lerp.area());
705     const int w = c->textures.tmu[i].texture->surface.width;
706     const int h = c->textures.tmu[i].texture->surface.height;
707     const int shift = 16 + (16 - TRI_FRACTION_BITS);
708     int32_t texelArea = abs( gglMulx(s1-s0, t2-t0, shift) -
709             gglMulx(s2-s0, t1-t0, shift) )*w*h;
710     int log2TArea = (32-TRI_FRACTION_BITS  -1) - gglClz(texelArea);
711     int log2Area  = (32-TRI_FRACTION_BITS*2-1) - gglClz(area);
712     int lod = (log2TArea - log2Area + 1) >> 1;
713     return lod;
714 }
715 
lerp_texcoords(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)716 void lerp_texcoords(ogles_context_t* c,
717         vertex_t* v0, vertex_t* v1, vertex_t* v2)
718 {
719     const compute_iterators_t& lerp = c->lerp;
720     int32_t itt[8] __attribute__((aligned(16)));
721     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
722         const texture_t& tmu = c->rasterizer.state.texture[i];
723         if (!tmu.enable)
724             continue;
725 
726         // compute the jacobians using block floating-point
727         int32_t s0 = v0->texture[i].S;
728         int32_t t0 = v0->texture[i].T;
729         int32_t s1 = v1->texture[i].S;
730         int32_t t1 = v1->texture[i].T;
731         int32_t s2 = v2->texture[i].S;
732         int32_t t2 = v2->texture[i].T;
733 
734         const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
735         if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
736             int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
737             c->rasterizer.procs.bindTextureLod(c, i,
738                     &c->textures.tmu[i].texture->mip(lod));
739         }
740 
741         // premultiply (s,t) when clampling
742         if (tmu.s_wrap == GGL_CLAMP) {
743             const int width = tmu.surface.width;
744             s0 *= width;
745             s1 *= width;
746             s2 *= width;
747         }
748         if (tmu.t_wrap == GGL_CLAMP) {
749             const int height = tmu.surface.height;
750             t0 *= height;
751             t1 *= height;
752             t2 *= height;
753         }
754         itt[6] = -lerp.iteratorsScale(itt+0, s0, s1, s2);
755         itt[7] = -lerp.iteratorsScale(itt+3, t0, t1, t2);
756         c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
757     }
758 }
759 
lerp_texcoords_w(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)760 void lerp_texcoords_w(ogles_context_t* c,
761         vertex_t* v0, vertex_t* v1, vertex_t* v2)
762 {
763     const compute_iterators_t& lerp = c->lerp;
764     int32_t itt[8] __attribute__((aligned(16)));
765     int32_t itw[3];
766 
767     // compute W's scale to 2.30
768     int32_t w0 = v0->window.w;
769     int32_t w1 = v1->window.w;
770     int32_t w2 = v2->window.w;
771     int wscale = 32 - gglClz(w0|w1|w2);
772 
773     // compute the jacobian using block floating-point
774     int sc = lerp.iteratorsScale(itw, w0, w1, w2);
775     sc +=  wscale - 16;
776     c->rasterizer.procs.wGrad3xv(c, itw);
777 
778     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
779         const texture_t& tmu = c->rasterizer.state.texture[i];
780         if (!tmu.enable)
781             continue;
782 
783         // compute the jacobians using block floating-point
784         int32_t s0 = v0->texture[i].S;
785         int32_t t0 = v0->texture[i].T;
786         int32_t s1 = v1->texture[i].S;
787         int32_t t1 = v1->texture[i].T;
788         int32_t s2 = v2->texture[i].S;
789         int32_t t2 = v2->texture[i].T;
790 
791         const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
792         if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
793             int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
794             c->rasterizer.procs.bindTextureLod(c, i,
795                     &c->textures.tmu[i].texture->mip(lod));
796         }
797 
798         // premultiply (s,t) when clampling
799         if (tmu.s_wrap == GGL_CLAMP) {
800             const int width = tmu.surface.width;
801             s0 *= width;
802             s1 *= width;
803             s2 *= width;
804         }
805         if (tmu.t_wrap == GGL_CLAMP) {
806             const int height = tmu.surface.height;
807             t0 *= height;
808             t1 *= height;
809             t2 *= height;
810         }
811 
812         s0 = gglMulx(s0, w0, wscale);
813         t0 = gglMulx(t0, w0, wscale);
814         s1 = gglMulx(s1, w1, wscale);
815         t1 = gglMulx(t1, w1, wscale);
816         s2 = gglMulx(s2, w2, wscale);
817         t2 = gglMulx(t2, w2, wscale);
818 
819         itt[6] = sc - lerp.iteratorsScale(itt+0, s0, s1, s2);
820         itt[7] = sc - lerp.iteratorsScale(itt+3, t0, t1, t2);
821         c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
822     }
823 }
824 
825 
826 static inline
cull_triangle(ogles_context_t * c,vertex_t *,vertex_t *,vertex_t *)827 bool cull_triangle(ogles_context_t* c, vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/)
828 {
829     if (ggl_likely(c->cull.enable)) {
830         const GLenum winding = (c->lerp.area() > 0) ? GL_CW : GL_CCW;
831         const GLenum face = (winding == c->cull.frontFace) ? GL_FRONT : GL_BACK;
832         if (face == c->cull.cullFace)
833             return true; // culled!
834     }
835     return false;
836 }
837 
838 static inline
frustumPlaneDist(int plane,const vec4_t & s)839 GLfixed frustumPlaneDist(int plane, const vec4_t& s)
840 {
841     const GLfixed d = s.v[ plane >> 1 ];
842     return  ((plane & 1) ? (s.w - d) : (s.w + d));
843 }
844 
845 static inline
clipDivide(GLfixed a,GLfixed b)846 int32_t clipDivide(GLfixed a, GLfixed b) {
847     // returns a 4.28 fixed-point
848     return gglMulDivi(1LU<<28, a, b);
849 }
850 
clip_triangle(ogles_context_t * c,vertex_t * v0,vertex_t * v1,vertex_t * v2)851 void clip_triangle(ogles_context_t* c,
852         vertex_t* v0, vertex_t* v1, vertex_t* v2)
853 {
854     uint32_t all_cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
855 
856     vertex_t *p0, *p1, *p2;
857     const int MAX_CLIPPING_PLANES = 6 + OGLES_MAX_CLIP_PLANES;
858     const int MAX_VERTICES = 3;
859 
860     // Temporary buffer to hold the new vertices. Each plane can add up to
861     // two new vertices (because the polygon is convex).
862     // We need one extra element, to handle an overflow case when
863     // the polygon degenerates into something non convex.
864     vertex_t buffer[MAX_CLIPPING_PLANES * 2 + 1];   // ~3KB
865     vertex_t* buf = buffer;
866 
867     // original list of vertices (polygon to clip, in fact this
868     // function works with an arbitrary polygon).
869     vertex_t* in[3] = { v0, v1, v2 };
870 
871     // output lists (we need 2, which we use back and forth)
872     // (maximum outpout list's size is MAX_CLIPPING_PLANES + MAX_VERTICES)
873     // 2 more elements for overflow when non convex polygons.
874     vertex_t* out[2][MAX_CLIPPING_PLANES + MAX_VERTICES + 2];
875     unsigned int outi = 0;
876 
877     // current input list
878     vertex_t** ivl = in;
879 
880     // 3 input vertices, 0 in the output list, first plane
881     unsigned int ic = 3;
882 
883     // User clip-planes first, the clipping is always done in eye-coordinate
884     // this is basically the same algorithm than for the view-volume
885     // clipping, except for the computation of the distance (vertex, plane)
886     // and the fact that we need to compute the eye-coordinates of each
887     // new vertex we create.
888 
889     if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
890     {
891         unsigned int plane = 0;
892         uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
893         do {
894             if (cc & 1) {
895                 // pointers to our output list (head and current)
896                 vertex_t** const ovl = &out[outi][0];
897                 vertex_t** output = ovl;
898                 unsigned int oc = 0;
899                 unsigned int sentinel = 0;
900                 // previous vertex, compute distance to the plane
901                 vertex_t* s = ivl[ic-1];
902                 const vec4_t& equation = c->clipPlanes.plane[plane].equation;
903                 GLfixed sd = dot4(equation.v, s->eye.v);
904                 // clip each vertex against this plane...
905                 for (unsigned int i=0 ; i<ic ; i++) {
906                     vertex_t* p = ivl[i];
907                     const GLfixed pd = dot4(equation.v, p->eye.v);
908                     if (sd >= 0) {
909                         if (pd >= 0) {
910                             // both inside
911                             *output++ = p;
912                             oc++;
913                         } else {
914                             // s inside, p outside (exiting)
915                             const GLfixed t = clipDivide(sd, sd-pd);
916                             c->arrays.clipEye(c, buf, t, p, s);
917                             *output++ = buf++;
918                             oc++;
919                             if (++sentinel >= 3)
920                                 return; // non-convex polygon!
921                         }
922                     } else {
923                         if (pd >= 0) {
924                             // s outside (entering)
925                             if (pd) {
926                                 const GLfixed t = clipDivide(pd, pd-sd);
927                                 c->arrays.clipEye(c, buf, t, s, p);
928                                 *output++ = buf++;
929                                 oc++;
930                                 if (++sentinel >= 3)
931                                     return; // non-convex polygon!
932                             }
933                             *output++ = p;
934                             oc++;
935                         } else {
936                            // both outside
937                         }
938                     }
939                     s = p;
940                     sd = pd;
941                 }
942                 // output list become the new input list
943                 if (oc<3)
944                     return; // less than 3 vertices left? we're done!
945                 ivl = ovl;
946                 ic = oc;
947                 outi = 1-outi;
948             }
949             cc >>= 1;
950             plane++;
951         } while (cc);
952     }
953 
954     // frustum clip-planes
955     if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
956     {
957         unsigned int plane = 0;
958         uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
959         do {
960             if (cc & 1) {
961                 // pointers to our output list (head and current)
962                 vertex_t** const ovl = &out[outi][0];
963                 vertex_t** output = ovl;
964                 unsigned int oc = 0;
965                 unsigned int sentinel = 0;
966                 // previous vertex, compute distance to the plane
967                 vertex_t* s = ivl[ic-1];
968                 GLfixed sd = frustumPlaneDist(plane, s->clip);
969                 // clip each vertex against this plane...
970                 for (unsigned int i=0 ; i<ic ; i++) {
971                     vertex_t* p = ivl[i];
972                     const GLfixed pd = frustumPlaneDist(plane, p->clip);
973                     if (sd >= 0) {
974                         if (pd >= 0) {
975                             // both inside
976                             *output++ = p;
977                             oc++;
978                         } else {
979                             // s inside, p outside (exiting)
980                             const GLfixed t = clipDivide(sd, sd-pd);
981                             c->arrays.clipVertex(c, buf, t, p, s);
982                             *output++ = buf++;
983                             oc++;
984                             if (++sentinel >= 3)
985                                 return; // non-convex polygon!
986                         }
987                     } else {
988                         if (pd >= 0) {
989                             // s outside (entering)
990                             if (pd) {
991                                 const GLfixed t = clipDivide(pd, pd-sd);
992                                 c->arrays.clipVertex(c, buf, t, s, p);
993                                 *output++ = buf++;
994                                 oc++;
995                                 if (++sentinel >= 3)
996                                     return; // non-convex polygon!
997                             }
998                             *output++ = p;
999                             oc++;
1000                         } else {
1001                            // both outside
1002                         }
1003                     }
1004                     s = p;
1005                     sd = pd;
1006                 }
1007                 // output list become the new input list
1008                 if (oc<3)
1009                     return; // less than 3 vertices left? we're done!
1010                 ivl = ovl;
1011                 ic = oc;
1012                 outi = 1-outi;
1013             }
1014             cc >>= 1;
1015             plane++;
1016         } while (cc);
1017     }
1018 
1019     // finally we can render our triangles...
1020     p0 = ivl[0];
1021     p1 = ivl[1];
1022     for (unsigned int i=2 ; i<ic ; i++) {
1023         p2 = ivl[i];
1024         c->lerp.initTriangle(p0, p1, p2);
1025         if (cull_triangle(c, p0, p1, p2)) {
1026             p1 = p2;
1027             continue; // culled!
1028         }
1029         triangle(c, p0, p1, p2);
1030         p1 = p2;
1031     }
1032 }
1033 
clip_line(ogles_context_t * c,vertex_t * s,vertex_t * p)1034 unsigned int clip_line(ogles_context_t* c, vertex_t* s, vertex_t* p)
1035 {
1036     const uint32_t all_cc = (s->flags | p->flags) & vertex_t::CLIP_ALL;
1037 
1038     if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
1039     {
1040         unsigned int plane = 0;
1041         uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
1042         do {
1043             if (cc & 1) {
1044                 const vec4_t& equation = c->clipPlanes.plane[plane].equation;
1045                 const GLfixed sd = dot4(equation.v, s->eye.v);
1046                 const GLfixed pd = dot4(equation.v, p->eye.v);
1047                 if (sd >= 0) {
1048                     if (pd >= 0) {
1049                         // both inside
1050                     } else {
1051                         // s inside, p outside (exiting)
1052                         const GLfixed t = clipDivide(sd, sd-pd);
1053                         c->arrays.clipEye(c, p, t, p, s);
1054                     }
1055                 } else {
1056                     if (pd >= 0) {
1057                         // s outside (entering)
1058                         if (pd) {
1059                             const GLfixed t = clipDivide(pd, pd-sd);
1060                             c->arrays.clipEye(c, s, t, s, p);
1061                         }
1062                     } else {
1063                        // both outside
1064                        return 0;
1065                     }
1066                 }
1067             }
1068             cc >>= 1;
1069             plane++;
1070         } while (cc);
1071     }
1072 
1073     // frustum clip-planes
1074     if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
1075     {
1076         unsigned int plane = 0;
1077         uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
1078         do {
1079             if (cc & 1) {
1080                 const GLfixed sd = frustumPlaneDist(plane, s->clip);
1081                 const GLfixed pd = frustumPlaneDist(plane, p->clip);
1082                 if (sd >= 0) {
1083                     if (pd >= 0) {
1084                         // both inside
1085                     } else {
1086                         // s inside, p outside (exiting)
1087                         const GLfixed t = clipDivide(sd, sd-pd);
1088                         c->arrays.clipVertex(c, p, t, p, s);
1089                     }
1090                 } else {
1091                     if (pd >= 0) {
1092                         // s outside (entering)
1093                         if (pd) {
1094                             const GLfixed t = clipDivide(pd, pd-sd);
1095                             c->arrays.clipVertex(c, s, t, s, p);
1096                         }
1097                     } else {
1098                        // both outside
1099                        return 0;
1100                     }
1101                 }
1102             }
1103             cc >>= 1;
1104             plane++;
1105         } while (cc);
1106     }
1107 
1108     return 2;
1109 }
1110 
1111 
1112 }; // namespace android
1113