1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /*
19 ------------------------------------------------------------------------------
20  INPUT AND OUTPUT DEFINITIONS
21 
22  Inputs:
23     [input_variable_name] = [description of the input to module, its type
24                  definition, and length (when applicable)]
25 
26  Local Stores/Buffers/Pointers Needed:
27     [local_store_name] = [description of the local store, its type
28                   definition, and length (when applicable)]
29     [local_buffer_name] = [description of the local buffer, its type
30                    definition, and length (when applicable)]
31     [local_ptr_name] = [description of the local pointer, its type
32                 definition, and length (when applicable)]
33 
34  Global Stores/Buffers/Pointers Needed:
35     [global_store_name] = [description of the global store, its type
36                    definition, and length (when applicable)]
37     [global_buffer_name] = [description of the global buffer, its type
38                 definition, and length (when applicable)]
39     [global_ptr_name] = [description of the global pointer, its type
40                  definition, and length (when applicable)]
41 
42  Outputs:
43     [return_variable_name] = [description of data/pointer returned
44                   by module, its type definition, and length
45                   (when applicable)]
46 
47  Pointers and Buffers Modified:
48     [variable_bfr_ptr] points to the [describe where the
49       variable_bfr_ptr points to, its type definition, and length
50       (when applicable)]
51     [variable_bfr] contents are [describe the new contents of
52       variable_bfr]
53 
54  Local Stores Modified:
55     [local_store_name] = [describe new contents, its type
56                   definition, and length (when applicable)]
57 
58  Global Stores Modified:
59     [global_store_name] = [describe new contents, its type
60                    definition, and length (when applicable)]
61 
62 ------------------------------------------------------------------------------
63  FUNCTION DESCRIPTION
64 
65 ------------------------------------------------------------------------------
66  REQUIREMENTS
67 
68 ------------------------------------------------------------------------------
69  REFERENCES
70 
71 ------------------------------------------------------------------------------
72  PSEUDO-CODE
73 
74 ------------------------------------------------------------------------------
75  RESOURCES USED
76    When the code is written for a specific target processor the
77      the resources used should be documented below.
78 
79  STACK USAGE: [stack count for this module] + [variable to represent
80           stack usage for each subroutine called]
81 
82      where: [stack usage variable] = stack usage for [subroutine
83          name] (see [filename].ext)
84 
85  DATA MEMORY USED: x words
86 
87  PROGRAM MEMORY USED: x words
88 
89  CLOCK CYCLES: [cycle count equation for this module] + [variable
90            used to represent cycle count for each subroutine
91            called]
92 
93      where: [cycle count variable] = cycle count for [subroutine
94         name] (see [filename].ext)
95 
96 ------------------------------------------------------------------------------
97 */
98 
99 /*----------------------------------------------------------------------------
100 ; INCLUDES
101 ----------------------------------------------------------------------------*/
102 #include "mp4dec_lib.h"
103 #include "idct.h"
104 #include "motion_comp.h"
105 
106 #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
107 /*----------------------------------------------------------------------------
108 ; MACROS
109 ; Define module specific macros here
110 ----------------------------------------------------------------------------*/
111 
112 /*----------------------------------------------------------------------------
113 ; DEFINES
114 ; Include all pre-processor statements here. Include conditional
115 ; compile variables also.
116 ----------------------------------------------------------------------------*/
117 
118 /*----------------------------------------------------------------------------
119 ; LOCAL FUNCTION DEFINITIONS
120 ; Function Prototype declaration
121 ----------------------------------------------------------------------------*/
122 /* private prototypes */
123 static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width);
124 static void idctrow_intra(int16 *blk, PIXEL *, int width);
125 static void idctcol(int16 *blk);
126 
127 #ifdef FAST_IDCT
128 // mapping from nz_coefs to functions to be used
129 
130 
131 // ARM4 does not allow global data when they are not constant hence
132 // an array of function pointers cannot be considered as array of constants
133 // (actual addresses are only known when the dll is loaded).
134 // So instead of arrays of function pointers, we'll store here
135 // arrays of rows or columns and then call the idct function
136 // corresponding to such the row/column number:
137 
138 
139 static void (*const idctcolVCA[10][4])(int16*) =
140 {
141     {&idctcol1, &idctcol0, &idctcol0, &idctcol0},
142     {&idctcol1, &idctcol1, &idctcol0, &idctcol0},
143     {&idctcol2, &idctcol1, &idctcol0, &idctcol0},
144     {&idctcol3, &idctcol1, &idctcol0, &idctcol0},
145     {&idctcol3, &idctcol2, &idctcol0, &idctcol0},
146     {&idctcol3, &idctcol2, &idctcol1, &idctcol0},
147     {&idctcol3, &idctcol2, &idctcol1, &idctcol1},
148     {&idctcol3, &idctcol2, &idctcol2, &idctcol1},
149     {&idctcol3, &idctcol3, &idctcol2, &idctcol1},
150     {&idctcol4, &idctcol3, &idctcol2, &idctcol1}
151 };
152 
153 
154 static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) =
155 {
156     &idctrow1,
157     &idctrow2,
158     &idctrow2,
159     &idctrow2,
160     &idctrow2,
161     &idctrow3,
162     &idctrow4,
163     &idctrow4,
164     &idctrow4,
165     &idctrow4
166 };
167 
168 
169 static void (*const idctcolVCA2[16])(int16*) =
170 {
171     &idctcol0, &idctcol4, &idctcol3, &idctcol4,
172     &idctcol2, &idctcol4, &idctcol3, &idctcol4,
173     &idctcol1, &idctcol4, &idctcol3, &idctcol4,
174     &idctcol2, &idctcol4, &idctcol3, &idctcol4
175 };
176 
177 static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) =
178 {
179     &idctrow1, &idctrow4, &idctrow3, &idctrow4,
180     &idctrow2, &idctrow4, &idctrow3, &idctrow4
181 };
182 
183 static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) =
184 {
185     &idctrow1_intra,
186     &idctrow2_intra,
187     &idctrow2_intra,
188     &idctrow2_intra,
189     &idctrow2_intra,
190     &idctrow3_intra,
191     &idctrow4_intra,
192     &idctrow4_intra,
193     &idctrow4_intra,
194     &idctrow4_intra
195 };
196 
197 static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) =
198 {
199     &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra,
200     &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra
201 };
202 #endif
203 
204 /*----------------------------------------------------------------------------
205 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
206 ; Variable declaration - defined here and used outside this module
207 ----------------------------------------------------------------------------*/
208 
209 /*----------------------------------------------------------------------------
210 ; EXTERNAL FUNCTION REFERENCES
211 ; Declare functions defined elsewhere and referenced in this module
212 ----------------------------------------------------------------------------*/
213 
214 /*----------------------------------------------------------------------------
215 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
216 ; Declare variables used in this module but defined elsewhere
217 ----------------------------------------------------------------------------*/
218 
219 /*----------------------------------------------------------------------------
220 ; FUNCTION CODE
221 ----------------------------------------------------------------------------*/
MBlockIDCT(VideoDecData * video)222 void MBlockIDCT(VideoDecData *video)
223 {
224     Vop *currVop = video->currVop;
225     MacroBlock *mblock = video->mblock;
226     PIXEL *c_comp;
227     PIXEL *cu_comp;
228     PIXEL *cv_comp;
229     int x_pos = video->mbnum_col;
230     int y_pos = video->mbnum_row;
231     int width, width_uv;
232     int32 offset;
233     width = video->width;
234     width_uv = width >> 1;
235     offset = (int32)(y_pos << 4) * width + (x_pos << 4);
236 
237     c_comp  = currVop->yChan + offset;
238     cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2);
239     cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2);
240 
241     BlockIDCT_intra(mblock, c_comp, 0, width);
242     BlockIDCT_intra(mblock, c_comp + 8, 1, width);
243     BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width);
244     BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width);
245     BlockIDCT_intra(mblock, cu_comp, 4, width_uv);
246     BlockIDCT_intra(mblock, cv_comp, 5, width_uv);
247 }
248 
249 
BlockIDCT_intra(MacroBlock * mblock,PIXEL * c_comp,int comp,int width)250 void BlockIDCT_intra(
251     MacroBlock *mblock, PIXEL *c_comp, int comp, int width)
252 {
253     /*----------------------------------------------------------------------------
254     ; Define all local variables
255     ----------------------------------------------------------------------------*/
256     int16 *coeff_in = mblock->block[comp];
257 #ifdef INTEGER_IDCT
258 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
259     int i, bmapr;
260     int nz_coefs = mblock->no_coeff[comp];
261     uint8 *bitmapcol = mblock->bitmapcol[comp];
262     uint8 bitmaprow = mblock->bitmaprow[comp];
263 
264     /*----------------------------------------------------------------------------
265     ; Function body here
266     ----------------------------------------------------------------------------*/
267     if (nz_coefs <= 10)
268     {
269         bmapr = (nz_coefs - 1);
270 
271         (*(idctcolVCA[bmapr]))(coeff_in);
272         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
273         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
274         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
275 
276         (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width);
277     }
278     else
279     {
280         i = 8;
281         while (i--)
282         {
283             bmapr = (int)bitmapcol[i];
284             if (bmapr)
285             {
286                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
287                 {
288                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
289                 }
290                 else
291                 {
292                     idctcol(coeff_in + i);
293                 }
294             }
295         }
296         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
297         {
298             bitmaprow >>= 4;
299             (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width);
300         }
301         else
302         {
303             idctrow_intra(coeff_in, c_comp, width);
304         }
305     }
306 #else
307     void idct_intra(int *block, uint8 *comp, int width);
308     idct_intra(coeff_in, c_comp, width);
309 #endif
310 #else
311     void idctref_intra(int *block, uint8 *comp, int width);
312     idctref_intra(coeff_in, c_comp, width);
313 #endif
314 
315 
316     /*----------------------------------------------------------------------------
317     ; Return nothing or data or data pointer
318     ----------------------------------------------------------------------------*/
319     return;
320 }
321 
322 /*  08/04/05, no residue, just copy from pred to output */
Copy_Blk_to_Vop(uint8 * dst,uint8 * pred,int width)323 void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width)
324 {
325     /* copy 4 bytes at a time */
326     width -= 4;
327     *((uint32*)dst) = *((uint32*)pred);
328     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
329     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
330     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
331     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
332     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
333     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
334     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
335     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
336     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
337     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
338     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
339     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
340     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
341     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
342     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
343 
344     return ;
345 }
346 
347 /*  08/04/05 compute IDCT and add prediction at the end  */
BlockIDCT(uint8 * dst,uint8 * pred,int16 * coeff_in,int width,int nz_coefs,uint8 * bitmapcol,uint8 bitmaprow)348 void BlockIDCT(
349     uint8 *dst,  /* destination */
350     uint8 *pred, /* prediction block, pitch 16 */
351     int16   *coeff_in,  /* DCT data, size 64 */
352     int width, /* width of dst */
353     int nz_coefs,
354     uint8 *bitmapcol,
355     uint8 bitmaprow
356 )
357 {
358 #ifdef INTEGER_IDCT
359 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
360     int i, bmapr;
361     /*----------------------------------------------------------------------------
362     ; Function body here
363     ----------------------------------------------------------------------------*/
364     if (nz_coefs <= 10)
365     {
366         bmapr = (nz_coefs - 1);
367         (*(idctcolVCA[bmapr]))(coeff_in);
368         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
369         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
370         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
371 
372         (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width);
373         return ;
374     }
375     else
376     {
377         i = 8;
378 
379         while (i--)
380         {
381             bmapr = (int)bitmapcol[i];
382             if (bmapr)
383             {
384                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
385                 {
386                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
387                 }
388                 else
389                 {
390                     idctcol(coeff_in + i);
391                 }
392             }
393         }
394         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
395         {
396             (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width);
397         }
398         else
399         {
400             idctrow(coeff_in, pred, dst, width);
401         }
402         return ;
403     }
404 #else // FAST_IDCT
405     void idct(int *block, uint8 *pred, uint8 *dst, int width);
406     idct(coeff_in, pred, dst, width);
407     return;
408 #endif // FAST_IDCT
409 #else // INTEGER_IDCT
410     void idctref(int *block, uint8 *pred, uint8 *dst, int width);
411     idctref(coeff_in, pred, dst, width);
412     return;
413 #endif // INTEGER_IDCT
414 
415 }
416 /*----------------------------------------------------------------------------
417 ;  End Function: block_idct
418 ----------------------------------------------------------------------------*/
419 
420 
421 /****************************************************************************/
422 
423 /*
424 ------------------------------------------------------------------------------
425  FUNCTION NAME: idctrow
426 ------------------------------------------------------------------------------
427  INPUT AND OUTPUT DEFINITIONS FOR idctrow
428 
429  Inputs:
430     [input_variable_name] = [description of the input to module, its type
431                  definition, and length (when applicable)]
432 
433  Local Stores/Buffers/Pointers Needed:
434     [local_store_name] = [description of the local store, its type
435                   definition, and length (when applicable)]
436     [local_buffer_name] = [description of the local buffer, its type
437                    definition, and length (when applicable)]
438     [local_ptr_name] = [description of the local pointer, its type
439                 definition, and length (when applicable)]
440 
441  Global Stores/Buffers/Pointers Needed:
442     [global_store_name] = [description of the global store, its type
443                    definition, and length (when applicable)]
444     [global_buffer_name] = [description of the global buffer, its type
445                 definition, and length (when applicable)]
446     [global_ptr_name] = [description of the global pointer, its type
447                  definition, and length (when applicable)]
448 
449  Outputs:
450     [return_variable_name] = [description of data/pointer returned
451                   by module, its type definition, and length
452                   (when applicable)]
453 
454  Pointers and Buffers Modified:
455     [variable_bfr_ptr] points to the [describe where the
456       variable_bfr_ptr points to, its type definition, and length
457       (when applicable)]
458     [variable_bfr] contents are [describe the new contents of
459       variable_bfr]
460 
461  Local Stores Modified:
462     [local_store_name] = [describe new contents, its type
463                   definition, and length (when applicable)]
464 
465  Global Stores Modified:
466     [global_store_name] = [describe new contents, its type
467                    definition, and length (when applicable)]
468 
469 ------------------------------------------------------------------------------
470  FUNCTION DESCRIPTION FOR idctrow
471 
472 ------------------------------------------------------------------------------
473  REQUIREMENTS FOR idctrow
474 
475 ------------------------------------------------------------------------------
476  REFERENCES FOR idctrow
477 
478 ------------------------------------------------------------------------------
479  PSEUDO-CODE FOR idctrow
480 
481 ------------------------------------------------------------------------------
482  RESOURCES USED FOR idctrow
483    When the code is written for a specific target processor the
484      the resources used should be documented below.
485 
486  STACK USAGE: [stack count for this module] + [variable to represent
487           stack usage for each subroutine called]
488 
489      where: [stack usage variable] = stack usage for [subroutine
490          name] (see [filename].ext)
491 
492  DATA MEMORY USED: x words
493 
494  PROGRAM MEMORY USED: x words
495 
496  CLOCK CYCLES: [cycle count equation for this module] + [variable
497            used to represent cycle count for each subroutine
498            called]
499 
500      where: [cycle count variable] = cycle count for [subroutine
501         name] (see [filename].ext)
502 
503 ------------------------------------------------------------------------------
504 */
505 
506 /*----------------------------------------------------------------------------
507 ; Function Code FOR idctrow
508 ----------------------------------------------------------------------------*/
idctrow(int16 * blk,uint8 * pred,uint8 * dst,int width)509 void idctrow(
510     int16 *blk, uint8 *pred, uint8 *dst, int width
511 )
512 {
513     /*----------------------------------------------------------------------------
514     ; Define all local variables
515     ----------------------------------------------------------------------------*/
516     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
517     int i = 8;
518     uint32 pred_word, dst_word;
519     int res, res2;
520 
521     /*----------------------------------------------------------------------------
522     ; Function body here
523     ----------------------------------------------------------------------------*/
524     /* row (horizontal) IDCT
525     *
526     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
527     * ( k + - ) * l ) l=0                      8          2
528     *
529     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
530 
531     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
532     width -= 4;
533     dst -= width;
534     pred -= 12;
535     blk -= 8;
536 
537     while (i--)
538     {
539         x1 = (int32)blk[12] << 8;
540         blk[12] = 0;
541         x2 = blk[14];
542         blk[14] = 0;
543         x3 = blk[10];
544         blk[10] = 0;
545         x4 = blk[9];
546         blk[9] = 0;
547         x5 = blk[15];
548         blk[15] = 0;
549         x6 = blk[13];
550         blk[13] = 0;
551         x7 = blk[11];
552         blk[11] = 0;
553         x0 = ((*(blk += 8)) << 8) + 8192;
554         blk[0] = 0;   /* for proper rounding in the fourth stage */
555 
556         /* first stage */
557         x8 = W7 * (x4 + x5) + 4;
558         x4 = (x8 + (W1 - W7) * x4) >> 3;
559         x5 = (x8 - (W1 + W7) * x5) >> 3;
560         x8 = W3 * (x6 + x7) + 4;
561         x6 = (x8 - (W3 - W5) * x6) >> 3;
562         x7 = (x8 - (W3 + W5) * x7) >> 3;
563 
564         /* second stage */
565         x8 = x0 + x1;
566         x0 -= x1;
567         x1 = W6 * (x3 + x2) + 4;
568         x2 = (x1 - (W2 + W6) * x2) >> 3;
569         x3 = (x1 + (W2 - W6) * x3) >> 3;
570         x1 = x4 + x6;
571         x4 -= x6;
572         x6 = x5 + x7;
573         x5 -= x7;
574 
575         /* third stage */
576         x7 = x8 + x3;
577         x8 -= x3;
578         x3 = x0 + x2;
579         x0 -= x2;
580         x2 = (181 * (x4 + x5) + 128) >> 8;
581         x4 = (181 * (x4 - x5) + 128) >> 8;
582 
583         /* fourth stage */
584         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
585 
586         res = (x7 + x1) >> 14;
587         ADD_AND_CLIP1(res);
588         res2 = (x3 + x2) >> 14;
589         ADD_AND_CLIP2(res2);
590         dst_word = (res2 << 8) | res;
591         res = (x0 + x4) >> 14;
592         ADD_AND_CLIP3(res);
593         dst_word |= (res << 16);
594         res = (x8 + x6) >> 14;
595         ADD_AND_CLIP4(res);
596         dst_word |= (res << 24);
597         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
598 
599         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
600 
601         res = (x8 - x6) >> 14;
602         ADD_AND_CLIP1(res);
603         res2 = (x0 - x4) >> 14;
604         ADD_AND_CLIP2(res2);
605         dst_word = (res2 << 8) | res;
606         res = (x3 - x2) >> 14;
607         ADD_AND_CLIP3(res);
608         dst_word |= (res << 16);
609         res = (x7 - x1) >> 14;
610         ADD_AND_CLIP4(res);
611         dst_word |= (res << 24);
612         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
613     }
614     /*----------------------------------------------------------------------------
615     ; Return nothing or data or data pointer
616     ----------------------------------------------------------------------------*/
617     return;
618 }
619 
620 __attribute__((no_sanitize("signed-integer-overflow")))
idctrow_intra(int16 * blk,PIXEL * comp,int width)621 void idctrow_intra(
622     int16 *blk, PIXEL *comp, int width
623 )
624 {
625     /*----------------------------------------------------------------------------
626     ; Define all local variables
627     ----------------------------------------------------------------------------*/
628     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
629     int i = 8;
630     int offset = width;
631     int32 word;
632 
633     /*----------------------------------------------------------------------------
634     ; Function body here
635     ----------------------------------------------------------------------------*/
636     /* row (horizontal) IDCT
637     *
638     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
639     * ( k + - ) * l ) l=0                      8          2
640     *
641     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
642     while (i--)
643     {
644         x1 = (int32)blk[4] << 8;
645         blk[4] = 0;
646         x2 = blk[6];
647         blk[6] = 0;
648         x3 = blk[2];
649         blk[2] = 0;
650         x4 = blk[1];
651         blk[1] = 0;
652         x5 = blk[7];
653         blk[7] = 0;
654         x6 = blk[5];
655         blk[5] = 0;
656         x7 = blk[3];
657         blk[3] = 0;
658 #ifndef FAST_IDCT
659         /* shortcut */  /* covered by idctrow1  01/9/2001 */
660         if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
661         {
662             blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6;
663             return;
664         }
665 #endif
666         x0 = ((int32)blk[0] << 8) + 8192;
667         blk[0] = 0;  /* for proper rounding in the fourth stage */
668 
669         /* first stage */
670         x8 = W7 * (x4 + x5) + 4;
671         x4 = (x8 + (W1 - W7) * x4) >> 3;
672         x5 = (x8 - (W1 + W7) * x5) >> 3;
673         x8 = W3 * (x6 + x7) + 4;
674         x6 = (x8 - (W3 - W5) * x6) >> 3;
675         x7 = (x8 - (W3 + W5) * x7) >> 3;
676 
677         /* second stage */
678         x8 = x0 + x1;
679         x0 -= x1;
680         x1 = W6 * (x3 + x2) + 4;
681         x2 = (x1 - (W2 + W6) * x2) >> 3;
682         x3 = (x1 + (W2 - W6) * x3) >> 3;
683         x1 = x4 + x6;
684         x4 -= x6;
685         x6 = x5 + x7;
686         x5 -= x7;
687 
688         /* third stage */
689         x7 = x8 + x3;
690         x8 -= x3;
691         x3 = x0 + x2;
692         x0 -= x2;
693         x2 = (181 * (x4 + x5) + 128) >> 8;
694         x4 = (181 * (x4 - x5) + 128) >> 8;
695 
696         /* fourth stage */
697         word = ((x7 + x1) >> 14);
698         CLIP_RESULT(word)
699 
700         temp = ((x3 + x2) >> 14);
701         CLIP_RESULT(temp)
702         word = word | (temp << 8);
703 
704         temp = ((x0 + x4) >> 14);
705         CLIP_RESULT(temp)
706         word = word | (temp << 16);
707 
708         temp = ((x8 + x6) >> 14);
709         CLIP_RESULT(temp)
710         word = word | (temp << 24);
711         *((int32*)(comp)) = word;
712 
713         word = ((x8 - x6) >> 14);
714         CLIP_RESULT(word)
715 
716         temp = ((x0 - x4) >> 14);
717         CLIP_RESULT(temp)
718         word = word | (temp << 8);
719 
720         temp = ((x3 - x2) >> 14);
721         CLIP_RESULT(temp)
722         word = word | (temp << 16);
723 
724         temp = ((x7 - x1) >> 14);
725         CLIP_RESULT(temp)
726         word = word | (temp << 24);
727         *((int32*)(comp + 4)) = word;
728         comp += offset;
729 
730         blk += B_SIZE;
731     }
732     /*----------------------------------------------------------------------------
733     ; Return nothing or data or data pointer
734     ----------------------------------------------------------------------------*/
735     return;
736 }
737 
738 /*----------------------------------------------------------------------------
739 ; End Function: idctrow
740 ----------------------------------------------------------------------------*/
741 
742 
743 /****************************************************************************/
744 
745 /*
746 ------------------------------------------------------------------------------
747  FUNCTION NAME: idctcol
748 ------------------------------------------------------------------------------
749  INPUT AND OUTPUT DEFINITIONS FOR idctcol
750 
751  Inputs:
752     [input_variable_name] = [description of the input to module, its type
753                  definition, and length (when applicable)]
754 
755  Local Stores/Buffers/Pointers Needed:
756     [local_store_name] = [description of the local store, its type
757                   definition, and length (when applicable)]
758     [local_buffer_name] = [description of the local buffer, its type
759                    definition, and length (when applicable)]
760     [local_ptr_name] = [description of the local pointer, its type
761                 definition, and length (when applicable)]
762 
763  Global Stores/Buffers/Pointers Needed:
764     [global_store_name] = [description of the global store, its type
765                    definition, and length (when applicable)]
766     [global_buffer_name] = [description of the global buffer, its type
767                 definition, and length (when applicable)]
768     [global_ptr_name] = [description of the global pointer, its type
769                  definition, and length (when applicable)]
770 
771  Outputs:
772     [return_variable_name] = [description of data/pointer returned
773                   by module, its type definition, and length
774                   (when applicable)]
775 
776  Pointers and Buffers Modified:
777     [variable_bfr_ptr] points to the [describe where the
778       variable_bfr_ptr points to, its type definition, and length
779       (when applicable)]
780     [variable_bfr] contents are [describe the new contents of
781       variable_bfr]
782 
783  Local Stores Modified:
784     [local_store_name] = [describe new contents, its type
785                   definition, and length (when applicable)]
786 
787  Global Stores Modified:
788     [global_store_name] = [describe new contents, its type
789                    definition, and length (when applicable)]
790 
791 ------------------------------------------------------------------------------
792  FUNCTION DESCRIPTION FOR idctcol
793 
794 ------------------------------------------------------------------------------
795  REQUIREMENTS FOR idctcol
796 
797 ------------------------------------------------------------------------------
798  REFERENCES FOR idctcol
799 
800 ------------------------------------------------------------------------------
801  PSEUDO-CODE FOR idctcol
802 
803 ------------------------------------------------------------------------------
804  RESOURCES USED FOR idctcol
805    When the code is written for a specific target processor the
806      the resources used should be documented below.
807 
808  STACK USAGE: [stack count for this module] + [variable to represent
809           stack usage for each subroutine called]
810 
811      where: [stack usage variable] = stack usage for [subroutine
812          name] (see [filename].ext)
813 
814  DATA MEMORY USED: x words
815 
816  PROGRAM MEMORY USED: x words
817 
818  CLOCK CYCLES: [cycle count equation for this module] + [variable
819            used to represent cycle count for each subroutine
820            called]
821 
822      where: [cycle count variable] = cycle count for [subroutine
823         name] (see [filename].ext)
824 
825 ------------------------------------------------------------------------------
826 */
827 
828 /*----------------------------------------------------------------------------
829 ; Function Code FOR idctcol
830 ----------------------------------------------------------------------------*/
idctcol(int16 * blk)831 void idctcol(
832     int16 *blk
833 )
834 {
835     /*----------------------------------------------------------------------------
836     ; Define all local variables
837     ----------------------------------------------------------------------------*/
838     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
839 
840     /*----------------------------------------------------------------------------
841     ; Function body here
842     ----------------------------------------------------------------------------*/
843     /* column (vertical) IDCT
844     *
845     * 7                         pi         1 dst[8*k] = sum c[l] * src[8*l] *
846     * cos( -- * ( k + - ) * l ) l=0                        8          2
847     *
848     * where: c[0]    = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
849     x1 = (int32)blk[32] << 11;
850     x2 = blk[48];
851     x3 = blk[16];
852     x4 = blk[8];
853     x5 = blk[56];
854     x6 = blk[40];
855     x7 = blk[24];
856 #ifndef FAST_IDCT
857     /* shortcut */        /* covered by idctcolumn1  01/9/2001 */
858     if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
859     {
860         blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56]
861                                               = blk[0] << 3;
862         return;
863     }
864 #endif
865 
866     x0 = ((int32)blk[0] << 11) + 128;
867 
868     /* first stage */
869     x8 = W7 * (x4 + x5);
870     x4 = x8 + (W1 - W7) * x4;
871     x5 = x8 - (W1 + W7) * x5;
872     x8 = W3 * (x6 + x7);
873     x6 = x8 - (W3 - W5) * x6;
874     x7 = x8 - (W3 + W5) * x7;
875 
876     /* second stage */
877     x8 = x0 + x1;
878     x0 -= x1;
879     x1 = W6 * (x3 + x2);
880     x2 = x1 - (W2 + W6) * x2;
881     x3 = x1 + (W2 - W6) * x3;
882     x1 = x4 + x6;
883     x4 -= x6;
884     x6 = x5 + x7;
885     x5 -= x7;
886 
887     /* third stage */
888     x7 = x8 + x3;
889     x8 -= x3;
890     x3 = x0 + x2;
891     x0 -= x2;
892     x2 = (181 * (x4 + x5) + 128) >> 8;
893     x4 = (181 * (x4 - x5) + 128) >> 8;
894 
895     /* fourth stage */
896     blk[0]    = (x7 + x1) >> 8;
897     blk[8] = (x3 + x2) >> 8;
898     blk[16] = (x0 + x4) >> 8;
899     blk[24] = (x8 + x6) >> 8;
900     blk[32] = (x8 - x6) >> 8;
901     blk[40] = (x0 - x4) >> 8;
902     blk[48] = (x3 - x2) >> 8;
903     blk[56] = (x7 - x1) >> 8;
904     /*----------------------------------------------------------------------------
905     ; Return nothing or data or data pointer
906     ----------------------------------------------------------------------------*/
907     return;
908 }
909 /*----------------------------------------------------------------------------
910 ;  End Function: idctcol
911 ----------------------------------------------------------------------------*/
912 
913