1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /*
19 ------------------------------------------------------------------------------
20 INPUT AND OUTPUT DEFINITIONS
21
22 Inputs:
23 [input_variable_name] = [description of the input to module, its type
24 definition, and length (when applicable)]
25
26 Local Stores/Buffers/Pointers Needed:
27 [local_store_name] = [description of the local store, its type
28 definition, and length (when applicable)]
29 [local_buffer_name] = [description of the local buffer, its type
30 definition, and length (when applicable)]
31 [local_ptr_name] = [description of the local pointer, its type
32 definition, and length (when applicable)]
33
34 Global Stores/Buffers/Pointers Needed:
35 [global_store_name] = [description of the global store, its type
36 definition, and length (when applicable)]
37 [global_buffer_name] = [description of the global buffer, its type
38 definition, and length (when applicable)]
39 [global_ptr_name] = [description of the global pointer, its type
40 definition, and length (when applicable)]
41
42 Outputs:
43 [return_variable_name] = [description of data/pointer returned
44 by module, its type definition, and length
45 (when applicable)]
46
47 Pointers and Buffers Modified:
48 [variable_bfr_ptr] points to the [describe where the
49 variable_bfr_ptr points to, its type definition, and length
50 (when applicable)]
51 [variable_bfr] contents are [describe the new contents of
52 variable_bfr]
53
54 Local Stores Modified:
55 [local_store_name] = [describe new contents, its type
56 definition, and length (when applicable)]
57
58 Global Stores Modified:
59 [global_store_name] = [describe new contents, its type
60 definition, and length (when applicable)]
61
62 ------------------------------------------------------------------------------
63 FUNCTION DESCRIPTION
64
65 ------------------------------------------------------------------------------
66 REQUIREMENTS
67
68 ------------------------------------------------------------------------------
69 REFERENCES
70
71 ------------------------------------------------------------------------------
72 PSEUDO-CODE
73
74 ------------------------------------------------------------------------------
75 RESOURCES USED
76 When the code is written for a specific target processor the
77 the resources used should be documented below.
78
79 STACK USAGE: [stack count for this module] + [variable to represent
80 stack usage for each subroutine called]
81
82 where: [stack usage variable] = stack usage for [subroutine
83 name] (see [filename].ext)
84
85 DATA MEMORY USED: x words
86
87 PROGRAM MEMORY USED: x words
88
89 CLOCK CYCLES: [cycle count equation for this module] + [variable
90 used to represent cycle count for each subroutine
91 called]
92
93 where: [cycle count variable] = cycle count for [subroutine
94 name] (see [filename].ext)
95
96 ------------------------------------------------------------------------------
97 */
98
99 /*----------------------------------------------------------------------------
100 ; INCLUDES
101 ----------------------------------------------------------------------------*/
102 #include "mp4dec_lib.h"
103 #include "idct.h"
104 #include "motion_comp.h"
105
106 #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
107 /*----------------------------------------------------------------------------
108 ; MACROS
109 ; Define module specific macros here
110 ----------------------------------------------------------------------------*/
111
112 /*----------------------------------------------------------------------------
113 ; DEFINES
114 ; Include all pre-processor statements here. Include conditional
115 ; compile variables also.
116 ----------------------------------------------------------------------------*/
117
118 /*----------------------------------------------------------------------------
119 ; LOCAL FUNCTION DEFINITIONS
120 ; Function Prototype declaration
121 ----------------------------------------------------------------------------*/
122 /* private prototypes */
123 static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width);
124 static void idctrow_intra(int16 *blk, PIXEL *, int width);
125 static void idctcol(int16 *blk);
126
127 #ifdef FAST_IDCT
128 // mapping from nz_coefs to functions to be used
129
130
131 // ARM4 does not allow global data when they are not constant hence
132 // an array of function pointers cannot be considered as array of constants
133 // (actual addresses are only known when the dll is loaded).
134 // So instead of arrays of function pointers, we'll store here
135 // arrays of rows or columns and then call the idct function
136 // corresponding to such the row/column number:
137
138
139 static void (*const idctcolVCA[10][4])(int16*) =
140 {
141 {&idctcol1, &idctcol0, &idctcol0, &idctcol0},
142 {&idctcol1, &idctcol1, &idctcol0, &idctcol0},
143 {&idctcol2, &idctcol1, &idctcol0, &idctcol0},
144 {&idctcol3, &idctcol1, &idctcol0, &idctcol0},
145 {&idctcol3, &idctcol2, &idctcol0, &idctcol0},
146 {&idctcol3, &idctcol2, &idctcol1, &idctcol0},
147 {&idctcol3, &idctcol2, &idctcol1, &idctcol1},
148 {&idctcol3, &idctcol2, &idctcol2, &idctcol1},
149 {&idctcol3, &idctcol3, &idctcol2, &idctcol1},
150 {&idctcol4, &idctcol3, &idctcol2, &idctcol1}
151 };
152
153
154 static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) =
155 {
156 &idctrow1,
157 &idctrow2,
158 &idctrow2,
159 &idctrow2,
160 &idctrow2,
161 &idctrow3,
162 &idctrow4,
163 &idctrow4,
164 &idctrow4,
165 &idctrow4
166 };
167
168
169 static void (*const idctcolVCA2[16])(int16*) =
170 {
171 &idctcol0, &idctcol4, &idctcol3, &idctcol4,
172 &idctcol2, &idctcol4, &idctcol3, &idctcol4,
173 &idctcol1, &idctcol4, &idctcol3, &idctcol4,
174 &idctcol2, &idctcol4, &idctcol3, &idctcol4
175 };
176
177 static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) =
178 {
179 &idctrow1, &idctrow4, &idctrow3, &idctrow4,
180 &idctrow2, &idctrow4, &idctrow3, &idctrow4
181 };
182
183 static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) =
184 {
185 &idctrow1_intra,
186 &idctrow2_intra,
187 &idctrow2_intra,
188 &idctrow2_intra,
189 &idctrow2_intra,
190 &idctrow3_intra,
191 &idctrow4_intra,
192 &idctrow4_intra,
193 &idctrow4_intra,
194 &idctrow4_intra
195 };
196
197 static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) =
198 {
199 &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra,
200 &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra
201 };
202 #endif
203
204 /*----------------------------------------------------------------------------
205 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
206 ; Variable declaration - defined here and used outside this module
207 ----------------------------------------------------------------------------*/
208
209 /*----------------------------------------------------------------------------
210 ; EXTERNAL FUNCTION REFERENCES
211 ; Declare functions defined elsewhere and referenced in this module
212 ----------------------------------------------------------------------------*/
213
214 /*----------------------------------------------------------------------------
215 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
216 ; Declare variables used in this module but defined elsewhere
217 ----------------------------------------------------------------------------*/
218
219 /*----------------------------------------------------------------------------
220 ; FUNCTION CODE
221 ----------------------------------------------------------------------------*/
MBlockIDCT(VideoDecData * video)222 void MBlockIDCT(VideoDecData *video)
223 {
224 Vop *currVop = video->currVop;
225 MacroBlock *mblock = video->mblock;
226 PIXEL *c_comp;
227 PIXEL *cu_comp;
228 PIXEL *cv_comp;
229 int x_pos = video->mbnum_col;
230 int y_pos = video->mbnum_row;
231 int width, width_uv;
232 int32 offset;
233 width = video->width;
234 width_uv = width >> 1;
235 offset = (int32)(y_pos << 4) * width + (x_pos << 4);
236
237 c_comp = currVop->yChan + offset;
238 cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2);
239 cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2);
240
241 BlockIDCT_intra(mblock, c_comp, 0, width);
242 BlockIDCT_intra(mblock, c_comp + 8, 1, width);
243 BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width);
244 BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width);
245 BlockIDCT_intra(mblock, cu_comp, 4, width_uv);
246 BlockIDCT_intra(mblock, cv_comp, 5, width_uv);
247 }
248
249
BlockIDCT_intra(MacroBlock * mblock,PIXEL * c_comp,int comp,int width)250 void BlockIDCT_intra(
251 MacroBlock *mblock, PIXEL *c_comp, int comp, int width)
252 {
253 /*----------------------------------------------------------------------------
254 ; Define all local variables
255 ----------------------------------------------------------------------------*/
256 int16 *coeff_in = mblock->block[comp];
257 #ifdef INTEGER_IDCT
258 #ifdef FAST_IDCT /* VCA IDCT using nzcoefs and bitmaps*/
259 int i, bmapr;
260 int nz_coefs = mblock->no_coeff[comp];
261 uint8 *bitmapcol = mblock->bitmapcol[comp];
262 uint8 bitmaprow = mblock->bitmaprow[comp];
263
264 /*----------------------------------------------------------------------------
265 ; Function body here
266 ----------------------------------------------------------------------------*/
267 if (nz_coefs <= 10)
268 {
269 bmapr = (nz_coefs - 1);
270
271 (*(idctcolVCA[bmapr]))(coeff_in);
272 (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
273 (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
274 (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
275
276 (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width);
277 }
278 else
279 {
280 i = 8;
281 while (i--)
282 {
283 bmapr = (int)bitmapcol[i];
284 if (bmapr)
285 {
286 if ((bmapr&0xf) == 0) /* 07/18/01 */
287 {
288 (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
289 }
290 else
291 {
292 idctcol(coeff_in + i);
293 }
294 }
295 }
296 if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
297 {
298 bitmaprow >>= 4;
299 (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width);
300 }
301 else
302 {
303 idctrow_intra(coeff_in, c_comp, width);
304 }
305 }
306 #else
307 void idct_intra(int *block, uint8 *comp, int width);
308 idct_intra(coeff_in, c_comp, width);
309 #endif
310 #else
311 void idctref_intra(int *block, uint8 *comp, int width);
312 idctref_intra(coeff_in, c_comp, width);
313 #endif
314
315
316 /*----------------------------------------------------------------------------
317 ; Return nothing or data or data pointer
318 ----------------------------------------------------------------------------*/
319 return;
320 }
321
322 /* 08/04/05, no residue, just copy from pred to output */
Copy_Blk_to_Vop(uint8 * dst,uint8 * pred,int width)323 void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width)
324 {
325 /* copy 4 bytes at a time */
326 width -= 4;
327 *((uint32*)dst) = *((uint32*)pred);
328 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
329 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
330 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
331 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
332 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
333 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
334 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
335 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
336 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
337 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
338 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
339 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
340 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
341 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
342 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
343
344 return ;
345 }
346
347 /* 08/04/05 compute IDCT and add prediction at the end */
BlockIDCT(uint8 * dst,uint8 * pred,int16 * coeff_in,int width,int nz_coefs,uint8 * bitmapcol,uint8 bitmaprow)348 void BlockIDCT(
349 uint8 *dst, /* destination */
350 uint8 *pred, /* prediction block, pitch 16 */
351 int16 *coeff_in, /* DCT data, size 64 */
352 int width, /* width of dst */
353 int nz_coefs,
354 uint8 *bitmapcol,
355 uint8 bitmaprow
356 )
357 {
358 #ifdef INTEGER_IDCT
359 #ifdef FAST_IDCT /* VCA IDCT using nzcoefs and bitmaps*/
360 int i, bmapr;
361 /*----------------------------------------------------------------------------
362 ; Function body here
363 ----------------------------------------------------------------------------*/
364 if (nz_coefs <= 10)
365 {
366 bmapr = (nz_coefs - 1);
367 (*(idctcolVCA[bmapr]))(coeff_in);
368 (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
369 (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
370 (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
371
372 (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width);
373 return ;
374 }
375 else
376 {
377 i = 8;
378
379 while (i--)
380 {
381 bmapr = (int)bitmapcol[i];
382 if (bmapr)
383 {
384 if ((bmapr&0xf) == 0) /* 07/18/01 */
385 {
386 (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
387 }
388 else
389 {
390 idctcol(coeff_in + i);
391 }
392 }
393 }
394 if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
395 {
396 (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width);
397 }
398 else
399 {
400 idctrow(coeff_in, pred, dst, width);
401 }
402 return ;
403 }
404 #else // FAST_IDCT
405 void idct(int *block, uint8 *pred, uint8 *dst, int width);
406 idct(coeff_in, pred, dst, width);
407 return;
408 #endif // FAST_IDCT
409 #else // INTEGER_IDCT
410 void idctref(int *block, uint8 *pred, uint8 *dst, int width);
411 idctref(coeff_in, pred, dst, width);
412 return;
413 #endif // INTEGER_IDCT
414
415 }
416 /*----------------------------------------------------------------------------
417 ; End Function: block_idct
418 ----------------------------------------------------------------------------*/
419
420
421 /****************************************************************************/
422
423 /*
424 ------------------------------------------------------------------------------
425 FUNCTION NAME: idctrow
426 ------------------------------------------------------------------------------
427 INPUT AND OUTPUT DEFINITIONS FOR idctrow
428
429 Inputs:
430 [input_variable_name] = [description of the input to module, its type
431 definition, and length (when applicable)]
432
433 Local Stores/Buffers/Pointers Needed:
434 [local_store_name] = [description of the local store, its type
435 definition, and length (when applicable)]
436 [local_buffer_name] = [description of the local buffer, its type
437 definition, and length (when applicable)]
438 [local_ptr_name] = [description of the local pointer, its type
439 definition, and length (when applicable)]
440
441 Global Stores/Buffers/Pointers Needed:
442 [global_store_name] = [description of the global store, its type
443 definition, and length (when applicable)]
444 [global_buffer_name] = [description of the global buffer, its type
445 definition, and length (when applicable)]
446 [global_ptr_name] = [description of the global pointer, its type
447 definition, and length (when applicable)]
448
449 Outputs:
450 [return_variable_name] = [description of data/pointer returned
451 by module, its type definition, and length
452 (when applicable)]
453
454 Pointers and Buffers Modified:
455 [variable_bfr_ptr] points to the [describe where the
456 variable_bfr_ptr points to, its type definition, and length
457 (when applicable)]
458 [variable_bfr] contents are [describe the new contents of
459 variable_bfr]
460
461 Local Stores Modified:
462 [local_store_name] = [describe new contents, its type
463 definition, and length (when applicable)]
464
465 Global Stores Modified:
466 [global_store_name] = [describe new contents, its type
467 definition, and length (when applicable)]
468
469 ------------------------------------------------------------------------------
470 FUNCTION DESCRIPTION FOR idctrow
471
472 ------------------------------------------------------------------------------
473 REQUIREMENTS FOR idctrow
474
475 ------------------------------------------------------------------------------
476 REFERENCES FOR idctrow
477
478 ------------------------------------------------------------------------------
479 PSEUDO-CODE FOR idctrow
480
481 ------------------------------------------------------------------------------
482 RESOURCES USED FOR idctrow
483 When the code is written for a specific target processor the
484 the resources used should be documented below.
485
486 STACK USAGE: [stack count for this module] + [variable to represent
487 stack usage for each subroutine called]
488
489 where: [stack usage variable] = stack usage for [subroutine
490 name] (see [filename].ext)
491
492 DATA MEMORY USED: x words
493
494 PROGRAM MEMORY USED: x words
495
496 CLOCK CYCLES: [cycle count equation for this module] + [variable
497 used to represent cycle count for each subroutine
498 called]
499
500 where: [cycle count variable] = cycle count for [subroutine
501 name] (see [filename].ext)
502
503 ------------------------------------------------------------------------------
504 */
505
506 /*----------------------------------------------------------------------------
507 ; Function Code FOR idctrow
508 ----------------------------------------------------------------------------*/
idctrow(int16 * blk,uint8 * pred,uint8 * dst,int width)509 void idctrow(
510 int16 *blk, uint8 *pred, uint8 *dst, int width
511 )
512 {
513 /*----------------------------------------------------------------------------
514 ; Define all local variables
515 ----------------------------------------------------------------------------*/
516 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
517 int i = 8;
518 uint32 pred_word, dst_word;
519 int res, res2;
520
521 /*----------------------------------------------------------------------------
522 ; Function body here
523 ----------------------------------------------------------------------------*/
524 /* row (horizontal) IDCT
525 *
526 * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- *
527 * ( k + - ) * l ) l=0 8 2
528 *
529 * where: c[0] = 128 c[1..7] = 128*sqrt(2) */
530
531 /* preset the offset, such that we can take advantage pre-offset addressing mode */
532 width -= 4;
533 dst -= width;
534 pred -= 12;
535 blk -= 8;
536
537 while (i--)
538 {
539 x1 = (int32)blk[12] << 8;
540 blk[12] = 0;
541 x2 = blk[14];
542 blk[14] = 0;
543 x3 = blk[10];
544 blk[10] = 0;
545 x4 = blk[9];
546 blk[9] = 0;
547 x5 = blk[15];
548 blk[15] = 0;
549 x6 = blk[13];
550 blk[13] = 0;
551 x7 = blk[11];
552 blk[11] = 0;
553 x0 = ((*(blk += 8)) << 8) + 8192;
554 blk[0] = 0; /* for proper rounding in the fourth stage */
555
556 /* first stage */
557 x8 = W7 * (x4 + x5) + 4;
558 x4 = (x8 + (W1 - W7) * x4) >> 3;
559 x5 = (x8 - (W1 + W7) * x5) >> 3;
560 x8 = W3 * (x6 + x7) + 4;
561 x6 = (x8 - (W3 - W5) * x6) >> 3;
562 x7 = (x8 - (W3 + W5) * x7) >> 3;
563
564 /* second stage */
565 x8 = x0 + x1;
566 x0 -= x1;
567 x1 = W6 * (x3 + x2) + 4;
568 x2 = (x1 - (W2 + W6) * x2) >> 3;
569 x3 = (x1 + (W2 - W6) * x3) >> 3;
570 x1 = x4 + x6;
571 x4 -= x6;
572 x6 = x5 + x7;
573 x5 -= x7;
574
575 /* third stage */
576 x7 = x8 + x3;
577 x8 -= x3;
578 x3 = x0 + x2;
579 x0 -= x2;
580 x2 = (181 * (x4 + x5) + 128) >> 8;
581 x4 = (181 * (x4 - x5) + 128) >> 8;
582
583 /* fourth stage */
584 pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
585
586 res = (x7 + x1) >> 14;
587 ADD_AND_CLIP1(res);
588 res2 = (x3 + x2) >> 14;
589 ADD_AND_CLIP2(res2);
590 dst_word = (res2 << 8) | res;
591 res = (x0 + x4) >> 14;
592 ADD_AND_CLIP3(res);
593 dst_word |= (res << 16);
594 res = (x8 + x6) >> 14;
595 ADD_AND_CLIP4(res);
596 dst_word |= (res << 24);
597 *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
598
599 pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
600
601 res = (x8 - x6) >> 14;
602 ADD_AND_CLIP1(res);
603 res2 = (x0 - x4) >> 14;
604 ADD_AND_CLIP2(res2);
605 dst_word = (res2 << 8) | res;
606 res = (x3 - x2) >> 14;
607 ADD_AND_CLIP3(res);
608 dst_word |= (res << 16);
609 res = (x7 - x1) >> 14;
610 ADD_AND_CLIP4(res);
611 dst_word |= (res << 24);
612 *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
613 }
614 /*----------------------------------------------------------------------------
615 ; Return nothing or data or data pointer
616 ----------------------------------------------------------------------------*/
617 return;
618 }
619
620 __attribute__((no_sanitize("signed-integer-overflow")))
idctrow_intra(int16 * blk,PIXEL * comp,int width)621 void idctrow_intra(
622 int16 *blk, PIXEL *comp, int width
623 )
624 {
625 /*----------------------------------------------------------------------------
626 ; Define all local variables
627 ----------------------------------------------------------------------------*/
628 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
629 int i = 8;
630 int offset = width;
631 int32 word;
632
633 /*----------------------------------------------------------------------------
634 ; Function body here
635 ----------------------------------------------------------------------------*/
636 /* row (horizontal) IDCT
637 *
638 * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- *
639 * ( k + - ) * l ) l=0 8 2
640 *
641 * where: c[0] = 128 c[1..7] = 128*sqrt(2) */
642 while (i--)
643 {
644 x1 = (int32)blk[4] << 8;
645 blk[4] = 0;
646 x2 = blk[6];
647 blk[6] = 0;
648 x3 = blk[2];
649 blk[2] = 0;
650 x4 = blk[1];
651 blk[1] = 0;
652 x5 = blk[7];
653 blk[7] = 0;
654 x6 = blk[5];
655 blk[5] = 0;
656 x7 = blk[3];
657 blk[3] = 0;
658 #ifndef FAST_IDCT
659 /* shortcut */ /* covered by idctrow1 01/9/2001 */
660 if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
661 {
662 blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6;
663 return;
664 }
665 #endif
666 x0 = ((int32)blk[0] << 8) + 8192;
667 blk[0] = 0; /* for proper rounding in the fourth stage */
668
669 /* first stage */
670 x8 = W7 * (x4 + x5) + 4;
671 x4 = (x8 + (W1 - W7) * x4) >> 3;
672 x5 = (x8 - (W1 + W7) * x5) >> 3;
673 x8 = W3 * (x6 + x7) + 4;
674 x6 = (x8 - (W3 - W5) * x6) >> 3;
675 x7 = (x8 - (W3 + W5) * x7) >> 3;
676
677 /* second stage */
678 x8 = x0 + x1;
679 x0 -= x1;
680 x1 = W6 * (x3 + x2) + 4;
681 x2 = (x1 - (W2 + W6) * x2) >> 3;
682 x3 = (x1 + (W2 - W6) * x3) >> 3;
683 x1 = x4 + x6;
684 x4 -= x6;
685 x6 = x5 + x7;
686 x5 -= x7;
687
688 /* third stage */
689 x7 = x8 + x3;
690 x8 -= x3;
691 x3 = x0 + x2;
692 x0 -= x2;
693 x2 = (181 * (x4 + x5) + 128) >> 8;
694 x4 = (181 * (x4 - x5) + 128) >> 8;
695
696 /* fourth stage */
697 word = ((x7 + x1) >> 14);
698 CLIP_RESULT(word)
699
700 temp = ((x3 + x2) >> 14);
701 CLIP_RESULT(temp)
702 word = word | (temp << 8);
703
704 temp = ((x0 + x4) >> 14);
705 CLIP_RESULT(temp)
706 word = word | (temp << 16);
707
708 temp = ((x8 + x6) >> 14);
709 CLIP_RESULT(temp)
710 word = word | (temp << 24);
711 *((int32*)(comp)) = word;
712
713 word = ((x8 - x6) >> 14);
714 CLIP_RESULT(word)
715
716 temp = ((x0 - x4) >> 14);
717 CLIP_RESULT(temp)
718 word = word | (temp << 8);
719
720 temp = ((x3 - x2) >> 14);
721 CLIP_RESULT(temp)
722 word = word | (temp << 16);
723
724 temp = ((x7 - x1) >> 14);
725 CLIP_RESULT(temp)
726 word = word | (temp << 24);
727 *((int32*)(comp + 4)) = word;
728 comp += offset;
729
730 blk += B_SIZE;
731 }
732 /*----------------------------------------------------------------------------
733 ; Return nothing or data or data pointer
734 ----------------------------------------------------------------------------*/
735 return;
736 }
737
738 /*----------------------------------------------------------------------------
739 ; End Function: idctrow
740 ----------------------------------------------------------------------------*/
741
742
743 /****************************************************************************/
744
745 /*
746 ------------------------------------------------------------------------------
747 FUNCTION NAME: idctcol
748 ------------------------------------------------------------------------------
749 INPUT AND OUTPUT DEFINITIONS FOR idctcol
750
751 Inputs:
752 [input_variable_name] = [description of the input to module, its type
753 definition, and length (when applicable)]
754
755 Local Stores/Buffers/Pointers Needed:
756 [local_store_name] = [description of the local store, its type
757 definition, and length (when applicable)]
758 [local_buffer_name] = [description of the local buffer, its type
759 definition, and length (when applicable)]
760 [local_ptr_name] = [description of the local pointer, its type
761 definition, and length (when applicable)]
762
763 Global Stores/Buffers/Pointers Needed:
764 [global_store_name] = [description of the global store, its type
765 definition, and length (when applicable)]
766 [global_buffer_name] = [description of the global buffer, its type
767 definition, and length (when applicable)]
768 [global_ptr_name] = [description of the global pointer, its type
769 definition, and length (when applicable)]
770
771 Outputs:
772 [return_variable_name] = [description of data/pointer returned
773 by module, its type definition, and length
774 (when applicable)]
775
776 Pointers and Buffers Modified:
777 [variable_bfr_ptr] points to the [describe where the
778 variable_bfr_ptr points to, its type definition, and length
779 (when applicable)]
780 [variable_bfr] contents are [describe the new contents of
781 variable_bfr]
782
783 Local Stores Modified:
784 [local_store_name] = [describe new contents, its type
785 definition, and length (when applicable)]
786
787 Global Stores Modified:
788 [global_store_name] = [describe new contents, its type
789 definition, and length (when applicable)]
790
791 ------------------------------------------------------------------------------
792 FUNCTION DESCRIPTION FOR idctcol
793
794 ------------------------------------------------------------------------------
795 REQUIREMENTS FOR idctcol
796
797 ------------------------------------------------------------------------------
798 REFERENCES FOR idctcol
799
800 ------------------------------------------------------------------------------
801 PSEUDO-CODE FOR idctcol
802
803 ------------------------------------------------------------------------------
804 RESOURCES USED FOR idctcol
805 When the code is written for a specific target processor the
806 the resources used should be documented below.
807
808 STACK USAGE: [stack count for this module] + [variable to represent
809 stack usage for each subroutine called]
810
811 where: [stack usage variable] = stack usage for [subroutine
812 name] (see [filename].ext)
813
814 DATA MEMORY USED: x words
815
816 PROGRAM MEMORY USED: x words
817
818 CLOCK CYCLES: [cycle count equation for this module] + [variable
819 used to represent cycle count for each subroutine
820 called]
821
822 where: [cycle count variable] = cycle count for [subroutine
823 name] (see [filename].ext)
824
825 ------------------------------------------------------------------------------
826 */
827
828 /*----------------------------------------------------------------------------
829 ; Function Code FOR idctcol
830 ----------------------------------------------------------------------------*/
idctcol(int16 * blk)831 void idctcol(
832 int16 *blk
833 )
834 {
835 /*----------------------------------------------------------------------------
836 ; Define all local variables
837 ----------------------------------------------------------------------------*/
838 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
839
840 /*----------------------------------------------------------------------------
841 ; Function body here
842 ----------------------------------------------------------------------------*/
843 /* column (vertical) IDCT
844 *
845 * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] *
846 * cos( -- * ( k + - ) * l ) l=0 8 2
847 *
848 * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
849 x1 = (int32)blk[32] << 11;
850 x2 = blk[48];
851 x3 = blk[16];
852 x4 = blk[8];
853 x5 = blk[56];
854 x6 = blk[40];
855 x7 = blk[24];
856 #ifndef FAST_IDCT
857 /* shortcut */ /* covered by idctcolumn1 01/9/2001 */
858 if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
859 {
860 blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56]
861 = blk[0] << 3;
862 return;
863 }
864 #endif
865
866 x0 = ((int32)blk[0] << 11) + 128;
867
868 /* first stage */
869 x8 = W7 * (x4 + x5);
870 x4 = x8 + (W1 - W7) * x4;
871 x5 = x8 - (W1 + W7) * x5;
872 x8 = W3 * (x6 + x7);
873 x6 = x8 - (W3 - W5) * x6;
874 x7 = x8 - (W3 + W5) * x7;
875
876 /* second stage */
877 x8 = x0 + x1;
878 x0 -= x1;
879 x1 = W6 * (x3 + x2);
880 x2 = x1 - (W2 + W6) * x2;
881 x3 = x1 + (W2 - W6) * x3;
882 x1 = x4 + x6;
883 x4 -= x6;
884 x6 = x5 + x7;
885 x5 -= x7;
886
887 /* third stage */
888 x7 = x8 + x3;
889 x8 -= x3;
890 x3 = x0 + x2;
891 x0 -= x2;
892 x2 = (181 * (x4 + x5) + 128) >> 8;
893 x4 = (181 * (x4 - x5) + 128) >> 8;
894
895 /* fourth stage */
896 blk[0] = (x7 + x1) >> 8;
897 blk[8] = (x3 + x2) >> 8;
898 blk[16] = (x0 + x4) >> 8;
899 blk[24] = (x8 + x6) >> 8;
900 blk[32] = (x8 - x6) >> 8;
901 blk[40] = (x0 - x4) >> 8;
902 blk[48] = (x3 - x2) >> 8;
903 blk[56] = (x7 - x1) >> 8;
904 /*----------------------------------------------------------------------------
905 ; Return nothing or data or data pointer
906 ----------------------------------------------------------------------------*/
907 return;
908 }
909 /*----------------------------------------------------------------------------
910 ; End Function: idctcol
911 ----------------------------------------------------------------------------*/
912
913