1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 * Copyright (C) 2016 Mopria Alliance, Inc.
4 * Copyright (C) 2013 Hewlett-Packard Development Company, L.P.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #include "wprint_scaler.h"
20 #include <assert.h>
21 #include <stdio.h>
22
23 #define ROUND_4_DOWN(x) ((x) & ~3)
24 #define ROUND_4_UP(x) (ROUND_4_DOWN((x) + 3))
25 #define PSCALER_FRACT_BITS_COUNT 24
26
27 typedef enum {
28 FRACTION_ROUND_UP,
29 FRACTION_TRUNCATE
30 } pscaler_fraction_t;
31
32 static uint32
33 _scaler_fraction_part(uint32 iNum, uint32 iDen, pscaler_fraction_t mode, bool_t *overflow);
34
35 static void _hw_scale_image_plane(scaler_config_t *pscaler_config, scaler_mode_t scaleMode);
36
37 static void _calculate_factors(scaler_config_t *pscaler_config, scaler_mode_t scaleMode);
38
scaler_make_image_scaler_tables(uint16 image_input_width,uint16 image_input_buf_width,uint16 image_output_width,uint16 image_output_buf_width,uint16 image_input_height,uint16 image_output_height,scaler_config_t * pscaler_config)39 void scaler_make_image_scaler_tables(uint16 image_input_width, uint16 image_input_buf_width,
40 uint16 image_output_width, uint16 image_output_buf_width, uint16 image_input_height,
41 uint16 image_output_height, scaler_config_t *pscaler_config) {
42 pscaler_config->iSrcWidth = image_input_width;
43 pscaler_config->iSrcHeight = image_input_height;
44 pscaler_config->iOutWidth = image_output_width;
45 pscaler_config->iOutHeight = image_output_height;
46
47 if ((image_input_width >= image_output_width) &&
48 (image_input_height >= image_output_height)) { // scale DOWN
49 pscaler_config->scaleMode = PSCALER_SCALE_DOWN;
50 } else if ((image_input_width <= image_output_width) &&
51 (image_input_height <= image_output_height)) { // scale UP
52 pscaler_config->scaleMode = PSCALER_SCALE_UP;
53 } else if (image_input_width > image_output_width) { // mixed scale Y-axis first
54 pscaler_config->scaleMode = PSCALER_SCALE_MIXED_YUP;
55 } else { // mixed scale X-axis first
56 pscaler_config->scaleMode = PSCALER_SCALE_MIXED_XUP;
57 }
58
59 // Setup scale factors
60 _calculate_factors(pscaler_config, pscaler_config->scaleMode);
61
62 // calculates initial buffer sizes for scaling whole image
63 // start rows == 0
64 // end_rows == image height
65 // buffer widths == image widths
66 pscaler_config->fSrcStartRow.decimal = 0;
67 pscaler_config->fSrcStartRow.fraction = 0;
68 pscaler_config->iSrcStartRow = 0;
69 pscaler_config->iSrcEndRow = pscaler_config->iSrcHeight;
70 pscaler_config->iSrcBufWidth = image_input_buf_width;
71 pscaler_config->iOutStartRow = 0;
72 pscaler_config->iOutEndRow = pscaler_config->iOutHeight;
73 pscaler_config->iOutBufWidth = image_output_buf_width;
74 pscaler_config->pSrcBuf = NULL;
75 pscaler_config->pOutBuf = NULL;
76 pscaler_config->pTmpBuf = NULL;
77 }
78
scaler_calculate_scaling_rows(uint16 start_output_row_number,uint16 end_output_row_number,void * tables_ptr,uint16 * start_input_row_number,uint16 * end_input_row_number,uint16 * num_output_rows_generated,uint16 * num_rows_offset_to_start_output_row,uint32 * mixed_axis_temp_buffer_size_needed)79 void scaler_calculate_scaling_rows(uint16 start_output_row_number, uint16 end_output_row_number,
80 void *tables_ptr, uint16 *start_input_row_number, uint16 *end_input_row_number,
81 uint16 *num_output_rows_generated, uint16 *num_rows_offset_to_start_output_row,
82 uint32 *mixed_axis_temp_buffer_size_needed) {
83 float64_t fSrcEndRow;
84 bool_t overflow;
85 scaler_config_t *pscaler_config;
86
87 pscaler_config = (scaler_config_t *) tables_ptr;
88 assert (start_output_row_number < pscaler_config->iOutHeight);
89
90 // copy the output start and end rows
91 // Don't ever attempt to output a single row from the scaler.
92 if (end_output_row_number == start_output_row_number) {
93 if (start_output_row_number == 0) {
94 pscaler_config->iOutStartRow = start_output_row_number;
95 pscaler_config->iOutEndRow = end_output_row_number + 1;
96 *num_rows_offset_to_start_output_row = 0;
97 } else {
98 pscaler_config->iOutStartRow = start_output_row_number - 1;
99 pscaler_config->iOutEndRow = end_output_row_number;
100 *num_rows_offset_to_start_output_row = 1;
101 }
102 } else {
103 pscaler_config->iOutStartRow = start_output_row_number;
104 pscaler_config->iOutEndRow = end_output_row_number;
105 *num_rows_offset_to_start_output_row = 0;
106 }
107
108 if (pscaler_config->iOutEndRow >= pscaler_config->iOutHeight) { // last stripe
109 pscaler_config->iOutEndRow = pscaler_config->iOutHeight - 1;
110 }
111
112 if (pscaler_config->scaleMode == PSCALER_SCALE_UP ||
113 pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) {
114 // scale factors are calculated as dim-1/dim-1
115 pscaler_config->iSrcHeight--;
116 pscaler_config->iOutHeight--;
117 }
118
119 pscaler_config->fSrcStartRow.decimal = (uint32) pscaler_config->iOutStartRow *
120 (uint32) pscaler_config->iSrcHeight / (uint32) pscaler_config->iOutHeight;
121
122 pscaler_config->fSrcStartRow.fraction = _scaler_fraction_part(
123 (uint32) pscaler_config->iOutStartRow * (uint32) pscaler_config->iSrcHeight,
124 (uint32) pscaler_config->iOutHeight, FRACTION_ROUND_UP, &overflow);
125
126 if (overflow) {
127 pscaler_config->fSrcStartRow.decimal++;
128 }
129
130 pscaler_config->iSrcStartRow = pscaler_config->fSrcStartRow.decimal;
131
132 if (pscaler_config->scaleMode == PSCALER_SCALE_UP ||
133 pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) {
134 fSrcEndRow.decimal = (uint32) pscaler_config->iOutEndRow *
135 (uint32) pscaler_config->iSrcHeight / (uint32) pscaler_config->iOutHeight;
136 fSrcEndRow.fraction = _scaler_fraction_part(
137 (uint32) pscaler_config->iOutEndRow * (uint32) pscaler_config->iSrcHeight,
138 (uint32) pscaler_config->iOutHeight, FRACTION_TRUNCATE, &overflow);
139
140 pscaler_config->iSrcEndRow = (uint16) fSrcEndRow.decimal;
141
142 if (0 != fSrcEndRow.fraction) {
143 // will cause an extra output row to be created...
144 pscaler_config->iSrcEndRow++;
145 pscaler_config->iOutEndRow++;
146 }
147
148 // restore dimensions
149 pscaler_config->iSrcHeight++;
150 pscaler_config->iOutHeight++;
151 } else {
152 fSrcEndRow.decimal = (uint32) (pscaler_config->iOutEndRow + 1) *
153 (uint32) pscaler_config->iSrcHeight /
154 (uint32) pscaler_config->iOutHeight;
155
156 fSrcEndRow.fraction = _scaler_fraction_part(
157 (uint32) (pscaler_config->iOutEndRow + 1) * (uint32) pscaler_config->iSrcHeight,
158 (uint32) pscaler_config->iOutHeight, FRACTION_TRUNCATE, &overflow);
159
160 pscaler_config->iSrcEndRow = (uint16) fSrcEndRow.decimal;
161
162 if (0 == fSrcEndRow.fraction) {
163 pscaler_config->iSrcEndRow--;
164 }
165 }
166
167 // check to be sure we're not going beyond the source image
168 if (pscaler_config->iSrcEndRow >= pscaler_config->iSrcHeight) { // last stripe
169 pscaler_config->iSrcEndRow = pscaler_config->iSrcHeight - 1;
170 }
171
172 *start_input_row_number = pscaler_config->iSrcStartRow;
173 *end_input_row_number = pscaler_config->iSrcEndRow;
174 *num_output_rows_generated = (pscaler_config->iOutEndRow - pscaler_config->iOutStartRow + 1);
175
176 // Calculate the 2nd pass buffer size if mixed scaling is done
177 if (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP) {
178 *mixed_axis_temp_buffer_size_needed =
179 ROUND_4_UP(pscaler_config->iOutWidth + 1) *
180 (*end_input_row_number - *start_input_row_number + 1);
181 } else if (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) {
182 *mixed_axis_temp_buffer_size_needed =
183 ROUND_4_UP(pscaler_config->iSrcWidth) * (*num_output_rows_generated + 1);
184 } else {
185 *mixed_axis_temp_buffer_size_needed = 0;
186 }
187
188 (*num_output_rows_generated)++;
189 }
190
scaler_scale_image_data(uint8 * input_plane,void * tables_ptr,uint8 * scaled_output_plane,uint8 * temp_buffer_for_mixed_axis_scaling)191 void scaler_scale_image_data(uint8 *input_plane, void *tables_ptr, uint8 *scaled_output_plane,
192 uint8 *temp_buffer_for_mixed_axis_scaling) {
193 uint16 iOrigWidth, iOrigHeight, iOrigOutBufWidth, iOrigSrcBufWidth;
194 uint16 iOrigOutStartRow, iOrigOutEndRow, iOrigSrcStartRow, iOrigSrcEndRow;
195 float64_t fOrigSrcStartRow;
196 uint8 *pOrigBuf;
197 scaler_config_t *pscaler_config;
198
199 pscaler_config = (scaler_config_t *) tables_ptr;
200 pscaler_config->pSrcBuf = input_plane;
201 pscaler_config->pOutBuf = scaled_output_plane;
202
203 if ((PSCALER_SCALE_MIXED_XUP == pscaler_config->scaleMode) ||
204 (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode)) {
205 pscaler_config->pTmpBuf = temp_buffer_for_mixed_axis_scaling;
206
207 // save the output buffer
208 pOrigBuf = pscaler_config->pOutBuf;
209
210 // use the temp buff as the output buff for pass 1
211 pscaler_config->pOutBuf = pscaler_config->pTmpBuf;
212
213 if (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode) {
214 // save the original output widths
215 iOrigWidth = pscaler_config->iOutWidth;
216 iOrigOutBufWidth = pscaler_config->iOutBufWidth;
217
218 // set output widths to input widths (1::1)
219 pscaler_config->iOutWidth = pscaler_config->iSrcWidth;
220 pscaler_config->iOutBufWidth = pscaler_config->iSrcBufWidth;
221
222 // calculate the new scaler factors
223 _calculate_factors(pscaler_config, PSCALER_SCALE_UP);
224
225 // Run the photo scaler hardware
226 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_UP);
227
228 // reset the output widths
229 pscaler_config->iOutWidth = iOrigWidth;
230 pscaler_config->iOutBufWidth = iOrigOutBufWidth;
231 } else {
232 // save the original output height and row info
233 iOrigHeight = pscaler_config->iOutHeight;
234 iOrigOutStartRow = pscaler_config->iOutStartRow;
235 iOrigOutEndRow = pscaler_config->iOutEndRow;
236 fOrigSrcStartRow.fraction = pscaler_config->fSrcStartRow.fraction;
237
238 // set output height and rows to input height and rows(1::1)
239 pscaler_config->iOutHeight = pscaler_config->iSrcHeight;
240 pscaler_config->iOutStartRow = pscaler_config->iSrcStartRow;
241 pscaler_config->iOutEndRow = pscaler_config->iSrcEndRow;
242 pscaler_config->fSrcStartRow.fraction = 0;
243
244 // calculate the new scaler factors
245 _calculate_factors(pscaler_config, PSCALER_SCALE_UP);
246
247 // Run the photo scaler hardware
248 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_UP);
249
250 // reset the output height and rows
251 pscaler_config->iOutHeight = iOrigHeight;
252 pscaler_config->iOutStartRow = iOrigOutStartRow;
253 pscaler_config->iOutEndRow = iOrigOutEndRow;
254 pscaler_config->fSrcStartRow.fraction = fOrigSrcStartRow.fraction;
255 }
256 // restore the original output buffer
257 pscaler_config->pOutBuf = pOrigBuf;
258
259 // save the original input buffer
260 pOrigBuf = pscaler_config->pSrcBuf;
261
262 // use the previous output (temp) buffer as the new input buffer
263 pscaler_config->pSrcBuf = pscaler_config->pTmpBuf;
264
265 if (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode) {
266 // save the original input height and rows
267 iOrigHeight = pscaler_config->iSrcHeight;
268 iOrigSrcStartRow = pscaler_config->iSrcStartRow;
269 iOrigSrcEndRow = pscaler_config->iSrcEndRow;
270 fOrigSrcStartRow.decimal = pscaler_config->fSrcStartRow.decimal;
271 fOrigSrcStartRow.fraction = pscaler_config->fSrcStartRow.fraction;
272
273 // set the height and rows to 1::1 for the second pass
274 pscaler_config->iSrcHeight = pscaler_config->iOutHeight;
275 pscaler_config->iSrcStartRow = pscaler_config->iOutStartRow;
276 pscaler_config->iSrcEndRow = pscaler_config->iOutEndRow;
277 pscaler_config->fSrcStartRow.decimal = pscaler_config->iOutStartRow;
278 pscaler_config->fSrcStartRow.fraction = 0;
279
280 // calculate new scale factors
281 _calculate_factors(pscaler_config, PSCALER_SCALE_DOWN);
282
283 // Run the photo scaler hardware
284 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_DOWN);
285
286 // restore original input height and rows
287 pscaler_config->iSrcHeight = iOrigHeight;
288 pscaler_config->iSrcStartRow = iOrigSrcStartRow;
289 pscaler_config->iSrcEndRow = iOrigSrcEndRow;
290 pscaler_config->fSrcStartRow.decimal = fOrigSrcStartRow.decimal;
291 pscaler_config->fSrcStartRow.fraction = fOrigSrcStartRow.fraction;
292 } else {
293 // save the original input widths
294 iOrigWidth = pscaler_config->iSrcWidth;
295 iOrigSrcBufWidth = pscaler_config->iSrcBufWidth;
296
297 // set the widths to 1::1 for the second pass
298 pscaler_config->iSrcWidth = pscaler_config->iOutWidth;
299 pscaler_config->iSrcBufWidth = pscaler_config->iOutBufWidth;
300
301 // calculate new scale factors
302 _calculate_factors(pscaler_config, PSCALER_SCALE_DOWN);
303
304 // Run the photo scaler hardware
305 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_DOWN);
306
307 // restore original input widths
308 pscaler_config->iSrcWidth = iOrigWidth;
309 pscaler_config->iSrcBufWidth = iOrigSrcBufWidth;
310 }
311
312 // restore the input buffer
313 pscaler_config->pTmpBuf = pscaler_config->pSrcBuf;
314 pscaler_config->pSrcBuf = pOrigBuf;
315
316 // release the temp buffer
317 pscaler_config->pTmpBuf = NULL;
318 } else {
319 // Run the photo scaler hardware
320 _hw_scale_image_plane(pscaler_config, pscaler_config->scaleMode);
321 }
322 }
323
_calculate_factors(scaler_config_t * pscaler_config,scaler_mode_t scaleMode)324 static void _calculate_factors(scaler_config_t *pscaler_config, scaler_mode_t scaleMode) {
325 bool_t overflow;
326 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
327 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP)) {
328 // scale up factors are computed as (dim-1)/(dim-1)
329 pscaler_config->iSrcHeight--;
330 pscaler_config->iOutHeight--;
331 }
332 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
333 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP)) {
334 pscaler_config->iSrcWidth--;
335 pscaler_config->iOutWidth--;
336 }
337
338 pscaler_config->fXfactor.decimal = (uint32) pscaler_config->iOutWidth /
339 (uint32) pscaler_config->iSrcWidth;
340 pscaler_config->fXfactor.fraction = _scaler_fraction_part(
341 (uint32) pscaler_config->iOutWidth,
342 (uint32) pscaler_config->iSrcWidth,
343 FRACTION_TRUNCATE,
344 &overflow);
345
346 pscaler_config->fXfactorInv.decimal = (uint32) pscaler_config->iSrcWidth /
347 (uint32) pscaler_config->iOutWidth;
348 pscaler_config->fXfactorInv.fraction = _scaler_fraction_part(
349 (uint32) pscaler_config->iSrcWidth, (uint32) pscaler_config->iOutWidth,
350 FRACTION_ROUND_UP, &overflow);
351
352 if (overflow) {
353 pscaler_config->fXfactorInv.decimal++;
354 }
355
356 pscaler_config->fYfactor.decimal = (uint32) pscaler_config->iOutHeight /
357 (uint32) pscaler_config->iSrcHeight;
358 pscaler_config->fYfactor.fraction = _scaler_fraction_part(
359 (uint32) pscaler_config->iOutHeight, (uint32) pscaler_config->iSrcHeight,
360 FRACTION_TRUNCATE, &overflow);
361
362 pscaler_config->fYfactorInv.decimal = (uint32) pscaler_config->iSrcHeight /
363 (uint32) pscaler_config->iOutHeight;
364 pscaler_config->fYfactorInv.fraction = _scaler_fraction_part(
365 (uint32) pscaler_config->iSrcHeight, (uint32) pscaler_config->iOutHeight,
366 FRACTION_ROUND_UP, &overflow);
367
368 if (overflow) {
369 pscaler_config->fYfactorInv.decimal++;
370 }
371
372 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
373 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP)) {
374 // restore original dimensions
375 pscaler_config->iSrcHeight++;
376 pscaler_config->iOutHeight++;
377 }
378 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
379 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP)) {
380 pscaler_config->iSrcWidth++;
381 pscaler_config->iOutWidth++;
382 }
383 }
384
_scaler_fraction_part(uint32 iNum,uint32 iDen,pscaler_fraction_t mode,bool_t * overflow)385 static uint32 _scaler_fraction_part(uint32 iNum, uint32 iDen, pscaler_fraction_t mode,
386 bool_t *overflow) {
387 uint32 iFract; // fractional part
388 uint32 iRem; // remainder part
389 int i; // loop counter
390
391 *overflow = 0;
392 iFract = 0;
393 iRem = iNum % iDen;
394
395 if (iRem == 0) {
396 return (0);
397 }
398
399 for (i = PSCALER_FRACT_BITS_COUNT - 1; i >= 0; i--) {
400 iRem <<= 1;
401
402 if (iRem == iDen) {
403 iFract |= (1 << i);
404 break;
405 } else if (iRem > iDen) {
406 iFract |= (1 << i);
407 iRem -= iDen;
408 }
409 }
410
411 if (mode == FRACTION_TRUNCATE) {
412 return (iFract << 8);
413 } else {
414 if (iRem == 0) {
415 return (iFract << 8);
416 } else {
417 if (iFract < 0x00ffffff) {
418 iFract++;
419 return (iFract << 8);
420 } else {
421 *overflow = 1;
422 return (0);
423 }
424 }
425 }
426 }
427
428 #define _RESTRICT_ __restrict__
429
_scale_row_down_9in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ in2,uint8 * _RESTRICT_ in3,uint8 * _RESTRICT_ in4,uint8 * _RESTRICT_ in5,uint8 * _RESTRICT_ in6,uint8 * _RESTRICT_ in7,uint8 * _RESTRICT_ in8,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)430 static inline void _scale_row_down_9in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
431 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
432 uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ in7, uint8 *_RESTRICT_ in8, uint8 *_RESTRICT_ out,
433 uint64 position_x, uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight,
434 uint32 weight_reciprocal, int out_width) {
435 int x;
436 uint32 in_col;
437 sint32 total_weight;
438
439 for (x = 0; x < out_width; x++) {
440 uint32 acc_r = 0;
441 uint32 acc_g = 0;
442 uint32 acc_b = 0;
443 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
444 total_weight = x_factor_inv >> 24;
445
446 in_col = position_x >> 32;
447
448 while (total_weight > 0) {
449 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
450 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
451 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
452 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
453 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
454 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8;
455 acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight << 8;
456 acc_r += (uint32) in7[(in_col * 3) + 0] * curr_weight << 8;
457 acc_r += (uint32) in8[(in_col * 3) + 0] * curr_weight * bot_weight;
458
459 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
460 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
461 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
462 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
463 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
464 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8;
465 acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight << 8;
466 acc_g += (uint32) in7[(in_col * 3) + 1] * curr_weight << 8;
467 acc_g += (uint32) in8[(in_col * 3) + 1] * curr_weight * bot_weight;
468
469 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
470 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
471 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
472 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
473 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
474 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8;
475 acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight << 8;
476 acc_b += (uint32) in7[(in_col * 3) + 2] * curr_weight << 8;
477 acc_b += (uint32) in8[(in_col * 3) + 2] * curr_weight * bot_weight;
478
479 in_col++;
480
481 total_weight -= curr_weight;
482 curr_weight = total_weight > 256 ? 256 : total_weight;
483 }
484
485 position_x += x_factor_inv;
486
487 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
488 out[(x * 3) + 0] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
489 out[(x * 3) + 0] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
490 }
491 }
492
_scale_row_down_8in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ in2,uint8 * _RESTRICT_ in3,uint8 * _RESTRICT_ in4,uint8 * _RESTRICT_ in5,uint8 * _RESTRICT_ in6,uint8 * _RESTRICT_ in7,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)493 static inline void _scale_row_down_8in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
494 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
495 uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ in7, uint8 *_RESTRICT_ out, uint64 position_x,
496 uint64 x_factor_inv, uint32 top_weight,
497 uint32 bot_weight, uint32 weight_reciprocal,
498 int out_width) {
499 int x;
500 uint32 in_col;
501 sint32 total_weight;
502
503 for (x = 0; x < out_width; x++) {
504 uint32 acc_r = 0;
505 uint32 acc_g = 0;
506 uint32 acc_b = 0;
507 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
508 total_weight = x_factor_inv >> 24;
509
510 in_col = position_x >> 32;
511
512 while (total_weight > 0) {
513 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
514 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
515 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
516 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
517 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
518 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8;
519 acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight << 8;
520 acc_r += (uint32) in7[(in_col * 3) + 0] * curr_weight * bot_weight;
521
522 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
523 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
524 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
525 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
526 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
527 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8;
528 acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight << 8;
529 acc_g += (uint32) in7[(in_col * 3) + 1] * curr_weight * bot_weight;
530
531 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
532 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
533 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
534 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
535 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
536 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8;
537 acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight << 8;
538 acc_b += (uint32) in7[(in_col * 3) + 2] * curr_weight * bot_weight;
539
540 in_col++;
541
542 total_weight -= curr_weight;
543 curr_weight = total_weight > 256 ? 256 : total_weight;
544 }
545
546 position_x += x_factor_inv;
547
548 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
549 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
550 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
551 }
552 }
553
_scale_row_down_7in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ in2,uint8 * _RESTRICT_ in3,uint8 * _RESTRICT_ in4,uint8 * _RESTRICT_ in5,uint8 * _RESTRICT_ in6,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)554 static inline void _scale_row_down_7in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
555 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
556 uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv,
557 uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
558 int x;
559 uint32 in_col;
560 sint32 total_weight;
561
562 for (x = 0; x < out_width; x++) {
563 uint32 acc_r = 0;
564 uint32 acc_g = 0;
565 uint32 acc_b = 0;
566 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
567 total_weight = x_factor_inv >> 24;
568
569 in_col = position_x >> 32;
570
571 while (total_weight > 0) {
572 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
573 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
574 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
575 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
576 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
577 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8;
578 acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight * bot_weight;
579
580 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
581 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
582 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
583 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
584 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
585 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8;
586 acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight * bot_weight;
587
588 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
589 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
590 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
591 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
592 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
593 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8;
594 acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight * bot_weight;
595
596 in_col++;
597
598 total_weight -= curr_weight;
599 curr_weight = total_weight > 256 ? 256 : total_weight;
600 }
601
602 position_x += x_factor_inv;
603
604 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
605 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
606 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
607 }
608 }
609
_scale_row_down_6in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ in2,uint8 * _RESTRICT_ in3,uint8 * _RESTRICT_ in4,uint8 * _RESTRICT_ in5,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)610 static inline void _scale_row_down_6in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
611 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
612 uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, uint32 top_weight,
613 uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
614 int x;
615 uint32 in_col;
616 sint32 total_weight;
617
618 for (x = 0; x < out_width; x++) {
619 uint32 acc_r = 0;
620 uint32 acc_g = 0;
621 uint32 acc_b = 0;
622 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
623 total_weight = x_factor_inv >> 24;
624
625 in_col = position_x >> 32;
626
627 while (total_weight > 0) {
628 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
629 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
630 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
631 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
632 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
633 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight * bot_weight;
634
635 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
636 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
637 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
638 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
639 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
640 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight * bot_weight;
641
642 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
643 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
644 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
645 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
646 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
647 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight * bot_weight;
648
649 in_col++;
650
651 total_weight -= curr_weight;
652 curr_weight = total_weight > 256 ? 256 : total_weight;
653 }
654
655 position_x += x_factor_inv;
656
657 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
658 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
659 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
660 }
661 }
662
_scale_row_down_5in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ in2,uint8 * _RESTRICT_ in3,uint8 * _RESTRICT_ in4,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)663 static inline void _scale_row_down_5in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
664 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ out,
665 uint64 position_x, uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight,
666 uint32 weight_reciprocal, int out_width) {
667 int x;
668 uint32 in_col;
669 sint32 total_weight;
670
671 for (x = 0; x < out_width; x++) {
672 uint32 acc_r = 0;
673 uint32 acc_g = 0;
674 uint32 acc_b = 0;
675 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
676 total_weight = x_factor_inv >> 24;
677
678 in_col = position_x >> 32;
679
680 while (total_weight > 0) {
681 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
682 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
683 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
684 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
685 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight * bot_weight;
686
687 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
688 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
689 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
690 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
691 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight * bot_weight;
692
693 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
694 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
695 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
696 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
697 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight * bot_weight;
698
699 in_col++;
700
701 total_weight -= curr_weight;
702 curr_weight = total_weight > 256 ? 256 : total_weight;
703 }
704
705 position_x += x_factor_inv;
706
707 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
708 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
709 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
710 }
711 }
712
_scale_row_down_4in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ in2,uint8 * _RESTRICT_ in3,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)713 static inline void _scale_row_down_4in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
714 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ out, uint64 position_x,
715 uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal,
716 int out_width) {
717 int x;
718 uint32 in_col;
719 sint32 total_weight;
720
721 for (x = 0; x < out_width; x++) {
722 uint32 acc_r = 0;
723 uint32 acc_g = 0;
724 uint32 acc_b = 0;
725 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
726 total_weight = x_factor_inv >> 24;
727
728 in_col = position_x >> 32;
729
730 while (total_weight > 0) {
731 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
732 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
733 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
734 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight * bot_weight;
735
736 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
737 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
738 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
739 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight * bot_weight;
740
741 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
742 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
743 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
744 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight * bot_weight;
745
746 in_col++;
747
748 total_weight -= curr_weight;
749 curr_weight = total_weight > 256 ? 256 : total_weight;
750 }
751
752 position_x += x_factor_inv;
753
754 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
755 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
756 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
757 }
758 }
759
_scale_row_down_3in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ in2,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)760 static inline void _scale_row_down_3in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
761 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv,
762 uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
763 int x;
764 uint32 in_col;
765 sint32 total_weight;
766
767 for (x = 0; x < out_width; x++) {
768 uint32 acc_r = 0;
769 uint32 acc_g = 0;
770 uint32 acc_b = 0;
771 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
772 total_weight = x_factor_inv >> 24;
773
774 in_col = position_x >> 32;
775
776 while (total_weight > 0) {
777 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
778 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
779 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight * bot_weight;
780
781 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
782 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
783 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight * bot_weight;
784
785 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
786 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
787 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight * bot_weight;
788
789 in_col++;
790
791 total_weight -= curr_weight;
792 curr_weight = total_weight > 256 ? 256 : total_weight;
793 }
794
795 position_x += x_factor_inv;
796
797 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
798 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
799 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
800 }
801 }
802
_scale_row_down_2in(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ out,uint64 position_x,uint64 x_factor_inv,uint32 top_weight,uint32 bot_weight,uint32 weight_reciprocal,int out_width)803 static inline void _scale_row_down_2in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
804 uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, uint32 top_weight,
805 uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
806 int x;
807 uint32 in_col;
808 sint32 total_weight;
809
810 for (x = 0; x < out_width; x++) {
811 uint32 acc_r = 0;
812 uint32 acc_g = 0;
813 uint32 acc_b = 0;
814 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
815 total_weight = x_factor_inv >> 24;
816
817 in_col = position_x >> 32;
818
819 while (total_weight > 0) {
820 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
821 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight * bot_weight;
822
823 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
824 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight * bot_weight;
825
826 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
827 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight * bot_weight;
828
829 in_col++;
830
831 total_weight -= curr_weight;
832 curr_weight = total_weight > 256 ? 256 : total_weight;
833 }
834
835 position_x += x_factor_inv;
836
837 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
838 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
839 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
840 }
841 }
842
_scale_row_down(uint8 * in,uint8 * _RESTRICT_ out,uint32 in_row_ofs,uint64 position_x,uint64 position_y,uint64 x_factor_inv,uint64 y_factor_inv,uint32 weight_reciprocal,int out_width)843 static inline void _scale_row_down(uint8 *in, uint8 *_RESTRICT_ out, uint32 in_row_ofs,
844 uint64 position_x, uint64 position_y, uint64 x_factor_inv, uint64 y_factor_inv,
845 uint32 weight_reciprocal, int out_width) {
846 int x;
847 uint32 y, in_col, in_rows, top_weight, bot_weight;
848 sint32 total_weight;
849
850 total_weight = y_factor_inv >> 24;
851
852 top_weight = (uint32) 256 - ((position_y >> 24) & 0xff);
853
854 if ((sint32) top_weight > total_weight) {
855 top_weight = total_weight;
856 }
857 total_weight -= top_weight;
858
859 if (total_weight & 0xff) {
860 bot_weight = total_weight & 0xff;
861 } else if (total_weight > 255) {
862 bot_weight = 256;
863 } else {
864 bot_weight = 0;
865 }
866
867 total_weight -= bot_weight;
868
869 assert(total_weight >= 0);
870 assert((total_weight & 0xff) == 0);
871
872 in_rows = 2 + (total_weight >> 8);
873
874 if (in_rows == 2) {
875 _scale_row_down_2in(in, in + in_row_ofs,
876 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
877 out_width);
878 } else if (in_rows == 3) {
879 _scale_row_down_3in(in, in + in_row_ofs, in + 2 * in_row_ofs,
880 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
881 out_width);
882 } else if (in_rows == 4) {
883 _scale_row_down_4in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
884 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
885 out_width);
886 } else if (in_rows == 5) {
887 _scale_row_down_5in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
888 in + 4 * in_row_ofs,
889 out, position_x, x_factor_inv,
890 top_weight, bot_weight, weight_reciprocal,
891 out_width);
892 } else if (in_rows == 6) {
893 _scale_row_down_6in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
894 in + 4 * in_row_ofs, in + 5 * in_row_ofs,
895 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
896 out_width);
897 } else if (in_rows == 7) {
898 _scale_row_down_7in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
899 in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs,
900 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
901 out_width);
902 } else if (in_rows == 8) {
903 _scale_row_down_8in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
904 in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs,
905 in + 7 * in_row_ofs,
906 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
907 out_width);
908 } else if (in_rows == 9) {
909 _scale_row_down_9in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
910 in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs,
911 in + 7 * in_row_ofs, in + 8 * in_row_ofs,
912 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
913 out_width);
914 } else {
915 for (x = 0; x < out_width; x++) {
916 uint32 acc_r = 0;
917 uint32 acc_g = 0;
918 uint32 acc_b = 0;
919 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
920 total_weight = x_factor_inv >> 24;
921
922 in_col = position_x >> 32;
923
924 while (total_weight > 0) {
925 acc_r += (uint32) in[(in_col * 3) + 0] * curr_weight * top_weight;
926 acc_g += (uint32) in[(in_col * 3) + 1] * curr_weight * top_weight;
927 acc_b += (uint32) in[(in_col * 3) + 2] * curr_weight * top_weight;
928
929 for (y = 1; y < in_rows - 1; y++) {
930 acc_r += (uint32) in[y * in_row_ofs + ((in_col * 3) + 0)] * curr_weight * 256;
931 acc_g += (uint32) in[y * in_row_ofs + ((in_col * 3) + 1)] * curr_weight * 256;
932 acc_b += (uint32) in[y * in_row_ofs + ((in_col * 3) + 2)] * curr_weight * 256;
933 }
934
935 acc_r +=
936 (uint32) in[y * in_row_ofs + ((in_col * 3) + 0)] * curr_weight * bot_weight;
937 acc_g +=
938 (uint32) in[y * in_row_ofs + ((in_col * 3) + 1)] * curr_weight * bot_weight;
939 acc_b +=
940 (uint32) in[y * in_row_ofs + ((in_col * 3) + 2)] * curr_weight * bot_weight;
941
942 in_col++;
943 total_weight -= curr_weight;
944 curr_weight = total_weight > 256 ? 256 : total_weight;
945 }
946
947 position_x += x_factor_inv;
948
949 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
950 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
951 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
952 }
953 }
954 }
955
_scale_row_up(uint8 * _RESTRICT_ in0,uint8 * _RESTRICT_ in1,uint8 * _RESTRICT_ out,sint32 weight_y,uint64 position_x,uint64 increment_x,int out_width)956 static void _scale_row_up(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, uint8 *_RESTRICT_ out,
957 sint32 weight_y, uint64 position_x, uint64 increment_x, int out_width) {
958 int x;
959 for (x = 0; x < out_width; x++) {
960 sint32 top_val_r, bot_val_r;
961 sint32 top_val_g, bot_val_g;
962 sint32 top_val_b, bot_val_b;
963
964 // Position is tracked with 32 bits of precision, but interpolation is
965 // only guided by 10. REVISIT - Check ASM and make sure the compiler
966 // handled the second part here optimally.
967 uint32 pix_x = position_x >> 32;
968
969 sint32 weight_x = (position_x & 0xffffffff) >> 22;
970
971 // top_val and bot_val become 18-bit values here
972 top_val_r = (in0[(pix_x * 3) + 0] << 10) +
973 weight_x * ((sint32) in0[((pix_x + 1) * 3) + 0] - in0[(pix_x * 3) + 0]);
974 bot_val_r = (in1[(pix_x * 3) + 0] << 10) +
975 weight_x * ((sint32) in1[((pix_x + 1) * 3) + 0] - in1[(pix_x * 3) + 0]);
976
977 top_val_g = (in0[(pix_x * 3) + 1] << 10) +
978 weight_x * ((sint32) in0[((pix_x + 1) * 3) + 1] - in0[(pix_x * 3) + 1]);
979 bot_val_g = (in1[(pix_x * 3) + 1] << 10) +
980 weight_x * ((sint32) in1[((pix_x + 1) * 3) + 1] - in1[(pix_x * 3) + 1]);
981
982 top_val_b = (in0[(pix_x * 3) + 2] << 10) +
983 weight_x * ((sint32) in0[((pix_x + 1) * 3) + 2] - in0[(pix_x * 3) + 2]);
984 bot_val_b = (in1[(pix_x * 3) + 2] << 10) +
985 weight_x * ((sint32) in1[((pix_x + 1) * 3) + 2] - in1[(pix_x * 3) + 2]);
986
987 // out is an 8-bit value. We do not need to range-check, as overflow
988 // is mathematically impossible.
989 out[(x * 3) + 0] = ((top_val_r << 10) + weight_y * (bot_val_r - top_val_r)) >> 20;
990 out[(x * 3) + 1] = ((top_val_g << 10) + weight_y * (bot_val_g - top_val_g)) >> 20;
991 out[(x * 3) + 2] = ((top_val_b << 10) + weight_y * (bot_val_b - top_val_b)) >> 20;
992
993 position_x += increment_x;
994 }
995 }
996
_hw_scale_image_plane(scaler_config_t * pscaler_config,scaler_mode_t scaleMode)997 static void _hw_scale_image_plane(scaler_config_t *pscaler_config, scaler_mode_t scaleMode) {
998 // These pointers duplicate h/w regs
999 uint64 x_factor, y_factor, x_factor_inv, y_factor_inv;
1000 uint32 x_output_width, y_output_width;
1001 uint32 input_pixel_ptr_offset, output_pixel_ptr_offset;
1002 uint32 first_xi;
1003 uint64 first_y_src, first_x_src, weight_reciprocal;
1004
1005 // These are internal state
1006 uint32 r;
1007 uint8 *outp;
1008
1009 x_output_width = pscaler_config->iOutWidth;
1010 y_output_width = pscaler_config->iOutEndRow -
1011 pscaler_config->iOutStartRow + 1;
1012
1013 input_pixel_ptr_offset = pscaler_config->iSrcBufWidth;
1014 output_pixel_ptr_offset = pscaler_config->iOutBufWidth;
1015
1016 x_factor = (uint64) pscaler_config->fXfactor.decimal << 32;
1017 x_factor |= pscaler_config->fXfactor.fraction;
1018
1019 y_factor = (uint64) pscaler_config->fYfactor.decimal << 32;
1020 y_factor |= pscaler_config->fYfactor.fraction;
1021
1022 x_factor_inv = (uint64) pscaler_config->fXfactorInv.decimal << 32;
1023 x_factor_inv |= pscaler_config->fXfactorInv.fraction;
1024
1025 y_factor_inv = (uint64) pscaler_config->fYfactorInv.decimal << 32;
1026 y_factor_inv |= pscaler_config->fYfactorInv.fraction;
1027
1028 first_y_src = (uint64) pscaler_config->fSrcStartRow.decimal << 32;
1029 first_y_src |= pscaler_config->fSrcStartRow.fraction;
1030
1031 // PC REVISIT - The HW has config registers for these, but they aren't being
1032 // used by lib_photo_scaler do I don't want to use them, either. For now
1033 // just print them so I can figure out what's going on and then clear the
1034 // associated variables. Maybe we're always running the scaler from the
1035 // left edge of the source so they're implicitly zero?
1036 first_xi = pscaler_config->iOutStartColumn;
1037
1038 first_x_src = (uint64) pscaler_config->fSrcStartColumn.decimal << 32;
1039 first_x_src |= pscaler_config->fSrcStartColumn.fraction;
1040
1041 first_xi = first_x_src = 0;
1042
1043 weight_reciprocal = ((uint64) 1 << 32);
1044 weight_reciprocal /= (x_factor_inv >> 24) * (y_factor_inv >> 24);
1045
1046 outp = (pscaler_config->pOutBuf) + (first_xi * 3);
1047
1048 // PC - Assume pSrcBuf is already aligned to "true" base of input,
1049 // so ignore whole-number part of first_y_src.
1050 first_y_src = first_y_src & 0xffffffff;
1051
1052 for (r = 0; r < y_output_width; r++) {
1053 uint8 *inp = (pscaler_config->pSrcBuf) +
1054 (first_y_src >> 32) * input_pixel_ptr_offset;
1055 {
1056 if (scaleMode == PSCALER_SCALE_UP) {
1057 _scale_row_up(inp, inp + input_pixel_ptr_offset, outp,
1058 (first_y_src & 0xffffffff) >> 22, first_x_src,
1059 x_factor_inv, x_output_width);
1060 } else {
1061 _scale_row_down(inp, outp, input_pixel_ptr_offset,
1062 first_x_src, first_y_src, x_factor_inv, y_factor_inv,
1063 weight_reciprocal, x_output_width);
1064 }
1065 }
1066 first_y_src += y_factor_inv;
1067 outp += output_pixel_ptr_offset;
1068 }
1069 }