1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <audio_utils/primitives.h>
18 #include <string.h>
19 #include "private/private.h"
20
ditherAndClamp(int32_t * out,const int32_t * sums,size_t pairs)21 void ditherAndClamp(int32_t *out, const int32_t *sums, size_t pairs)
22 {
23 for (; pairs > 0; --pairs) {
24 const int32_t l = clamp16(*sums++ >> 12);
25 const int32_t r = clamp16(*sums++ >> 12);
26 *out++ = (r << 16) | (l & 0xFFFF);
27 }
28 }
29
memcpy_to_i16_from_q4_27(int16_t * dst,const int32_t * src,size_t count)30 void memcpy_to_i16_from_q4_27(int16_t *dst, const int32_t *src, size_t count)
31 {
32 for (; count > 0; --count) {
33 *dst++ = clamp16(*src++ >> 12);
34 }
35 }
36
memcpy_to_i16_from_u8(int16_t * dst,const uint8_t * src,size_t count)37 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count)
38 {
39 dst += count;
40 src += count;
41 for (; count > 0; --count) {
42 *--dst = (int16_t)(*--src - 0x80) << 8;
43 }
44 }
45
memcpy_to_u8_from_i16(uint8_t * dst,const int16_t * src,size_t count)46 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count)
47 {
48 for (; count > 0; --count) {
49 *dst++ = (*src++ >> 8) + 0x80;
50 }
51 }
52
memcpy_to_u8_from_p24(uint8_t * dst,const uint8_t * src,size_t count)53 void memcpy_to_u8_from_p24(uint8_t *dst, const uint8_t *src, size_t count)
54 {
55 for (; count > 0; --count) {
56 #if HAVE_BIG_ENDIAN
57 *dst++ = src[0] + 0x80;
58 #else
59 *dst++ = src[2] + 0x80;
60 #endif
61 src += 3;
62 }
63 }
64
memcpy_to_u8_from_i32(uint8_t * dst,const int32_t * src,size_t count)65 void memcpy_to_u8_from_i32(uint8_t *dst, const int32_t *src, size_t count)
66 {
67 for (; count > 0; --count) {
68 *dst++ = (*src++ >> 24) + 0x80;
69 }
70 }
71
memcpy_to_u8_from_q8_23(uint8_t * dst,const int32_t * src,size_t count)72 void memcpy_to_u8_from_q8_23(uint8_t *dst, const int32_t *src, size_t count)
73 {
74 for (; count > 0; --count) {
75 *dst++ = clamp8_from_q8_23(*src++);
76 }
77 }
78
memcpy_to_u8_from_float(uint8_t * dst,const float * src,size_t count)79 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count)
80 {
81 for (; count > 0; --count) {
82 *dst++ = clamp8_from_float(*src++);
83 }
84 }
85
memcpy_to_i16_from_i32(int16_t * dst,const int32_t * src,size_t count)86 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count)
87 {
88 for (; count > 0; --count) {
89 *dst++ = *src++ >> 16;
90 }
91 }
92
memcpy_to_i16_from_float(int16_t * dst,const float * src,size_t count)93 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count)
94 {
95 for (; count > 0; --count) {
96 *dst++ = clamp16_from_float(*src++);
97 }
98 }
99
memcpy_to_float_from_q4_27(float * dst,const int32_t * src,size_t count)100 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count)
101 {
102 for (; count > 0; --count) {
103 *dst++ = float_from_q4_27(*src++);
104 }
105 }
106
memcpy_to_float_from_i16(float * dst,const int16_t * src,size_t count)107 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count)
108 {
109 dst += count;
110 src += count;
111 for (; count > 0; --count) {
112 *--dst = float_from_i16(*--src);
113 }
114 }
115
memcpy_to_float_from_u8(float * dst,const uint8_t * src,size_t count)116 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count)
117 {
118 dst += count;
119 src += count;
120 for (; count > 0; --count) {
121 *--dst = float_from_u8(*--src);
122 }
123 }
124
memcpy_to_float_from_p24(float * dst,const uint8_t * src,size_t count)125 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count)
126 {
127 dst += count;
128 src += count * 3;
129 for (; count > 0; --count) {
130 src -= 3;
131 *--dst = float_from_p24(src);
132 }
133 }
134
memcpy_to_i16_from_p24(int16_t * dst,const uint8_t * src,size_t count)135 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count)
136 {
137 for (; count > 0; --count) {
138 #if HAVE_BIG_ENDIAN
139 *dst++ = src[1] | (src[0] << 8);
140 #else
141 *dst++ = src[1] | (src[2] << 8);
142 #endif
143 src += 3;
144 }
145 }
146
memcpy_to_i32_from_p24(int32_t * dst,const uint8_t * src,size_t count)147 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count)
148 {
149 dst += count;
150 src += count * 3;
151 for (; count > 0; --count) {
152 src -= 3;
153 #if HAVE_BIG_ENDIAN
154 *--dst = (src[2] << 8) | (src[1] << 16) | (src[0] << 24);
155 #else
156 *--dst = (src[0] << 8) | (src[1] << 16) | (src[2] << 24);
157 #endif
158 }
159 }
160
memcpy_to_p24_from_i16(uint8_t * dst,const int16_t * src,size_t count)161 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count)
162 {
163 dst += count * 3;
164 src += count;
165 for (; count > 0; --count) {
166 dst -= 3;
167 const int16_t sample = *--src;
168 #if HAVE_BIG_ENDIAN
169 dst[0] = sample >> 8;
170 dst[1] = sample;
171 dst[2] = 0;
172 #else
173 dst[0] = 0;
174 dst[1] = sample;
175 dst[2] = sample >> 8;
176 #endif
177 }
178 }
179
memcpy_to_p24_from_float(uint8_t * dst,const float * src,size_t count)180 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count)
181 {
182 for (; count > 0; --count) {
183 int32_t ival = clamp24_from_float(*src++);
184
185 #if HAVE_BIG_ENDIAN
186 *dst++ = ival >> 16;
187 *dst++ = ival >> 8;
188 *dst++ = ival;
189 #else
190 *dst++ = ival;
191 *dst++ = ival >> 8;
192 *dst++ = ival >> 16;
193 #endif
194 }
195 }
196
memcpy_to_p24_from_q8_23(uint8_t * dst,const int32_t * src,size_t count)197 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count)
198 {
199 for (; count > 0; --count) {
200 int32_t ival = clamp24_from_q8_23(*src++);
201
202 #if HAVE_BIG_ENDIAN
203 *dst++ = ival >> 16;
204 *dst++ = ival >> 8;
205 *dst++ = ival;
206 #else
207 *dst++ = ival;
208 *dst++ = ival >> 8;
209 *dst++ = ival >> 16;
210 #endif
211 }
212 }
213
memcpy_to_p24_from_i32(uint8_t * dst,const int32_t * src,size_t count)214 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count)
215 {
216 for (; count > 0; --count) {
217 int32_t ival = *src++ >> 8;
218
219 #if HAVE_BIG_ENDIAN
220 *dst++ = ival >> 16;
221 *dst++ = ival >> 8;
222 *dst++ = ival;
223 #else
224 *dst++ = ival;
225 *dst++ = ival >> 8;
226 *dst++ = ival >> 16;
227 #endif
228 }
229 }
230
memcpy_to_q8_23_from_i16(int32_t * dst,const int16_t * src,size_t count)231 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count)
232 {
233 dst += count;
234 src += count;
235 for (; count > 0; --count) {
236 *--dst = (int32_t)*--src << 8;
237 }
238 }
239
memcpy_to_q8_23_from_float_with_clamp(int32_t * dst,const float * src,size_t count)240 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count)
241 {
242 for (; count > 0; --count) {
243 *dst++ = clamp24_from_float(*src++);
244 }
245 }
246
memcpy_to_q8_23_from_p24(int32_t * dst,const uint8_t * src,size_t count)247 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count)
248 {
249 dst += count;
250 src += count * 3;
251 for (; count > 0; --count) {
252 src -= 3;
253 #if HAVE_BIG_ENDIAN
254 *--dst = (int8_t)src[0] << 16 | src[1] << 8 | src[2];
255 #else
256 *--dst = (int8_t)src[2] << 16 | src[1] << 8 | src[0];
257 #endif
258 }
259 }
260
memcpy_to_q4_27_from_float(int32_t * dst,const float * src,size_t count)261 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count)
262 {
263 for (; count > 0; --count) {
264 *dst++ = clampq4_27_from_float(*src++);
265 }
266 }
267
memcpy_to_i16_from_q8_23(int16_t * dst,const int32_t * src,size_t count)268 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count)
269 {
270 for (; count > 0; --count) {
271 *dst++ = clamp16(*src++ >> 8);
272 }
273 }
274
memcpy_to_float_from_q8_23(float * dst,const int32_t * src,size_t count)275 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count)
276 {
277 for (; count > 0; --count) {
278 *dst++ = float_from_q8_23(*src++);
279 }
280 }
281
memcpy_to_i32_from_u8(int32_t * dst,const uint8_t * src,size_t count)282 void memcpy_to_i32_from_u8(int32_t *dst, const uint8_t *src, size_t count)
283 {
284 dst += count;
285 src += count;
286 for (; count > 0; --count) {
287 *--dst = ((int32_t)(*--src) - 0x80) << 24;
288 }
289 }
290
memcpy_to_i32_from_i16(int32_t * dst,const int16_t * src,size_t count)291 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count)
292 {
293 dst += count;
294 src += count;
295 for (; count > 0; --count) {
296 *--dst = (int32_t)*--src << 16;
297 }
298 }
299
memcpy_to_i32_from_float(int32_t * dst,const float * src,size_t count)300 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count)
301 {
302 for (; count > 0; --count) {
303 *dst++ = clamp32_from_float(*src++);
304 }
305 }
306
memcpy_to_float_from_i32(float * dst,const int32_t * src,size_t count)307 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count)
308 {
309 for (; count > 0; --count) {
310 *dst++ = float_from_i32(*src++);
311 }
312 }
313
memcpy_to_float_from_float_with_clamping(float * dst,const float * src,size_t count,float absMax)314 void memcpy_to_float_from_float_with_clamping(float *dst, const float *src, size_t count,
315 float absMax) {
316 // Note: using NEON intrinsics (vminq_f32, vld1q_f32...) did NOT accelerate
317 // the function when benchmarked. The compiler already vectorize using FMINNM f32x4 & similar.
318 // Note: clamping induce a ~20% overhead compared to memcpy for count in [64, 512]
319 // See primitives_benchmark
320 for (; count > 0; --count) {
321 const float sample = *src++;
322 *dst++ = fmax(-absMax, fmin(absMax, sample));
323 }
324 }
325
downmix_to_mono_i16_from_stereo_i16(int16_t * dst,const int16_t * src,size_t count)326 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count)
327 {
328 for (; count > 0; --count) {
329 *dst++ = (int16_t)(((int32_t)src[0] + (int32_t)src[1]) >> 1);
330 src += 2;
331 }
332 }
333
upmix_to_stereo_i16_from_mono_i16(int16_t * dst,const int16_t * src,size_t count)334 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count)
335 {
336 dst += count * 2;
337 src += count;
338 for (; count > 0; --count) {
339 const int32_t temp = *--src;
340 dst -= 2;
341 dst[0] = temp;
342 dst[1] = temp;
343 }
344 }
345
downmix_to_mono_float_from_stereo_float(float * dst,const float * src,size_t frames)346 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t frames)
347 {
348 for (; frames > 0; --frames) {
349 *dst++ = (src[0] + src[1]) * 0.5;
350 src += 2;
351 }
352 }
353
upmix_to_stereo_float_from_mono_float(float * dst,const float * src,size_t frames)354 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t frames)
355 {
356 dst += frames * 2;
357 src += frames;
358 for (; frames > 0; --frames) {
359 const float temp = *--src;
360 dst -= 2;
361 dst[0] = temp;
362 dst[1] = temp;
363 }
364 }
365
nonZeroMono32(const int32_t * samples,size_t count)366 size_t nonZeroMono32(const int32_t *samples, size_t count)
367 {
368 size_t nonZero = 0;
369 for (; count > 0; --count) {
370 nonZero += *samples++ != 0;
371 }
372 return nonZero;
373 }
374
nonZeroMono16(const int16_t * samples,size_t count)375 size_t nonZeroMono16(const int16_t *samples, size_t count)
376 {
377 size_t nonZero = 0;
378 for (; count > 0; --count) {
379 nonZero += *samples++ != 0;
380 }
381 return nonZero;
382 }
383
nonZeroStereo32(const int32_t * frames,size_t count)384 size_t nonZeroStereo32(const int32_t *frames, size_t count)
385 {
386 size_t nonZero = 0;
387 for (; count > 0; --count) {
388 nonZero += frames[0] != 0 || frames[1] != 0;
389 frames += 2;
390 }
391 return nonZero;
392 }
393
nonZeroStereo16(const int16_t * frames,size_t count)394 size_t nonZeroStereo16(const int16_t *frames, size_t count)
395 {
396 size_t nonZero = 0;
397 for (; count > 0; --count) {
398 nonZero += frames[0] != 0 || frames[1] != 0;
399 frames += 2;
400 }
401 return nonZero;
402 }
403
404 /*
405 * C macro to do channel mask copying independent of dst/src sample type.
406 * Don't pass in any expressions for the macro arguments here.
407 */
408 #define copy_frame_by_mask(dst, dmask, src, smask, count, zero) \
409 { \
410 uint32_t bit, ormask; \
411 for (; (count) > 0; --(count)) { \
412 ormask = (dmask) | (smask); \
413 while (ormask) { \
414 bit = ormask & -ormask; /* get lowest bit */ \
415 ormask ^= bit; /* remove lowest bit */ \
416 if ((dmask) & bit) { \
417 *(dst)++ = (smask) & bit ? *(src)++ : (zero); \
418 } else { /* source channel only */ \
419 ++(src); \
420 } \
421 } \
422 } \
423 }
424
memcpy_by_channel_mask(void * dst,uint32_t dst_mask,const void * src,uint32_t src_mask,size_t sample_size,size_t count)425 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
426 const void *src, uint32_t src_mask, size_t sample_size, size_t count)
427 {
428 #if 0
429 /* alternate way of handling memcpy_by_channel_mask by using the idxary */
430 int8_t idxary[32];
431 uint32_t src_channels = __builtin_popcount(src_mask);
432 uint32_t dst_channels =
433 memcpy_by_index_array_initialization(idxary, 32, dst_mask, src_mask);
434
435 memcpy_by_idxary(dst, dst_channels, src, src_channels, idxary, sample_size, count);
436 #else
437 if (dst_mask == src_mask) {
438 memcpy(dst, src, sample_size * __builtin_popcount(dst_mask) * count);
439 return;
440 }
441 switch (sample_size) {
442 case 1: {
443 uint8_t *udst = (uint8_t*)dst;
444 const uint8_t *usrc = (const uint8_t*)src;
445
446 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0);
447 } break;
448 case 2: {
449 uint16_t *udst = (uint16_t*)dst;
450 const uint16_t *usrc = (const uint16_t*)src;
451
452 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0);
453 } break;
454 case 3: { /* could be slow. use a struct to represent 3 bytes of data. */
455 uint8x3_t *udst = (uint8x3_t*)dst;
456 const uint8x3_t *usrc = (const uint8x3_t*)src;
457 static const uint8x3_t zero; /* tricky - we use this to zero out a sample */
458
459 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, zero);
460 } break;
461 case 4: {
462 uint32_t *udst = (uint32_t*)dst;
463 const uint32_t *usrc = (const uint32_t*)src;
464
465 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0);
466 } break;
467 default:
468 abort(); /* illegal value */
469 break;
470 }
471 #endif
472 }
473
474 /*
475 * C macro to do copying by index array, to rearrange samples
476 * within a frame. This is independent of src/dst sample type.
477 * Don't pass in any expressions for the macro arguments here.
478 */
479 #define copy_frame_by_idx(dst, dst_channels, src, src_channels, idxary, count, zero) \
480 { \
481 unsigned i; \
482 int index; \
483 for (; (count) > 0; --(count)) { \
484 for (i = 0; i < (dst_channels); ++i) { \
485 index = (idxary)[i]; \
486 *(dst)++ = index < 0 ? (zero) : (src)[index]; \
487 } \
488 (src) += (src_channels); \
489 } \
490 }
491
memcpy_by_index_array(void * dst,uint32_t dst_channels,const void * src,uint32_t src_channels,const int8_t * idxary,size_t sample_size,size_t count)492 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
493 const void *src, uint32_t src_channels,
494 const int8_t *idxary, size_t sample_size, size_t count)
495 {
496 switch (sample_size) {
497 case 1: {
498 uint8_t *udst = (uint8_t*)dst;
499 const uint8_t *usrc = (const uint8_t*)src;
500
501 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0);
502 } break;
503 case 2: {
504 uint16_t *udst = (uint16_t*)dst;
505 const uint16_t *usrc = (const uint16_t*)src;
506
507 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0);
508 } break;
509 case 3: { /* could be slow. use a struct to represent 3 bytes of data. */
510 uint8x3_t *udst = (uint8x3_t*)dst;
511 const uint8x3_t *usrc = (const uint8x3_t*)src;
512 static const uint8x3_t zero;
513
514 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, zero);
515 } break;
516 case 4: {
517 uint32_t *udst = (uint32_t*)dst;
518 const uint32_t *usrc = (const uint32_t*)src;
519
520 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0);
521 } break;
522 default:
523 abort(); /* illegal value */
524 break;
525 }
526 }
527
memcpy_by_index_array_initialization(int8_t * idxary,size_t idxcount,uint32_t dst_mask,uint32_t src_mask)528 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
529 uint32_t dst_mask, uint32_t src_mask)
530 {
531 size_t n = 0;
532 int srcidx = 0;
533 uint32_t bit, ormask = src_mask | dst_mask;
534
535 while (ormask && n < idxcount) {
536 bit = ormask & -ormask; /* get lowest bit */
537 ormask ^= bit; /* remove lowest bit */
538 if (src_mask & dst_mask & bit) { /* matching channel */
539 idxary[n++] = srcidx++;
540 } else if (src_mask & bit) { /* source channel only */
541 ++srcidx;
542 } else { /* destination channel only */
543 idxary[n++] = -1;
544 }
545 }
546 return n + __builtin_popcount(ormask & dst_mask);
547 }
548
memcpy_by_index_array_initialization_src_index(int8_t * idxary,size_t idxcount,uint32_t dst_mask,uint32_t src_mask)549 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
550 uint32_t dst_mask, uint32_t src_mask) {
551 size_t dst_count = __builtin_popcount(dst_mask);
552 if (idxcount == 0) {
553 return dst_count;
554 }
555 if (dst_count > idxcount) {
556 dst_count = idxcount;
557 }
558
559 size_t src_idx, dst_idx;
560 for (src_idx = 0, dst_idx = 0; dst_idx < dst_count; ++dst_idx) {
561 if (src_mask & 1) {
562 idxary[dst_idx] = src_idx++;
563 } else {
564 idxary[dst_idx] = -1;
565 }
566 src_mask >>= 1;
567 }
568 return dst_idx;
569 }
570
memcpy_by_index_array_initialization_dst_index(int8_t * idxary,size_t idxcount,uint32_t dst_mask,uint32_t src_mask)571 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
572 uint32_t dst_mask, uint32_t src_mask) {
573 size_t src_idx, dst_idx;
574 size_t dst_count = __builtin_popcount(dst_mask);
575 size_t src_count = __builtin_popcount(src_mask);
576 if (idxcount == 0) {
577 return dst_count;
578 }
579 if (dst_count > idxcount) {
580 dst_count = idxcount;
581 }
582 for (src_idx = 0, dst_idx = 0; dst_idx < dst_count; ++src_idx) {
583 if (dst_mask & 1) {
584 idxary[dst_idx++] = src_idx < src_count ? (signed)src_idx : -1;
585 }
586 dst_mask >>= 1;
587 }
588 return dst_idx;
589 }
590
accumulate_i16(int16_t * dst,const int16_t * src,size_t count)591 void accumulate_i16(int16_t *dst, const int16_t *src, size_t count) {
592 while (count--) {
593 *dst = clamp16((int32_t)*dst + *src++);
594 ++dst;
595 }
596 }
597
accumulate_u8(uint8_t * dst,const uint8_t * src,size_t count)598 void accumulate_u8(uint8_t *dst, const uint8_t *src, size_t count) {
599 int32_t sum;
600 for (; count > 0; --count) {
601 // 8-bit samples are centered around 0x80.
602 sum = *dst + *src++ - 0x80;
603 // Clamp to [0, 0xff].
604 *dst++ = (sum & 0x100) ? (~sum >> 9) : sum;
605 }
606 }
607
accumulate_p24(uint8_t * dst,const uint8_t * src,size_t count)608 void accumulate_p24(uint8_t *dst, const uint8_t *src, size_t count) {
609 for (; count > 0; --count) {
610 // Unpack.
611 int32_t dst_q8_23 = 0;
612 int32_t src_q8_23 = 0;
613 memcpy_to_q8_23_from_p24(&dst_q8_23, dst, 1);
614 memcpy_to_q8_23_from_p24(&src_q8_23, src, 1);
615
616 // Accumulate and overwrite.
617 dst_q8_23 += src_q8_23;
618 memcpy_to_p24_from_q8_23(dst, &dst_q8_23, 1);
619
620 // Move on to next sample.
621 dst += 3;
622 src += 3;
623 }
624 }
625
accumulate_q8_23(int32_t * dst,const int32_t * src,size_t count)626 void accumulate_q8_23(int32_t *dst, const int32_t *src, size_t count) {
627 for (; count > 0; --count) {
628 *dst = clamp24_from_q8_23(*dst + *src++);
629 ++dst;
630 }
631 }
632
accumulate_i32(int32_t * dst,const int32_t * src,size_t count)633 void accumulate_i32(int32_t *dst, const int32_t *src, size_t count) {
634 for (; count > 0; --count) {
635 *dst = clamp32((int64_t)*dst + *src++);
636 ++dst;
637 }
638 }
639
accumulate_float(float * dst,const float * src,size_t count)640 void accumulate_float(float *dst, const float *src, size_t count) {
641 for (; count > 0; --count) {
642 *dst++ += *src++;
643 }
644 }
645