1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
18 #define ANDROID_AUDIO_PRIMITIVES_H
19
20 #include <math.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/cdefs.h>
24
25 /** \cond */
26 __BEGIN_DECLS
27 /** \endcond */
28
29 /**
30 * \file primitives.h
31 * The memcpy_* conversion routines are designed to work in-place on same dst as src
32 * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
33 * This allows the loops to go upwards for faster cache access (and may be more flexible
34 * for future optimization later).
35 */
36
37 /**
38 * Deprecated. Use memcpy_to_i16_from_q4_27() instead (double the pairs for the count).
39 * Neither this function nor memcpy_to_i16_from_q4_27() actually dither.
40 *
41 * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
42 * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
43 * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
44 * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
45 * is dithered and the remaining fraction is converted to the output Q.15, with clamping
46 * on the 4 integer guard bits.
47 *
48 * For interleaved stereo, pairs is the number of sample pairs,
49 * and out is an array of interleaved pairs of 16-bit samples per channel.
50 * For mono, pairs is the number of samples / 2, and out is an array of 16-bit samples.
51 * The name "dither" is a misnomer; the current implementation does not actually dither
52 * but uses truncation. This may change.
53 * The out and sums buffers must either be completely separate (non-overlapping), or
54 * they must both start at the same address. Partially overlapping buffers are not supported.
55 */
56 void ditherAndClamp(int32_t *out, const int32_t *sums, size_t pairs);
57
58 /**
59 * Copy samples from signed fixed-point 32-bit Q4.27 to 16-bit Q0.15
60 *
61 * \param dst Destination buffer
62 * \param src Source buffer
63 * \param count Number of samples to copy
64 *
65 * The destination and source buffers must either be completely separate (non-overlapping), or
66 * they must both start at the same address. Partially overlapping buffers are not supported.
67 */
68 void memcpy_to_i16_from_q4_27(int16_t *dst, const int32_t *src, size_t count);
69
70 /**
71 * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
72 *
73 * \param dst Destination buffer
74 * \param src Source buffer
75 * \param count Number of samples to copy
76 *
77 * The destination and source buffers must either be completely separate (non-overlapping), or
78 * they must both start at the same address. Partially overlapping buffers are not supported.
79 */
80 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
81
82 /**
83 * Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
84 *
85 * \param dst Destination buffer
86 * \param src Source buffer
87 * \param count Number of samples to copy
88 *
89 * The destination and source buffers must either be completely separate (non-overlapping), or
90 * they must both start at the same address. Partially overlapping buffers are not supported.
91 * The conversion is done by truncation, without dithering, so it loses resolution.
92 */
93 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
94
95 /**
96 * Copy samples from float to unsigned 8-bit offset by 0x80.
97 *
98 * \param dst Destination buffer
99 * \param src Source buffer
100 * \param count Number of samples to copy
101 *
102 * The destination and source buffers must either be completely separate (non-overlapping), or
103 * they must both start at the same address. Partially overlapping buffers are not supported.
104 * The conversion is done by truncation, without dithering, so it loses resolution.
105 */
106 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
107
108 /**
109 * Copy samples from signed fixed-point packed 24 bit Q0.23 to unsigned 8-bit offset by 0x80.
110 *
111 * \param dst Destination buffer
112 * \param src Source buffer
113 * \param count Number of samples to copy
114 *
115 * The destination and source buffers must either be completely separate (non-overlapping), or
116 * they must both start at the same address. Partially overlapping buffers are not supported.
117 * The conversion is done by truncation, without dithering, so it loses resolution.
118 */
119 void memcpy_to_u8_from_p24(uint8_t *dst, const uint8_t *src, size_t count);
120
121 /**
122 * Copy samples from signed 32-bit fixed-point Q0.31 to unsigned 8-bit offset by 0x80.
123 *
124 * \param dst Destination buffer
125 * \param src Source buffer
126 * \param count Number of samples to copy
127 *
128 * The destination and source buffers must either be completely separate (non-overlapping), or
129 * they must both start at the same address. Partially overlapping buffers are not supported.
130 * The conversion is done by truncation, without dithering, so it loses resolution.
131 */
132 void memcpy_to_u8_from_i32(uint8_t *dst, const int32_t *src, size_t count);
133
134 /**
135 * Copy samples from signed fixed-point 32-bit Q8.23 to unsigned 8-bit offset by 0x80.
136 *
137 * \param dst Destination buffer
138 * \param src Source buffer
139 * \param count Number of samples to copy
140 *
141 * The destination and source buffers must either be completely separate (non-overlapping), or
142 * they must both start at the same address. Partially overlapping buffers are not supported.
143 * The conversion is done by truncation, without dithering, so it loses resolution.
144 */
145 void memcpy_to_u8_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
146
147 /**
148 * Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
149 *
150 * \param dst Destination buffer
151 * \param src Source buffer
152 * \param count Number of samples to copy
153 *
154 * The destination and source buffers must either be completely separate (non-overlapping), or
155 * they must both start at the same address. Partially overlapping buffers are not supported.
156 * The conversion is done by truncation, without dithering, so it loses resolution.
157 */
158 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
159
160 /**
161 * Shrink and copy samples from single-precision floating-point to signed 16-bit.
162 * Each float should be in the range -1.0 to 1.0. Values outside that range are clamped,
163 * refer to clamp16_from_float().
164 *
165 * \param dst Destination buffer
166 * \param src Source buffer
167 * \param count Number of samples to copy
168 *
169 * The destination and source buffers must either be completely separate (non-overlapping), or
170 * they must both start at the same address. Partially overlapping buffers are not supported.
171 * The conversion is done by truncation, without dithering, so it loses resolution.
172 */
173 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
174
175 /**
176 * Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
177 * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
178 * [0xf8000000, 0x07ffffff]. The full float range is [-16.0, 16.0]. Note the closed range
179 * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
180 *
181 * \param dst Destination buffer
182 * \param src Source buffer
183 * \param count Number of samples to copy
184 *
185 * The destination and source buffers must either be completely separate (non-overlapping), or
186 * they must both start at the same address. Partially overlapping buffers are not supported.
187 */
188 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
189
190 /**
191 * Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
192 * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
193 * No rounding is needed as the representation is exact.
194 *
195 * \param dst Destination buffer
196 * \param src Source buffer
197 * \param count Number of samples to copy
198 *
199 * The destination and source buffers must either be completely separate (non-overlapping), or
200 * they must both start at the same address. Partially overlapping buffers are not supported.
201 */
202 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
203
204 /**
205 * Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
206 * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
207 * No rounding is needed as the representation is exact.
208 *
209 * \param dst Destination buffer
210 * \param src Source buffer
211 * \param count Number of samples to copy
212 *
213 * The destination and source buffers must either be completely separate (non-overlapping), or
214 * they must both start at the same address. Partially overlapping buffers are not supported.
215 */
216 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
217
218 /**
219 * Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
220 * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
221 * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
222 * No rounding is needed as the representation is exact.
223 *
224 * \param dst Destination buffer
225 * \param src Source buffer
226 * \param count Number of samples to copy
227 *
228 * The destination and source buffers must either be completely separate (non-overlapping), or
229 * they must both start at the same address. Partially overlapping buffers are not supported.
230 */
231 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
232
233 /**
234 * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
235 * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
236 * The data is truncated without rounding.
237 *
238 * \param dst Destination buffer
239 * \param src Source buffer
240 * \param count Number of samples to copy
241 *
242 * The destination and source buffers must either be completely separate (non-overlapping), or
243 * they must both start at the same address. Partially overlapping buffers are not supported.
244 */
245 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
246
247 /**
248 * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
249 * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
250 * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
251 *
252 * \param dst Destination buffer
253 * \param src Source buffer
254 * \param count Number of samples to copy
255 *
256 * The destination and source buffers must either be completely separate (non-overlapping), or
257 * they must both start at the same address. Partially overlapping buffers are not supported.
258 */
259 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
260
261 /**
262 * Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
263 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
264 * The output data range is [0x800000, 0x7fff00] (not full).
265 * Nevertheless there is no DC offset on the output, if the input has no DC offset.
266 *
267 * \param dst Destination buffer
268 * \param src Source buffer
269 * \param count Number of samples to copy
270 *
271 * The destination and source buffers must either be completely separate (non-overlapping), or
272 * they must both start at the same address. Partially overlapping buffers are not supported.
273 */
274 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
275
276 /**
277 * Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
278 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
279 * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
280 * for details.
281 *
282 * \param dst Destination buffer
283 * \param src Source buffer
284 * \param count Number of samples to copy
285 *
286 * The destination and source buffers must either be completely separate (non-overlapping), or
287 * they must both start at the same address. Partially overlapping buffers are not supported.
288 */
289 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
290
291 /**
292 * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
293 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
294 * The data is clamped to the range is [0x800000, 0x7fffff].
295 *
296 * \param dst Destination buffer
297 * \param src Source buffer
298 * \param count Number of samples to copy
299 *
300 * The destination and source buffers must either be completely separate (non-overlapping), or
301 * they must both start at the same address.
302 */
303 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
304
305 /**
306 * Shrink and copy samples from signed 32-bit fixed-point Q0.31
307 * to signed fixed-point packed 24 bit Q0.23.
308 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
309 *
310 * \param dst Destination buffer
311 * \param src Source buffer
312 * \param count Number of samples to copy
313 *
314 * The destination and source buffers must either be completely separate (non-overlapping), or
315 * they must both start at the same address. Partially overlapping buffers are not supported.
316 * The conversion is done by truncation, without dithering, so it loses resolution.
317 */
318 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
319
320 /**
321 * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
322 * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
323 *
324 * \param dst Destination buffer
325 * \param src Source buffer
326 * \param count Number of samples to copy
327 *
328 * The destination and source buffers must either be completely separate (non-overlapping), or
329 * they must both start at the same address. Partially overlapping buffers are not supported.
330 */
331 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
332
333 /**
334 * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
335 * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
336 * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
337 * See clamp24_from_float() for details.
338 *
339 * \param dst Destination buffer
340 * \param src Source buffer
341 * \param count Number of samples to copy
342 *
343 * The destination and source buffers must either be completely separate (non-overlapping), or
344 * they must both start at the same address. Partially overlapping buffers are not supported.
345 */
346 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
347
348 /**
349 * Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
350 * The output data range is [0xff800000, 0x007fffff].
351 *
352 * \param dst Destination buffer
353 * \param src Source buffer
354 * \param count Number of samples to copy
355 *
356 * The destination and source buffers must either be completely separate (non-overlapping), or
357 * they must both start at the same address. Partially overlapping buffers are not supported.
358 */
359 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
360
361 /**
362 * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
363 * The conversion will use the full available Q4.27 range, including guard bits.
364 * Fractional lsb is rounded to nearest, ties away from zero.
365 * See clampq4_27_from_float() for details.
366 *
367 * \param dst Destination buffer
368 * \param src Source buffer
369 * \param count Number of samples to copy
370 *
371 * The destination and source buffers must either be completely separate (non-overlapping), or
372 * they must both start at the same address. Partially overlapping buffers are not supported.
373 */
374 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
375
376 /**
377 * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
378 * The data is clamped, and truncated without rounding.
379 *
380 * \param dst Destination buffer
381 * \param src Source buffer
382 * \param count Number of samples to copy
383 *
384 * The destination and source buffers must either be completely separate (non-overlapping), or
385 * they must both start at the same address. Partially overlapping buffers are not supported.
386 */
387 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
388
389 /**
390 * Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
391 * The nominal output float range is [-1.0, 1.0) for the fixed-point
392 * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
393 * No rounding is needed as the representation is exact for nominal values.
394 * Rounding for overflow values is to nearest, ties to even.
395 *
396 * \param dst Destination buffer
397 * \param src Source buffer
398 * \param count Number of samples to copy
399 *
400 * The destination and source buffers must either be completely separate (non-overlapping), or
401 * they must both start at the same address. Partially overlapping buffers are not supported.
402 */
403 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
404
405 /**
406 * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 32-bit.
407 *
408 * \param dst Destination buffer
409 * \param src Source buffer
410 * \param count Number of samples to copy
411 *
412 * The destination and source buffers must either be completely separate (non-overlapping), or
413 * they must both start at the same address. Partially overlapping buffers are not supported.
414 */
415 void memcpy_to_i32_from_u8(int32_t *dst, const uint8_t *src, size_t count);
416
417 /**
418 * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
419 * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
420 *
421 * \param dst Destination buffer
422 * \param src Source buffer
423 * \param count Number of samples to copy
424 *
425 * The destination and source buffers must either be completely separate (non-overlapping), or
426 * they must both start at the same address. Partially overlapping buffers are not supported.
427 */
428 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
429
430 /**
431 * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
432 * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
433 * ties away from zero. See clamp32_from_float() for details.
434 *
435 * \param dst Destination buffer
436 * \param src Source buffer
437 * \param count Number of samples to copy
438 *
439 * The destination and source buffers must either be completely separate (non-overlapping), or
440 * they must both start at the same address. Partially overlapping buffers are not supported.
441 */
442 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
443
444 /**
445 * Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
446 * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
447 * Rounding is done according to float_from_i32().
448 *
449 * \param dst Destination buffer
450 * \param src Source buffer
451 * \param count Number of samples to copy
452 *
453 * The destination and source buffers must either be completely separate (non-overlapping), or
454 * they must both start at the same address. Partially overlapping buffers are not supported.
455 */
456 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
457
458 /**
459 * Copy samples from unrestricted float to range restricted float [-absMax, absMax].
460 * Any float sample not in the range [-absMax, absMax] will be clamped in this range.
461 *
462 * \param dst Destination buffer
463 * \param src Source buffer
464 * \param count Number of samples to copy
465 * \param absMax Maximum of the absolute value of the copied samples.
466 *
467 * The destination and source buffers must either be completely separate (non-overlapping), or
468 * they must both start at the same address. Partially overlapping buffers are not supported.
469 * Note: NAN is clamped to absMax and not 0 for performance reason (~2xfaster).
470 */
471 void memcpy_to_float_from_float_with_clamping(float *dst, const float *src, size_t count,
472 float absMax);
473
474 /**
475 * Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
476 *
477 * \param dst Destination buffer
478 * \param src Source buffer
479 * \param count Number of stereo frames to downmix
480 *
481 * The destination and source buffers must be completely separate (non-overlapping).
482 * The current implementation truncates the mean rather than dither, but this may change.
483 */
484 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
485
486 /**
487 * Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
488 * duplicating.
489 *
490 * \param dst Destination buffer
491 * \param src Source buffer
492 * \param count Number of mono samples to upmix
493 *
494 * The destination and source buffers must either be completely separate (non-overlapping), or
495 * they must both start at the same address. Partially overlapping buffers are not supported.
496 */
497 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
498
499 /**
500 * Downmix pairs of interleaved stereo input float samples to mono output float samples
501 * by averaging the stereo pair together.
502 *
503 * \param dst Destination buffer
504 * \param src Source buffer
505 * \param count Number of stereo frames to downmix
506 *
507 * The destination and source buffers must be completely separate (non-overlapping),
508 * or they must both start at the same address.
509 */
510 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
511
512 /**
513 * Upmix mono input float samples to pairs of interleaved stereo output float samples by
514 * duplicating.
515 *
516 * \param dst Destination buffer
517 * \param src Source buffer
518 * \param count Number of mono samples to upmix
519 *
520 * The destination and source buffers must either be completely separate (non-overlapping), or
521 * they must both start at the same address. Partially overlapping buffers are not supported.
522 */
523 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
524
525 /**
526 * \return the total number of non-zero 32-bit samples.
527 */
528 size_t nonZeroMono32(const int32_t *samples, size_t count);
529
530 /**
531 * \return the total number of non-zero 16-bit samples.
532 */
533 size_t nonZeroMono16(const int16_t *samples, size_t count);
534
535 /**
536 * \return the total number of non-zero stereo frames, where a frame is considered non-zero
537 * if either of its constituent 32-bit samples is non-zero.
538 */
539 size_t nonZeroStereo32(const int32_t *frames, size_t count);
540
541 /**
542 * \return the total number of non-zero stereo frames, where a frame is considered non-zero
543 * if either of its constituent 16-bit samples is non-zero.
544 */
545 size_t nonZeroStereo16(const int16_t *frames, size_t count);
546
547 /**
548 * Copy frames, selecting source samples based on a source channel mask to fit
549 * the destination channel mask. Unmatched channels in the destination channel mask
550 * are zero filled. Unmatched channels in the source channel mask are dropped.
551 * Channels present in the channel mask are represented by set bits in the
552 * uint32_t value and are matched without further interpretation.
553 *
554 * \param dst Destination buffer
555 * \param dst_mask Bit mask corresponding to destination channels present
556 * \param src Source buffer
557 * \param src_mask Bit mask corresponding to source channels present
558 * \param sample_size Size of each sample in bytes. Must be 1, 2, 3, or 4.
559 * \param count Number of frames to copy
560 *
561 * The destination and source buffers must be completely separate (non-overlapping).
562 * If the sample size is not in range, the function will abort.
563 */
564 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
565 const void *src, uint32_t src_mask, size_t sample_size, size_t count);
566
567 /**
568 * Copy frames, selecting source samples based on an index array (idxary).
569 * The idxary[] consists of dst_channels number of elements.
570 * The ith element if idxary[] corresponds the ith destination channel.
571 * A non-negative value is the channel index in the source frame.
572 * A negative index (-1) represents filling with 0.
573 *
574 * Example: Swapping L and R channels for stereo streams
575 * <PRE>
576 * idxary[0] = 1;
577 * idxary[1] = 0;
578 * </PRE>
579 *
580 * Example: Copying a mono source to the front center 5.1 channel
581 * <PRE>
582 * idxary[0] = -1;
583 * idxary[1] = -1;
584 * idxary[2] = 0;
585 * idxary[3] = -1;
586 * idxary[4] = -1;
587 * idxary[5] = -1;
588 * </PRE>
589 *
590 * This copy allows swizzling of channels or replication of channels.
591 *
592 * \param dst Destination buffer
593 * \param dst_channels Number of destination channels per frame
594 * \param src Source buffer
595 * \param src_channels Number of source channels per frame
596 * \param idxary Array of indices representing channels in the source frame
597 * \param sample_size Size of each sample in bytes. Must be 1, 2, 3, or 4.
598 * \param count Number of frames to copy
599 *
600 * The destination and source buffers must be completely separate (non-overlapping).
601 * If the sample size is not in range, the function will abort.
602 */
603 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
604 const void *src, uint32_t src_channels,
605 const int8_t *idxary, size_t sample_size, size_t count);
606
607 /**
608 * Prepares an index array (idxary) from channel masks, which can be later
609 * used by memcpy_by_index_array().
610 *
611 * \return the number of array elements required.
612 * This may be greater than idxcount, so the return value should be checked
613 * if idxary size is less than 32.
614 *
615 * Note that idxary is a caller allocated array
616 * of at least as many channels as present in the dst_mask.
617 * Channels present in the channel mask are represented by set bits in the
618 * uint32_t value and are matched without further interpretation.
619 *
620 * This function is typically used for converting audio data with different
621 * channel position masks.
622 *
623 * \param idxary Updated array of indices of channels in the src frame for the dst frame
624 * \param idxcount Number of caller allocated elements in idxary
625 * \param dst_mask Bit mask corresponding to destination channels present
626 * \param src_mask Bit mask corresponding to source channels present
627 */
628 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
629 uint32_t dst_mask, uint32_t src_mask);
630
631 /**
632 * Prepares an index array (idxary) from channel masks, which can be later
633 * used by memcpy_by_index_array().
634 *
635 * \return the number of array elements required.
636 *
637 * For a source channel index mask, the source channels will map to the destination
638 * channels as if counting the set bits in dst_mask in order from lsb to msb
639 * (zero bits are ignored). The ith bit of the src_mask corresponds to the
640 * ith SET bit of dst_mask and the ith destination channel. Hence, a zero ith
641 * bit of the src_mask indicates that the ith destination channel plays silence.
642 *
643 * \param idxary Updated array of indices of channels in the src frame for the dst frame
644 * \param idxcount Number of caller allocated elements in idxary
645 * \param dst_mask Bit mask corresponding to destination channels present
646 * \param src_mask Bit mask corresponding to source channels present
647 */
648 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
649 uint32_t dst_mask, uint32_t src_mask);
650
651 /**
652 * Prepares an index array (idxary) from channel mask bits, which can be later
653 * used by memcpy_by_index_array().
654 *
655 * \return the number of array elements required.
656 *
657 * This initialization is for a destination channel index mask from a positional
658 * source mask.
659 *
660 * For an destination channel index mask, the input channels will map
661 * to the destination channels, with the ith SET bit in the source bits corresponding
662 * to the ith bit in the destination bits. If there is a zero bit in the middle
663 * of set destination bits (unlikely), the corresponding source channel will
664 * be dropped.
665 *
666 * \param idxary Updated array of indices of channels in the src frame for the dst frame
667 * \param idxcount Number of caller allocated elements in idxary
668 * \param dst_mask Bit mask corresponding to destination channels present
669 * \param src_mask Bit mask corresponding to source channels present
670 */
671 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
672 uint32_t dst_mask, uint32_t src_mask);
673
674 /**
675 * Add and clamp signed 16-bit samples.
676 *
677 * \param dst Destination buffer
678 * \param src Source buffer
679 * \param count Number of samples to add
680 *
681 * The destination and source buffers must either be completely separate (non-overlapping), or
682 * they must both start at the same address. Partially overlapping buffers are not supported.
683 */
684 void accumulate_i16(int16_t *dst, const int16_t *src, size_t count);
685
686 /**
687 * Add and clamp unsigned 8-bit samples.
688 *
689 * \param dst Destination buffer
690 * \param src Source buffer
691 * \param count Number of samples to add
692 *
693 * The destination and source buffers must either be completely separate (non-overlapping), or
694 * they must both start at the same address. Partially overlapping buffers are not supported.
695 */
696 void accumulate_u8(uint8_t *dst, const uint8_t *src, size_t count);
697
698 /**
699 * Add and clamp packed 24-bit Q0.23 samples.
700 *
701 * \param dst Destination buffer
702 * \param src Source buffer
703 * \param count Number of samples to add
704 *
705 * The destination and source buffers must either be completely separate (non-overlapping), or
706 * they must both start at the same address. Partially overlapping buffers are not supported.
707 */
708 void accumulate_p24(uint8_t *dst, const uint8_t *src, size_t count);
709
710 /**
711 * Add and clamp 32-bit Q8.23 samples.
712 *
713 * \param dst Destination buffer
714 * \param src Source buffer
715 * \param count Number of samples to add
716 *
717 * The destination and source buffers must either be completely separate (non-overlapping), or
718 * they must both start at the same address. Partially overlapping buffers are not supported.
719 */
720 void accumulate_q8_23(int32_t *dst, const int32_t *src, size_t count);
721
722 /**
723 * Add and clamp signed 32-bit Q0.31 samples.
724 *
725 * \param dst Destination buffer
726 * \param src Source buffer
727 * \param count Number of samples to add
728 *
729 * The destination and source buffers must either be completely separate (non-overlapping), or
730 * they must both start at the same address. Partially overlapping buffers are not supported.
731 */
732 void accumulate_i32(int32_t *dst, const int32_t *src, size_t count);
733
734 /**
735 * Add float samples. Result is not clamped.
736 *
737 * \param dst Destination buffer
738 * \param src Source buffer
739 * \param count Number of samples to add
740 *
741 * The destination and source buffers must either be completely separate (non-overlapping), or
742 * they must both start at the same address. Partially overlapping buffers are not supported.
743 */
744 void accumulate_float(float *dst, const float *src, size_t count);
745
746 /**
747 * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
748 */
clamp16(int32_t sample)749 static inline int16_t clamp16(int32_t sample)
750 {
751 if ((sample>>15) ^ (sample>>31))
752 sample = 0x7FFF ^ (sample>>31);
753 return sample;
754 }
755
756 /**
757 * Clamp (aka hard limit or clip) a signed 64-bit sample to 32-bit range.
758 */
clamp32(int64_t sample)759 static inline int32_t clamp32(int64_t sample)
760 {
761 if ((sample>>31) ^ (sample>>63))
762 sample = 0x7fffffff ^ (sample>>63);
763 return sample;
764 }
765
766 /**
767 * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
768 * with clamping. Note the open bound at 1.0, values within 1/65536 of 1.0 map
769 * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
770 *
771 * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
772 * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
773 * depending on the sign bit inside NaN (whose representation is not unique).
774 * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
775 *
776 * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
777 * NEW code enabled: Rounding of 0.5 lsb is away from 0.
778 */
clamp16_from_float(float f)779 static inline int16_t clamp16_from_float(float f)
780 {
781 #if 0
782 /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
783 * floating point significand. The normal shift is 3<<22, but the -15 offset
784 * is used to multiply by 32768.
785 */
786 static const float offset = (float)(3 << (22 - 15));
787 /* zero = (0x10f << 22) = 0x43c00000 (not directly used) */
788 static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
789 static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
790
791 union {
792 float f;
793 int32_t i;
794 } u;
795
796 u.f = f + offset; /* recenter valid range */
797 /* Now the valid range is represented as integers between [limneg, limpos].
798 * Clamp using the fact that float representation (as an integer) is an ordered set.
799 */
800 if (u.i < limneg)
801 u.i = -32768;
802 else if (u.i > limpos)
803 u.i = 32767;
804 return u.i; /* Return lower 16 bits, the part of interest in the significand. */
805 #else
806 static const float scale = 1 << 15;
807 return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
808 #endif
809 }
810
811 /**
812 * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
813 * with clamping. Note the open bound at 1.0, values within 1/128 of 1.0 map
814 * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
815 *
816 * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
817 * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
818 * depending on the sign bit inside NaN (whose representation is not unique).
819 * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
820 *
821 * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
822 * NEW code enabled: Rounding of 0.5 lsb is away from 0.
823 */
clamp8_from_float(float f)824 static inline uint8_t clamp8_from_float(float f)
825 {
826 #if 0
827 /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
828 * floating point significand. The normal shift is 3<<22, but the -7 offset
829 * is used to multiply by 128.
830 */
831 static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
832 /* zero = (0x11f << 22) = 0x47c00000 */
833 static const int32_t limneg = (0x11f << 22) /*zero*/;
834 static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
835
836 union {
837 float f;
838 int32_t i;
839 } u;
840
841 u.f = f + offset; /* recenter valid range */
842 /* Now the valid range is represented as integers between [limneg, limpos].
843 * Clamp using the fact that float representation (as an integer) is an ordered set.
844 */
845 if (u.i < limneg)
846 return 0;
847 if (u.i > limpos)
848 return 255;
849 return u.i; /* Return lower 8 bits, the part of interest in the significand. */
850 #else
851 return roundf(fmaxf(fminf(f * 128.f + 128.f, 255.f), 0.f));
852 #endif
853 }
854
855 /**
856 * Convert a signed fixed-point 32-bit Q8.23 value to uint8_t [0, 0xff]
857 * with clamping.
858 *
859 * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
860 */
clamp8_from_q8_23(int32_t ival)861 static inline uint8_t clamp8_from_q8_23(int32_t ival)
862 {
863 static const int32_t limpos = 0x7fffff;
864 static const int32_t limneg = -0x800000;
865 if (ival < limneg) {
866 return 0;
867 } else if (ival > limpos) {
868 return 0xff;
869 } else {
870 return (ival >> 16) + 0x80;
871 }
872 }
873
874 /**
875 * Convert a single-precision floating point value to a Q0.23 integer value, stored in a
876 * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
877 *
878 * OLD code disabled: Rounds to nearest, ties away from 0.
879 * NEW code enabled: Rounding of 0.5 lsb is away from 0.
880 *
881 * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
882 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
883 * depending on hardware and future implementation of this function.
884 */
clamp24_from_float(float f)885 static inline int32_t clamp24_from_float(float f)
886 {
887 #if 0
888 static const float scale = (float)(1 << 23);
889 static const float limpos = 0x7fffff / scale;
890 static const float limneg = -0x800000 / scale;
891
892 if (f <= limneg) {
893 return -0x800000;
894 } else if (f >= limpos) {
895 return 0x7fffff;
896 }
897 f *= scale;
898 /* integer conversion is through truncation (though int to float is not).
899 * ensure that we round to nearest, ties away from 0.
900 */
901 return f > 0 ? f + 0.5 : f - 0.5;
902 #else
903 static const float scale = 1 << 23;
904 return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
905 #endif
906 }
907
908 /**
909 * Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
910 * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
911 *
912 * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
913 */
clamp24_from_q8_23(int32_t ival)914 static inline int32_t clamp24_from_q8_23(int32_t ival)
915 {
916 static const int32_t limpos = 0x7fffff;
917 static const int32_t limneg = -0x800000;
918 if (ival < limneg) {
919 return limneg;
920 } else if (ival > limpos) {
921 return limpos;
922 } else {
923 return ival;
924 }
925 }
926
927 /**
928 * Convert a single-precision floating point value to a Q4.27 integer value.
929 * Rounds to nearest, ties away from 0.
930 *
931 * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
932 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
933 * depending on hardware and future implementation of this function.
934 */
clampq4_27_from_float(float f)935 static inline int32_t clampq4_27_from_float(float f)
936 {
937 static const float scale = (float)(1UL << 27);
938 static const float limpos = 16.;
939 static const float limneg = -16.;
940
941 if (f <= limneg) {
942 return -0x80000000; /* or 0x80000000 */
943 } else if (f >= limpos) {
944 return 0x7fffffff;
945 }
946 f *= scale;
947 /* integer conversion is through truncation (though int to float is not).
948 * ensure that we round to nearest, ties away from 0.
949 */
950 return f > 0 ? f + 0.5 : f - 0.5;
951 }
952
953 /**
954 * Convert a single-precision floating point value to a Q0.31 integer value.
955 * Rounds to nearest, ties away from 0.
956 *
957 * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
958 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
959 * depending on hardware and future implementation of this function.
960 */
clamp32_from_float(float f)961 static inline int32_t clamp32_from_float(float f)
962 {
963 static const float scale = (float)(1UL << 31);
964 static const float limpos = 1.;
965 static const float limneg = -1.;
966
967 if (f <= limneg) {
968 return -0x80000000; /* or 0x80000000 */
969 } else if (f >= limpos) {
970 return 0x7fffffff;
971 }
972 f *= scale;
973 /* integer conversion is through truncation (though int to float is not).
974 * ensure that we round to nearest, ties away from 0.
975 */
976 return f > 0 ? f + 0.5 : f - 0.5;
977 }
978
979 /**
980 * Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
981 * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
982 * [0xf8000000, 0x07ffffff]. The full float range is [-16.0, 16.0].
983 *
984 * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
985 * In more detail: if the fixed-point integer exceeds 24 bit significand of single
986 * precision floating point, the 0.5 lsb in the significand conversion will round
987 * towards even, as per IEEE 754 default.
988 */
float_from_q4_27(int32_t ival)989 static inline float float_from_q4_27(int32_t ival)
990 {
991 /* The scale factor is the reciprocal of the fractional bits.
992 *
993 * Since the scale factor is a power of 2, the scaling is exact, and there
994 * is no rounding due to the multiplication - the bit pattern is preserved.
995 * However, there may be rounding due to the fixed-point to float conversion,
996 * as described above.
997 */
998 static const float scale = 1. / (float)(1UL << 27);
999
1000 return ival * scale;
1001 }
1002
1003 /**
1004 * Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
1005 * The nominal output float range is [0.0, 1.0] if the fixed-point range is
1006 * [0x00000000, 0x10000000]. The full float range is [0.0, 16.0].
1007 *
1008 * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
1009 * In more detail: if the fixed-point integer exceeds 24 bit significand of single
1010 * precision floating point, the 0.5 lsb in the significand conversion will round
1011 * towards even, as per IEEE 754 default.
1012 */
float_from_u4_28(uint32_t uval)1013 static inline float float_from_u4_28(uint32_t uval)
1014 {
1015 static const float scale = 1. / (float)(1UL << 28);
1016
1017 return uval * scale;
1018 }
1019
1020 /**
1021 * Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
1022 * The nominal output float range is [0.0, 1.0] if the fixed-point range is
1023 * [0x0000, 0x1000]. The full float range is [0.0, 16.0).
1024 */
float_from_u4_12(uint16_t uval)1025 static inline float float_from_u4_12(uint16_t uval)
1026 {
1027 static const float scale = 1. / (float)(1UL << 12);
1028
1029 return uval * scale;
1030 }
1031
1032 /**
1033 * Convert a single-precision floating point value to a U4.28 integer value.
1034 * Rounds to nearest, ties away from 0.
1035 *
1036 * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
1037 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
1038 * depending on hardware and future implementation of this function.
1039 */
u4_28_from_float(float f)1040 static inline uint32_t u4_28_from_float(float f)
1041 {
1042 static const float scale = (float)(1 << 28);
1043 static const float limpos = ((float) 0xffffffffUL) / scale;
1044
1045 if (f <= 0.) {
1046 return 0;
1047 } else if (f >= limpos) {
1048 return 0xffffffff;
1049 }
1050 /* integer conversion is through truncation (though int to float is not).
1051 * ensure that we round to nearest, ties away from 0.
1052 */
1053 return f * scale + 0.5;
1054 }
1055
1056 /**
1057 * Convert a single-precision floating point value to a U4.12 integer value.
1058 * Rounds to nearest, ties away from 0.
1059 *
1060 * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
1061 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
1062 * depending on hardware and future implementation of this function.
1063 */
u4_12_from_float(float f)1064 static inline uint16_t u4_12_from_float(float f)
1065 {
1066 static const float scale = (float)(1 << 12);
1067 static const float limpos = 0xffff / scale;
1068
1069 if (f <= 0.) {
1070 return 0;
1071 } else if (f >= limpos) {
1072 return 0xffff;
1073 }
1074 /* integer conversion is through truncation (though int to float is not).
1075 * ensure that we round to nearest, ties away from 0.
1076 */
1077 return f * scale + 0.5;
1078 }
1079
1080 /**
1081 * Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
1082 * The output float range is [-1.0, 1.0) for the fixed-point range
1083 * [0x8000, 0x7fff].
1084 *
1085 * There is no rounding, the conversion and representation is exact.
1086 */
float_from_i16(int16_t ival)1087 static inline float float_from_i16(int16_t ival)
1088 {
1089 /* The scale factor is the reciprocal of the nominal 16 bit integer
1090 * half-sided range (32768).
1091 *
1092 * Since the scale factor is a power of 2, the scaling is exact, and there
1093 * is no rounding due to the multiplication - the bit pattern is preserved.
1094 */
1095 static const float scale = 1. / (float)(1UL << 15);
1096
1097 return ival * scale;
1098 }
1099
1100 /**
1101 * Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
1102 * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
1103 * [0x00, 0xff].
1104 */
float_from_u8(uint8_t uval)1105 static inline float float_from_u8(uint8_t uval)
1106 {
1107 static const float scale = 1. / (float)(1UL << 7);
1108
1109 return ((int)uval - 128) * scale;
1110 }
1111
1112 /**
1113 * Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
1114 * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
1115 * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
1116 * Even though the output range is limited on the positive side, there is no
1117 * DC offset on the output, if the input has no DC offset.
1118 *
1119 * Avoid relying on the limited output range, as future implementations may go
1120 * to full range.
1121 */
i32_from_p24(const uint8_t * packed24)1122 static inline int32_t i32_from_p24(const uint8_t *packed24)
1123 {
1124 /* convert to 32b */
1125 return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
1126 }
1127
1128 /**
1129 * Convert a 32-bit Q0.31 value to single-precision floating-point.
1130 * The output float range is [-1.0, 1.0] for the fixed-point range
1131 * [0x80000000, 0x7fffffff].
1132 *
1133 * Rounding may occur in the least significant 8 bits for large fixed point
1134 * values due to storage into the 24-bit floating-point significand.
1135 * Rounding will be to nearest, ties to even.
1136 */
float_from_i32(int32_t ival)1137 static inline float float_from_i32(int32_t ival)
1138 {
1139 static const float scale = 1. / (float)(1UL << 31);
1140
1141 return ival * scale;
1142 }
1143
1144 /**
1145 * Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
1146 * to single-precision floating-point. The output float range is [-1.0, 1.0)
1147 * for the fixed-point range [0x800000, 0x7fffff].
1148 *
1149 * There is no rounding, the conversion and representation is exact.
1150 */
float_from_p24(const uint8_t * packed24)1151 static inline float float_from_p24(const uint8_t *packed24)
1152 {
1153 return float_from_i32(i32_from_p24(packed24));
1154 }
1155
1156 /**
1157 * Convert a 24-bit Q8.23 value to single-precision floating-point.
1158 * The nominal output float range is [-1.0, 1.0) for the fixed-point
1159 * range [0xff800000, 0x007fffff]. The maximum float range is [-256.0, 256.0).
1160 *
1161 * There is no rounding in the nominal range, the conversion and representation
1162 * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
1163 */
float_from_q8_23(int32_t ival)1164 static inline float float_from_q8_23(int32_t ival)
1165 {
1166 static const float scale = 1. / (float)(1UL << 23);
1167
1168 return ival * scale;
1169 }
1170
1171 /**
1172 * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
1173 */
1174 static inline
mulAdd(int16_t in,int16_t v,int32_t a)1175 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
1176 {
1177 #if defined(__arm__) && !defined(__thumb__)
1178 int32_t out;
1179 asm( "smlabb %[out], %[in], %[v], %[a] \n"
1180 : [out]"=r"(out)
1181 : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
1182 : );
1183 return out;
1184 #else
1185 return a + in * (int32_t)v;
1186 #endif
1187 }
1188
1189 /**
1190 * Multiply 16-bit terms with 32-bit result: return in*v.
1191 */
1192 static inline
mul(int16_t in,int16_t v)1193 int32_t mul(int16_t in, int16_t v)
1194 {
1195 #if defined(__arm__) && !defined(__thumb__)
1196 int32_t out;
1197 asm( "smulbb %[out], %[in], %[v] \n"
1198 : [out]"=r"(out)
1199 : [in]"%r"(in), [v]"r"(v)
1200 : );
1201 return out;
1202 #else
1203 return in * (int32_t)v;
1204 #endif
1205 }
1206
1207 /**
1208 * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1209 */
1210 static inline
mulAddRL(int left,uint32_t inRL,uint32_t vRL,int32_t a)1211 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
1212 {
1213 #if defined(__arm__) && !defined(__thumb__)
1214 int32_t out;
1215 if (left) {
1216 asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
1217 : [out]"=r"(out)
1218 : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1219 : );
1220 } else {
1221 asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
1222 : [out]"=r"(out)
1223 : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1224 : );
1225 }
1226 return out;
1227 #else
1228 if (left) {
1229 return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1230 } else {
1231 return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1232 }
1233 #endif
1234 }
1235
1236 /**
1237 * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1238 */
1239 static inline
mulRL(int left,uint32_t inRL,uint32_t vRL)1240 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
1241 {
1242 #if defined(__arm__) && !defined(__thumb__)
1243 int32_t out;
1244 if (left) {
1245 asm( "smulbb %[out], %[inRL], %[vRL] \n"
1246 : [out]"=r"(out)
1247 : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1248 : );
1249 } else {
1250 asm( "smultt %[out], %[inRL], %[vRL] \n"
1251 : [out]"=r"(out)
1252 : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1253 : );
1254 }
1255 return out;
1256 #else
1257 if (left) {
1258 return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1259 } else {
1260 return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1261 }
1262 #endif
1263 }
1264
1265 /** \cond */
1266 __END_DECLS
1267 /** \endcond */
1268
1269 #endif // ANDROID_AUDIO_PRIMITIVES_H
1270