1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "audio_hw_fir_filter"
18 //#define LOG_NDEBUG 0
19 
20 #include <assert.h>
21 #include <audio_utils/primitives.h>
22 #include <errno.h>
23 #include <inttypes.h>
24 #include <log/log.h>
25 #include <malloc.h>
26 #include <string.h>
27 
28 #include "fir_filter.h"
29 
30 #ifdef __ARM_NEON
31 #include "arm_neon.h"
32 #endif /* #ifdef __ARM_NEON */
33 
fir_init(uint32_t channels,fir_filter_mode_t mode,uint32_t filter_length,uint32_t input_length,int16_t * coeffs)34 fir_filter_t* fir_init(uint32_t channels, fir_filter_mode_t mode, uint32_t filter_length,
35                        uint32_t input_length, int16_t* coeffs) {
36     if ((channels == 0) || (filter_length == 0) || (coeffs == NULL)) {
37         ALOGE("%s: Invalid channel count, filter length or coefficient array.", __func__);
38         return NULL;
39     }
40 
41     fir_filter_t* fir = (fir_filter_t*)calloc(1, sizeof(fir_filter_t));
42     if (fir == NULL) {
43         ALOGE("%s: Unable to allocate memory for fir_filter.", __func__);
44         return NULL;
45     }
46 
47     fir->channels = channels;
48     fir->filter_length = filter_length;
49     /* Default: same filter coeffs for all channels */
50     fir->mode = FIR_SINGLE_FILTER;
51     uint32_t coeff_bytes = fir->filter_length * sizeof(int16_t);
52     if (mode == FIR_PER_CHANNEL_FILTER) {
53         fir->mode = FIR_PER_CHANNEL_FILTER;
54         coeff_bytes = fir->filter_length * fir->channels * sizeof(int16_t);
55     }
56 
57     fir->coeffs = (int16_t*)malloc(coeff_bytes);
58     if (fir->coeffs == NULL) {
59         ALOGE("%s: Unable to allocate memory for FIR coeffs", __func__);
60         goto exit_1;
61     }
62     memcpy(fir->coeffs, coeffs, coeff_bytes);
63 
64     fir->buffer_size = (input_length + fir->filter_length) * fir->channels;
65     fir->state = (int16_t*)malloc(fir->buffer_size * sizeof(int16_t));
66     if (fir->state == NULL) {
67         ALOGE("%s: Unable to allocate memory for FIR state", __func__);
68         goto exit_2;
69     }
70 
71 #ifdef __ARM_NEON
72     ALOGI("%s: Using ARM Neon", __func__);
73 #endif /* #ifdef __ARM_NEON */
74 
75     fir_reset(fir);
76     return fir;
77 
78 exit_2:
79     free(fir->coeffs);
80 exit_1:
81     free(fir);
82     return NULL;
83 }
84 
fir_release(fir_filter_t * fir)85 void fir_release(fir_filter_t* fir) {
86     if (fir == NULL) {
87         return;
88     }
89     free(fir->state);
90     free(fir->coeffs);
91     free(fir);
92 }
93 
fir_reset(fir_filter_t * fir)94 void fir_reset(fir_filter_t* fir) {
95     if (fir == NULL) {
96         return;
97     }
98     memset(fir->state, 0, fir->buffer_size * sizeof(int16_t));
99 }
100 
fir_process_interleaved(fir_filter_t * fir,int16_t * input,int16_t * output,uint32_t samples)101 void fir_process_interleaved(fir_filter_t* fir, int16_t* input, int16_t* output, uint32_t samples) {
102     assert(fir != NULL);
103 
104     int start_offset = (fir->filter_length - 1) * fir->channels;
105     memcpy(&fir->state[start_offset], input, samples * fir->channels * sizeof(int16_t));
106     // int ch;
107     bool use_2nd_set_coeffs = (fir->channels > 1) && (fir->mode == FIR_PER_CHANNEL_FILTER);
108     int16_t* p_coeff_A = &fir->coeffs[0];
109     int16_t* p_coeff_B = use_2nd_set_coeffs ? &fir->coeffs[fir->filter_length] : &fir->coeffs[0];
110     int16_t* p_output;
111     for (int ch = 0; ch < fir->channels; ch += 2) {
112         p_output = &output[ch];
113         int offset = start_offset + ch;
114         for (int s = 0; s < samples; s++) {
115             int32_t acc_A = 0;
116             int32_t acc_B = 0;
117 
118 #ifdef __ARM_NEON
119             int32x4_t acc_vec = vdupq_n_s32(0);
120             for (int k = 0; k < fir->filter_length; k++, offset -= fir->channels) {
121                 int16x4_t coeff_vec = vdup_n_s16(p_coeff_A[k]);
122                 coeff_vec = vset_lane_s16(p_coeff_B[k], coeff_vec, 1);
123                 int16x4_t input_vec = vld1_s16(&fir->state[offset]);
124                 acc_vec = vmlal_s16(acc_vec, coeff_vec, input_vec);
125             }
126             acc_A = vgetq_lane_s32(acc_vec, 0);
127             acc_B = vgetq_lane_s32(acc_vec, 1);
128 #else
129             for (int k = 0; k < fir->filter_length; k++, offset -= fir->channels) {
130                 int32_t input_A = (int32_t)(fir->state[offset]);
131                 int32_t coeff_A = (int32_t)(p_coeff_A[k]);
132                 int32_t input_B = (int32_t)(fir->state[offset + 1]);
133                 int32_t coeff_B = (int32_t)(p_coeff_B[k]);
134                 acc_A += (input_A * coeff_A);
135                 acc_B += (input_B * coeff_B);
136             }
137 #endif /* #ifdef __ARM_NEON */
138 
139             *p_output = clamp16(acc_A >> 15);
140             if (ch < fir->channels - 1) {
141                 *(p_output + 1) = clamp16(acc_B >> 15);
142             }
143             /* Move to next sample */
144             p_output += fir->channels;
145             offset += (fir->filter_length + 1) * fir->channels;
146         }
147         if (use_2nd_set_coeffs) {
148             p_coeff_A += (fir->filter_length << 1);
149             p_coeff_B += (fir->filter_length << 1);
150         }
151     }
152     memmove(fir->state, &fir->state[samples * fir->channels],
153             (fir->filter_length - 1) * fir->channels * sizeof(int16_t));
154 }
155