1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /**
18 * Tools for measuring latency and for detecting glitches.
19 * These classes are pure math and can be used with any audio system.
20 */
21
22 #ifndef AAUDIO_EXAMPLES_LOOPBACK_ANALYSER_H
23 #define AAUDIO_EXAMPLES_LOOPBACK_ANALYSER_H
24
25 #include <algorithm>
26 #include <assert.h>
27 #include <cctype>
28 #include <math.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32
33 #include <audio_utils/sndfile.h>
34
35 // Tag for machine readable results as property = value pairs
36 #define LOOPBACK_RESULT_TAG "RESULT: "
37
38 constexpr int32_t kDefaultSampleRate = 48000;
39 constexpr int32_t kMillisPerSecond = 1000;
40 constexpr int32_t kMinLatencyMillis = 4; // arbitrary and very low
41 constexpr int32_t kMaxLatencyMillis = 400; // arbitrary and generous
42 constexpr double kMaxEchoGain = 10.0; // based on experiments, otherwise too noisy
43 constexpr double kMinimumConfidence = 0.5;
44
printAudioScope(float sample)45 static void printAudioScope(float sample) {
46 const int maxStars = 80; // arbitrary, fits on one line
47 char c = '*';
48 if (sample < -1.0) {
49 sample = -1.0;
50 c = '$';
51 } else if (sample > 1.0) {
52 sample = 1.0;
53 c = '$';
54 }
55 int numSpaces = (int) (((sample + 1.0) * 0.5) * maxStars);
56 for (int i = 0; i < numSpaces; i++) {
57 putchar(' ');
58 }
59 printf("%c\n", c);
60 }
61
62 /*
63
64 FIR filter designed with
65 http://t-filter.appspot.com
66
67 sampling frequency: 48000 Hz
68
69 * 0 Hz - 8000 Hz
70 gain = 1.2
71 desired ripple = 5 dB
72 actual ripple = 5.595266169703693 dB
73
74 * 12000 Hz - 20000 Hz
75 gain = 0
76 desired attenuation = -40 dB
77 actual attenuation = -37.58691566571914 dB
78
79 */
80
81 #define FILTER_TAP_NUM 11
82
83 static const float sFilterTaps8000[FILTER_TAP_NUM] = {
84 -0.05944219353343189f,
85 -0.07303434839503208f,
86 -0.037690487672689066f,
87 0.1870480506596512f,
88 0.3910337357836833f,
89 0.5333672385425637f,
90 0.3910337357836833f,
91 0.1870480506596512f,
92 -0.037690487672689066f,
93 -0.07303434839503208f,
94 -0.05944219353343189f
95 };
96
97 class LowPassFilter {
98 public:
99
100 /*
101 * Filter one input sample.
102 * @return filtered output
103 */
filter(float input)104 float filter(float input) {
105 float output = 0.0f;
106 mX[mCursor] = input;
107 // Index backwards over x.
108 int xIndex = mCursor + FILTER_TAP_NUM;
109 // Write twice so we avoid having to wrap in the middle of the convolution.
110 mX[xIndex] = input;
111 for (int i = 0; i < FILTER_TAP_NUM; i++) {
112 output += sFilterTaps8000[i] * mX[xIndex--];
113 }
114 if (++mCursor >= FILTER_TAP_NUM) {
115 mCursor = 0;
116 }
117 return output;
118 }
119
120 /**
121 * @return true if PASSED
122 */
test()123 bool test() {
124 // Measure the impulse of the filter at different phases so we exercise
125 // all the wraparound cases in the FIR.
126 for (int offset = 0; offset < (FILTER_TAP_NUM * 2); offset++ ) {
127 // printf("LowPassFilter: cursor = %d\n", mCursor);
128 // Offset by one each time.
129 if (filter(0.0f) != 0.0f) {
130 printf("ERROR: filter should return 0.0 before impulse response\n");
131 return false;
132 }
133 for (int i = 0; i < FILTER_TAP_NUM; i++) {
134 float output = filter((i == 0) ? 1.0f : 0.0f); // impulse
135 if (output != sFilterTaps8000[i]) {
136 printf("ERROR: filter should return impulse response\n");
137 return false;
138 }
139 }
140 for (int i = 0; i < FILTER_TAP_NUM; i++) {
141 if (filter(0.0f) != 0.0f) {
142 printf("ERROR: filter should return 0.0 after impulse response\n");
143 return false;
144 }
145 }
146 }
147 return true;
148 }
149
150 private:
151 float mX[FILTER_TAP_NUM * 2]{}; // twice as big as needed to avoid wrapping
152 int32_t mCursor = 0;
153 };
154
155 // A narrow impulse seems to have better immunity against over estimating the
156 // latency due to detecting subharmonics by the auto-correlator.
157 static const float s_Impulse[] = {
158 0.0f, 0.0f, 0.0f, 0.0f, 0.3f, // silence on each side of the impulse
159 0.99f, 0.0f, -0.99f, // bipolar with one zero crossing in middle
160 -0.3f, 0.0f, 0.0f, 0.0f, 0.0f
161 };
162
163 constexpr int32_t kImpulseSizeInFrames = (int32_t)(sizeof(s_Impulse) / sizeof(s_Impulse[0]));
164
165 class PseudoRandom {
166 public:
PseudoRandom()167 PseudoRandom() {}
PseudoRandom(int64_t seed)168 PseudoRandom(int64_t seed)
169 : mSeed(seed)
170 {}
171
172 /**
173 * Returns the next random double from -1.0 to 1.0
174 *
175 * @return value from -1.0 to 1.0
176 */
nextRandomDouble()177 double nextRandomDouble() {
178 return nextRandomInteger() * (0.5 / (((int32_t)1) << 30));
179 }
180
181 /** Calculate random 32 bit number using linear-congruential method. */
nextRandomInteger()182 int32_t nextRandomInteger() {
183 // Use values for 64-bit sequence from MMIX by Donald Knuth.
184 mSeed = (mSeed * (int64_t)6364136223846793005) + (int64_t)1442695040888963407;
185 return (int32_t) (mSeed >> 32); // The higher bits have a longer sequence.
186 }
187
188 private:
189 int64_t mSeed = 99887766;
190 };
191
192
193 typedef struct LatencyReport_s {
194 double latencyInFrames;
195 double confidence;
196 } LatencyReport;
197
calculateCorrelation(const float * a,const float * b,int windowSize)198 static double calculateCorrelation(const float *a,
199 const float *b,
200 int windowSize)
201 {
202 double correlation = 0.0;
203 double sumProducts = 0.0;
204 double sumSquares = 0.0;
205
206 // Correlate a against b.
207 for (int i = 0; i < windowSize; i++) {
208 float s1 = a[i];
209 float s2 = b[i];
210 // Use a normalized cross-correlation.
211 sumProducts += s1 * s2;
212 sumSquares += ((s1 * s1) + (s2 * s2));
213 }
214
215 if (sumSquares >= 0.00000001) {
216 correlation = (float) (2.0 * sumProducts / sumSquares);
217 }
218 return correlation;
219 }
220
measureLatencyFromEchos(const float * data,int32_t numFloats,int32_t sampleRate,LatencyReport * report)221 static int measureLatencyFromEchos(const float *data,
222 int32_t numFloats,
223 int32_t sampleRate,
224 LatencyReport *report) {
225 // Allocate results array
226 const int minReasonableLatencyFrames = sampleRate * kMinLatencyMillis / kMillisPerSecond;
227 const int maxReasonableLatencyFrames = sampleRate * kMaxLatencyMillis / kMillisPerSecond;
228 int32_t maxCorrelationSize = maxReasonableLatencyFrames * 3;
229 int numCorrelations = std::min(numFloats, maxCorrelationSize);
230 float *correlations = new float[numCorrelations]{};
231 float *harmonicSums = new float[numCorrelations]{};
232
233 // Perform sliding auto-correlation.
234 // Skip first frames to avoid huge peak at zero offset.
235 for (int i = minReasonableLatencyFrames; i < numCorrelations; i++) {
236 int32_t remaining = numFloats - i;
237 float correlation = (float) calculateCorrelation(&data[i], data, remaining);
238 correlations[i] = correlation;
239 // printf("correlation[%d] = %f\n", ic, correlation);
240 }
241
242 // Apply a technique similar to Harmonic Product Spectrum Analysis to find echo fundamental.
243 // Add higher harmonics mapped onto lower harmonics. This reinforces the "fundamental" echo.
244 const int numEchoes = 8;
245 for (int partial = 1; partial < numEchoes; partial++) {
246 for (int i = minReasonableLatencyFrames; i < numCorrelations; i++) {
247 harmonicSums[i / partial] += correlations[i] / partial;
248 }
249 }
250
251 // Find highest peak in correlation array.
252 float maxCorrelation = 0.0;
253 int peakIndex = 0;
254 for (int i = 0; i < numCorrelations; i++) {
255 if (harmonicSums[i] > maxCorrelation) {
256 maxCorrelation = harmonicSums[i];
257 peakIndex = i;
258 // printf("maxCorrelation = %f at %d\n", maxCorrelation, peakIndex);
259 }
260 }
261 report->latencyInFrames = peakIndex;
262 /*
263 {
264 int32_t topPeak = peakIndex * 7 / 2;
265 for (int i = 0; i < topPeak; i++) {
266 float sample = harmonicSums[i];
267 printf("%4d: %7.5f ", i, sample);
268 printAudioScope(sample);
269 }
270 }
271 */
272
273 // Calculate confidence.
274 if (maxCorrelation < 0.001) {
275 report->confidence = 0.0;
276 } else {
277 // Compare peak to average value around peak.
278 int32_t numSamples = std::min(numCorrelations, peakIndex * 2);
279 if (numSamples <= 0) {
280 report->confidence = 0.0;
281 } else {
282 double sum = 0.0;
283 for (int i = 0; i < numSamples; i++) {
284 sum += harmonicSums[i];
285 }
286 const double average = sum / numSamples;
287 const double ratio = average / maxCorrelation; // will be < 1.0
288 report->confidence = 1.0 - sqrt(ratio);
289 }
290 }
291
292 delete[] correlations;
293 delete[] harmonicSums;
294 return 0;
295 }
296
297 class AudioRecording
298 {
299 public:
AudioRecording()300 AudioRecording() {
301 }
~AudioRecording()302 ~AudioRecording() {
303 delete[] mData;
304 }
305
allocate(int maxFrames)306 void allocate(int maxFrames) {
307 delete[] mData;
308 mData = new float[maxFrames];
309 mMaxFrames = maxFrames;
310 }
311
312 // Write SHORT data from the first channel.
write(int16_t * inputData,int32_t inputChannelCount,int32_t numFrames)313 int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
314 // stop at end of buffer
315 if ((mFrameCounter + numFrames) > mMaxFrames) {
316 numFrames = mMaxFrames - mFrameCounter;
317 }
318 for (int i = 0; i < numFrames; i++) {
319 mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
320 }
321 return numFrames;
322 }
323
324 // Write FLOAT data from the first channel.
write(float * inputData,int32_t inputChannelCount,int32_t numFrames)325 int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
326 // stop at end of buffer
327 if ((mFrameCounter + numFrames) > mMaxFrames) {
328 numFrames = mMaxFrames - mFrameCounter;
329 }
330 for (int i = 0; i < numFrames; i++) {
331 mData[mFrameCounter++] = inputData[i * inputChannelCount];
332 }
333 return numFrames;
334 }
335
size()336 int32_t size() {
337 return mFrameCounter;
338 }
339
getData()340 float *getData() {
341 return mData;
342 }
343
setSampleRate(int32_t sampleRate)344 void setSampleRate(int32_t sampleRate) {
345 mSampleRate = sampleRate;
346 }
347
getSampleRate()348 int32_t getSampleRate() {
349 return mSampleRate;
350 }
351
352 int save(const char *fileName, bool writeShorts = true) {
353 SNDFILE *sndFile = nullptr;
354 int written = 0;
355 SF_INFO info = {
356 .frames = mFrameCounter,
357 .samplerate = mSampleRate,
358 .channels = 1,
359 .format = SF_FORMAT_WAV | (writeShorts ? SF_FORMAT_PCM_16 : SF_FORMAT_FLOAT)
360 };
361
362 sndFile = sf_open(fileName, SFM_WRITE, &info);
363 if (sndFile == nullptr) {
364 printf("AudioRecording::save(%s) failed to open file\n", fileName);
365 return -errno;
366 }
367
368 written = sf_writef_float(sndFile, mData, mFrameCounter);
369
370 sf_close(sndFile);
371 return written;
372 }
373
load(const char * fileName)374 int load(const char *fileName) {
375 SNDFILE *sndFile = nullptr;
376 SF_INFO info;
377
378 sndFile = sf_open(fileName, SFM_READ, &info);
379 if (sndFile == nullptr) {
380 printf("AudioRecording::load(%s) failed to open file\n", fileName);
381 return -errno;
382 }
383
384 assert(info.channels == 1);
385 assert(info.format == SF_FORMAT_FLOAT);
386
387 setSampleRate(info.samplerate);
388 allocate(info.frames);
389 mFrameCounter = sf_readf_float(sndFile, mData, info.frames);
390
391 sf_close(sndFile);
392 return mFrameCounter;
393 }
394
395 /**
396 * Square the samples so they are all positive and so the peaks are emphasized.
397 */
square()398 void square() {
399 for (int i = 0; i < mFrameCounter; i++) {
400 const float sample = mData[i];
401 mData[i] = sample * sample;
402 }
403 }
404
405 /**
406 * Low pass filter the recording using a simple FIR filter.
407 * Note that the lowpass filter cutoff tracks the sample rate.
408 * That is OK because the impulse width is a fixed number of samples.
409 */
lowPassFilter()410 void lowPassFilter() {
411 for (int i = 0; i < mFrameCounter; i++) {
412 mData[i] = mLowPassFilter.filter(mData[i]);
413 }
414 }
415
416 /**
417 * Remove DC offset using a one-pole one-zero IIR filter.
418 */
dcBlocker()419 void dcBlocker() {
420 const float R = 0.996; // narrow notch at zero Hz
421 float x1 = 0.0;
422 float y1 = 0.0;
423 for (int i = 0; i < mFrameCounter; i++) {
424 const float x = mData[i];
425 const float y = x - x1 + (R * y1);
426 mData[i] = y;
427 y1 = y;
428 x1 = x;
429 }
430 }
431
432 private:
433 float *mData = nullptr;
434 int32_t mFrameCounter = 0;
435 int32_t mMaxFrames = 0;
436 int32_t mSampleRate = kDefaultSampleRate; // common default
437 LowPassFilter mLowPassFilter;
438 };
439
440 // ====================================================================================
441 class LoopbackProcessor {
442 public:
443 virtual ~LoopbackProcessor() = default;
444
445
446 enum process_result {
447 PROCESS_RESULT_OK,
448 PROCESS_RESULT_GLITCH
449 };
450
reset()451 virtual void reset() {}
452
453 virtual process_result process(float *inputData, int inputChannelCount,
454 float *outputData, int outputChannelCount,
455 int numFrames) = 0;
456
457
458 virtual void report() = 0;
459
printStatus()460 virtual void printStatus() {};
461
getResult()462 int32_t getResult() {
463 return mResult;
464 }
465
setResult(int32_t result)466 void setResult(int32_t result) {
467 mResult = result;
468 }
469
isDone()470 virtual bool isDone() {
471 return false;
472 }
473
save(const char * fileName)474 virtual int save(const char *fileName) {
475 (void) fileName;
476 return AAUDIO_ERROR_UNIMPLEMENTED;
477 }
478
load(const char * fileName)479 virtual int load(const char *fileName) {
480 (void) fileName;
481 return AAUDIO_ERROR_UNIMPLEMENTED;
482 }
483
setSampleRate(int32_t sampleRate)484 virtual void setSampleRate(int32_t sampleRate) {
485 mSampleRate = sampleRate;
486 }
487
getSampleRate()488 int32_t getSampleRate() {
489 return mSampleRate;
490 }
491
492 // Measure peak amplitude of buffer.
measurePeakAmplitude(float * inputData,int inputChannelCount,int numFrames)493 static float measurePeakAmplitude(float *inputData, int inputChannelCount, int numFrames) {
494 float peak = 0.0f;
495 for (int i = 0; i < numFrames; i++) {
496 const float pos = fabs(*inputData);
497 if (pos > peak) {
498 peak = pos;
499 }
500 inputData += inputChannelCount;
501 }
502 return peak;
503 }
504
505
506 private:
507 int32_t mSampleRate = kDefaultSampleRate;
508 int32_t mResult = 0;
509 };
510
511 class PeakDetector {
512 public:
process(float input)513 float process(float input) {
514 float output = mPrevious * mDecay;
515 if (input > output) {
516 output = input;
517 }
518 mPrevious = output;
519 return output;
520 }
521
522 private:
523 float mDecay = 0.99f;
524 float mPrevious = 0.0f;
525 };
526
527 // ====================================================================================
528 /**
529 * Measure latency given a loopback stream data.
530 * Uses a state machine to cycle through various stages including:
531 *
532 */
533 class EchoAnalyzer : public LoopbackProcessor {
534 public:
535
EchoAnalyzer()536 EchoAnalyzer() : LoopbackProcessor() {
537 mAudioRecording.allocate(2 * getSampleRate());
538 mAudioRecording.setSampleRate(getSampleRate());
539 }
540
setSampleRate(int32_t sampleRate)541 void setSampleRate(int32_t sampleRate) override {
542 LoopbackProcessor::setSampleRate(sampleRate);
543 mAudioRecording.setSampleRate(sampleRate);
544 }
545
reset()546 void reset() override {
547 mDownCounter = getSampleRate() / 2;
548 mLoopCounter = 0;
549 mMeasuredLoopGain = 0.0f;
550 mEchoGain = 1.0f;
551 mState = STATE_INITIAL_SILENCE;
552 }
553
isDone()554 virtual bool isDone() {
555 return mState == STATE_DONE || mState == STATE_FAILED;
556 }
557
setGain(float gain)558 void setGain(float gain) {
559 mEchoGain = gain;
560 }
561
getGain()562 float getGain() {
563 return mEchoGain;
564 }
565
testLowPassFilter()566 bool testLowPassFilter() {
567 LowPassFilter filter;
568 return filter.test();
569 }
570
report()571 void report() override {
572 printf("EchoAnalyzer ---------------\n");
573 if (getResult() != 0) {
574 printf(LOOPBACK_RESULT_TAG "result = %d\n", getResult());
575 return;
576 }
577
578 // printf("LowPassFilter test %s\n", testLowPassFilter() ? "PASSED" : "FAILED");
579
580 printf(LOOPBACK_RESULT_TAG "measured.gain = %8f\n", mMeasuredLoopGain);
581 printf(LOOPBACK_RESULT_TAG "echo.gain = %8f\n", mEchoGain);
582 printf(LOOPBACK_RESULT_TAG "test.state = %8d\n", mState);
583 printf(LOOPBACK_RESULT_TAG "test.state.name = %8s\n", convertStateToText(mState));
584
585 if (mState == STATE_WAITING_FOR_SILENCE) {
586 printf("WARNING - Stuck waiting for silence. Input may be too noisy!\n");
587 setResult(ERROR_NOISY);
588 } else if (mMeasuredLoopGain >= 0.9999) {
589 printf(" ERROR - clipping, turn down volume slightly\n");
590 setResult(ERROR_CLIPPING);
591 } else if (mState != STATE_DONE && mState != STATE_GATHERING_ECHOS) {
592 printf("WARNING - Bad state. Check volume on device.\n");
593 setResult(ERROR_INVALID_STATE);
594 } else {
595 // Cleanup the signal to improve the auto-correlation.
596 mAudioRecording.dcBlocker();
597 mAudioRecording.square();
598 mAudioRecording.lowPassFilter();
599
600 printf("Please wait several seconds for auto-correlation to complete.\n");
601 measureLatencyFromEchos(mAudioRecording.getData(),
602 mAudioRecording.size(),
603 getSampleRate(),
604 &mLatencyReport);
605
606 double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
607 / getSampleRate();
608 printf(LOOPBACK_RESULT_TAG "latency.frames = %8.2f\n",
609 mLatencyReport.latencyInFrames);
610 printf(LOOPBACK_RESULT_TAG "latency.msec = %8.2f\n",
611 latencyMillis);
612 printf(LOOPBACK_RESULT_TAG "latency.confidence = %8.6f\n",
613 mLatencyReport.confidence);
614 if (mLatencyReport.confidence < kMinimumConfidence) {
615 printf(" ERROR - confidence too low!\n");
616 setResult(ERROR_CONFIDENCE);
617 }
618 }
619 }
620
printStatus()621 void printStatus() override {
622 printf("st = %d, echo gain = %f ", mState, mEchoGain);
623 }
624
sendImpulses(float * outputData,int outputChannelCount,int numFrames)625 void sendImpulses(float *outputData, int outputChannelCount, int numFrames) {
626 while (numFrames-- > 0) {
627 float sample = s_Impulse[mSampleIndex++];
628 if (mSampleIndex >= kImpulseSizeInFrames) {
629 mSampleIndex = 0;
630 }
631
632 *outputData = sample;
633 outputData += outputChannelCount;
634 }
635 }
636
sendOneImpulse(float * outputData,int outputChannelCount)637 void sendOneImpulse(float *outputData, int outputChannelCount) {
638 mSampleIndex = 0;
639 sendImpulses(outputData, outputChannelCount, kImpulseSizeInFrames);
640 }
641
642 // @return number of frames for a typical block of processing
getBlockFrames()643 int32_t getBlockFrames() {
644 return getSampleRate() / 8;
645 }
646
process(float * inputData,int inputChannelCount,float * outputData,int outputChannelCount,int numFrames)647 process_result process(float *inputData, int inputChannelCount,
648 float *outputData, int outputChannelCount,
649 int numFrames) override {
650 int channelsValid = std::min(inputChannelCount, outputChannelCount);
651 float peak = 0.0f;
652 int numWritten;
653 int numSamples;
654
655 echo_state nextState = mState;
656
657 switch (mState) {
658 case STATE_INITIAL_SILENCE:
659 // Output silence at the beginning.
660 numSamples = numFrames * outputChannelCount;
661 for (int i = 0; i < numSamples; i++) {
662 outputData[i] = 0;
663 }
664 mDownCounter -= numFrames;
665 if (mDownCounter <= 0) {
666 nextState = STATE_MEASURING_GAIN;
667 //printf("%5d: switch to STATE_MEASURING_GAIN\n", mLoopCounter);
668 mDownCounter = getBlockFrames() * 2;
669 }
670 break;
671
672 case STATE_MEASURING_GAIN:
673 sendImpulses(outputData, outputChannelCount, numFrames);
674 peak = measurePeakAmplitude(inputData, inputChannelCount, numFrames);
675 // If we get several in a row then go to next state.
676 if (peak > mPulseThreshold) {
677 mDownCounter -= numFrames;
678 if (mDownCounter <= 0) {
679 //printf("%5d: switch to STATE_WAITING_FOR_SILENCE, measured peak = %f\n",
680 // mLoopCounter, peak);
681 mDownCounter = getBlockFrames();
682 mMeasuredLoopGain = peak; // assumes original pulse amplitude is one
683 mSilenceThreshold = peak * 0.1; // scale silence to measured pulse
684 // Calculate gain that will give us a nice decaying echo.
685 mEchoGain = mDesiredEchoGain / mMeasuredLoopGain;
686 if (mEchoGain > kMaxEchoGain) {
687 printf("ERROR - loop gain too low. Increase the volume.\n");
688 nextState = STATE_FAILED;
689 } else {
690 nextState = STATE_WAITING_FOR_SILENCE;
691 }
692 }
693 } else if (numFrames > kImpulseSizeInFrames){ // ignore short callbacks
694 mDownCounter = getBlockFrames();
695 }
696 break;
697
698 case STATE_WAITING_FOR_SILENCE:
699 // Output silence and wait for the echos to die down.
700 numSamples = numFrames * outputChannelCount;
701 for (int i = 0; i < numSamples; i++) {
702 outputData[i] = 0;
703 }
704 peak = measurePeakAmplitude(inputData, inputChannelCount, numFrames);
705 // If we get several in a row then go to next state.
706 if (peak < mSilenceThreshold) {
707 mDownCounter -= numFrames;
708 if (mDownCounter <= 0) {
709 nextState = STATE_SENDING_PULSE;
710 //printf("%5d: switch to STATE_SENDING_PULSE\n", mLoopCounter);
711 mDownCounter = getBlockFrames();
712 }
713 } else {
714 mDownCounter = getBlockFrames();
715 }
716 break;
717
718 case STATE_SENDING_PULSE:
719 mAudioRecording.write(inputData, inputChannelCount, numFrames);
720 sendOneImpulse(outputData, outputChannelCount);
721 nextState = STATE_GATHERING_ECHOS;
722 //printf("%5d: switch to STATE_GATHERING_ECHOS\n", mLoopCounter);
723 break;
724
725 case STATE_GATHERING_ECHOS:
726 numWritten = mAudioRecording.write(inputData, inputChannelCount, numFrames);
727 peak = measurePeakAmplitude(inputData, inputChannelCount, numFrames);
728 if (peak > mMeasuredLoopGain) {
729 mMeasuredLoopGain = peak; // AGC might be raising gain so adjust it on the fly.
730 // Recalculate gain that will give us a nice decaying echo.
731 mEchoGain = mDesiredEchoGain / mMeasuredLoopGain;
732 }
733 // Echo input to output.
734 for (int i = 0; i < numFrames; i++) {
735 int ic;
736 for (ic = 0; ic < channelsValid; ic++) {
737 outputData[ic] = inputData[ic] * mEchoGain;
738 }
739 for (; ic < outputChannelCount; ic++) {
740 outputData[ic] = 0;
741 }
742 inputData += inputChannelCount;
743 outputData += outputChannelCount;
744 }
745 if (numWritten < numFrames) {
746 nextState = STATE_DONE;
747 }
748 break;
749
750 case STATE_DONE:
751 case STATE_FAILED:
752 default:
753 break;
754 }
755
756 mState = nextState;
757 mLoopCounter++;
758 return PROCESS_RESULT_OK;
759 }
760
save(const char * fileName)761 int save(const char *fileName) override {
762 return mAudioRecording.save(fileName);
763 }
764
load(const char * fileName)765 int load(const char *fileName) override {
766 int result = mAudioRecording.load(fileName);
767 setSampleRate(mAudioRecording.getSampleRate());
768 mState = STATE_DONE;
769 return result;
770 }
771
772 private:
773
774 enum error_code {
775 ERROR_OK = 0,
776 ERROR_NOISY = -99,
777 ERROR_CLIPPING,
778 ERROR_CONFIDENCE,
779 ERROR_INVALID_STATE
780 };
781
782 enum echo_state {
783 STATE_INITIAL_SILENCE,
784 STATE_MEASURING_GAIN,
785 STATE_WAITING_FOR_SILENCE,
786 STATE_SENDING_PULSE,
787 STATE_GATHERING_ECHOS,
788 STATE_DONE,
789 STATE_FAILED
790 };
791
convertStateToText(echo_state state)792 const char *convertStateToText(echo_state state) {
793 const char *result = "Unknown";
794 switch(state) {
795 case STATE_INITIAL_SILENCE:
796 result = "INIT";
797 break;
798 case STATE_MEASURING_GAIN:
799 result = "GAIN";
800 break;
801 case STATE_WAITING_FOR_SILENCE:
802 result = "SILENCE";
803 break;
804 case STATE_SENDING_PULSE:
805 result = "PULSE";
806 break;
807 case STATE_GATHERING_ECHOS:
808 result = "ECHOS";
809 break;
810 case STATE_DONE:
811 result = "DONE";
812 break;
813 case STATE_FAILED:
814 result = "FAILED";
815 break;
816 }
817 return result;
818 }
819
820
821 int32_t mDownCounter = 500;
822 int32_t mLoopCounter = 0;
823 int32_t mSampleIndex = 0;
824 float mPulseThreshold = 0.02f;
825 float mSilenceThreshold = 0.002f;
826 float mMeasuredLoopGain = 0.0f;
827 float mDesiredEchoGain = 0.95f;
828 float mEchoGain = 1.0f;
829 echo_state mState = STATE_INITIAL_SILENCE;
830
831 AudioRecording mAudioRecording; // contains only the input after the gain detection burst
832 LatencyReport mLatencyReport;
833 // PeakDetector mPeakDetector;
834 };
835
836
837 // ====================================================================================
838 /**
839 * Output a steady sinewave and analyze the return signal.
840 *
841 * Use a cosine transform to measure the predicted magnitude and relative phase of the
842 * looped back sine wave. Then generate a predicted signal and compare with the actual signal.
843 */
844 class SineAnalyzer : public LoopbackProcessor {
845 public:
846
report()847 void report() override {
848 printf("SineAnalyzer ------------------\n");
849 printf(LOOPBACK_RESULT_TAG "peak.amplitude = %8f\n", mPeakAmplitude);
850 printf(LOOPBACK_RESULT_TAG "sine.magnitude = %8f\n", mMagnitude);
851 printf(LOOPBACK_RESULT_TAG "peak.noise = %8f\n", mPeakNoise);
852 printf(LOOPBACK_RESULT_TAG "rms.noise = %8f\n", mRootMeanSquareNoise);
853 float amplitudeRatio = mMagnitude / mPeakNoise;
854 float signalToNoise = amplitudeRatio * amplitudeRatio;
855 printf(LOOPBACK_RESULT_TAG "signal.to.noise = %8.2f\n", signalToNoise);
856 float signalToNoiseDB = 10.0 * log(signalToNoise);
857 printf(LOOPBACK_RESULT_TAG "signal.to.noise.db = %8.2f\n", signalToNoiseDB);
858 if (signalToNoiseDB < MIN_SNRATIO_DB) {
859 printf("ERROR - signal to noise ratio is too low! < %d dB. Adjust volume.\n", MIN_SNRATIO_DB);
860 setResult(ERROR_NOISY);
861 }
862 printf(LOOPBACK_RESULT_TAG "frames.accumulated = %8d\n", mFramesAccumulated);
863 printf(LOOPBACK_RESULT_TAG "sine.period = %8d\n", mSinePeriod);
864 printf(LOOPBACK_RESULT_TAG "test.state = %8d\n", mState);
865 printf(LOOPBACK_RESULT_TAG "frame.count = %8d\n", mFrameCounter);
866 // Did we ever get a lock?
867 bool gotLock = (mState == STATE_LOCKED) || (mGlitchCount > 0);
868 if (!gotLock) {
869 printf("ERROR - failed to lock on reference sine tone\n");
870 setResult(ERROR_NO_LOCK);
871 } else {
872 // Only print if meaningful.
873 printf(LOOPBACK_RESULT_TAG "glitch.count = %8d\n", mGlitchCount);
874 printf(LOOPBACK_RESULT_TAG "max.glitch = %8f\n", mMaxGlitchDelta);
875 if (mGlitchCount > 0) {
876 printf("ERROR - number of glitches > 0\n");
877 setResult(ERROR_GLITCHES);
878 }
879 }
880 }
881
printStatus()882 void printStatus() override {
883 printf("st = %d, #gl = %3d,", mState, mGlitchCount);
884 }
885
886 double calculateMagnitude(double *phasePtr = NULL) {
887 if (mFramesAccumulated == 0) {
888 return 0.0;
889 }
890 double sinMean = mSinAccumulator / mFramesAccumulated;
891 double cosMean = mCosAccumulator / mFramesAccumulated;
892 double magnitude = 2.0 * sqrt( (sinMean * sinMean) + (cosMean * cosMean ));
893 if( phasePtr != NULL )
894 {
895 double phase = M_PI_2 - atan2( sinMean, cosMean );
896 *phasePtr = phase;
897 }
898 return magnitude;
899 }
900
901 /**
902 * @param inputData contains microphone data with sine signal feedback
903 * @param outputData contains the reference sine wave
904 */
process(float * inputData,int inputChannelCount,float * outputData,int outputChannelCount,int numFrames)905 process_result process(float *inputData, int inputChannelCount,
906 float *outputData, int outputChannelCount,
907 int numFrames) override {
908 process_result result = PROCESS_RESULT_OK;
909 mProcessCount++;
910
911 float peak = measurePeakAmplitude(inputData, inputChannelCount, numFrames);
912 if (peak > mPeakAmplitude) {
913 mPeakAmplitude = peak;
914 }
915
916 for (int i = 0; i < numFrames; i++) {
917 bool sineEnabled = true;
918 float sample = inputData[i * inputChannelCount];
919
920 float sinOut = sinf(mPhase);
921
922 switch (mState) {
923 case STATE_IDLE:
924 sineEnabled = false;
925 mDownCounter--;
926 if (mDownCounter <= 0) {
927 mState = STATE_MEASURE_NOISE;
928 mDownCounter = NOISE_FRAME_COUNT;
929 }
930 break;
931 case STATE_MEASURE_NOISE:
932 sineEnabled = false;
933 mPeakNoise = std::max(abs(sample), mPeakNoise);
934 mNoiseSumSquared += sample * sample;
935 mDownCounter--;
936 if (mDownCounter <= 0) {
937 mState = STATE_WAITING_FOR_SIGNAL;
938 mRootMeanSquareNoise = sqrt(mNoiseSumSquared / NOISE_FRAME_COUNT);
939 mTolerance = std::max(MIN_TOLERANCE, mPeakNoise * 2.0f);
940 mPhase = 0.0; // prevent spike at start
941 }
942 break;
943
944 case STATE_IMMUNE:
945 mDownCounter--;
946 if (mDownCounter <= 0) {
947 mState = STATE_WAITING_FOR_SIGNAL;
948 }
949 break;
950
951 case STATE_WAITING_FOR_SIGNAL:
952 if (peak > mThreshold) {
953 mState = STATE_WAITING_FOR_LOCK;
954 //printf("%5d: switch to STATE_WAITING_FOR_LOCK\n", mFrameCounter);
955 resetAccumulator();
956 }
957 break;
958
959 case STATE_WAITING_FOR_LOCK:
960 mSinAccumulator += sample * sinOut;
961 mCosAccumulator += sample * cosf(mPhase);
962 mFramesAccumulated++;
963 // Must be a multiple of the period or the calculation will not be accurate.
964 if (mFramesAccumulated == mSinePeriod * PERIODS_NEEDED_FOR_LOCK) {
965 mPhaseOffset = 0.0;
966 mMagnitude = calculateMagnitude(&mPhaseOffset);
967 if (mMagnitude > mThreshold) {
968 if (fabs(mPreviousPhaseOffset - mPhaseOffset) < 0.001) {
969 mState = STATE_LOCKED;
970 //printf("%5d: switch to STATE_LOCKED\n", mFrameCounter);
971 }
972 mPreviousPhaseOffset = mPhaseOffset;
973 }
974 resetAccumulator();
975 }
976 break;
977
978 case STATE_LOCKED: {
979 // Predict next sine value
980 float predicted = sinf(mPhase + mPhaseOffset) * mMagnitude;
981 // printf(" predicted = %f, actual = %f\n", predicted, sample);
982
983 float diff = predicted - sample;
984 float absDiff = fabs(diff);
985 mMaxGlitchDelta = std::max(mMaxGlitchDelta, absDiff);
986 if (absDiff > mTolerance) {
987 mGlitchCount++;
988 result = PROCESS_RESULT_GLITCH;
989 //printf("%5d: Got a glitch # %d, predicted = %f, actual = %f\n",
990 // mFrameCounter, mGlitchCount, predicted, sample);
991 mState = STATE_IMMUNE;
992 mDownCounter = mSinePeriod * PERIODS_IMMUNE;
993 }
994
995 // Track incoming signal and slowly adjust magnitude to account
996 // for drift in the DRC or AGC.
997 mSinAccumulator += sample * sinOut;
998 mCosAccumulator += sample * cosf(mPhase);
999 mFramesAccumulated++;
1000 // Must be a multiple of the period or the calculation will not be accurate.
1001 if (mFramesAccumulated == mSinePeriod) {
1002 const double coefficient = 0.1;
1003 double phaseOffset = 0.0;
1004 double magnitude = calculateMagnitude(&phaseOffset);
1005 // One pole averaging filter.
1006 mMagnitude = (mMagnitude * (1.0 - coefficient)) + (magnitude * coefficient);
1007 resetAccumulator();
1008 }
1009 } break;
1010 }
1011
1012 float output = 0.0f;
1013 // Output sine wave so we can measure it.
1014 if (sineEnabled) {
1015 output = (sinOut * mOutputAmplitude)
1016 + (mWhiteNoise.nextRandomDouble() * mNoiseAmplitude);
1017 // printf("%5d: sin(%f) = %f, %f\n", i, mPhase, sinOut, mPhaseIncrement);
1018 // advance and wrap phase
1019 mPhase += mPhaseIncrement;
1020 if (mPhase > M_PI) {
1021 mPhase -= (2.0 * M_PI);
1022 }
1023 }
1024 outputData[i * outputChannelCount] = output;
1025
1026
1027 mFrameCounter++;
1028 }
1029 return result;
1030 }
1031
resetAccumulator()1032 void resetAccumulator() {
1033 mFramesAccumulated = 0;
1034 mSinAccumulator = 0.0;
1035 mCosAccumulator = 0.0;
1036 }
1037
reset()1038 void reset() override {
1039 mGlitchCount = 0;
1040 mState = STATE_IDLE;
1041 mDownCounter = IDLE_FRAME_COUNT;
1042 mPhaseIncrement = 2.0 * M_PI / mSinePeriod;
1043 printf("phaseInc = %f for period %d\n", mPhaseIncrement, mSinePeriod);
1044 resetAccumulator();
1045 mProcessCount = 0;
1046 mPeakNoise = 0.0f;
1047 mNoiseSumSquared = 0.0;
1048 mRootMeanSquareNoise = 0.0;
1049 mPhase = 0.0f;
1050 mMaxGlitchDelta = 0.0;
1051 }
1052
1053 private:
1054
1055 enum error_code {
1056 OK,
1057 ERROR_NO_LOCK = -80,
1058 ERROR_GLITCHES,
1059 ERROR_NOISY
1060 };
1061
1062 enum sine_state_t {
1063 STATE_IDLE,
1064 STATE_MEASURE_NOISE,
1065 STATE_IMMUNE,
1066 STATE_WAITING_FOR_SIGNAL,
1067 STATE_WAITING_FOR_LOCK,
1068 STATE_LOCKED
1069 };
1070
1071 enum constants {
1072 // Arbitrary durations, assuming 48000 Hz
1073 IDLE_FRAME_COUNT = 48 * 100,
1074 NOISE_FRAME_COUNT = 48 * 600,
1075 PERIODS_NEEDED_FOR_LOCK = 8,
1076 PERIODS_IMMUNE = 2,
1077 MIN_SNRATIO_DB = 65
1078 };
1079
1080 static constexpr float MIN_TOLERANCE = 0.01;
1081
1082 int mSinePeriod = 79;
1083 double mPhaseIncrement = 0.0;
1084 double mPhase = 0.0;
1085 double mPhaseOffset = 0.0;
1086 double mPreviousPhaseOffset = 0.0;
1087 double mMagnitude = 0.0;
1088 double mThreshold = 0.005;
1089 double mTolerance = MIN_TOLERANCE;
1090 int32_t mFramesAccumulated = 0;
1091 int32_t mProcessCount = 0;
1092 double mSinAccumulator = 0.0;
1093 double mCosAccumulator = 0.0;
1094 float mMaxGlitchDelta = 0.0f;
1095 int32_t mGlitchCount = 0;
1096 double mPeakAmplitude = 0.0;
1097 int mDownCounter = IDLE_FRAME_COUNT;
1098 int32_t mFrameCounter = 0;
1099 float mOutputAmplitude = 0.75;
1100
1101 // measure background noise
1102 float mPeakNoise = 0.0f;
1103 double mNoiseSumSquared = 0.0;
1104 double mRootMeanSquareNoise = 0.0;
1105
1106 PseudoRandom mWhiteNoise;
1107 float mNoiseAmplitude = 0.00; // Used to experiment with warbling caused by DRC.
1108
1109 sine_state_t mState = STATE_IDLE;
1110 };
1111
1112 #undef LOOPBACK_RESULT_TAG
1113
1114 #endif /* AAUDIO_EXAMPLES_LOOPBACK_ANALYSER_H */
1115