1 /**
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.media.soundtrigger;
18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK;
19 
20 import android.annotation.IntDef;
21 import android.annotation.NonNull;
22 import android.annotation.Nullable;
23 import android.annotation.RequiresPermission;
24 import android.annotation.SystemApi;
25 import android.compat.annotation.UnsupportedAppUsage;
26 import android.hardware.soundtrigger.IRecognitionStatusCallback;
27 import android.hardware.soundtrigger.SoundTrigger;
28 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
29 import android.media.AudioFormat;
30 import android.os.Handler;
31 import android.os.Looper;
32 import android.os.Message;
33 import android.os.ParcelUuid;
34 import android.os.RemoteException;
35 import android.util.Slog;
36 
37 import com.android.internal.app.ISoundTriggerService;
38 
39 import java.io.PrintWriter;
40 import java.lang.annotation.Retention;
41 import java.lang.annotation.RetentionPolicy;
42 import java.util.UUID;
43 
44 /**
45  * A class that allows interaction with the actual sound trigger detection on the system.
46  * Sound trigger detection refers to a detectors that match generic sound patterns that are
47  * not voice-based. The voice-based recognition models should utilize the {@link
48  * VoiceInteractionService} instead. Access to this class is protected by a permission
49  * granted only to system or privileged apps.
50  *
51  * @hide
52  */
53 @SystemApi
54 public final class SoundTriggerDetector {
55     private static final boolean DBG = false;
56     private static final String TAG = "SoundTriggerDetector";
57 
58     private static final int MSG_AVAILABILITY_CHANGED = 1;
59     private static final int MSG_SOUND_TRIGGER_DETECTED = 2;
60     private static final int MSG_DETECTION_ERROR = 3;
61     private static final int MSG_DETECTION_PAUSE = 4;
62     private static final int MSG_DETECTION_RESUME = 5;
63 
64     private final Object mLock = new Object();
65 
66     private final ISoundTriggerService mSoundTriggerService;
67     private final UUID mSoundModelId;
68     private final Callback mCallback;
69     private final Handler mHandler;
70     private final RecognitionCallback mRecognitionCallback;
71 
72     /** @hide */
73     @Retention(RetentionPolicy.SOURCE)
74     @IntDef(flag = true,
75             value = {
76                 RECOGNITION_FLAG_NONE,
77                 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
78                 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS
79             })
80     public @interface RecognitionFlags {}
81 
82     /**
83      * Empty flag for {@link #startRecognition(int)}.
84      *
85      *  @hide
86      */
87     public static final int RECOGNITION_FLAG_NONE = 0;
88 
89     /**
90      * Recognition flag for {@link #startRecognition(int)} that indicates
91      * whether the trigger audio for hotword needs to be captured.
92      */
93     public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
94 
95     /**
96      * Recognition flag for {@link #startRecognition(int)} that indicates
97      * whether the recognition should keep going on even after the
98      * model triggers.
99      * If this flag is specified, it's possible to get multiple
100      * triggers after a call to {@link #startRecognition(int)}, if the model
101      * triggers multiple times.
102      * When this isn't specified, the default behavior is to stop recognition once the
103      * trigger happenss, till the caller starts recognition again.
104      */
105     public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
106 
107     /**
108      * Additional payload for {@link Callback#onDetected}.
109      */
110     public static class EventPayload {
111         private final boolean mTriggerAvailable;
112 
113         // Indicates if {@code captureSession} can be used to continue capturing more audio
114         // from the DSP hardware.
115         private final boolean mCaptureAvailable;
116         // The session to use when attempting to capture more audio from the DSP hardware.
117         private final int mCaptureSession;
118         private final AudioFormat mAudioFormat;
119         // Raw data associated with the event.
120         // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true.
121         private final byte[] mData;
122 
EventPayload(boolean triggerAvailable, boolean captureAvailable, AudioFormat audioFormat, int captureSession, byte[] data)123         private EventPayload(boolean triggerAvailable, boolean captureAvailable,
124                 AudioFormat audioFormat, int captureSession, byte[] data) {
125             mTriggerAvailable = triggerAvailable;
126             mCaptureAvailable = captureAvailable;
127             mCaptureSession = captureSession;
128             mAudioFormat = audioFormat;
129             mData = data;
130         }
131 
132         /**
133          * Gets the format of the audio obtained using {@link #getTriggerAudio()}.
134          * May be null if there's no audio present.
135          */
136         @Nullable
getCaptureAudioFormat()137         public AudioFormat getCaptureAudioFormat() {
138             return mAudioFormat;
139         }
140 
141         /**
142          * Gets the raw audio that triggered the detector.
143          * This may be null if the trigger audio isn't available.
144          * If non-null, the format of the audio can be obtained by calling
145          * {@link #getCaptureAudioFormat()}.
146          *
147          * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO
148          */
149         @Nullable
getTriggerAudio()150         public byte[] getTriggerAudio() {
151             if (mTriggerAvailable) {
152                 return mData;
153             } else {
154                 return null;
155             }
156         }
157 
158         /**
159          * Gets the opaque data passed from the detection engine for the event.
160          * This may be null if it was not populated by the engine, or if the data is known to
161          * contain the trigger audio.
162          *
163          * @see #getTriggerAudio
164          *
165          * @hide
166          */
167         @Nullable
168         @UnsupportedAppUsage
getData()169         public byte[] getData() {
170             if (!mTriggerAvailable) {
171                 return mData;
172             } else {
173                 return null;
174             }
175         }
176 
177         /**
178          * Gets the session ID to start a capture from the DSP.
179          * This may be null if streaming capture isn't possible.
180          * If non-null, the format of the audio that can be captured can be
181          * obtained using {@link #getCaptureAudioFormat()}.
182          *
183          * TODO: Candidate for Public API when the API to start capture with a session ID
184          * is made public.
185          *
186          * TODO: Add this to {@link #getCaptureAudioFormat()}:
187          * "Gets the format of the audio obtained using {@link #getTriggerAudio()}
188          * or {@link #getCaptureSession()}. May be null if no audio can be obtained
189          * for either the trigger or a streaming session."
190          *
191          * TODO: Should this return a known invalid value instead?
192          *
193          * @hide
194          */
195         @Nullable
196         @UnsupportedAppUsage
getCaptureSession()197         public Integer getCaptureSession() {
198             if (mCaptureAvailable) {
199                 return mCaptureSession;
200             } else {
201                 return null;
202             }
203         }
204     }
205 
206     public static abstract class Callback {
207         /**
208          * Called when the availability of the sound model changes.
209          */
onAvailabilityChanged(int status)210         public abstract void onAvailabilityChanged(int status);
211 
212         /**
213          * Called when the sound model has triggered (such as when it matched a
214          * given sound pattern).
215          */
onDetected(@onNull EventPayload eventPayload)216         public abstract void onDetected(@NonNull EventPayload eventPayload);
217 
218         /**
219          *  Called when the detection fails due to an error.
220          */
onError()221         public abstract void onError();
222 
223         /**
224          * Called when the recognition is paused temporarily for some reason.
225          * This is an informational callback, and the clients shouldn't be doing anything here
226          * except showing an indication on their UI if they have to.
227          */
onRecognitionPaused()228         public abstract void onRecognitionPaused();
229 
230         /**
231          * Called when the recognition is resumed after it was temporarily paused.
232          * This is an informational callback, and the clients shouldn't be doing anything here
233          * except showing an indication on their UI if they have to.
234          */
onRecognitionResumed()235         public abstract void onRecognitionResumed();
236     }
237 
238     /**
239      * This class should be constructed by the {@link SoundTriggerManager}.
240      * @hide
241      */
SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, @NonNull Callback callback, @Nullable Handler handler)242     SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId,
243             @NonNull Callback callback, @Nullable Handler handler) {
244         mSoundTriggerService = soundTriggerService;
245         mSoundModelId = soundModelId;
246         mCallback = callback;
247         if (handler == null) {
248             mHandler = new MyHandler();
249         } else {
250             mHandler = new MyHandler(handler.getLooper());
251         }
252         mRecognitionCallback = new RecognitionCallback();
253     }
254 
255     /**
256      * Starts recognition on the associated sound model. Result is indicated via the
257      * {@link Callback}.
258      * @return Indicates whether the call succeeded or not.
259      */
260     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
startRecognition(@ecognitionFlags int recognitionFlags)261     public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
262         if (DBG) {
263             Slog.d(TAG, "startRecognition()");
264         }
265         boolean captureTriggerAudio =
266                 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
267 
268         boolean allowMultipleTriggers =
269                 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
270         int status = STATUS_OK;
271         try {
272             status = mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId),
273                     mRecognitionCallback, new RecognitionConfig(captureTriggerAudio,
274                         allowMultipleTriggers, null, null));
275         } catch (RemoteException e) {
276             return false;
277         }
278         return status == STATUS_OK;
279     }
280 
281     /**
282      * Stops recognition for the associated model.
283      */
284     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
stopRecognition()285     public boolean stopRecognition() {
286         int status = STATUS_OK;
287         try {
288             status = mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId),
289                     mRecognitionCallback);
290         } catch (RemoteException e) {
291             return false;
292         }
293         return status == STATUS_OK;
294     }
295 
296     /**
297      * @hide
298      */
dump(String prefix, PrintWriter pw)299     public void dump(String prefix, PrintWriter pw) {
300         synchronized (mLock) {
301             // TODO: Dump useful debug information.
302         }
303     }
304 
305     /**
306      * Callback that handles events from the lower sound trigger layer.
307      *
308      * Note that these callbacks will be called synchronously from the SoundTriggerService
309      * layer and thus should do minimal work (such as sending a message on a handler to do
310      * the real work).
311      * @hide
312      */
313     private class RecognitionCallback extends IRecognitionStatusCallback.Stub {
314 
315         /**
316          * @hide
317          */
318         @Override
onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event)319         public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) {
320             Slog.d(TAG, "onGenericSoundTriggerDetected()" + event);
321             Message.obtain(mHandler,
322                     MSG_SOUND_TRIGGER_DETECTED,
323                     new EventPayload(event.triggerInData, event.captureAvailable,
324                             event.captureFormat, event.captureSession, event.data))
325                     .sendToTarget();
326         }
327 
328         @Override
onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event)329         public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) {
330             Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event);
331         }
332 
333         /**
334          * @hide
335          */
336         @Override
onError(int status)337         public void onError(int status) {
338             Slog.d(TAG, "onError()" + status);
339             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
340         }
341 
342         /**
343          * @hide
344          */
345         @Override
onRecognitionPaused()346         public void onRecognitionPaused() {
347             Slog.d(TAG, "onRecognitionPaused()");
348             mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE);
349         }
350 
351         /**
352          * @hide
353          */
354         @Override
onRecognitionResumed()355         public void onRecognitionResumed() {
356             Slog.d(TAG, "onRecognitionResumed()");
357             mHandler.sendEmptyMessage(MSG_DETECTION_RESUME);
358         }
359     }
360 
361     private class MyHandler extends Handler {
362 
MyHandler()363         MyHandler() {
364             super();
365         }
366 
MyHandler(Looper looper)367         MyHandler(Looper looper) {
368             super(looper);
369         }
370 
371         @Override
handleMessage(Message msg)372         public void handleMessage(Message msg) {
373             if (mCallback == null) {
374                   Slog.w(TAG, "Received message: " + msg.what + " for NULL callback.");
375                   return;
376             }
377             switch (msg.what) {
378                 case MSG_SOUND_TRIGGER_DETECTED:
379                     mCallback.onDetected((EventPayload) msg.obj);
380                     break;
381                 case MSG_DETECTION_ERROR:
382                     mCallback.onError();
383                     break;
384                 case MSG_DETECTION_PAUSE:
385                     mCallback.onRecognitionPaused();
386                     break;
387                 case MSG_DETECTION_RESUME:
388                     mCallback.onRecognitionResumed();
389                     break;
390                 default:
391                     super.handleMessage(msg);
392 
393             }
394         }
395     }
396 }
397