1 /** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.media.soundtrigger; 18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK; 19 20 import android.annotation.IntDef; 21 import android.annotation.NonNull; 22 import android.annotation.Nullable; 23 import android.annotation.RequiresPermission; 24 import android.annotation.SystemApi; 25 import android.compat.annotation.UnsupportedAppUsage; 26 import android.hardware.soundtrigger.IRecognitionStatusCallback; 27 import android.hardware.soundtrigger.SoundTrigger; 28 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 29 import android.media.AudioFormat; 30 import android.os.Handler; 31 import android.os.Looper; 32 import android.os.Message; 33 import android.os.ParcelUuid; 34 import android.os.RemoteException; 35 import android.util.Slog; 36 37 import com.android.internal.app.ISoundTriggerService; 38 39 import java.io.PrintWriter; 40 import java.lang.annotation.Retention; 41 import java.lang.annotation.RetentionPolicy; 42 import java.util.UUID; 43 44 /** 45 * A class that allows interaction with the actual sound trigger detection on the system. 46 * Sound trigger detection refers to a detectors that match generic sound patterns that are 47 * not voice-based. The voice-based recognition models should utilize the {@link 48 * VoiceInteractionService} instead. Access to this class is protected by a permission 49 * granted only to system or privileged apps. 50 * 51 * @hide 52 */ 53 @SystemApi 54 public final class SoundTriggerDetector { 55 private static final boolean DBG = false; 56 private static final String TAG = "SoundTriggerDetector"; 57 58 private static final int MSG_AVAILABILITY_CHANGED = 1; 59 private static final int MSG_SOUND_TRIGGER_DETECTED = 2; 60 private static final int MSG_DETECTION_ERROR = 3; 61 private static final int MSG_DETECTION_PAUSE = 4; 62 private static final int MSG_DETECTION_RESUME = 5; 63 64 private final Object mLock = new Object(); 65 66 private final ISoundTriggerService mSoundTriggerService; 67 private final UUID mSoundModelId; 68 private final Callback mCallback; 69 private final Handler mHandler; 70 private final RecognitionCallback mRecognitionCallback; 71 72 /** @hide */ 73 @Retention(RetentionPolicy.SOURCE) 74 @IntDef(flag = true, 75 value = { 76 RECOGNITION_FLAG_NONE, 77 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, 78 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS 79 }) 80 public @interface RecognitionFlags {} 81 82 /** 83 * Empty flag for {@link #startRecognition(int)}. 84 * 85 * @hide 86 */ 87 public static final int RECOGNITION_FLAG_NONE = 0; 88 89 /** 90 * Recognition flag for {@link #startRecognition(int)} that indicates 91 * whether the trigger audio for hotword needs to be captured. 92 */ 93 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 94 95 /** 96 * Recognition flag for {@link #startRecognition(int)} that indicates 97 * whether the recognition should keep going on even after the 98 * model triggers. 99 * If this flag is specified, it's possible to get multiple 100 * triggers after a call to {@link #startRecognition(int)}, if the model 101 * triggers multiple times. 102 * When this isn't specified, the default behavior is to stop recognition once the 103 * trigger happenss, till the caller starts recognition again. 104 */ 105 public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; 106 107 /** 108 * Additional payload for {@link Callback#onDetected}. 109 */ 110 public static class EventPayload { 111 private final boolean mTriggerAvailable; 112 113 // Indicates if {@code captureSession} can be used to continue capturing more audio 114 // from the DSP hardware. 115 private final boolean mCaptureAvailable; 116 // The session to use when attempting to capture more audio from the DSP hardware. 117 private final int mCaptureSession; 118 private final AudioFormat mAudioFormat; 119 // Raw data associated with the event. 120 // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true. 121 private final byte[] mData; 122 EventPayload(boolean triggerAvailable, boolean captureAvailable, AudioFormat audioFormat, int captureSession, byte[] data)123 private EventPayload(boolean triggerAvailable, boolean captureAvailable, 124 AudioFormat audioFormat, int captureSession, byte[] data) { 125 mTriggerAvailable = triggerAvailable; 126 mCaptureAvailable = captureAvailable; 127 mCaptureSession = captureSession; 128 mAudioFormat = audioFormat; 129 mData = data; 130 } 131 132 /** 133 * Gets the format of the audio obtained using {@link #getTriggerAudio()}. 134 * May be null if there's no audio present. 135 */ 136 @Nullable getCaptureAudioFormat()137 public AudioFormat getCaptureAudioFormat() { 138 return mAudioFormat; 139 } 140 141 /** 142 * Gets the raw audio that triggered the detector. 143 * This may be null if the trigger audio isn't available. 144 * If non-null, the format of the audio can be obtained by calling 145 * {@link #getCaptureAudioFormat()}. 146 * 147 * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO 148 */ 149 @Nullable getTriggerAudio()150 public byte[] getTriggerAudio() { 151 if (mTriggerAvailable) { 152 return mData; 153 } else { 154 return null; 155 } 156 } 157 158 /** 159 * Gets the opaque data passed from the detection engine for the event. 160 * This may be null if it was not populated by the engine, or if the data is known to 161 * contain the trigger audio. 162 * 163 * @see #getTriggerAudio 164 * 165 * @hide 166 */ 167 @Nullable 168 @UnsupportedAppUsage getData()169 public byte[] getData() { 170 if (!mTriggerAvailable) { 171 return mData; 172 } else { 173 return null; 174 } 175 } 176 177 /** 178 * Gets the session ID to start a capture from the DSP. 179 * This may be null if streaming capture isn't possible. 180 * If non-null, the format of the audio that can be captured can be 181 * obtained using {@link #getCaptureAudioFormat()}. 182 * 183 * TODO: Candidate for Public API when the API to start capture with a session ID 184 * is made public. 185 * 186 * TODO: Add this to {@link #getCaptureAudioFormat()}: 187 * "Gets the format of the audio obtained using {@link #getTriggerAudio()} 188 * or {@link #getCaptureSession()}. May be null if no audio can be obtained 189 * for either the trigger or a streaming session." 190 * 191 * TODO: Should this return a known invalid value instead? 192 * 193 * @hide 194 */ 195 @Nullable 196 @UnsupportedAppUsage getCaptureSession()197 public Integer getCaptureSession() { 198 if (mCaptureAvailable) { 199 return mCaptureSession; 200 } else { 201 return null; 202 } 203 } 204 } 205 206 public static abstract class Callback { 207 /** 208 * Called when the availability of the sound model changes. 209 */ onAvailabilityChanged(int status)210 public abstract void onAvailabilityChanged(int status); 211 212 /** 213 * Called when the sound model has triggered (such as when it matched a 214 * given sound pattern). 215 */ onDetected(@onNull EventPayload eventPayload)216 public abstract void onDetected(@NonNull EventPayload eventPayload); 217 218 /** 219 * Called when the detection fails due to an error. 220 */ onError()221 public abstract void onError(); 222 223 /** 224 * Called when the recognition is paused temporarily for some reason. 225 * This is an informational callback, and the clients shouldn't be doing anything here 226 * except showing an indication on their UI if they have to. 227 */ onRecognitionPaused()228 public abstract void onRecognitionPaused(); 229 230 /** 231 * Called when the recognition is resumed after it was temporarily paused. 232 * This is an informational callback, and the clients shouldn't be doing anything here 233 * except showing an indication on their UI if they have to. 234 */ onRecognitionResumed()235 public abstract void onRecognitionResumed(); 236 } 237 238 /** 239 * This class should be constructed by the {@link SoundTriggerManager}. 240 * @hide 241 */ SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, @NonNull Callback callback, @Nullable Handler handler)242 SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, 243 @NonNull Callback callback, @Nullable Handler handler) { 244 mSoundTriggerService = soundTriggerService; 245 mSoundModelId = soundModelId; 246 mCallback = callback; 247 if (handler == null) { 248 mHandler = new MyHandler(); 249 } else { 250 mHandler = new MyHandler(handler.getLooper()); 251 } 252 mRecognitionCallback = new RecognitionCallback(); 253 } 254 255 /** 256 * Starts recognition on the associated sound model. Result is indicated via the 257 * {@link Callback}. 258 * @return Indicates whether the call succeeded or not. 259 */ 260 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) startRecognition(@ecognitionFlags int recognitionFlags)261 public boolean startRecognition(@RecognitionFlags int recognitionFlags) { 262 if (DBG) { 263 Slog.d(TAG, "startRecognition()"); 264 } 265 boolean captureTriggerAudio = 266 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 267 268 boolean allowMultipleTriggers = 269 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; 270 int status = STATUS_OK; 271 try { 272 status = mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId), 273 mRecognitionCallback, new RecognitionConfig(captureTriggerAudio, 274 allowMultipleTriggers, null, null)); 275 } catch (RemoteException e) { 276 return false; 277 } 278 return status == STATUS_OK; 279 } 280 281 /** 282 * Stops recognition for the associated model. 283 */ 284 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) stopRecognition()285 public boolean stopRecognition() { 286 int status = STATUS_OK; 287 try { 288 status = mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId), 289 mRecognitionCallback); 290 } catch (RemoteException e) { 291 return false; 292 } 293 return status == STATUS_OK; 294 } 295 296 /** 297 * @hide 298 */ dump(String prefix, PrintWriter pw)299 public void dump(String prefix, PrintWriter pw) { 300 synchronized (mLock) { 301 // TODO: Dump useful debug information. 302 } 303 } 304 305 /** 306 * Callback that handles events from the lower sound trigger layer. 307 * 308 * Note that these callbacks will be called synchronously from the SoundTriggerService 309 * layer and thus should do minimal work (such as sending a message on a handler to do 310 * the real work). 311 * @hide 312 */ 313 private class RecognitionCallback extends IRecognitionStatusCallback.Stub { 314 315 /** 316 * @hide 317 */ 318 @Override onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event)319 public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) { 320 Slog.d(TAG, "onGenericSoundTriggerDetected()" + event); 321 Message.obtain(mHandler, 322 MSG_SOUND_TRIGGER_DETECTED, 323 new EventPayload(event.triggerInData, event.captureAvailable, 324 event.captureFormat, event.captureSession, event.data)) 325 .sendToTarget(); 326 } 327 328 @Override onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event)329 public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) { 330 Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event); 331 } 332 333 /** 334 * @hide 335 */ 336 @Override onError(int status)337 public void onError(int status) { 338 Slog.d(TAG, "onError()" + status); 339 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 340 } 341 342 /** 343 * @hide 344 */ 345 @Override onRecognitionPaused()346 public void onRecognitionPaused() { 347 Slog.d(TAG, "onRecognitionPaused()"); 348 mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE); 349 } 350 351 /** 352 * @hide 353 */ 354 @Override onRecognitionResumed()355 public void onRecognitionResumed() { 356 Slog.d(TAG, "onRecognitionResumed()"); 357 mHandler.sendEmptyMessage(MSG_DETECTION_RESUME); 358 } 359 } 360 361 private class MyHandler extends Handler { 362 MyHandler()363 MyHandler() { 364 super(); 365 } 366 MyHandler(Looper looper)367 MyHandler(Looper looper) { 368 super(looper); 369 } 370 371 @Override handleMessage(Message msg)372 public void handleMessage(Message msg) { 373 if (mCallback == null) { 374 Slog.w(TAG, "Received message: " + msg.what + " for NULL callback."); 375 return; 376 } 377 switch (msg.what) { 378 case MSG_SOUND_TRIGGER_DETECTED: 379 mCallback.onDetected((EventPayload) msg.obj); 380 break; 381 case MSG_DETECTION_ERROR: 382 mCallback.onError(); 383 break; 384 case MSG_DETECTION_PAUSE: 385 mCallback.onRecognitionPaused(); 386 break; 387 case MSG_DETECTION_RESUME: 388 mCallback.onRecognitionResumed(); 389 break; 390 default: 391 super.handleMessage(msg); 392 393 } 394 } 395 } 396 } 397