1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 package android.speech.tts; 17 18 import android.annotation.IntDef; 19 import android.annotation.IntRange; 20 import android.media.AudioFormat; 21 22 import java.lang.annotation.Retention; 23 import java.lang.annotation.RetentionPolicy; 24 25 /** 26 * A callback to return speech data synthesized by a text to speech engine. 27 * 28 * The engine can provide streaming audio by calling 29 * {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally 30 * {@link #done}. 31 * 32 * {@link #error} can be called at any stage in the synthesis process to 33 * indicate that an error has occurred, but if the call is made after a call 34 * to {@link #done}, it might be discarded. 35 * 36 * {@link #done} must be called at the end of synthesis, regardless of errors. 37 * 38 * All methods can be only called on the synthesis thread. 39 */ 40 public interface SynthesisCallback { 41 42 /** @hide */ 43 @Retention(RetentionPolicy.SOURCE) 44 @IntDef({ 45 AudioFormat.ENCODING_PCM_8BIT, 46 AudioFormat.ENCODING_PCM_16BIT, 47 AudioFormat.ENCODING_PCM_FLOAT 48 }) 49 @interface SupportedAudioFormat {}; 50 51 /** 52 * @return the maximum number of bytes that the TTS engine can pass in a single call of {@link 53 * #audioAvailable}. Calls to {@link #audioAvailable} with data lengths larger than this 54 * value will not succeed. 55 */ getMaxBufferSize()56 int getMaxBufferSize(); 57 58 /** 59 * The service should call this when it starts to synthesize audio for this request. 60 * 61 * <p>This method should only be called on the synthesis thread, while in {@link 62 * TextToSpeechService#onSynthesizeText}. 63 * 64 * @param sampleRateInHz Sample rate in HZ of the generated audio. 65 * @param audioFormat Audio format of the generated audio. Must be one of {@link 66 * AudioFormat#ENCODING_PCM_8BIT} or {@link AudioFormat#ENCODING_PCM_16BIT}. Can also be 67 * {@link AudioFormat#ENCODING_PCM_FLOAT} when targetting Android N and above. 68 * @param channelCount The number of channels. Must be {@code 1} or {@code 2}. 69 * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link 70 * android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}. 71 */ start( int sampleRateInHz, @SupportedAudioFormat int audioFormat, @IntRange(from = 1, to = 2) int channelCount)72 int start( 73 int sampleRateInHz, 74 @SupportedAudioFormat int audioFormat, 75 @IntRange(from = 1, to = 2) int channelCount); 76 77 /** 78 * The service should call this method when synthesized audio is ready for consumption. 79 * 80 * <p>This method should only be called on the synthesis thread, while in {@link 81 * TextToSpeechService#onSynthesizeText}. 82 * 83 * @param buffer The generated audio data. This method will not hold on to {@code buffer}, so the 84 * caller is free to modify it after this method returns. 85 * @param offset The offset into {@code buffer} where the audio data starts. 86 * @param length The number of bytes of audio data in {@code buffer}. This must be less than or 87 * equal to the return value of {@link #getMaxBufferSize}. 88 * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link 89 * android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}. 90 */ audioAvailable(byte[] buffer, int offset, int length)91 int audioAvailable(byte[] buffer, int offset, int length); 92 93 /** 94 * The service should call this method when all the synthesized audio for a request has been 95 * passed to {@link #audioAvailable}. 96 * 97 * <p>This method should only be called on the synthesis thread, while in {@link 98 * TextToSpeechService#onSynthesizeText}. 99 * 100 * <p>This method has to be called if {@link #start} and/or {@link #error} was called. 101 * 102 * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link 103 * android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}. 104 */ done()105 int done(); 106 107 /** 108 * The service should call this method if the speech synthesis fails. 109 * 110 * <p>This method should only be called on the synthesis thread, while in {@link 111 * TextToSpeechService#onSynthesizeText}. 112 */ error()113 void error(); 114 115 /** 116 * The service should call this method if the speech synthesis fails. 117 * 118 * <p>This method should only be called on the synthesis thread, while in {@link 119 * TextToSpeechService#onSynthesizeText}. 120 * 121 * @param errorCode Error code to pass to the client. One of the ERROR_ values from {@link 122 * android.speech.tts.TextToSpeech} 123 */ error(@extToSpeech.Error int errorCode)124 void error(@TextToSpeech.Error int errorCode); 125 126 /** 127 * Check if {@link #start} was called or not. 128 * 129 * <p>This method should only be called on the synthesis thread, while in {@link 130 * TextToSpeechService#onSynthesizeText}. 131 * 132 * <p>Useful for checking if a fallback from network request is possible. 133 */ hasStarted()134 boolean hasStarted(); 135 136 /** 137 * Check if {@link #done} was called or not. 138 * 139 * <p>This method should only be called on the synthesis thread, while in {@link 140 * TextToSpeechService#onSynthesizeText}. 141 * 142 * <p>Useful for checking if a fallback from network request is possible. 143 */ hasFinished()144 boolean hasFinished(); 145 146 /** 147 * The service may call this method to provide timing information about the spoken text. 148 * 149 * <p>Calling this method means that at the given audio frame, the given range of the input is 150 * about to be spoken. If this method is called the client will receive a callback on the 151 * listener ({@link UtteranceProgressListener#onRangeStart}) at the moment that frame has been 152 * reached by the playback head. 153 * 154 * <p>This information can be used by the client, for example, to highlight ranges of the text 155 * while it is spoken. 156 * 157 * <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into 158 * the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The 159 * definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat} 160 * for more information. 161 * 162 * <p>This method should only be called on the synthesis thread, while in {@link 163 * TextToSpeechService#onSynthesizeText}. 164 * 165 * @param markerInFrames The position in frames in the audio where this range is spoken. 166 * @param start The start index of the range in the input text. 167 * @param end The end index (exclusive) of the range in the input text. 168 */ rangeStart(int markerInFrames, int start, int end)169 default void rangeStart(int markerInFrames, int start, int end) {} 170 } 171