1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 package android.speech.tts;
17 
18 import android.annotation.IntDef;
19 import android.annotation.IntRange;
20 import android.media.AudioFormat;
21 
22 import java.lang.annotation.Retention;
23 import java.lang.annotation.RetentionPolicy;
24 
25 /**
26  * A callback to return speech data synthesized by a text to speech engine.
27  *
28  * The engine can provide streaming audio by calling
29  * {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
30  * {@link #done}.
31  *
32  * {@link #error} can be called at any stage in the synthesis process to
33  * indicate that an error has occurred, but if the call is made after a call
34  * to {@link #done}, it might be discarded.
35  *
36  * {@link #done} must be called at the end of synthesis, regardless of errors.
37  *
38  * All methods can be only called on the synthesis thread.
39  */
40 public interface SynthesisCallback {
41 
42     /** @hide */
43     @Retention(RetentionPolicy.SOURCE)
44     @IntDef({
45         AudioFormat.ENCODING_PCM_8BIT,
46         AudioFormat.ENCODING_PCM_16BIT,
47         AudioFormat.ENCODING_PCM_FLOAT
48     })
49     @interface SupportedAudioFormat {};
50 
51     /**
52      * @return the maximum number of bytes that the TTS engine can pass in a single call of {@link
53      *     #audioAvailable}. Calls to {@link #audioAvailable} with data lengths larger than this
54      *     value will not succeed.
55      */
getMaxBufferSize()56     int getMaxBufferSize();
57 
58   /**
59    * The service should call this when it starts to synthesize audio for this request.
60    *
61    * <p>This method should only be called on the synthesis thread, while in {@link
62    * TextToSpeechService#onSynthesizeText}.
63    *
64    * @param sampleRateInHz Sample rate in HZ of the generated audio.
65    * @param audioFormat Audio format of the generated audio. Must be one of {@link
66    *     AudioFormat#ENCODING_PCM_8BIT} or {@link AudioFormat#ENCODING_PCM_16BIT}. Can also be
67    *     {@link AudioFormat#ENCODING_PCM_FLOAT} when targetting Android N and above.
68    * @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
69    * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
70    *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
71    */
start( int sampleRateInHz, @SupportedAudioFormat int audioFormat, @IntRange(from = 1, to = 2) int channelCount)72   int start(
73       int sampleRateInHz,
74       @SupportedAudioFormat int audioFormat,
75       @IntRange(from = 1, to = 2) int channelCount);
76 
77   /**
78    * The service should call this method when synthesized audio is ready for consumption.
79    *
80    * <p>This method should only be called on the synthesis thread, while in {@link
81    * TextToSpeechService#onSynthesizeText}.
82    *
83    * @param buffer The generated audio data. This method will not hold on to {@code buffer}, so the
84    *     caller is free to modify it after this method returns.
85    * @param offset The offset into {@code buffer} where the audio data starts.
86    * @param length The number of bytes of audio data in {@code buffer}. This must be less than or
87    *     equal to the return value of {@link #getMaxBufferSize}.
88    * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
89    *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
90    */
audioAvailable(byte[] buffer, int offset, int length)91   int audioAvailable(byte[] buffer, int offset, int length);
92 
93   /**
94    * The service should call this method when all the synthesized audio for a request has been
95    * passed to {@link #audioAvailable}.
96    *
97    * <p>This method should only be called on the synthesis thread, while in {@link
98    * TextToSpeechService#onSynthesizeText}.
99    *
100    * <p>This method has to be called if {@link #start} and/or {@link #error} was called.
101    *
102    * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
103    *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
104    */
done()105   int done();
106 
107     /**
108      * The service should call this method if the speech synthesis fails.
109      *
110      * <p>This method should only be called on the synthesis thread, while in {@link
111      * TextToSpeechService#onSynthesizeText}.
112      */
error()113     void error();
114 
115   /**
116    * The service should call this method if the speech synthesis fails.
117    *
118    * <p>This method should only be called on the synthesis thread, while in {@link
119    * TextToSpeechService#onSynthesizeText}.
120    *
121    * @param errorCode Error code to pass to the client. One of the ERROR_ values from {@link
122    *     android.speech.tts.TextToSpeech}
123    */
error(@extToSpeech.Error int errorCode)124   void error(@TextToSpeech.Error int errorCode);
125 
126     /**
127      * Check if {@link #start} was called or not.
128      *
129      * <p>This method should only be called on the synthesis thread, while in {@link
130      * TextToSpeechService#onSynthesizeText}.
131      *
132      * <p>Useful for checking if a fallback from network request is possible.
133      */
hasStarted()134     boolean hasStarted();
135 
136     /**
137      * Check if {@link #done} was called or not.
138      *
139      * <p>This method should only be called on the synthesis thread, while in {@link
140      * TextToSpeechService#onSynthesizeText}.
141      *
142      * <p>Useful for checking if a fallback from network request is possible.
143      */
hasFinished()144     boolean hasFinished();
145 
146     /**
147      * The service may call this method to provide timing information about the spoken text.
148      *
149      * <p>Calling this method means that at the given audio frame, the given range of the input is
150      * about to be spoken. If this method is called the client will receive a callback on the
151      * listener ({@link UtteranceProgressListener#onRangeStart}) at the moment that frame has been
152      * reached by the playback head.
153      *
154      * <p>This information can be used by the client, for example, to highlight ranges of the text
155      * while it is spoken.
156      *
157      * <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
158      * the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
159      * definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
160      * for more information.
161      *
162      * <p>This method should only be called on the synthesis thread, while in {@link
163      * TextToSpeechService#onSynthesizeText}.
164      *
165      * @param markerInFrames The position in frames in the audio where this range is spoken.
166      * @param start The start index of the range in the input text.
167      * @param end The end index (exclusive) of the range in the input text.
168      */
rangeStart(int markerInFrames, int start, int end)169     default void rangeStart(int markerInFrames, int start, int end) {}
170 }
171