1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "ESQueue"
19 #include <media/stagefright/foundation/ADebug.h>
20
21 #include "ESQueue.h"
22
23 #include <media/stagefright/foundation/hexdump.h>
24 #include <media/stagefright/foundation/ABitReader.h>
25 #include <media/stagefright/foundation/ABuffer.h>
26 #include <media/stagefright/foundation/AMessage.h>
27 #include <media/stagefright/foundation/ByteUtils.h>
28 #include <media/stagefright/foundation/avc_utils.h>
29 #include <media/stagefright/MediaErrors.h>
30 #include <media/stagefright/MediaDefs.h>
31 #include <media/stagefright/MetaData.h>
32 #include <media/stagefright/MetaDataUtils.h>
33 #include <media/cas/DescramblerAPI.h>
34 #include <media/hardware/CryptoAPI.h>
35
36 #include <inttypes.h>
37 #include <netinet/in.h>
38
39 #ifdef ENABLE_CRYPTO
40 #include "HlsSampleDecryptor.h"
41 #endif
42
43 namespace android {
44
ElementaryStreamQueue(Mode mode,uint32_t flags)45 ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
46 : mMode(mode),
47 mFlags(flags),
48 mEOSReached(false),
49 mCASystemId(0),
50 mAUIndex(0) {
51
52 ALOGV("ElementaryStreamQueue(%p) mode %x flags %x isScrambled %d isSampleEncrypted %d",
53 this, mode, flags, isScrambled(), isSampleEncrypted());
54
55 // Create the decryptor anyway since we don't know the use-case unless key is provided
56 // Won't decrypt if key info not available (e.g., scanner/extractor just parsing ts files)
57 mSampleDecryptor = isSampleEncrypted() ?
58 #ifdef ENABLE_CRYPTO
59 new HlsSampleDecryptor
60 #else
61 new SampleDecryptor
62 #endif
63 : NULL;
64 }
65
getFormat()66 sp<MetaData> ElementaryStreamQueue::getFormat() {
67 return mFormat;
68 }
69
clear(bool clearFormat)70 void ElementaryStreamQueue::clear(bool clearFormat) {
71 if (mBuffer != NULL) {
72 mBuffer->setRange(0, 0);
73 }
74
75 mRangeInfos.clear();
76
77 if (mScrambledBuffer != NULL) {
78 mScrambledBuffer->setRange(0, 0);
79 }
80 mScrambledRangeInfos.clear();
81
82 if (clearFormat) {
83 mFormat.clear();
84 }
85
86 mEOSReached = false;
87 }
88
isScrambled() const89 bool ElementaryStreamQueue::isScrambled() const {
90 return (mFlags & kFlag_ScrambledData) != 0;
91 }
92
setCasInfo(int32_t systemId,const std::vector<uint8_t> & sessionId)93 void ElementaryStreamQueue::setCasInfo(
94 int32_t systemId, const std::vector<uint8_t> &sessionId) {
95 mCASystemId = systemId;
96 mCasSessionId = sessionId;
97 }
98
readVariableBits(ABitReader & bits,int32_t nbits)99 static int32_t readVariableBits(ABitReader &bits, int32_t nbits) {
100 int32_t value = 0;
101 int32_t more_bits = 1;
102
103 while (more_bits) {
104 value += bits.getBits(nbits);
105 more_bits = bits.getBits(1);
106 if (!more_bits)
107 break;
108 value++;
109 value <<= nbits;
110 }
111 return value;
112 }
113
114 // Parse AC3 header assuming the current ptr is start position of syncframe,
115 // update metadata only applicable, and return the payload size
parseAC3SyncFrame(const uint8_t * ptr,size_t size,sp<MetaData> * metaData)116 static unsigned parseAC3SyncFrame(
117 const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
118 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
119 static const unsigned samplingRateTable[] = {48000, 44100, 32000};
120
121 static const unsigned frameSizeTable[19][3] = {
122 { 64, 69, 96 },
123 { 80, 87, 120 },
124 { 96, 104, 144 },
125 { 112, 121, 168 },
126 { 128, 139, 192 },
127 { 160, 174, 240 },
128 { 192, 208, 288 },
129 { 224, 243, 336 },
130 { 256, 278, 384 },
131 { 320, 348, 480 },
132 { 384, 417, 576 },
133 { 448, 487, 672 },
134 { 512, 557, 768 },
135 { 640, 696, 960 },
136 { 768, 835, 1152 },
137 { 896, 975, 1344 },
138 { 1024, 1114, 1536 },
139 { 1152, 1253, 1728 },
140 { 1280, 1393, 1920 },
141 };
142
143 ABitReader bits(ptr, size);
144 if (bits.numBitsLeft() < 16) {
145 return 0;
146 }
147 if (bits.getBits(16) != 0x0B77) {
148 return 0;
149 }
150
151 if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
152 ALOGV("Not enough bits left for further parsing");
153 return 0;
154 }
155 bits.skipBits(16); // crc1
156
157 unsigned fscod = bits.getBits(2);
158 if (fscod == 3) {
159 ALOGW("Incorrect fscod in AC3 header");
160 return 0;
161 }
162
163 unsigned frmsizecod = bits.getBits(6);
164 if (frmsizecod > 37) {
165 ALOGW("Incorrect frmsizecod in AC3 header");
166 return 0;
167 }
168
169 unsigned bsid = bits.getBits(5);
170 if (bsid > 8) {
171 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
172 return 0;
173 }
174
175 bits.skipBits(3); // bsmod
176 unsigned acmod = bits.getBits(3);
177
178 if ((acmod & 1) > 0 && acmod != 1) {
179 if (bits.numBitsLeft() < 2) {
180 return 0;
181 }
182 bits.skipBits(2); //cmixlev
183 }
184 if ((acmod & 4) > 0) {
185 if (bits.numBitsLeft() < 2) {
186 return 0;
187 }
188 bits.skipBits(2); //surmixlev
189 }
190 if (acmod == 2) {
191 if (bits.numBitsLeft() < 2) {
192 return 0;
193 }
194 bits.skipBits(2); //dsurmod
195 }
196
197 if (bits.numBitsLeft() < 1) {
198 return 0;
199 }
200 unsigned lfeon = bits.getBits(1);
201
202 unsigned samplingRate = samplingRateTable[fscod];
203 unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
204 if (fscod == 1) {
205 payloadSize += frmsizecod & 1;
206 }
207 payloadSize <<= 1; // convert from 16-bit words to bytes
208
209 unsigned channelCount = channelCountTable[acmod] + lfeon;
210
211 if (metaData != NULL) {
212 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
213 (*metaData)->setInt32(kKeyChannelCount, channelCount);
214 (*metaData)->setInt32(kKeySampleRate, samplingRate);
215 }
216
217 return payloadSize;
218 }
219
220 // Parse EAC3 header assuming the current ptr is start position of syncframe,
221 // update metadata only applicable, and return the payload size
222 // ATSC A/52:2012 E2.3.1
parseEAC3SyncFrame(const uint8_t * ptr,size_t size,sp<MetaData> * metaData)223 static unsigned parseEAC3SyncFrame(
224 const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
225 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
226 static const unsigned samplingRateTable[] = {48000, 44100, 32000};
227 static const unsigned samplingRateTable2[] = {24000, 22050, 16000};
228
229 ABitReader bits(ptr, size);
230 if (bits.numBitsLeft() < 16) {
231 ALOGE("Not enough bits left for further parsing");
232 return 0;
233 }
234 if (bits.getBits(16) != 0x0B77) {
235 ALOGE("No valid sync word in EAC3 header");
236 return 0;
237 }
238
239 // we parse up to bsid so there needs to be at least that many bits
240 if (bits.numBitsLeft() < 2 + 3 + 11 + 2 + 2 + 3 + 1 + 5) {
241 ALOGE("Not enough bits left for further parsing");
242 return 0;
243 }
244
245 unsigned strmtyp = bits.getBits(2);
246 if (strmtyp == 3) {
247 ALOGE("Incorrect strmtyp in EAC3 header");
248 return 0;
249 }
250
251 unsigned substreamid = bits.getBits(3);
252 // only the first independent stream is supported
253 if ((strmtyp == 0 || strmtyp == 2) && substreamid != 0)
254 return 0;
255
256 unsigned frmsiz = bits.getBits(11);
257 unsigned fscod = bits.getBits(2);
258
259 unsigned samplingRate = 0;
260 if (fscod == 0x3) {
261 unsigned fscod2 = bits.getBits(2);
262 if (fscod2 == 3) {
263 ALOGW("Incorrect fscod2 in EAC3 header");
264 return 0;
265 }
266 samplingRate = samplingRateTable2[fscod2];
267 } else {
268 samplingRate = samplingRateTable[fscod];
269 bits.skipBits(2); // numblkscod
270 }
271
272 unsigned acmod = bits.getBits(3);
273 unsigned lfeon = bits.getBits(1);
274 unsigned bsid = bits.getBits(5);
275 if (bsid < 11 || bsid > 16) {
276 ALOGW("Incorrect bsid in EAC3 header. Could be AC-3 or some unknown EAC3 format");
277 return 0;
278 }
279
280 // we currently only support the first independant stream
281 if (metaData != NULL && (strmtyp == 0 || strmtyp == 2)) {
282 unsigned channelCount = channelCountTable[acmod] + lfeon;
283 ALOGV("EAC3 channelCount = %d", channelCount);
284 ALOGV("EAC3 samplingRate = %d", samplingRate);
285 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_EAC3);
286 (*metaData)->setInt32(kKeyChannelCount, channelCount);
287 (*metaData)->setInt32(kKeySampleRate, samplingRate);
288 (*metaData)->setInt32(kKeyIsSyncFrame, 1);
289 }
290
291 unsigned payloadSize = frmsiz + 1;
292 payloadSize <<= 1; // convert from 16-bit words to bytes
293
294 return payloadSize;
295 }
296
297 // Parse AC4 header assuming the current ptr is start position of syncframe
298 // and update frameSize and metadata.
parseAC4SyncFrame(const uint8_t * ptr,size_t size,unsigned & frameSize,sp<MetaData> * metaData)299 static status_t parseAC4SyncFrame(
300 const uint8_t *ptr, size_t size, unsigned &frameSize, sp<MetaData> *metaData) {
301 // ETSI TS 103 190-2 V1.1.1 (2015-09), Annex C
302 // The sync_word can be either 0xAC40 or 0xAC41.
303 static const int kSyncWordAC40 = 0xAC40;
304 static const int kSyncWordAC41 = 0xAC41;
305
306 size_t headerSize = 0;
307 ABitReader bits(ptr, size);
308 int32_t syncWord = bits.getBits(16);
309 if ((syncWord != kSyncWordAC40) && (syncWord != kSyncWordAC41)) {
310 ALOGE("Invalid syncword in AC4 header");
311 return ERROR_MALFORMED;
312 }
313 headerSize += 2;
314
315 frameSize = bits.getBits(16);
316 headerSize += 2;
317 if (frameSize == 0xFFFF) {
318 frameSize = bits.getBits(24);
319 headerSize += 3;
320 }
321
322 if (frameSize == 0) {
323 ALOGE("Invalid frame size in AC4 header");
324 return ERROR_MALFORMED;
325 }
326 frameSize += headerSize;
327 // If the sync_word is 0xAC41, a crc_word is also transmitted.
328 if (syncWord == kSyncWordAC41) {
329 frameSize += 2; // crc_word
330 }
331 ALOGV("AC4 frameSize = %u", frameSize);
332
333 // ETSI TS 103 190-2 V1.1.1 6.2.1.1
334 uint32_t bitstreamVersion = bits.getBits(2);
335 if (bitstreamVersion == 3) {
336 bitstreamVersion += readVariableBits(bits, 2);
337 }
338
339 bits.skipBits(10); // Sequence Counter
340
341 uint32_t bWaitFrames = bits.getBits(1);
342 if (bWaitFrames) {
343 uint32_t waitFrames = bits.getBits(3);
344 if (waitFrames > 0) {
345 bits.skipBits(2); // br_code;
346 }
347 }
348
349 // ETSI TS 103 190 V1.1.1 Table 82
350 bool fsIndex = bits.getBits(1);
351 uint32_t samplingRate = fsIndex ? 48000 : 44100;
352
353 if (metaData != NULL) {
354 ALOGV("dequeueAccessUnitAC4 Setting mFormat");
355 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC4);
356 (*metaData)->setInt32(kKeyIsSyncFrame, 1);
357 // [FIXME] AC4 channel count is defined per presentation. Provide a default channel count
358 // as stereo for the entire stream.
359 (*metaData)->setInt32(kKeyChannelCount, 2);
360 (*metaData)->setInt32(kKeySampleRate, samplingRate);
361 }
362 return OK;
363 }
364
IsSeeminglyValidAC4Header(const uint8_t * ptr,size_t size,unsigned & frameSize)365 static status_t IsSeeminglyValidAC4Header(const uint8_t *ptr, size_t size, unsigned &frameSize) {
366 return parseAC4SyncFrame(ptr, size, frameSize, NULL);
367 }
368
IsSeeminglyValidADTSHeader(const uint8_t * ptr,size_t size,size_t * frameLength)369 static bool IsSeeminglyValidADTSHeader(
370 const uint8_t *ptr, size_t size, size_t *frameLength) {
371 if (size < 7) {
372 // Not enough data to verify header.
373 return false;
374 }
375
376 if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
377 return false;
378 }
379
380 unsigned layer = (ptr[1] >> 1) & 3;
381
382 if (layer != 0) {
383 return false;
384 }
385
386 unsigned ID = (ptr[1] >> 3) & 1;
387 unsigned profile_ObjectType = ptr[2] >> 6;
388
389 if (ID == 1 && profile_ObjectType == 3) {
390 // MPEG-2 profile 3 is reserved.
391 return false;
392 }
393
394 size_t frameLengthInHeader =
395 ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
396 if (frameLengthInHeader > size) {
397 return false;
398 }
399
400 *frameLength = frameLengthInHeader;
401 return true;
402 }
403
IsSeeminglyValidMPEGAudioHeader(const uint8_t * ptr,size_t size)404 static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
405 if (size < 3) {
406 // Not enough data to verify header.
407 return false;
408 }
409
410 if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
411 return false;
412 }
413
414 unsigned ID = (ptr[1] >> 3) & 3;
415
416 if (ID == 1) {
417 return false; // reserved
418 }
419
420 unsigned layer = (ptr[1] >> 1) & 3;
421
422 if (layer == 0) {
423 return false; // reserved
424 }
425
426 unsigned bitrateIndex = (ptr[2] >> 4);
427
428 if (bitrateIndex == 0x0f) {
429 return false; // reserved
430 }
431
432 unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
433
434 if (samplingRateIndex == 3) {
435 return false; // reserved
436 }
437
438 return true;
439 }
440
appendData(const void * data,size_t size,int64_t timeUs,int32_t payloadOffset,uint32_t pesScramblingControl)441 status_t ElementaryStreamQueue::appendData(
442 const void *data, size_t size, int64_t timeUs,
443 int32_t payloadOffset, uint32_t pesScramblingControl) {
444
445 if (mEOSReached) {
446 ALOGE("appending data after EOS");
447 return ERROR_MALFORMED;
448 }
449
450 if (!isScrambled() && (mBuffer == NULL || mBuffer->size() == 0)) {
451 switch (mMode) {
452 case H264:
453 case MPEG_VIDEO:
454 {
455 #if 0
456 if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
457 return ERROR_MALFORMED;
458 }
459 #else
460 uint8_t *ptr = (uint8_t *)data;
461
462 ssize_t startOffset = -1;
463 for (size_t i = 0; i + 2 < size; ++i) {
464 if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
465 startOffset = i;
466 break;
467 }
468 }
469
470 if (startOffset < 0) {
471 return ERROR_MALFORMED;
472 }
473
474 if (mFormat == NULL && startOffset > 0) {
475 ALOGI("found something resembling an H.264/MPEG syncword "
476 "at offset %zd",
477 startOffset);
478 }
479
480 data = &ptr[startOffset];
481 size -= startOffset;
482 #endif
483 break;
484 }
485
486 case MPEG4_VIDEO:
487 {
488 #if 0
489 if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
490 return ERROR_MALFORMED;
491 }
492 #else
493 uint8_t *ptr = (uint8_t *)data;
494
495 ssize_t startOffset = -1;
496 for (size_t i = 0; i + 2 < size; ++i) {
497 if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
498 startOffset = i;
499 break;
500 }
501 }
502
503 if (startOffset < 0) {
504 return ERROR_MALFORMED;
505 }
506
507 if (startOffset > 0) {
508 ALOGI("found something resembling an H.264/MPEG syncword "
509 "at offset %zd",
510 startOffset);
511 }
512
513 data = &ptr[startOffset];
514 size -= startOffset;
515 #endif
516 break;
517 }
518
519 case AAC:
520 {
521 uint8_t *ptr = (uint8_t *)data;
522
523 #if 0
524 if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
525 return ERROR_MALFORMED;
526 }
527 #else
528 ssize_t startOffset = -1;
529 size_t frameLength;
530 for (size_t i = 0; i < size; ++i) {
531 if (IsSeeminglyValidADTSHeader(
532 &ptr[i], size - i, &frameLength)) {
533 startOffset = i;
534 break;
535 }
536 }
537
538 if (startOffset < 0) {
539 return ERROR_MALFORMED;
540 }
541
542 if (startOffset > 0) {
543 ALOGI("found something resembling an AAC syncword at "
544 "offset %zd",
545 startOffset);
546 }
547
548 if (frameLength != size - startOffset) {
549 ALOGV("First ADTS AAC frame length is %zd bytes, "
550 "while the buffer size is %zd bytes.",
551 frameLength, size - startOffset);
552 }
553
554 data = &ptr[startOffset];
555 size -= startOffset;
556 #endif
557 break;
558 }
559
560 case AC3:
561 case EAC3:
562 {
563 uint8_t *ptr = (uint8_t *)data;
564
565 ssize_t startOffset = -1;
566 for (size_t i = 0; i < size; ++i) {
567 unsigned payloadSize = 0;
568 if (mMode == AC3) {
569 payloadSize = parseAC3SyncFrame(&ptr[i], size - i, NULL);
570 } else if (mMode == EAC3) {
571 payloadSize = parseEAC3SyncFrame(&ptr[i], size - i, NULL);
572 }
573 if (payloadSize > 0) {
574 startOffset = i;
575 break;
576 }
577 }
578
579 if (startOffset < 0) {
580 return ERROR_MALFORMED;
581 }
582
583 if (startOffset > 0) {
584 ALOGI("found something resembling an (E)AC3 syncword at "
585 "offset %zd",
586 startOffset);
587 }
588
589 data = &ptr[startOffset];
590 size -= startOffset;
591 break;
592 }
593
594 case AC4:
595 {
596 uint8_t *ptr = (uint8_t *)data;
597 unsigned frameSize = 0;
598 ssize_t startOffset = -1;
599
600 // A valid AC4 stream should have minimum of 7 bytes in its buffer.
601 // (Sync header 4 bytes + AC4 toc 3 bytes)
602 if (size < 7) {
603 return ERROR_MALFORMED;
604 }
605 for (size_t i = 0; i < size; ++i) {
606 if (IsSeeminglyValidAC4Header(&ptr[i], size - i, frameSize) == OK) {
607 startOffset = i;
608 break;
609 }
610 }
611
612 if (startOffset < 0) {
613 return ERROR_MALFORMED;
614 }
615
616 if (startOffset > 0) {
617 ALOGI("found something resembling an AC4 syncword at "
618 "offset %zd",
619 startOffset);
620 }
621 if (frameSize != size - startOffset) {
622 ALOGV("AC4 frame size is %u bytes, while the buffer size is %zd bytes.",
623 frameSize, size - startOffset);
624 }
625
626 data = &ptr[startOffset];
627 size -= startOffset;
628 break;
629 }
630
631 case MPEG_AUDIO:
632 {
633 uint8_t *ptr = (uint8_t *)data;
634
635 ssize_t startOffset = -1;
636 for (size_t i = 0; i < size; ++i) {
637 if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
638 startOffset = i;
639 break;
640 }
641 }
642
643 if (startOffset < 0) {
644 return ERROR_MALFORMED;
645 }
646
647 if (startOffset > 0) {
648 ALOGI("found something resembling an MPEG audio "
649 "syncword at offset %zd",
650 startOffset);
651 }
652
653 data = &ptr[startOffset];
654 size -= startOffset;
655 break;
656 }
657
658 case PCM_AUDIO:
659 case METADATA:
660 {
661 break;
662 }
663
664 default:
665 ALOGE("Unknown mode: %d", mMode);
666 return ERROR_MALFORMED;
667 }
668 }
669
670 size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
671 if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
672 neededSize = (neededSize + 65535) & ~65535;
673
674 ALOGV("resizing buffer to size %zu", neededSize);
675
676 sp<ABuffer> buffer = new ABuffer(neededSize);
677 if (mBuffer != NULL) {
678 memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
679 buffer->setRange(0, mBuffer->size());
680 } else {
681 buffer->setRange(0, 0);
682 }
683
684 mBuffer = buffer;
685 }
686
687 memcpy(mBuffer->data() + mBuffer->size(), data, size);
688 mBuffer->setRange(0, mBuffer->size() + size);
689
690 RangeInfo info;
691 info.mLength = size;
692 info.mTimestampUs = timeUs;
693 info.mPesOffset = payloadOffset;
694 info.mPesScramblingControl = pesScramblingControl;
695 mRangeInfos.push_back(info);
696
697 #if 0
698 if (mMode == AAC) {
699 ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
700 hexdump(data, size);
701 }
702 #endif
703
704 return OK;
705 }
706
appendScrambledData(const void * data,size_t size,size_t leadingClearBytes,int32_t keyId,bool isSync,sp<ABuffer> clearSizes,sp<ABuffer> encSizes)707 void ElementaryStreamQueue::appendScrambledData(
708 const void *data, size_t size,
709 size_t leadingClearBytes,
710 int32_t keyId, bool isSync,
711 sp<ABuffer> clearSizes, sp<ABuffer> encSizes) {
712 if (!isScrambled()) {
713 return;
714 }
715
716 size_t neededSize = (mScrambledBuffer == NULL ? 0 : mScrambledBuffer->size()) + size;
717 if (mScrambledBuffer == NULL || neededSize > mScrambledBuffer->capacity()) {
718 neededSize = (neededSize + 65535) & ~65535;
719
720 ALOGI("resizing scrambled buffer to size %zu", neededSize);
721
722 sp<ABuffer> buffer = new ABuffer(neededSize);
723 if (mScrambledBuffer != NULL) {
724 memcpy(buffer->data(), mScrambledBuffer->data(), mScrambledBuffer->size());
725 buffer->setRange(0, mScrambledBuffer->size());
726 } else {
727 buffer->setRange(0, 0);
728 }
729
730 mScrambledBuffer = buffer;
731 }
732 memcpy(mScrambledBuffer->data() + mScrambledBuffer->size(), data, size);
733 mScrambledBuffer->setRange(0, mScrambledBuffer->size() + size);
734
735 ScrambledRangeInfo scrambledInfo;
736 scrambledInfo.mLength = size;
737 scrambledInfo.mLeadingClearBytes = leadingClearBytes;
738 scrambledInfo.mKeyId = keyId;
739 scrambledInfo.mIsSync = isSync;
740 scrambledInfo.mClearSizes = clearSizes;
741 scrambledInfo.mEncSizes = encSizes;
742
743 ALOGV("[stream %d] appending scrambled range: size=%zu", mMode, size);
744
745 mScrambledRangeInfos.push_back(scrambledInfo);
746 }
747
dequeueScrambledAccessUnit()748 sp<ABuffer> ElementaryStreamQueue::dequeueScrambledAccessUnit() {
749 size_t nextScan = mBuffer->size();
750 int32_t pesOffset = 0, pesScramblingControl = 0;
751 int64_t timeUs = fetchTimestamp(nextScan, &pesOffset, &pesScramblingControl);
752 if (timeUs < 0ll) {
753 ALOGE("Negative timeUs");
754 return NULL;
755 }
756
757 // return scrambled unit
758 int32_t keyId = pesScramblingControl, isSync = 0, scrambledLength = 0;
759 sp<ABuffer> clearSizes, encSizes;
760 size_t leadingClearBytes;
761 while (mScrambledRangeInfos.size() > mRangeInfos.size()) {
762 auto it = mScrambledRangeInfos.begin();
763 ALOGV("[stream %d] fetching scrambled range: size=%zu", mMode, it->mLength);
764
765 if (scrambledLength > 0) {
766 // This shouldn't happen since we always dequeue the entire PES.
767 ALOGW("Discarding srambled length %d", scrambledLength);
768 }
769 scrambledLength = it->mLength;
770
771 // TODO: handle key id change, use first non-zero keyId for now
772 if (keyId == 0) {
773 keyId = it->mKeyId;
774 }
775 clearSizes = it->mClearSizes;
776 encSizes = it->mEncSizes;
777 isSync = it->mIsSync;
778 leadingClearBytes = it->mLeadingClearBytes;
779 mScrambledRangeInfos.erase(it);
780 }
781 if (scrambledLength == 0) {
782 ALOGE("[stream %d] empty scrambled unit!", mMode);
783 return NULL;
784 }
785
786 // Retrieve the leading clear bytes info, and use it to set the clear
787 // range on mBuffer. Note that the leading clear bytes includes the
788 // PES header portion, while mBuffer doesn't.
789 if ((int32_t)leadingClearBytes > pesOffset) {
790 mBuffer->setRange(0, leadingClearBytes - pesOffset);
791 } else {
792 mBuffer->setRange(0, 0);
793 }
794
795 // Try to parse formats, and if unavailable set up a dummy format.
796 // Only support the following modes for scrambled content for now.
797 // (will be expanded later).
798 if (mFormat == NULL) {
799 mFormat = new MetaData;
800 switch (mMode) {
801 case H264:
802 {
803 if (!MakeAVCCodecSpecificData(
804 *mFormat, mBuffer->data(), mBuffer->size())) {
805 ALOGI("Creating dummy AVC format for scrambled content");
806
807 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
808 mFormat->setInt32(kKeyWidth, 1280);
809 mFormat->setInt32(kKeyHeight, 720);
810 }
811 break;
812 }
813 case AAC:
814 {
815 if (!MakeAACCodecSpecificData(
816 *mFormat, mBuffer->data(), mBuffer->size())) {
817 ALOGI("Creating dummy AAC format for scrambled content");
818
819 MakeAACCodecSpecificData(*mFormat,
820 1 /*profile*/, 7 /*sampling_freq_index*/, 1 /*channel_config*/);
821 mFormat->setInt32(kKeyIsADTS, true);
822 }
823
824 break;
825 }
826 case MPEG_VIDEO:
827 {
828 ALOGI("Creating dummy MPEG format for scrambled content");
829
830 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
831 mFormat->setInt32(kKeyWidth, 1280);
832 mFormat->setInt32(kKeyHeight, 720);
833 break;
834 }
835 default:
836 {
837 ALOGE("Unknown mode for scrambled content");
838 return NULL;
839 }
840 }
841
842 // for MediaExtractor.CasInfo
843 mFormat->setInt32(kKeyCASystemID, mCASystemId);
844 mFormat->setData(kKeyCASessionID,
845 0, mCasSessionId.data(), mCasSessionId.size());
846 }
847
848 mBuffer->setRange(0, 0);
849
850 // copy into scrambled access unit
851 sp<ABuffer> scrambledAccessUnit = ABuffer::CreateAsCopy(
852 mScrambledBuffer->data(), scrambledLength);
853
854 scrambledAccessUnit->meta()->setInt64("timeUs", timeUs);
855 if (isSync) {
856 scrambledAccessUnit->meta()->setInt32("isSync", 1);
857 }
858
859 // fill in CryptoInfo fields for AnotherPacketSource::read()
860 // MediaCas doesn't use cryptoMode, but set to non-zero value here.
861 scrambledAccessUnit->meta()->setInt32(
862 "cryptoMode", CryptoPlugin::kMode_AES_CTR);
863 scrambledAccessUnit->meta()->setInt32("cryptoKey", keyId);
864 scrambledAccessUnit->meta()->setBuffer("clearBytes", clearSizes);
865 scrambledAccessUnit->meta()->setBuffer("encBytes", encSizes);
866 scrambledAccessUnit->meta()->setInt32("pesOffset", pesOffset);
867
868 memmove(mScrambledBuffer->data(),
869 mScrambledBuffer->data() + scrambledLength,
870 mScrambledBuffer->size() - scrambledLength);
871
872 mScrambledBuffer->setRange(0, mScrambledBuffer->size() - scrambledLength);
873
874 ALOGV("[stream %d] dequeued scrambled AU: timeUs=%lld, size=%zu",
875 mMode, (long long)timeUs, scrambledAccessUnit->size());
876
877 return scrambledAccessUnit;
878 }
879
dequeueAccessUnit()880 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
881 if (isScrambled()) {
882 return dequeueScrambledAccessUnit();
883 }
884
885 if ((mFlags & kFlag_AlignedData) && mMode == H264) {
886 if (mRangeInfos.empty()) {
887 return NULL;
888 }
889
890 RangeInfo info = *mRangeInfos.begin();
891 mRangeInfos.erase(mRangeInfos.begin());
892
893 sp<ABuffer> accessUnit = new ABuffer(info.mLength);
894 memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
895 accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
896
897 memmove(mBuffer->data(),
898 mBuffer->data() + info.mLength,
899 mBuffer->size() - info.mLength);
900
901 mBuffer->setRange(0, mBuffer->size() - info.mLength);
902
903 if (mFormat == NULL) {
904 mFormat = new MetaData;
905 if (!MakeAVCCodecSpecificData(*mFormat, accessUnit->data(), accessUnit->size())) {
906 mFormat.clear();
907 }
908 }
909
910 return accessUnit;
911 }
912
913 switch (mMode) {
914 case H264:
915 return dequeueAccessUnitH264();
916 case AAC:
917 return dequeueAccessUnitAAC();
918 case AC3:
919 case EAC3:
920 return dequeueAccessUnitEAC3();
921 case AC4:
922 return dequeueAccessUnitAC4();
923 case MPEG_VIDEO:
924 return dequeueAccessUnitMPEGVideo();
925 case MPEG4_VIDEO:
926 return dequeueAccessUnitMPEG4Video();
927 case PCM_AUDIO:
928 return dequeueAccessUnitPCMAudio();
929 case METADATA:
930 return dequeueAccessUnitMetadata();
931 default:
932 if (mMode != MPEG_AUDIO) {
933 ALOGE("Unknown mode");
934 return NULL;
935 }
936 return dequeueAccessUnitMPEGAudio();
937 }
938 }
939
dequeueAccessUnitEAC3()940 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitEAC3() {
941 unsigned syncStartPos = 0; // in bytes
942 unsigned payloadSize = 0;
943 sp<MetaData> format = new MetaData;
944
945 ALOGV("dequeueAccessUnitEAC3[%d]: mBuffer %p(%zu)", mAUIndex,
946 mBuffer->data(), mBuffer->size());
947
948 while (true) {
949 if (syncStartPos + 2 >= mBuffer->size()) {
950 return NULL;
951 }
952
953 uint8_t *ptr = mBuffer->data() + syncStartPos;
954 size_t size = mBuffer->size() - syncStartPos;
955 if (mMode == AC3) {
956 payloadSize = parseAC3SyncFrame(ptr, size, &format);
957 } else if (mMode == EAC3) {
958 payloadSize = parseEAC3SyncFrame(ptr, size, &format);
959 }
960 if (payloadSize > 0) {
961 break;
962 }
963
964 ALOGV("dequeueAccessUnitEAC3[%d]: syncStartPos %u payloadSize %u",
965 mAUIndex, syncStartPos, payloadSize);
966
967 ++syncStartPos;
968 }
969
970 if (mBuffer->size() < syncStartPos + payloadSize) {
971 ALOGV("Not enough buffer size for E/AC3");
972 return NULL;
973 }
974
975 if (mFormat == NULL) {
976 mFormat = format;
977 }
978
979 int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
980 if (timeUs < 0ll) {
981 ALOGE("negative timeUs");
982 return NULL;
983 }
984
985 // Not decrypting if key info not available (e.g., scanner/extractor parsing ts files)
986 if (mSampleDecryptor != NULL) {
987 if (mMode == AC3) {
988 mSampleDecryptor->processAC3(mBuffer->data() + syncStartPos, payloadSize);
989 } else if (mMode == EAC3) {
990 ALOGE("EAC3 AU is encrypted and decryption is not supported");
991 return NULL;
992 }
993 }
994 mAUIndex++;
995
996 sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
997 memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
998
999 accessUnit->meta()->setInt64("timeUs", timeUs);
1000 accessUnit->meta()->setInt32("isSync", 1);
1001
1002 memmove(
1003 mBuffer->data(),
1004 mBuffer->data() + syncStartPos + payloadSize,
1005 mBuffer->size() - syncStartPos - payloadSize);
1006
1007 mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
1008
1009 return accessUnit;
1010 }
1011
dequeueAccessUnitAC4()1012 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC4() {
1013 unsigned syncStartPos = 0;
1014 unsigned payloadSize = 0;
1015 sp<MetaData> format = new MetaData;
1016 ALOGV("dequeueAccessUnit_AC4[%d]: mBuffer %p(%zu)", mAUIndex, mBuffer->data(), mBuffer->size());
1017
1018 // A valid AC4 stream should have minimum of 7 bytes in its buffer.
1019 // (Sync header 4 bytes + AC4 toc 3 bytes)
1020 if (mBuffer->size() < 7) {
1021 return NULL;
1022 }
1023
1024 while (true) {
1025 if (syncStartPos + 2 >= mBuffer->size()) {
1026 return NULL;
1027 }
1028
1029 status_t status = parseAC4SyncFrame(
1030 mBuffer->data() + syncStartPos,
1031 mBuffer->size() - syncStartPos,
1032 payloadSize,
1033 &format);
1034 if (status == OK) {
1035 break;
1036 }
1037
1038 ALOGV("dequeueAccessUnit_AC4[%d]: syncStartPos %u payloadSize %u",
1039 mAUIndex, syncStartPos, payloadSize);
1040
1041 ++syncStartPos;
1042 }
1043
1044 if (mBuffer->size() < syncStartPos + payloadSize) {
1045 ALOGV("Not enough buffer size for AC4");
1046 return NULL;
1047 }
1048
1049 if (mFormat == NULL) {
1050 mFormat = format;
1051 }
1052
1053 int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
1054 if (timeUs < 0ll) {
1055 ALOGE("negative timeUs");
1056 return NULL;
1057 }
1058 mAUIndex++;
1059
1060 sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
1061 memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
1062
1063 accessUnit->meta()->setInt64("timeUs", timeUs);
1064 accessUnit->meta()->setInt32("isSync", 1);
1065
1066 memmove(
1067 mBuffer->data(),
1068 mBuffer->data() + syncStartPos + payloadSize,
1069 mBuffer->size() - syncStartPos - payloadSize);
1070
1071 mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
1072 return accessUnit;
1073 }
1074
dequeueAccessUnitPCMAudio()1075 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
1076 if (mBuffer->size() < 4) {
1077 return NULL;
1078 }
1079
1080 ABitReader bits(mBuffer->data(), 4);
1081 if (bits.getBits(8) != 0xa0) {
1082 ALOGE("Unexpected bit values");
1083 return NULL;
1084 }
1085 unsigned numAUs = bits.getBits(8);
1086 bits.skipBits(8);
1087 bits.skipBits(2); // quantization_word_length
1088 unsigned audio_sampling_frequency = bits.getBits(3);
1089 unsigned num_channels = bits.getBits(3);
1090
1091 if (audio_sampling_frequency != 2) {
1092 ALOGE("Wrong sampling freq");
1093 return NULL;
1094 }
1095 if (num_channels != 1u) {
1096 ALOGE("Wrong channel #");
1097 return NULL;
1098 }
1099
1100 if (mFormat == NULL) {
1101 mFormat = new MetaData;
1102 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
1103 mFormat->setInt32(kKeyChannelCount, 2);
1104 mFormat->setInt32(kKeySampleRate, 48000);
1105 mFormat->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
1106 }
1107
1108 static const size_t kFramesPerAU = 80;
1109 size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
1110
1111 size_t payloadSize = numAUs * frameSize * kFramesPerAU;
1112
1113 if (mBuffer->size() < 4 + payloadSize) {
1114 return NULL;
1115 }
1116
1117 sp<ABuffer> accessUnit = new ABuffer(payloadSize);
1118 memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
1119
1120 int64_t timeUs = fetchTimestamp(payloadSize + 4);
1121 if (timeUs < 0LL) {
1122 ALOGE("Negative timeUs");
1123 return NULL;
1124 }
1125 accessUnit->meta()->setInt64("timeUs", timeUs);
1126 accessUnit->meta()->setInt32("isSync", 1);
1127
1128 int16_t *ptr = (int16_t *)accessUnit->data();
1129 for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
1130 ptr[i] = ntohs(ptr[i]);
1131 }
1132
1133 memmove(
1134 mBuffer->data(),
1135 mBuffer->data() + 4 + payloadSize,
1136 mBuffer->size() - 4 - payloadSize);
1137
1138 mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
1139
1140 return accessUnit;
1141 }
1142
dequeueAccessUnitAAC()1143 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
1144 if (mBuffer->size() == 0) {
1145 return NULL;
1146 }
1147
1148 if (mRangeInfos.empty()) {
1149 return NULL;
1150 }
1151
1152 const RangeInfo &info = *mRangeInfos.begin();
1153 if (mBuffer->size() < info.mLength) {
1154 return NULL;
1155 }
1156
1157 if (info.mTimestampUs < 0LL) {
1158 ALOGE("Negative info.mTimestampUs");
1159 return NULL;
1160 }
1161
1162 ALOGV("dequeueAccessUnit_AAC[%d]: mBuffer %zu info.mLength %zu",
1163 mAUIndex, mBuffer->size(), info.mLength);
1164
1165 struct ADTSPosition {
1166 size_t offset;
1167 size_t headerSize;
1168 size_t length;
1169 };
1170
1171 Vector<ADTSPosition> frames;
1172
1173 // The idea here is consume all AAC frames starting at offsets before
1174 // info.mLength so we can assign a meaningful timestamp without
1175 // having to interpolate.
1176 // The final AAC frame may well extend into the next RangeInfo but
1177 // that's ok.
1178 size_t offset = 0;
1179 while (offset < info.mLength) {
1180 if (offset + 7 > mBuffer->size()) {
1181 return NULL;
1182 }
1183
1184 ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
1185
1186 // adts_fixed_header
1187
1188 if (bits.getBits(12) != 0xfffu) {
1189 ALOGE("Wrong atds_fixed_header");
1190 return NULL;
1191 }
1192 bits.skipBits(3); // ID, layer
1193 bool protection_absent = bits.getBits(1) != 0;
1194
1195 if (mFormat == NULL) {
1196 mFormat = new MetaData;
1197 if (!MakeAACCodecSpecificData(
1198 *mFormat, mBuffer->data() + offset, mBuffer->size() - offset)) {
1199 return NULL;
1200 }
1201
1202 int32_t sampleRate;
1203 int32_t numChannels;
1204 if (!mFormat->findInt32(kKeySampleRate, &sampleRate)) {
1205 ALOGE("SampleRate not found");
1206 return NULL;
1207 }
1208 if (!mFormat->findInt32(kKeyChannelCount, &numChannels)) {
1209 ALOGE("ChannelCount not found");
1210 return NULL;
1211 }
1212
1213 ALOGI("found AAC codec config (%d Hz, %d channels)",
1214 sampleRate, numChannels);
1215 }
1216
1217 // profile_ObjectType, sampling_frequency_index, private_bits,
1218 // channel_configuration, original_copy, home
1219 bits.skipBits(12);
1220
1221 // adts_variable_header
1222
1223 // copyright_identification_bit, copyright_identification_start
1224 bits.skipBits(2);
1225
1226 unsigned aac_frame_length = bits.getBits(13);
1227 if (aac_frame_length == 0){
1228 ALOGE("b/62673179, Invalid AAC frame length!");
1229 android_errorWriteLog(0x534e4554, "62673179");
1230 return NULL;
1231 }
1232
1233 bits.skipBits(11); // adts_buffer_fullness
1234
1235 unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
1236
1237 if (number_of_raw_data_blocks_in_frame != 0) {
1238 // To be implemented.
1239 ALOGE("Should not reach here.");
1240 return NULL;
1241 }
1242
1243 if (offset + aac_frame_length > mBuffer->size()) {
1244 return NULL;
1245 }
1246
1247 size_t headerSize = protection_absent ? 7 : 9;
1248
1249 // tracking the frame positions first then decrypt only if an accessUnit to be generated
1250 if (mSampleDecryptor != NULL) {
1251 ADTSPosition frame = {
1252 .offset = offset,
1253 .headerSize = headerSize,
1254 .length = aac_frame_length
1255 };
1256
1257 frames.push(frame);
1258 }
1259
1260 offset += aac_frame_length;
1261 }
1262
1263 // Decrypting only if the loop didn't exit early and an accessUnit is about to be generated
1264 // Not decrypting if key info not available (e.g., scanner/extractor parsing ts files)
1265 if (mSampleDecryptor != NULL) {
1266 for (size_t frameId = 0; frameId < frames.size(); frameId++) {
1267 const ADTSPosition &frame = frames.itemAt(frameId);
1268
1269 mSampleDecryptor->processAAC(frame.headerSize,
1270 mBuffer->data() + frame.offset, frame.length);
1271 // ALOGV("dequeueAccessUnitAAC[%zu]: while offset %zu headerSize %zu frame_len %zu",
1272 // frameId, frame.offset, frame.headerSize, frame.length);
1273 }
1274 }
1275 mAUIndex++;
1276
1277 int64_t timeUs = fetchTimestamp(offset);
1278
1279 sp<ABuffer> accessUnit = new ABuffer(offset);
1280 memcpy(accessUnit->data(), mBuffer->data(), offset);
1281
1282 memmove(mBuffer->data(), mBuffer->data() + offset,
1283 mBuffer->size() - offset);
1284 mBuffer->setRange(0, mBuffer->size() - offset);
1285
1286 accessUnit->meta()->setInt64("timeUs", timeUs);
1287 accessUnit->meta()->setInt32("isSync", 1);
1288
1289 return accessUnit;
1290 }
1291
fetchTimestamp(size_t size,int32_t * pesOffset,int32_t * pesScramblingControl)1292 int64_t ElementaryStreamQueue::fetchTimestamp(
1293 size_t size, int32_t *pesOffset, int32_t *pesScramblingControl) {
1294 int64_t timeUs = -1;
1295 bool first = true;
1296
1297 while (size > 0) {
1298 if (mRangeInfos.empty()) {
1299 return timeUs;
1300 }
1301
1302 RangeInfo *info = &*mRangeInfos.begin();
1303
1304 if (first) {
1305 timeUs = info->mTimestampUs;
1306 if (pesOffset != NULL) {
1307 *pesOffset = info->mPesOffset;
1308 }
1309 if (pesScramblingControl != NULL) {
1310 *pesScramblingControl = info->mPesScramblingControl;
1311 }
1312 first = false;
1313 }
1314
1315 if (info->mLength > size) {
1316 info->mLength -= size;
1317 size = 0;
1318 } else {
1319 size -= info->mLength;
1320
1321 mRangeInfos.erase(mRangeInfos.begin());
1322 info = NULL;
1323 }
1324
1325 }
1326
1327 if (timeUs == 0LL) {
1328 ALOGV("Returning 0 timestamp");
1329 }
1330
1331 return timeUs;
1332 }
1333
dequeueAccessUnitH264()1334 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
1335 const uint8_t *data = mBuffer->data();
1336
1337 size_t size = mBuffer->size();
1338 Vector<NALPosition> nals;
1339
1340 size_t totalSize = 0;
1341 size_t seiCount = 0;
1342
1343 status_t err;
1344 const uint8_t *nalStart;
1345 size_t nalSize;
1346 bool foundSlice = false;
1347 bool foundIDR = false;
1348
1349 ALOGV("dequeueAccessUnit_H264[%d] %p/%zu", mAUIndex, data, size);
1350
1351 while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
1352 if (nalSize == 0) continue;
1353
1354 unsigned nalType = nalStart[0] & 0x1f;
1355 bool flush = false;
1356
1357 if (nalType == 1 || nalType == 5) {
1358 if (nalType == 5) {
1359 foundIDR = true;
1360 }
1361 if (foundSlice) {
1362 //TODO: Shouldn't this have been called with nalSize-1?
1363 ABitReader br(nalStart + 1, nalSize);
1364 unsigned first_mb_in_slice = parseUE(&br);
1365
1366 if (first_mb_in_slice == 0) {
1367 // This slice starts a new frame.
1368
1369 flush = true;
1370 }
1371 }
1372
1373 foundSlice = true;
1374 } else if ((nalType == 9 || nalType == 7) && foundSlice) {
1375 // Access unit delimiter and SPS will be associated with the
1376 // next frame.
1377
1378 flush = true;
1379 } else if (nalType == 6 && nalSize > 0) {
1380 // found non-zero sized SEI
1381 ++seiCount;
1382 }
1383
1384 if (flush) {
1385 // The access unit will contain all nal units up to, but excluding
1386 // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
1387
1388 size_t auSize = 4 * nals.size() + totalSize;
1389 sp<ABuffer> accessUnit = new ABuffer(auSize);
1390 sp<ABuffer> sei;
1391
1392 if (seiCount > 0) {
1393 sei = new ABuffer(seiCount * sizeof(NALPosition));
1394 accessUnit->meta()->setBuffer("sei", sei);
1395 }
1396
1397 #if !LOG_NDEBUG
1398 AString out;
1399 #endif
1400
1401 size_t dstOffset = 0;
1402 size_t seiIndex = 0;
1403 size_t shrunkBytes = 0;
1404 for (size_t i = 0; i < nals.size(); ++i) {
1405 const NALPosition &pos = nals.itemAt(i);
1406
1407 unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
1408
1409 if (nalType == 6 && pos.nalSize > 0) {
1410 if (seiIndex >= sei->size() / sizeof(NALPosition)) {
1411 ALOGE("Wrong seiIndex");
1412 return NULL;
1413 }
1414 NALPosition &seiPos = ((NALPosition *)sei->data())[seiIndex++];
1415 seiPos.nalOffset = dstOffset + 4;
1416 seiPos.nalSize = pos.nalSize;
1417 }
1418
1419 #if !LOG_NDEBUG
1420 char tmp[128];
1421 sprintf(tmp, "0x%02x", nalType);
1422 if (i > 0) {
1423 out.append(", ");
1424 }
1425 out.append(tmp);
1426 #endif
1427
1428 memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
1429
1430 if (mSampleDecryptor != NULL && (nalType == 1 || nalType == 5)) {
1431 uint8_t *nalData = mBuffer->data() + pos.nalOffset;
1432 size_t newSize = mSampleDecryptor->processNal(nalData, pos.nalSize);
1433 // Note: the data can shrink due to unescaping
1434 memcpy(accessUnit->data() + dstOffset + 4,
1435 nalData,
1436 newSize);
1437 dstOffset += newSize + 4;
1438
1439 size_t thisShrunkBytes = pos.nalSize - newSize;
1440 //ALOGV("dequeueAccessUnitH264[%d]: nalType: %d -> %zu (%zu)",
1441 // nalType, (int)pos.nalSize, newSize, thisShrunkBytes);
1442
1443 shrunkBytes += thisShrunkBytes;
1444 }
1445 else {
1446 memcpy(accessUnit->data() + dstOffset + 4,
1447 mBuffer->data() + pos.nalOffset,
1448 pos.nalSize);
1449
1450 dstOffset += pos.nalSize + 4;
1451 //ALOGV("dequeueAccessUnitH264 [%d] %d @%d",
1452 // nalType, (int)pos.nalSize, (int)pos.nalOffset);
1453 }
1454 }
1455
1456 #if !LOG_NDEBUG
1457 ALOGV("accessUnit contains nal types %s", out.c_str());
1458 #endif
1459
1460 const NALPosition &pos = nals.itemAt(nals.size() - 1);
1461 size_t nextScan = pos.nalOffset + pos.nalSize;
1462
1463 memmove(mBuffer->data(),
1464 mBuffer->data() + nextScan,
1465 mBuffer->size() - nextScan);
1466
1467 mBuffer->setRange(0, mBuffer->size() - nextScan);
1468
1469 int64_t timeUs = fetchTimestamp(nextScan);
1470 if (timeUs < 0LL) {
1471 ALOGE("Negative timeUs");
1472 return NULL;
1473 }
1474
1475 accessUnit->meta()->setInt64("timeUs", timeUs);
1476 if (foundIDR) {
1477 accessUnit->meta()->setInt32("isSync", 1);
1478 }
1479
1480 if (mFormat == NULL) {
1481 mFormat = new MetaData;
1482 if (!MakeAVCCodecSpecificData(*mFormat,
1483 accessUnit->data(),
1484 accessUnit->size())) {
1485 mFormat.clear();
1486 }
1487 }
1488
1489 if (mSampleDecryptor != NULL && shrunkBytes > 0) {
1490 size_t adjustedSize = accessUnit->size() - shrunkBytes;
1491 ALOGV("dequeueAccessUnitH264[%d]: AU size adjusted %zu -> %zu",
1492 mAUIndex, accessUnit->size(), adjustedSize);
1493 accessUnit->setRange(0, adjustedSize);
1494 }
1495
1496 ALOGV("dequeueAccessUnitH264[%d]: AU %p(%zu) dstOffset:%zu, nals:%zu, totalSize:%zu ",
1497 mAUIndex, accessUnit->data(), accessUnit->size(),
1498 dstOffset, nals.size(), totalSize);
1499 mAUIndex++;
1500
1501 return accessUnit;
1502 }
1503
1504 NALPosition pos;
1505 pos.nalOffset = nalStart - mBuffer->data();
1506 pos.nalSize = nalSize;
1507
1508 nals.push(pos);
1509
1510 totalSize += nalSize;
1511 }
1512 if (err != (status_t)-EAGAIN) {
1513 ALOGE("Unexpeted err");
1514 return NULL;
1515 }
1516
1517 return NULL;
1518 }
1519
dequeueAccessUnitMPEGAudio()1520 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
1521 const uint8_t *data = mBuffer->data();
1522 size_t size = mBuffer->size();
1523
1524 if (size < 4) {
1525 return NULL;
1526 }
1527
1528 uint32_t header = U32_AT(data);
1529
1530 size_t frameSize;
1531 int samplingRate, numChannels, bitrate, numSamples;
1532 if (!GetMPEGAudioFrameSize(
1533 header, &frameSize, &samplingRate, &numChannels,
1534 &bitrate, &numSamples)) {
1535 ALOGE("Failed to get audio frame size");
1536 mBuffer->setRange(0, 0);
1537 return NULL;
1538 }
1539
1540 if (size < frameSize) {
1541 return NULL;
1542 }
1543
1544 unsigned layer = 4 - ((header >> 17) & 3);
1545
1546 sp<ABuffer> accessUnit = new ABuffer(frameSize);
1547 memcpy(accessUnit->data(), data, frameSize);
1548
1549 memmove(mBuffer->data(),
1550 mBuffer->data() + frameSize,
1551 mBuffer->size() - frameSize);
1552
1553 mBuffer->setRange(0, mBuffer->size() - frameSize);
1554
1555 int64_t timeUs = fetchTimestamp(frameSize);
1556 if (timeUs < 0LL) {
1557 ALOGE("Negative timeUs");
1558 return NULL;
1559 }
1560
1561 if (mFormat != NULL) {
1562 const char *mime;
1563 if (mFormat->findCString(kKeyMIMEType, &mime)) {
1564 if ((layer == 1) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I)) {
1565 ALOGE("Audio layer is not MPEG_LAYER_I");
1566 return NULL;
1567 } else if ((layer == 2) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II)) {
1568 ALOGE("Audio layer is not MPEG_LAYER_II");
1569 return NULL;
1570 } else if ((layer == 3) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG)) {
1571 ALOGE("Audio layer is not AUDIO_MPEG");
1572 return NULL;
1573 }
1574 }
1575 }
1576
1577 accessUnit->meta()->setInt64("timeUs", timeUs);
1578 accessUnit->meta()->setInt32("isSync", 1);
1579
1580 if (mFormat == NULL) {
1581 mFormat = new MetaData;
1582
1583 switch (layer) {
1584 case 1:
1585 mFormat->setCString(
1586 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
1587 break;
1588 case 2:
1589 mFormat->setCString(
1590 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
1591 break;
1592 case 3:
1593 mFormat->setCString(
1594 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
1595 break;
1596 default:
1597 return NULL;
1598 }
1599
1600 mFormat->setInt32(kKeySampleRate, samplingRate);
1601 mFormat->setInt32(kKeyChannelCount, numChannels);
1602 }
1603
1604 return accessUnit;
1605 }
1606
EncodeSize14(uint8_t ** _ptr,size_t size)1607 static void EncodeSize14(uint8_t **_ptr, size_t size) {
1608 if (size > 0x3fff) {
1609 ALOGE("Wrong size");
1610 return;
1611 }
1612
1613 uint8_t *ptr = *_ptr;
1614
1615 *ptr++ = 0x80 | (size >> 7);
1616 *ptr++ = size & 0x7f;
1617
1618 *_ptr = ptr;
1619 }
1620
MakeMPEGVideoESDS(const sp<ABuffer> & csd)1621 static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
1622 sp<ABuffer> esds = new ABuffer(csd->size() + 25);
1623
1624 uint8_t *ptr = esds->data();
1625 *ptr++ = 0x03;
1626 EncodeSize14(&ptr, 22 + csd->size());
1627
1628 *ptr++ = 0x00; // ES_ID
1629 *ptr++ = 0x00;
1630
1631 *ptr++ = 0x00; // streamDependenceFlag, URL_Flag, OCRstreamFlag
1632
1633 *ptr++ = 0x04;
1634 EncodeSize14(&ptr, 16 + csd->size());
1635
1636 *ptr++ = 0x40; // Audio ISO/IEC 14496-3
1637
1638 for (size_t i = 0; i < 12; ++i) {
1639 *ptr++ = 0x00;
1640 }
1641
1642 *ptr++ = 0x05;
1643 EncodeSize14(&ptr, csd->size());
1644
1645 memcpy(ptr, csd->data(), csd->size());
1646
1647 return esds;
1648 }
1649
dequeueAccessUnitMPEGVideo()1650 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
1651 const uint8_t *data = mBuffer->data();
1652 size_t size = mBuffer->size();
1653
1654 Vector<size_t> userDataPositions;
1655
1656 bool sawPictureStart = false;
1657 int pprevStartCode = -1;
1658 int prevStartCode = -1;
1659 int currentStartCode = -1;
1660 bool gopFound = false;
1661 bool isClosedGop = false;
1662 bool brokenLink = false;
1663
1664 size_t offset = 0;
1665 while (offset + 3 < size) {
1666 if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1667 ++offset;
1668 continue;
1669 }
1670
1671 pprevStartCode = prevStartCode;
1672 prevStartCode = currentStartCode;
1673 currentStartCode = data[offset + 3];
1674
1675 if (currentStartCode == 0xb3 && mFormat == NULL) {
1676 memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1677 size -= offset;
1678 (void)fetchTimestamp(offset);
1679 offset = 0;
1680 mBuffer->setRange(0, size);
1681 }
1682
1683 if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1684 || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1685 // seqHeader without/with extension
1686
1687 if (mFormat == NULL) {
1688 if (size < 7u) {
1689 ALOGE("Size too small");
1690 return NULL;
1691 }
1692
1693 unsigned width =
1694 (data[4] << 4) | data[5] >> 4;
1695
1696 unsigned height =
1697 ((data[5] & 0x0f) << 8) | data[6];
1698
1699 mFormat = new MetaData;
1700 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1701 mFormat->setInt32(kKeyWidth, width);
1702 mFormat->setInt32(kKeyHeight, height);
1703
1704 ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1705
1706 sp<ABuffer> csd = new ABuffer(offset);
1707 memcpy(csd->data(), data, offset);
1708
1709 memmove(mBuffer->data(),
1710 mBuffer->data() + offset,
1711 mBuffer->size() - offset);
1712
1713 mBuffer->setRange(0, mBuffer->size() - offset);
1714 size -= offset;
1715 (void)fetchTimestamp(offset);
1716 offset = 0;
1717
1718 // hexdump(csd->data(), csd->size());
1719
1720 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1721 mFormat->setData(
1722 kKeyESDS, kTypeESDS, esds->data(), esds->size());
1723
1724 return NULL;
1725 }
1726 }
1727
1728 if (mFormat != NULL && currentStartCode == 0xb8) {
1729 // GOP layer
1730 if (offset + 7 >= size) {
1731 ALOGE("Size too small");
1732 return NULL;
1733 }
1734 gopFound = true;
1735 isClosedGop = (data[offset + 7] & 0x40) != 0;
1736 brokenLink = (data[offset + 7] & 0x20) != 0;
1737 }
1738
1739 if (mFormat != NULL && currentStartCode == 0xb2) {
1740 userDataPositions.add(offset);
1741 }
1742
1743 if (mFormat != NULL && currentStartCode == 0x00) {
1744 // Picture start
1745
1746 if (!sawPictureStart) {
1747 sawPictureStart = true;
1748 } else {
1749 sp<ABuffer> accessUnit = new ABuffer(offset);
1750 memcpy(accessUnit->data(), data, offset);
1751
1752 memmove(mBuffer->data(),
1753 mBuffer->data() + offset,
1754 mBuffer->size() - offset);
1755
1756 mBuffer->setRange(0, mBuffer->size() - offset);
1757
1758 int64_t timeUs = fetchTimestamp(offset);
1759 if (timeUs < 0LL) {
1760 ALOGE("Negative timeUs");
1761 return NULL;
1762 }
1763
1764 offset = 0;
1765
1766 accessUnit->meta()->setInt64("timeUs", timeUs);
1767 if (gopFound && (!brokenLink || isClosedGop)) {
1768 accessUnit->meta()->setInt32("isSync", 1);
1769 }
1770
1771 ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1772 timeUs);
1773
1774 // hexdump(accessUnit->data(), accessUnit->size());
1775
1776 if (userDataPositions.size() > 0) {
1777 sp<ABuffer> mpegUserData =
1778 new ABuffer(userDataPositions.size() * sizeof(size_t));
1779 if (mpegUserData != NULL && mpegUserData->data() != NULL) {
1780 for (size_t i = 0; i < userDataPositions.size(); ++i) {
1781 memcpy(
1782 mpegUserData->data() + i * sizeof(size_t),
1783 &userDataPositions[i], sizeof(size_t));
1784 }
1785 accessUnit->meta()->setBuffer("mpeg-user-data", mpegUserData);
1786 }
1787 }
1788
1789 return accessUnit;
1790 }
1791 }
1792
1793 ++offset;
1794 }
1795
1796 return NULL;
1797 }
1798
getNextChunkSize(const uint8_t * data,size_t size)1799 static ssize_t getNextChunkSize(
1800 const uint8_t *data, size_t size) {
1801 static const char kStartCode[] = "\x00\x00\x01";
1802
1803 // per ISO/IEC 14496-2 6.2.1, a chunk has a 3-byte prefix + 1-byte start code
1804 // we need at least <prefix><start><next prefix> to successfully scan
1805 if (size < 3 + 1 + 3) {
1806 return -EAGAIN;
1807 }
1808
1809 if (memcmp(kStartCode, data, 3)) {
1810 return -EAGAIN;
1811 }
1812
1813 size_t offset = 4;
1814 while (offset + 2 < size) {
1815 if (!memcmp(&data[offset], kStartCode, 3)) {
1816 return offset;
1817 }
1818
1819 ++offset;
1820 }
1821
1822 return -EAGAIN;
1823 }
1824
dequeueAccessUnitMPEG4Video()1825 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1826 uint8_t *data = mBuffer->data();
1827 size_t size = mBuffer->size();
1828
1829 enum {
1830 SKIP_TO_VISUAL_OBJECT_SEQ_START,
1831 EXPECT_VISUAL_OBJECT_START,
1832 EXPECT_VO_START,
1833 EXPECT_VOL_START,
1834 WAIT_FOR_VOP_START,
1835 SKIP_TO_VOP_START,
1836
1837 } state;
1838
1839 if (mFormat == NULL) {
1840 state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1841 } else {
1842 state = SKIP_TO_VOP_START;
1843 }
1844
1845 int32_t width = -1, height = -1;
1846
1847 size_t offset = 0;
1848 ssize_t chunkSize;
1849 while ((chunkSize = getNextChunkSize(
1850 &data[offset], size - offset)) > 0) {
1851 bool discard = false;
1852
1853 unsigned chunkType = data[offset + 3];
1854
1855 switch (state) {
1856 case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1857 {
1858 if (chunkType == 0xb0) {
1859 // Discard anything before this marker.
1860
1861 state = EXPECT_VISUAL_OBJECT_START;
1862 } else {
1863 discard = true;
1864 offset += chunkSize;
1865 ALOGW("b/74114680, advance to next chunk");
1866 android_errorWriteLog(0x534e4554, "74114680");
1867 }
1868 break;
1869 }
1870
1871 case EXPECT_VISUAL_OBJECT_START:
1872 {
1873 if (chunkType != 0xb5) {
1874 ALOGE("Unexpected chunkType");
1875 return NULL;
1876 }
1877 state = EXPECT_VO_START;
1878 break;
1879 }
1880
1881 case EXPECT_VO_START:
1882 {
1883 if (chunkType > 0x1f) {
1884 ALOGE("Unexpected chunkType");
1885 return NULL;
1886 }
1887 state = EXPECT_VOL_START;
1888 break;
1889 }
1890
1891 case EXPECT_VOL_START:
1892 {
1893 if ((chunkType & 0xf0) != 0x20) {
1894 ALOGE("Wrong chunkType");
1895 return NULL;
1896 }
1897
1898 if (!ExtractDimensionsFromVOLHeader(
1899 &data[offset], chunkSize,
1900 &width, &height)) {
1901 ALOGE("Failed to get dimension");
1902 return NULL;
1903 }
1904
1905 state = WAIT_FOR_VOP_START;
1906 break;
1907 }
1908
1909 case WAIT_FOR_VOP_START:
1910 {
1911 if (chunkType == 0xb3 || chunkType == 0xb6) {
1912 // group of VOP or VOP start.
1913
1914 mFormat = new MetaData;
1915 mFormat->setCString(
1916 kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1917
1918 mFormat->setInt32(kKeyWidth, width);
1919 mFormat->setInt32(kKeyHeight, height);
1920
1921 ALOGI("found MPEG4 video codec config (%d x %d)",
1922 width, height);
1923
1924 sp<ABuffer> csd = new ABuffer(offset);
1925 memcpy(csd->data(), data, offset);
1926
1927 // hexdump(csd->data(), csd->size());
1928
1929 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1930 mFormat->setData(
1931 kKeyESDS, kTypeESDS,
1932 esds->data(), esds->size());
1933
1934 discard = true;
1935 state = SKIP_TO_VOP_START;
1936 }
1937
1938 break;
1939 }
1940
1941 case SKIP_TO_VOP_START:
1942 {
1943 if (chunkType == 0xb6) {
1944 int vopCodingType = (data[offset + 4] & 0xc0) >> 6;
1945
1946 offset += chunkSize;
1947
1948 sp<ABuffer> accessUnit = new ABuffer(offset);
1949 memcpy(accessUnit->data(), data, offset);
1950
1951 memmove(data, &data[offset], size - offset);
1952 size -= offset;
1953 mBuffer->setRange(0, size);
1954
1955 int64_t timeUs = fetchTimestamp(offset);
1956 if (timeUs < 0LL) {
1957 ALOGE("Negative timeus");
1958 return NULL;
1959 }
1960
1961 offset = 0;
1962
1963 accessUnit->meta()->setInt64("timeUs", timeUs);
1964 if (vopCodingType == 0) { // intra-coded VOP
1965 accessUnit->meta()->setInt32("isSync", 1);
1966 }
1967
1968 ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1969 timeUs);
1970
1971 // hexdump(accessUnit->data(), accessUnit->size());
1972
1973 return accessUnit;
1974 } else if (chunkType != 0xb3) {
1975 offset += chunkSize;
1976 discard = true;
1977 }
1978
1979 break;
1980 }
1981
1982 default:
1983 ALOGE("Unknown state: %d", state);
1984 return NULL;
1985 }
1986
1987 if (discard) {
1988 (void)fetchTimestamp(offset);
1989 memmove(data, &data[offset], size - offset);
1990 size -= offset;
1991 offset = 0;
1992 mBuffer->setRange(0, size);
1993 } else {
1994 offset += chunkSize;
1995 }
1996 }
1997
1998 return NULL;
1999 }
2000
signalEOS()2001 void ElementaryStreamQueue::signalEOS() {
2002 if (!mEOSReached) {
2003 if (mMode == MPEG_VIDEO) {
2004 const char *theEnd = "\x00\x00\x01\x00";
2005 appendData(theEnd, 4, 0);
2006 }
2007 mEOSReached = true;
2008 } else {
2009 ALOGW("EOS already signaled");
2010 }
2011 }
2012
dequeueAccessUnitMetadata()2013 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMetadata() {
2014 size_t size = mBuffer->size();
2015 if (!size) {
2016 return NULL;
2017 }
2018
2019 sp<ABuffer> accessUnit = new ABuffer(size);
2020 int64_t timeUs = fetchTimestamp(size);
2021 accessUnit->meta()->setInt64("timeUs", timeUs);
2022
2023 memcpy(accessUnit->data(), mBuffer->data(), size);
2024 mBuffer->setRange(0, 0);
2025
2026 if (mFormat == NULL) {
2027 mFormat = new MetaData;
2028 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_DATA_TIMED_ID3);
2029 }
2030
2031 return accessUnit;
2032 }
2033
signalNewSampleAesKey(const sp<AMessage> & keyItem)2034 void ElementaryStreamQueue::signalNewSampleAesKey(const sp<AMessage> &keyItem) {
2035 if (mSampleDecryptor == NULL) {
2036 ALOGE("signalNewSampleAesKey: Stream %x is not encrypted; keyItem: %p",
2037 mMode, keyItem.get());
2038 return;
2039 }
2040
2041 mSampleDecryptor->signalNewSampleAesKey(keyItem);
2042 }
2043
2044
2045 } // namespace android
2046