1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include <log/log.h>
28 #include <utils/Log.h>
29
30 #include "AC4Parser.h"
31 #include "MPEG4Extractor.h"
32 #include "SampleTable.h"
33 #include "ItemTable.h"
34
35 #include <ESDS.h>
36 #include <ID3.h>
37 #include <media/DataSourceBase.h>
38 #include <media/ExtractorUtils.h>
39 #include <media/stagefright/foundation/ABitReader.h>
40 #include <media/stagefright/foundation/ABuffer.h>
41 #include <media/stagefright/foundation/ADebug.h>
42 #include <media/stagefright/foundation/AMessage.h>
43 #include <media/stagefright/foundation/AudioPresentationInfo.h>
44 #include <media/stagefright/foundation/AUtils.h>
45 #include <media/stagefright/foundation/ByteUtils.h>
46 #include <media/stagefright/foundation/ColorUtils.h>
47 #include <media/stagefright/foundation/avc_utils.h>
48 #include <media/stagefright/foundation/hexdump.h>
49 #include <media/stagefright/foundation/OpusHeader.h>
50 #include <media/stagefright/MediaBufferGroup.h>
51 #include <media/stagefright/MediaDefs.h>
52 #include <media/stagefright/MetaDataBase.h>
53 #include <utils/String8.h>
54
55 #include <byteswap.h>
56
57 #ifndef UINT32_MAX
58 #define UINT32_MAX (4294967295U)
59 #endif
60
61 #define ALAC_SPECIFIC_INFO_SIZE (36)
62
63 namespace android {
64
65 enum {
66 // max track header chunk to return
67 kMaxTrackHeaderSize = 32,
68
69 // maximum size of an atom. Some atoms can be bigger according to the spec,
70 // but we only allow up to this size.
71 kMaxAtomSize = 64 * 1024 * 1024,
72 };
73
74 class MPEG4Source : public MediaTrackHelper {
75 static const size_t kMaxPcmFrameSize = 8192;
76 public:
77 // Caller retains ownership of both "dataSource" and "sampleTable".
78 MPEG4Source(AMediaFormat *format,
79 DataSourceHelper *dataSource,
80 int32_t timeScale,
81 const sp<SampleTable> &sampleTable,
82 Vector<SidxEntry> &sidx,
83 const Trex *trex,
84 off64_t firstMoofOffset,
85 const sp<ItemTable> &itemTable,
86 uint64_t elstShiftStartTicks);
87 virtual status_t init();
88
89 virtual media_status_t start();
90 virtual media_status_t stop();
91
92 virtual media_status_t getFormat(AMediaFormat *);
93
94 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()95 bool supportsNonBlockingRead() override { return true; }
96 virtual media_status_t fragmentedRead(
97 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
98
99 virtual ~MPEG4Source();
100
101 private:
102 Mutex mLock;
103
104 AMediaFormat *mFormat;
105 DataSourceHelper *mDataSource;
106 int32_t mTimescale;
107 sp<SampleTable> mSampleTable;
108 uint32_t mCurrentSampleIndex;
109 uint32_t mCurrentFragmentIndex;
110 Vector<SidxEntry> &mSegments;
111 const Trex *mTrex;
112 off64_t mFirstMoofOffset;
113 off64_t mCurrentMoofOffset;
114 off64_t mNextMoofOffset;
115 uint32_t mCurrentTime; // in media timescale ticks
116 int32_t mLastParsedTrackId;
117 int32_t mTrackId;
118
119 int32_t mCryptoMode; // passed in from extractor
120 int32_t mDefaultIVSize; // passed in from extractor
121 uint8_t mCryptoKey[16]; // passed in from extractor
122 int32_t mDefaultEncryptedByteBlock;
123 int32_t mDefaultSkipByteBlock;
124 uint32_t mCurrentAuxInfoType;
125 uint32_t mCurrentAuxInfoTypeParameter;
126 int32_t mCurrentDefaultSampleInfoSize;
127 uint32_t mCurrentSampleInfoCount;
128 uint32_t mCurrentSampleInfoAllocSize;
129 uint8_t* mCurrentSampleInfoSizes;
130 uint32_t mCurrentSampleInfoOffsetCount;
131 uint32_t mCurrentSampleInfoOffsetsAllocSize;
132 uint64_t* mCurrentSampleInfoOffsets;
133
134 bool mIsAVC;
135 bool mIsHEVC;
136 bool mIsAC4;
137 bool mIsPcm;
138 size_t mNALLengthSize;
139
140 bool mStarted;
141
142 MediaBufferHelper *mBuffer;
143
144 uint8_t *mSrcBuffer;
145
146 bool mIsHeif;
147 bool mIsAudio;
148 sp<ItemTable> mItemTable;
149
150 // Start offset from composition time to presentation time.
151 // Support shift only for video tracks through mElstShiftStartTicks for now.
152 uint64_t mElstShiftStartTicks;
153
154 size_t parseNALSize(const uint8_t *data) const;
155 status_t parseChunk(off64_t *offset);
156 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
157 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
158 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
159 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
160 status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
161 status_t parseSampleEncryption(off64_t offset);
162 // returns -1 for invalid layer ID
163 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
164
165 struct TrackFragmentHeaderInfo {
166 enum Flags {
167 kBaseDataOffsetPresent = 0x01,
168 kSampleDescriptionIndexPresent = 0x02,
169 kDefaultSampleDurationPresent = 0x08,
170 kDefaultSampleSizePresent = 0x10,
171 kDefaultSampleFlagsPresent = 0x20,
172 kDurationIsEmpty = 0x10000,
173 };
174
175 uint32_t mTrackID;
176 uint32_t mFlags;
177 uint64_t mBaseDataOffset;
178 uint32_t mSampleDescriptionIndex;
179 uint32_t mDefaultSampleDuration;
180 uint32_t mDefaultSampleSize;
181 uint32_t mDefaultSampleFlags;
182
183 uint64_t mDataOffset;
184 };
185 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
186
187 struct Sample {
188 off64_t offset;
189 size_t size;
190 uint32_t duration;
191 int32_t compositionOffset;
192 uint8_t iv[16];
193 Vector<size_t> clearsizes;
194 Vector<size_t> encryptedsizes;
195 };
196 Vector<Sample> mCurrentSamples;
197
198 MPEG4Source(const MPEG4Source &);
199 MPEG4Source &operator=(const MPEG4Source &);
200 };
201
202 // This custom data source wraps an existing one and satisfies requests
203 // falling entirely within a cached range from the cache while forwarding
204 // all remaining requests to the wrapped datasource.
205 // This is used to cache the full sampletable metadata for a single track,
206 // possibly wrapping multiple times to cover all tracks, i.e.
207 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
208
209 class CachedRangedDataSource : public DataSourceHelper {
210 public:
211 explicit CachedRangedDataSource(DataSourceHelper *source);
212 virtual ~CachedRangedDataSource();
213
214 ssize_t readAt(off64_t offset, void *data, size_t size) override;
215 status_t getSize(off64_t *size) override;
216 uint32_t flags() override;
217
218 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
219
220
221 private:
222 Mutex mLock;
223
224 DataSourceHelper *mSource;
225 bool mOwnsDataSource;
226 off64_t mCachedOffset;
227 size_t mCachedSize;
228 uint8_t *mCache;
229
230 void clearCache();
231
232 CachedRangedDataSource(const CachedRangedDataSource &);
233 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
234 };
235
CachedRangedDataSource(DataSourceHelper * source)236 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
237 : DataSourceHelper(source),
238 mSource(source),
239 mOwnsDataSource(false),
240 mCachedOffset(0),
241 mCachedSize(0),
242 mCache(NULL) {
243 }
244
~CachedRangedDataSource()245 CachedRangedDataSource::~CachedRangedDataSource() {
246 clearCache();
247 if (mOwnsDataSource) {
248 delete mSource;
249 }
250 }
251
clearCache()252 void CachedRangedDataSource::clearCache() {
253 if (mCache) {
254 free(mCache);
255 mCache = NULL;
256 }
257
258 mCachedOffset = 0;
259 mCachedSize = 0;
260 }
261
readAt(off64_t offset,void * data,size_t size)262 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
263 Mutex::Autolock autoLock(mLock);
264
265 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
266 memcpy(data, &mCache[offset - mCachedOffset], size);
267 return size;
268 }
269
270 return mSource->readAt(offset, data, size);
271 }
272
getSize(off64_t * size)273 status_t CachedRangedDataSource::getSize(off64_t *size) {
274 return mSource->getSize(size);
275 }
276
flags()277 uint32_t CachedRangedDataSource::flags() {
278 return mSource->flags();
279 }
280
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)281 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
282 size_t size,
283 bool assumeSourceOwnershipOnSuccess) {
284 Mutex::Autolock autoLock(mLock);
285
286 clearCache();
287
288 mCache = (uint8_t *)malloc(size);
289
290 if (mCache == NULL) {
291 return -ENOMEM;
292 }
293
294 mCachedOffset = offset;
295 mCachedSize = size;
296
297 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
298
299 if (err < (ssize_t)size) {
300 clearCache();
301
302 return ERROR_IO;
303 }
304 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
305 return OK;
306 }
307
308 ////////////////////////////////////////////////////////////////////////////////
309
310 static const bool kUseHexDump = false;
311
FourCC2MIME(uint32_t fourcc)312 static const char *FourCC2MIME(uint32_t fourcc) {
313 switch (fourcc) {
314 case FOURCC("mp4a"):
315 return MEDIA_MIMETYPE_AUDIO_AAC;
316
317 case FOURCC("samr"):
318 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
319
320 case FOURCC("sawb"):
321 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
322
323 case FOURCC("ec-3"):
324 return MEDIA_MIMETYPE_AUDIO_EAC3;
325
326 case FOURCC("mp4v"):
327 return MEDIA_MIMETYPE_VIDEO_MPEG4;
328
329 case FOURCC("s263"):
330 case FOURCC("h263"):
331 case FOURCC("H263"):
332 return MEDIA_MIMETYPE_VIDEO_H263;
333
334 case FOURCC("avc1"):
335 return MEDIA_MIMETYPE_VIDEO_AVC;
336
337 case FOURCC("hvc1"):
338 case FOURCC("hev1"):
339 return MEDIA_MIMETYPE_VIDEO_HEVC;
340 case FOURCC("ac-4"):
341 return MEDIA_MIMETYPE_AUDIO_AC4;
342 case FOURCC("Opus"):
343 return MEDIA_MIMETYPE_AUDIO_OPUS;
344
345 case FOURCC("twos"):
346 case FOURCC("sowt"):
347 return MEDIA_MIMETYPE_AUDIO_RAW;
348 case FOURCC("alac"):
349 return MEDIA_MIMETYPE_AUDIO_ALAC;
350 case FOURCC("fLaC"):
351 return MEDIA_MIMETYPE_AUDIO_FLAC;
352 case FOURCC("av01"):
353 return MEDIA_MIMETYPE_VIDEO_AV1;
354 case FOURCC("vp09"):
355 return MEDIA_MIMETYPE_VIDEO_VP9;
356 case FOURCC(".mp3"):
357 case 0x6D730055: // "ms U" mp3 audio
358 return MEDIA_MIMETYPE_AUDIO_MPEG;
359 default:
360 ALOGW("Unknown fourcc: %c%c%c%c",
361 (fourcc >> 24) & 0xff,
362 (fourcc >> 16) & 0xff,
363 (fourcc >> 8) & 0xff,
364 fourcc & 0xff
365 );
366 return "application/octet-stream";
367 }
368 }
369
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)370 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
371 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
372 // AMR NB audio is always mono, 8kHz
373 *channels = 1;
374 *rate = 8000;
375 return true;
376 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
377 // AMR WB audio is always mono, 16kHz
378 *channels = 1;
379 *rate = 16000;
380 return true;
381 }
382 return false;
383 }
384
MPEG4Extractor(DataSourceHelper * source,const char * mime)385 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
386 : mMoofOffset(0),
387 mMoofFound(false),
388 mMdatFound(false),
389 mDataSource(source),
390 mInitCheck(NO_INIT),
391 mHeaderTimescale(0),
392 mIsQT(false),
393 mIsHeif(false),
394 mHasMoovBox(false),
395 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
396 mFirstTrack(NULL),
397 mLastTrack(NULL) {
398 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
399 mFileMetaData = AMediaFormat_new();
400 }
401
~MPEG4Extractor()402 MPEG4Extractor::~MPEG4Extractor() {
403 Track *track = mFirstTrack;
404 while (track) {
405 Track *next = track->next;
406
407 delete track;
408 track = next;
409 }
410 mFirstTrack = mLastTrack = NULL;
411
412 for (size_t i = 0; i < mPssh.size(); i++) {
413 delete [] mPssh[i].data;
414 }
415 mPssh.clear();
416
417 delete mDataSource;
418 AMediaFormat_delete(mFileMetaData);
419 }
420
flags() const421 uint32_t MPEG4Extractor::flags() const {
422 return CAN_PAUSE |
423 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
424 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
425 }
426
getMetaData(AMediaFormat * meta)427 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
428 status_t err;
429 if ((err = readMetaData()) != OK) {
430 return AMEDIA_ERROR_UNKNOWN;
431 }
432 AMediaFormat_copy(meta, mFileMetaData);
433 return AMEDIA_OK;
434 }
435
countTracks()436 size_t MPEG4Extractor::countTracks() {
437 status_t err;
438 if ((err = readMetaData()) != OK) {
439 ALOGV("MPEG4Extractor::countTracks: no tracks");
440 return 0;
441 }
442
443 size_t n = 0;
444 Track *track = mFirstTrack;
445 while (track) {
446 ++n;
447 track = track->next;
448 }
449
450 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
451 return n;
452 }
453
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)454 media_status_t MPEG4Extractor::getTrackMetaData(
455 AMediaFormat *meta,
456 size_t index, uint32_t flags) {
457 status_t err;
458 if ((err = readMetaData()) != OK) {
459 return AMEDIA_ERROR_UNKNOWN;
460 }
461
462 Track *track = mFirstTrack;
463 while (index > 0) {
464 if (track == NULL) {
465 return AMEDIA_ERROR_UNKNOWN;
466 }
467
468 track = track->next;
469 --index;
470 }
471
472 if (track == NULL) {
473 return AMEDIA_ERROR_UNKNOWN;
474 }
475
476 [=] {
477 int64_t duration;
478 int32_t samplerate;
479 // Only for audio track.
480 if (track->has_elst && mHeaderTimescale != 0 &&
481 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
482 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
483
484 // Elst has to be processed only the first time this function is called.
485 track->has_elst = false;
486
487 if (track->elst_segment_duration > INT64_MAX) {
488 return;
489 }
490 int64_t segment_duration = track->elst_segment_duration;
491 int64_t media_time = track->elst_media_time;
492 int64_t halfscale = track->timescale / 2;
493
494 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
495 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
496 segment_duration, media_time,
497 halfscale, mHeaderTimescale, track->timescale);
498
499 if ((uint32_t)samplerate != track->timescale){
500 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
501 samplerate);
502 }
503 // Both delay and paddingsamples have to be set inorder for either to be
504 // effective in the lower layers.
505 int64_t delay = 0;
506 if (media_time > 0) { // Gapless playback
507 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
508 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
509 __builtin_add_overflow(delay, halfscale, &delay) ||
510 (delay /= track->timescale, false) ||
511 delay > INT32_MAX ||
512 delay < INT32_MIN) {
513 ALOGW("ignoring edit list with bogus values");
514 return;
515 }
516 }
517 ALOGV("delay = %" PRId64, delay);
518 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
519
520 int64_t paddingsamples = 0;
521 if (segment_duration > 0) {
522 int64_t scaled_duration;
523 // scaled_duration = duration * mHeaderTimescale;
524 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
525 return;
526 }
527 ALOGV("scaled_duration = %" PRId64, scaled_duration);
528
529 int64_t segment_end;
530 int64_t padding;
531 int64_t segment_duration_e6;
532 int64_t media_time_scaled_e6;
533 int64_t media_time_scaled;
534 // padding = scaled_duration - ((segment_duration * 1000000) +
535 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
536 // segment_duration is based on timescale in movie header box(mdhd)
537 // media_time is based on timescale track header/media timescale
538 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
539 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
540 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
541 return;
542 }
543 media_time_scaled_e6 /= track->timescale;
544 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
545 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
546 return;
547 }
548 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
549 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
550 // might be slightly shorter than the segment duration, which would make the
551 // padding negative. Clamp to zero.
552 if (padding > 0) {
553 int64_t halfscale_mht = mHeaderTimescale / 2;
554 int64_t halfscale_e6;
555 int64_t timescale_e6;
556 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
557 // / (mHeaderTimescale * 1000000);
558 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
559 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
560 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
561 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
562 (paddingsamples /= timescale_e6, false) ||
563 paddingsamples > INT32_MAX) {
564 return;
565 }
566 }
567 }
568 ALOGV("paddingsamples = %" PRId64, paddingsamples);
569 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
570 }
571 }();
572
573 if ((flags & kIncludeExtensiveMetaData)
574 && !track->includes_expensive_metadata) {
575 track->includes_expensive_metadata = true;
576
577 const char *mime;
578 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
579 if (!strncasecmp("video/", mime, 6)) {
580 // MPEG2 tracks do not provide CSD, so read the stream header
581 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
582 off64_t offset;
583 size_t size;
584 if (track->sampleTable->getMetaDataForSample(
585 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
586 if (size > kMaxTrackHeaderSize) {
587 size = kMaxTrackHeaderSize;
588 }
589 uint8_t header[kMaxTrackHeaderSize];
590 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
591 AMediaFormat_setBuffer(track->meta,
592 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
593 }
594 }
595 }
596
597 if (mMoofOffset > 0) {
598 int64_t duration;
599 if (AMediaFormat_getInt64(track->meta,
600 AMEDIAFORMAT_KEY_DURATION, &duration)) {
601 // nothing fancy, just pick a frame near 1/4th of the duration
602 AMediaFormat_setInt64(track->meta,
603 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
604 }
605 } else {
606 uint32_t sampleIndex;
607 uint64_t sampleTime;
608 if (track->timescale != 0 &&
609 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
610 && track->sampleTable->getMetaDataForSample(
611 sampleIndex, NULL /* offset */, NULL /* size */,
612 &sampleTime) == OK) {
613 AMediaFormat_setInt64(track->meta,
614 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
615 ((int64_t)sampleTime * 1000000) / track->timescale);
616 }
617 }
618 }
619 }
620
621 AMediaFormat_copy(meta, track->meta);
622 return AMEDIA_OK;
623 }
624
readMetaData()625 status_t MPEG4Extractor::readMetaData() {
626 if (mInitCheck != NO_INIT) {
627 return mInitCheck;
628 }
629
630 off64_t offset = 0;
631 status_t err;
632 bool sawMoovOrSidx = false;
633
634 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
635 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
636 (mItemTable != NULL) && mItemTable->isValid()))) {
637 off64_t orig_offset = offset;
638 err = parseChunk(&offset, 0);
639
640 if (err != OK && err != UNKNOWN_ERROR) {
641 break;
642 } else if (offset <= orig_offset) {
643 // only continue parsing if the offset was advanced,
644 // otherwise we might end up in an infinite loop
645 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
646 err = ERROR_MALFORMED;
647 break;
648 } else if (err == UNKNOWN_ERROR) {
649 sawMoovOrSidx = true;
650 }
651 }
652
653 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
654 off64_t exifOffset;
655 size_t exifSize;
656 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
657 AMediaFormat_setInt64(mFileMetaData,
658 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
659 AMediaFormat_setInt64(mFileMetaData,
660 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
661 }
662 for (uint32_t imageIndex = 0;
663 imageIndex < mItemTable->countImages(); imageIndex++) {
664 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
665 if (meta == NULL) {
666 ALOGE("heif image %u has no meta!", imageIndex);
667 continue;
668 }
669 // Some heif files advertise image sequence brands (eg. 'hevc') in
670 // ftyp box, but don't have any valid tracks in them. Instead of
671 // reporting the entire file as malformed, we override the error
672 // to allow still images to be extracted.
673 if (err != OK) {
674 ALOGW("Extracting still images only");
675 err = OK;
676 }
677 mInitCheck = OK;
678
679 ALOGV("adding HEIF image track %u", imageIndex);
680 Track *track = new Track;
681 if (mLastTrack != NULL) {
682 mLastTrack->next = track;
683 } else {
684 mFirstTrack = track;
685 }
686 mLastTrack = track;
687
688 track->meta = meta;
689 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
690 track->timescale = 1000000;
691 }
692 }
693
694 if (mInitCheck == OK) {
695 if (findTrackByMimePrefix("video/") != NULL) {
696 AMediaFormat_setString(mFileMetaData,
697 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
698 } else if (findTrackByMimePrefix("audio/") != NULL) {
699 AMediaFormat_setString(mFileMetaData,
700 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
701 } else if (findTrackByMimePrefix(
702 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
703 AMediaFormat_setString(mFileMetaData,
704 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
705 } else {
706 AMediaFormat_setString(mFileMetaData,
707 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
708 }
709 } else {
710 mInitCheck = err;
711 }
712
713 CHECK_NE(err, (status_t)NO_INIT);
714
715 // copy pssh data into file metadata
716 uint64_t psshsize = 0;
717 for (size_t i = 0; i < mPssh.size(); i++) {
718 psshsize += 20 + mPssh[i].datalen;
719 }
720 if (psshsize > 0 && psshsize <= UINT32_MAX) {
721 char *buf = (char*)malloc(psshsize);
722 if (!buf) {
723 ALOGE("b/28471206");
724 return NO_MEMORY;
725 }
726 char *ptr = buf;
727 for (size_t i = 0; i < mPssh.size(); i++) {
728 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
729 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
730 ptr += (20 + mPssh[i].datalen);
731 }
732 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
733 free(buf);
734 }
735
736 return mInitCheck;
737 }
738
739 struct PathAdder {
PathAdderandroid::PathAdder740 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
741 : mPath(path) {
742 mPath->push(chunkType);
743 }
744
~PathAdderandroid::PathAdder745 ~PathAdder() {
746 mPath->pop();
747 }
748
749 private:
750 Vector<uint32_t> *mPath;
751
752 PathAdder(const PathAdder &);
753 PathAdder &operator=(const PathAdder &);
754 };
755
underMetaDataPath(const Vector<uint32_t> & path)756 static bool underMetaDataPath(const Vector<uint32_t> &path) {
757 return path.size() >= 5
758 && path[0] == FOURCC("moov")
759 && path[1] == FOURCC("udta")
760 && path[2] == FOURCC("meta")
761 && path[3] == FOURCC("ilst");
762 }
763
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)764 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
765 return path.size() >= 2
766 && path[0] == FOURCC("moov")
767 && path[1] == FOURCC("meta")
768 && (depth == 2
769 || (depth == 3
770 && (path[2] == FOURCC("hdlr")
771 || path[2] == FOURCC("ilst")
772 || path[2] == FOURCC("keys"))));
773 }
774
775 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)776 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
777 // delta between mpeg4 time and unix epoch time
778 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
779 if (time_1904 < INT64_MIN + delta) {
780 return false;
781 }
782 time_t time_1970 = time_1904 - delta;
783
784 char tmp[32];
785 struct tm* tm = gmtime(&time_1970);
786 if (tm != NULL &&
787 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
788 s->setTo(tmp);
789 return true;
790 }
791 return false;
792 }
793
parseChunk(off64_t * offset,int depth)794 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
795 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
796
797 if (*offset < 0) {
798 ALOGE("b/23540914");
799 return ERROR_MALFORMED;
800 }
801 if (depth > 100) {
802 ALOGE("b/27456299");
803 return ERROR_MALFORMED;
804 }
805 uint32_t hdr[2];
806 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
807 return ERROR_IO;
808 }
809 uint64_t chunk_size = ntohl(hdr[0]);
810 int32_t chunk_type = ntohl(hdr[1]);
811 off64_t data_offset = *offset + 8;
812
813 if (chunk_size == 1) {
814 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
815 return ERROR_IO;
816 }
817 chunk_size = ntoh64(chunk_size);
818 data_offset += 8;
819
820 if (chunk_size < 16) {
821 // The smallest valid chunk is 16 bytes long in this case.
822 return ERROR_MALFORMED;
823 }
824 } else if (chunk_size == 0) {
825 if (depth == 0) {
826 // atom extends to end of file
827 off64_t sourceSize;
828 if (mDataSource->getSize(&sourceSize) == OK) {
829 chunk_size = (sourceSize - *offset);
830 } else {
831 // XXX could we just pick a "sufficiently large" value here?
832 ALOGE("atom size is 0, and data source has no size");
833 return ERROR_MALFORMED;
834 }
835 } else {
836 // not allowed for non-toplevel atoms, skip it
837 *offset += 4;
838 return OK;
839 }
840 } else if (chunk_size < 8) {
841 // The smallest valid chunk is 8 bytes long.
842 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
843 return ERROR_MALFORMED;
844 }
845
846 char chunk[5];
847 MakeFourCCString(chunk_type, chunk);
848 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
849
850 if (kUseHexDump) {
851 static const char kWhitespace[] = " ";
852 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
853 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
854
855 char buffer[256];
856 size_t n = chunk_size;
857 if (n > sizeof(buffer)) {
858 n = sizeof(buffer);
859 }
860 if (mDataSource->readAt(*offset, buffer, n)
861 < (ssize_t)n) {
862 return ERROR_IO;
863 }
864
865 hexdump(buffer, n);
866 }
867
868 PathAdder autoAdder(&mPath, chunk_type);
869
870 // (data_offset - *offset) is either 8 or 16
871 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
872 if (chunk_data_size < 0) {
873 ALOGE("b/23540914");
874 return ERROR_MALFORMED;
875 }
876 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
877 char errMsg[100];
878 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
879 ALOGE("%s (b/28615448)", errMsg);
880 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
881 return ERROR_MALFORMED;
882 }
883
884 if (chunk_type != FOURCC("cprt")
885 && chunk_type != FOURCC("covr")
886 && mPath.size() == 5 && underMetaDataPath(mPath)) {
887 off64_t stop_offset = *offset + chunk_size;
888 *offset = data_offset;
889 while (*offset < stop_offset) {
890 status_t err = parseChunk(offset, depth + 1);
891 if (err != OK) {
892 return err;
893 }
894 }
895
896 if (*offset != stop_offset) {
897 return ERROR_MALFORMED;
898 }
899
900 return OK;
901 }
902
903 switch(chunk_type) {
904 case FOURCC("moov"):
905 case FOURCC("trak"):
906 case FOURCC("mdia"):
907 case FOURCC("minf"):
908 case FOURCC("dinf"):
909 case FOURCC("stbl"):
910 case FOURCC("mvex"):
911 case FOURCC("moof"):
912 case FOURCC("traf"):
913 case FOURCC("mfra"):
914 case FOURCC("udta"):
915 case FOURCC("ilst"):
916 case FOURCC("sinf"):
917 case FOURCC("schi"):
918 case FOURCC("edts"):
919 case FOURCC("wave"):
920 {
921 if (chunk_type == FOURCC("moov") && depth != 0) {
922 ALOGE("moov: depth %d", depth);
923 return ERROR_MALFORMED;
924 }
925
926 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
927 ALOGE("duplicate moov");
928 return ERROR_MALFORMED;
929 }
930
931 if (chunk_type == FOURCC("moof") && !mMoofFound) {
932 // store the offset of the first segment
933 mMoofFound = true;
934 mMoofOffset = *offset;
935 }
936
937 if (chunk_type == FOURCC("stbl")) {
938 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
939
940 if (mDataSource->flags()
941 & (DataSourceBase::kWantsPrefetching
942 | DataSourceBase::kIsCachingDataSource)) {
943 CachedRangedDataSource *cachedSource =
944 new CachedRangedDataSource(mDataSource);
945
946 if (cachedSource->setCachedRange(
947 *offset, chunk_size,
948 true /* assume ownership on success */) == OK) {
949 mDataSource = cachedSource;
950 } else {
951 delete cachedSource;
952 }
953 }
954
955 if (mLastTrack == NULL) {
956 return ERROR_MALFORMED;
957 }
958
959 mLastTrack->sampleTable = new SampleTable(mDataSource);
960 }
961
962 bool isTrack = false;
963 if (chunk_type == FOURCC("trak")) {
964 if (depth != 1) {
965 ALOGE("trak: depth %d", depth);
966 return ERROR_MALFORMED;
967 }
968 isTrack = true;
969
970 ALOGV("adding new track");
971 Track *track = new Track;
972 if (mLastTrack) {
973 mLastTrack->next = track;
974 } else {
975 mFirstTrack = track;
976 }
977 mLastTrack = track;
978
979 track->meta = AMediaFormat_new();
980 AMediaFormat_setString(track->meta,
981 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
982 }
983
984 off64_t stop_offset = *offset + chunk_size;
985 *offset = data_offset;
986 while (*offset < stop_offset) {
987
988 // pass udata terminate
989 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
990 // handle the case that udta terminates with terminate code x00000000
991 // note that 0 terminator is optional and we just handle this case.
992 uint32_t terminate_code = 1;
993 mDataSource->readAt(*offset, &terminate_code, 4);
994 if (0 == terminate_code) {
995 *offset += 4;
996 ALOGD("Terminal code for udta");
997 continue;
998 } else {
999 ALOGW("invalid udta Terminal code");
1000 }
1001 }
1002
1003 status_t err = parseChunk(offset, depth + 1);
1004 if (err != OK) {
1005 if (isTrack) {
1006 mLastTrack->skipTrack = true;
1007 break;
1008 }
1009 return err;
1010 }
1011 }
1012
1013 if (*offset != stop_offset) {
1014 return ERROR_MALFORMED;
1015 }
1016
1017 if (isTrack) {
1018 int32_t trackId;
1019 // There must be exactly one track header per track.
1020
1021 if (!AMediaFormat_getInt32(mLastTrack->meta,
1022 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1023 mLastTrack->skipTrack = true;
1024 }
1025
1026 status_t err = verifyTrack(mLastTrack);
1027 if (err != OK) {
1028 mLastTrack->skipTrack = true;
1029 }
1030
1031
1032 if (mLastTrack->skipTrack) {
1033 ALOGV("skipping this track...");
1034 Track *cur = mFirstTrack;
1035
1036 if (cur == mLastTrack) {
1037 delete cur;
1038 mFirstTrack = mLastTrack = NULL;
1039 } else {
1040 while (cur && cur->next != mLastTrack) {
1041 cur = cur->next;
1042 }
1043 if (cur) {
1044 cur->next = NULL;
1045 }
1046 delete mLastTrack;
1047 mLastTrack = cur;
1048 }
1049
1050 return OK;
1051 }
1052
1053 // place things we built elsewhere into their final locations
1054
1055 // put aggregated tx3g data into the metadata
1056 if (mLastTrack->mTx3gFilled > 0) {
1057 ALOGV("Putting %zu bytes of tx3g data into meta data",
1058 mLastTrack->mTx3gFilled);
1059 AMediaFormat_setBuffer(mLastTrack->meta,
1060 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1061 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1062 // drop it now to reduce our footprint
1063 free(mLastTrack->mTx3gBuffer);
1064 mLastTrack->mTx3gBuffer = NULL;
1065 mLastTrack->mTx3gFilled = 0;
1066 mLastTrack->mTx3gSize = 0;
1067 }
1068
1069 } else if (chunk_type == FOURCC("moov")) {
1070 mInitCheck = OK;
1071
1072 return UNKNOWN_ERROR; // Return a generic error.
1073 }
1074 break;
1075 }
1076
1077 case FOURCC("schm"):
1078 {
1079
1080 *offset += chunk_size;
1081 if (!mLastTrack) {
1082 return ERROR_MALFORMED;
1083 }
1084
1085 uint32_t scheme_type;
1086 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1087 return ERROR_IO;
1088 }
1089 scheme_type = ntohl(scheme_type);
1090 int32_t mode = kCryptoModeUnencrypted;
1091 switch(scheme_type) {
1092 case FOURCC("cbc1"):
1093 {
1094 mode = kCryptoModeAesCbc;
1095 break;
1096 }
1097 case FOURCC("cbcs"):
1098 {
1099 mode = kCryptoModeAesCbc;
1100 mLastTrack->subsample_encryption = true;
1101 break;
1102 }
1103 case FOURCC("cenc"):
1104 {
1105 mode = kCryptoModeAesCtr;
1106 break;
1107 }
1108 case FOURCC("cens"):
1109 {
1110 mode = kCryptoModeAesCtr;
1111 mLastTrack->subsample_encryption = true;
1112 break;
1113 }
1114 }
1115 if (mode != kCryptoModeUnencrypted) {
1116 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1117 }
1118 break;
1119 }
1120
1121
1122 case FOURCC("elst"):
1123 {
1124 *offset += chunk_size;
1125
1126 if (!mLastTrack) {
1127 return ERROR_MALFORMED;
1128 }
1129
1130 // See 14496-12 8.6.6
1131 uint8_t version;
1132 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1133 return ERROR_IO;
1134 }
1135
1136 uint32_t entry_count;
1137 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1138 return ERROR_IO;
1139 }
1140
1141 if (entry_count != 1) {
1142 // we only support a single entry at the moment, for gapless playback
1143 // or start offset
1144 ALOGW("ignoring edit list with %d entries", entry_count);
1145 } else {
1146 off64_t entriesoffset = data_offset + 8;
1147 uint64_t segment_duration;
1148 int64_t media_time;
1149
1150 if (version == 1) {
1151 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1152 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1153 return ERROR_IO;
1154 }
1155 } else if (version == 0) {
1156 uint32_t sd;
1157 int32_t mt;
1158 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1159 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1160 return ERROR_IO;
1161 }
1162 segment_duration = sd;
1163 media_time = mt;
1164 } else {
1165 return ERROR_IO;
1166 }
1167
1168 // save these for later, because the elst atom might precede
1169 // the atoms that actually gives us the duration and sample rate
1170 // needed to calculate the padding and delay values
1171 mLastTrack->has_elst = true;
1172 mLastTrack->elst_media_time = media_time;
1173 mLastTrack->elst_segment_duration = segment_duration;
1174 }
1175 break;
1176 }
1177
1178 case FOURCC("frma"):
1179 {
1180 *offset += chunk_size;
1181
1182 uint32_t original_fourcc;
1183 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1184 return ERROR_IO;
1185 }
1186 original_fourcc = ntohl(original_fourcc);
1187 ALOGV("read original format: %d", original_fourcc);
1188
1189 if (mLastTrack == NULL) {
1190 return ERROR_MALFORMED;
1191 }
1192
1193 AMediaFormat_setString(mLastTrack->meta,
1194 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1195 uint32_t num_channels = 0;
1196 uint32_t sample_rate = 0;
1197 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1198 AMediaFormat_setInt32(mLastTrack->meta,
1199 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1200 AMediaFormat_setInt32(mLastTrack->meta,
1201 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1202 }
1203
1204 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1205 off64_t tmpOffset = *offset;
1206 status_t err = parseALACSampleEntry(&tmpOffset);
1207 if (err != OK) {
1208 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1209 return err;
1210 }
1211 *offset = tmpOffset + 8;
1212 }
1213
1214 break;
1215 }
1216
1217 case FOURCC("tenc"):
1218 {
1219 *offset += chunk_size;
1220
1221 if (chunk_size < 32) {
1222 return ERROR_MALFORMED;
1223 }
1224
1225 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1226 // default IV size, 16 bytes default KeyID
1227 // (ISO 23001-7)
1228
1229 uint8_t version;
1230 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1231 < (ssize_t)sizeof(version)) {
1232 return ERROR_IO;
1233 }
1234
1235 uint8_t buf[4];
1236 memset(buf, 0, 4);
1237 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1238 return ERROR_IO;
1239 }
1240
1241 if (mLastTrack == NULL) {
1242 return ERROR_MALFORMED;
1243 }
1244
1245 uint8_t defaultEncryptedByteBlock = 0;
1246 uint8_t defaultSkipByteBlock = 0;
1247 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1248 if (version == 1) {
1249 uint32_t pattern = buf[2];
1250 defaultEncryptedByteBlock = pattern >> 4;
1251 defaultSkipByteBlock = pattern & 0xf;
1252 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1253 // use (1,0) to mean "encrypt everything"
1254 defaultEncryptedByteBlock = 1;
1255 }
1256 } else if (mLastTrack->subsample_encryption) {
1257 ALOGW("subsample_encryption should be version 1");
1258 } else if (defaultAlgorithmId > 1) {
1259 // only 0 (clear) and 1 (AES-128) are valid
1260 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1261 defaultAlgorithmId = 1;
1262 }
1263
1264 memset(buf, 0, 4);
1265 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1266 return ERROR_IO;
1267 }
1268 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1269
1270 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1271 // only unencrypted data must have 0 IV size
1272 return ERROR_MALFORMED;
1273 } else if (defaultIVSize != 0 &&
1274 defaultIVSize != 8 &&
1275 defaultIVSize != 16) {
1276 return ERROR_MALFORMED;
1277 }
1278
1279 uint8_t defaultKeyId[16];
1280
1281 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1282 return ERROR_IO;
1283 }
1284
1285 sp<ABuffer> defaultConstantIv;
1286 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1287
1288 uint8_t ivlength;
1289 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1290 < (ssize_t)sizeof(ivlength)) {
1291 return ERROR_IO;
1292 }
1293
1294 if (ivlength != 8 && ivlength != 16) {
1295 ALOGW("unsupported IV length: %u", ivlength);
1296 return ERROR_MALFORMED;
1297 }
1298
1299 defaultConstantIv = new ABuffer(ivlength);
1300 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1301 < (ssize_t)ivlength) {
1302 return ERROR_IO;
1303 }
1304
1305 defaultConstantIv->setRange(0, ivlength);
1306 }
1307
1308 int32_t tmpAlgorithmId;
1309 if (!AMediaFormat_getInt32(mLastTrack->meta,
1310 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1311 AMediaFormat_setInt32(mLastTrack->meta,
1312 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1313 }
1314
1315 AMediaFormat_setInt32(mLastTrack->meta,
1316 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1317 AMediaFormat_setBuffer(mLastTrack->meta,
1318 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1319 AMediaFormat_setInt32(mLastTrack->meta,
1320 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1321 AMediaFormat_setInt32(mLastTrack->meta,
1322 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1323 if (defaultConstantIv != NULL) {
1324 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1325 defaultConstantIv->data(), defaultConstantIv->size());
1326 }
1327 break;
1328 }
1329
1330 case FOURCC("tkhd"):
1331 {
1332 *offset += chunk_size;
1333
1334 status_t err;
1335 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1336 return err;
1337 }
1338
1339 break;
1340 }
1341
1342 case FOURCC("tref"):
1343 {
1344 off64_t stop_offset = *offset + chunk_size;
1345 *offset = data_offset;
1346 while (*offset < stop_offset) {
1347 status_t err = parseChunk(offset, depth + 1);
1348 if (err != OK) {
1349 return err;
1350 }
1351 }
1352 if (*offset != stop_offset) {
1353 return ERROR_MALFORMED;
1354 }
1355 break;
1356 }
1357
1358 case FOURCC("thmb"):
1359 {
1360 *offset += chunk_size;
1361
1362 if (mLastTrack != NULL) {
1363 // Skip thumbnail track for now since we don't have an
1364 // API to retrieve it yet.
1365 // The thumbnail track can't be accessed by negative index or time,
1366 // because each timed sample has its own corresponding thumbnail
1367 // in the thumbnail track. We'll need a dedicated API to retrieve
1368 // thumbnail at time instead.
1369 mLastTrack->skipTrack = true;
1370 }
1371
1372 break;
1373 }
1374
1375 case FOURCC("pssh"):
1376 {
1377 *offset += chunk_size;
1378
1379 PsshInfo pssh;
1380
1381 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1382 return ERROR_IO;
1383 }
1384
1385 uint32_t psshdatalen = 0;
1386 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1387 return ERROR_IO;
1388 }
1389 pssh.datalen = ntohl(psshdatalen);
1390 ALOGV("pssh data size: %d", pssh.datalen);
1391 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1392 // pssh data length exceeds size of containing box
1393 return ERROR_MALFORMED;
1394 }
1395
1396 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1397 if (pssh.data == NULL) {
1398 return ERROR_MALFORMED;
1399 }
1400 ALOGV("allocated pssh @ %p", pssh.data);
1401 ssize_t requested = (ssize_t) pssh.datalen;
1402 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1403 delete[] pssh.data;
1404 return ERROR_IO;
1405 }
1406 mPssh.push_back(pssh);
1407
1408 break;
1409 }
1410
1411 case FOURCC("mdhd"):
1412 {
1413 *offset += chunk_size;
1414
1415 if (chunk_data_size < 4 || mLastTrack == NULL) {
1416 return ERROR_MALFORMED;
1417 }
1418
1419 uint8_t version;
1420 if (mDataSource->readAt(
1421 data_offset, &version, sizeof(version))
1422 < (ssize_t)sizeof(version)) {
1423 return ERROR_IO;
1424 }
1425
1426 off64_t timescale_offset;
1427
1428 if (version == 1) {
1429 timescale_offset = data_offset + 4 + 16;
1430 } else if (version == 0) {
1431 timescale_offset = data_offset + 4 + 8;
1432 } else {
1433 return ERROR_IO;
1434 }
1435
1436 uint32_t timescale;
1437 if (mDataSource->readAt(
1438 timescale_offset, ×cale, sizeof(timescale))
1439 < (ssize_t)sizeof(timescale)) {
1440 return ERROR_IO;
1441 }
1442
1443 if (!timescale) {
1444 ALOGE("timescale should not be ZERO.");
1445 return ERROR_MALFORMED;
1446 }
1447
1448 mLastTrack->timescale = ntohl(timescale);
1449
1450 // 14496-12 says all ones means indeterminate, but some files seem to use
1451 // 0 instead. We treat both the same.
1452 int64_t duration = 0;
1453 if (version == 1) {
1454 if (mDataSource->readAt(
1455 timescale_offset + 4, &duration, sizeof(duration))
1456 < (ssize_t)sizeof(duration)) {
1457 return ERROR_IO;
1458 }
1459 if (duration != -1) {
1460 duration = ntoh64(duration);
1461 }
1462 } else {
1463 uint32_t duration32;
1464 if (mDataSource->readAt(
1465 timescale_offset + 4, &duration32, sizeof(duration32))
1466 < (ssize_t)sizeof(duration32)) {
1467 return ERROR_IO;
1468 }
1469 if (duration32 != 0xffffffff) {
1470 duration = ntohl(duration32);
1471 }
1472 }
1473 if (duration != 0 && mLastTrack->timescale != 0) {
1474 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1475 if (durationUs < 0 || durationUs > INT64_MAX) {
1476 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1477 (long long) duration, (long long) mLastTrack->timescale);
1478 return ERROR_MALFORMED;
1479 }
1480 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1481 }
1482
1483 uint8_t lang[2];
1484 off64_t lang_offset;
1485 if (version == 1) {
1486 lang_offset = timescale_offset + 4 + 8;
1487 } else if (version == 0) {
1488 lang_offset = timescale_offset + 4 + 4;
1489 } else {
1490 return ERROR_IO;
1491 }
1492
1493 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1494 < (ssize_t)sizeof(lang)) {
1495 return ERROR_IO;
1496 }
1497
1498 // To get the ISO-639-2/T three character language code
1499 // 1 bit pad followed by 3 5-bits characters. Each character
1500 // is packed as the difference between its ASCII value and 0x60.
1501 char lang_code[4];
1502 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1503 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1504 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1505 lang_code[3] = '\0';
1506
1507 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1508
1509 break;
1510 }
1511
1512 case FOURCC("stsd"):
1513 {
1514 uint8_t buffer[8];
1515 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1516 return ERROR_MALFORMED;
1517 }
1518
1519 if (mDataSource->readAt(
1520 data_offset, buffer, 8) < 8) {
1521 return ERROR_IO;
1522 }
1523
1524 if (U32_AT(buffer) != 0) {
1525 // Should be version 0, flags 0.
1526 return ERROR_MALFORMED;
1527 }
1528
1529 uint32_t entry_count = U32_AT(&buffer[4]);
1530
1531 if (entry_count > 1) {
1532 // For 3GPP timed text, there could be multiple tx3g boxes contain
1533 // multiple text display formats. These formats will be used to
1534 // display the timed text.
1535 // For encrypted files, there may also be more than one entry.
1536 const char *mime;
1537
1538 if (mLastTrack == NULL)
1539 return ERROR_MALFORMED;
1540
1541 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1542 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1543 strcasecmp(mime, "application/octet-stream")) {
1544 // For now we only support a single type of media per track.
1545 mLastTrack->skipTrack = true;
1546 *offset += chunk_size;
1547 break;
1548 }
1549 }
1550 off64_t stop_offset = *offset + chunk_size;
1551 *offset = data_offset + 8;
1552 for (uint32_t i = 0; i < entry_count; ++i) {
1553 status_t err = parseChunk(offset, depth + 1);
1554 if (err != OK) {
1555 return err;
1556 }
1557 }
1558
1559 if (*offset != stop_offset) {
1560 return ERROR_MALFORMED;
1561 }
1562 break;
1563 }
1564 case FOURCC("mett"):
1565 {
1566 *offset += chunk_size;
1567
1568 // the absolute minimum size of a compliant mett box is 11 bytes:
1569 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1570 // The resulting mime_format would be invalid at that size though.
1571 if (mLastTrack == NULL || chunk_data_size < 11) {
1572 return ERROR_MALFORMED;
1573 }
1574
1575 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1576 if (buffer.get() == NULL) {
1577 return NO_MEMORY;
1578 }
1579
1580 if (mDataSource->readAt(
1581 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1582 return ERROR_IO;
1583 }
1584
1585 // ISO-14496-12:
1586 // int8 reserved[6]; // should be all zeroes
1587 // int16_t data_reference_index;
1588 // char content_encoding[]; // null terminated, optional (= just the null byte)
1589 // char mime_format[]; // null terminated, mandatory
1590 // optional other boxes
1591 //
1592 // API < 29:
1593 // char mime_format[]; // null terminated
1594 //
1595 // API >= 29
1596 // char mime_format[]; // null terminated
1597 // char mime_format[]; // null terminated
1598
1599 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1600 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1601 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1602 // make it somewhat compatible with the standard. The workaround is to write the
1603 // null-terminated mime_format string twice. This allows compliant parsers to
1604 // read the missing reserved, data_reference_index, and content_encoding fields
1605 // from the first mime_type string. The actual mime_format field would then be
1606 // read correctly from the second string. The non-compliant Android frameworks
1607 // from API 28 and earlier would still be able to read the mime_format correctly
1608 // as it would only read the first null-terminated mime_format string. To enable
1609 // reading metadata tracks generated from both the non-compliant and compliant
1610 // formats, a check needs to be done to see which format is used.
1611 const char *str = (const char*) buffer.get();
1612 size_t string_length = strnlen(str, chunk_data_size);
1613
1614 if (string_length == chunk_data_size - 1) {
1615 // This is likely a pre API 29 file, since it's a single null terminated
1616 // string filling the entire box.
1617 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1618 } else {
1619 // This might be a fully compliant metadata track, a "double mime" compatibility
1620 // track, or anything else, including a single non-terminated string, so we need
1621 // to determine the length of each string we want to parse out of the box.
1622 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1623 if (encoding_length + 8 >= chunk_data_size - 2) {
1624 // the encoding extends to the end of the box, so there's no mime_format
1625 return ERROR_MALFORMED;
1626 }
1627 String8 contentEncoding(str + 8, encoding_length);
1628 String8 mimeFormat(str + 8 + encoding_length + 1,
1629 chunk_data_size - 8 - encoding_length - 1);
1630 AMediaFormat_setString(mLastTrack->meta,
1631 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1632 }
1633 break;
1634 }
1635
1636 case FOURCC("mp4a"):
1637 case FOURCC("enca"):
1638 case FOURCC("samr"):
1639 case FOURCC("sawb"):
1640 case FOURCC("Opus"):
1641 case FOURCC("twos"):
1642 case FOURCC("sowt"):
1643 case FOURCC("alac"):
1644 case FOURCC("fLaC"):
1645 case FOURCC(".mp3"):
1646 case 0x6D730055: // "ms U" mp3 audio
1647 {
1648 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1649
1650 if (chunk_type == FOURCC("alac")) {
1651 off64_t offsetTmp = *offset;
1652 status_t err = parseALACSampleEntry(&offsetTmp);
1653 if (err != OK) {
1654 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1655 return err;
1656 }
1657 }
1658
1659 // Ignore all atoms embedded in QT wave atom
1660 ALOGV("Ignore all atoms embedded in QT wave atom");
1661 *offset += chunk_size;
1662 break;
1663 }
1664
1665 uint8_t buffer[8 + 20];
1666 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1667 // Basic AudioSampleEntry size.
1668 return ERROR_MALFORMED;
1669 }
1670
1671 if (mDataSource->readAt(
1672 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1673 return ERROR_IO;
1674 }
1675
1676 // we can get data_ref_index value from U16_AT(&buffer[6])
1677 uint16_t version = U16_AT(&buffer[8]);
1678 uint32_t num_channels = U16_AT(&buffer[16]);
1679
1680 uint16_t sample_size = U16_AT(&buffer[18]);
1681 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1682
1683 if (mLastTrack == NULL)
1684 return ERROR_MALFORMED;
1685
1686 off64_t stop_offset = *offset + chunk_size;
1687 *offset = data_offset + sizeof(buffer);
1688
1689 if (mIsQT) {
1690 if (version == 1) {
1691 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1692 return ERROR_IO;
1693 }
1694
1695 #if 0
1696 U32_AT(buffer); // samples per packet
1697 U32_AT(&buffer[4]); // bytes per packet
1698 U32_AT(&buffer[8]); // bytes per frame
1699 U32_AT(&buffer[12]); // bytes per sample
1700 #endif
1701 *offset += 16;
1702 } else if (version == 2) {
1703 uint8_t v2buffer[36];
1704 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1705 return ERROR_IO;
1706 }
1707
1708 #if 0
1709 U32_AT(v2buffer); // size of struct only
1710 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1711 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1712 U32_AT(&v2buffer[16]); // always 0x7f000000
1713 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1714 U32_AT(&v2buffer[24]); // format specifc flags
1715 U32_AT(&v2buffer[28]); // const bytes per audio packet
1716 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1717 #endif
1718 *offset += 36;
1719 }
1720 }
1721
1722 if (chunk_type != FOURCC("enca")) {
1723 // if the chunk type is enca, we'll get the type from the frma box later
1724 AMediaFormat_setString(mLastTrack->meta,
1725 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1726 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1727
1728 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1729 AMediaFormat_setInt32(mLastTrack->meta,
1730 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1731 if (chunk_type == FOURCC("twos")) {
1732 AMediaFormat_setInt32(mLastTrack->meta,
1733 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1734 }
1735 }
1736 }
1737 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1738 chunk, num_channels, sample_size, sample_rate);
1739 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1740 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1741
1742 if (chunk_type == FOURCC("Opus")) {
1743 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1744 data_offset += sizeof(buffer);
1745 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1746
1747 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1748 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1749 return ERROR_MALFORMED;
1750 }
1751 // Read Opus Header
1752 if (mDataSource->readAt(
1753 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1754 return ERROR_IO;
1755 }
1756
1757 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1758 // http://wiki.xiph.org/OggOpus#ID_Header
1759 strncpy((char *)opusInfo, "OpusHead", 8);
1760
1761 // Version shall be 0 as per mp4 Opus Specific Box
1762 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1763 if (opusInfo[8]) {
1764 return ERROR_MALFORMED;
1765 }
1766 // Force version to 1 as per OpusHead definition
1767 // (http://wiki.xiph.org/OggOpus#ID_Header)
1768 opusInfo[8] = 1;
1769
1770 // Read Opus Specific Box values
1771 size_t opusOffset = 10;
1772 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1773 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1774 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1775
1776 // Convert Opus Specific Box values. ParseOpusHeader expects
1777 // the values in LE, however MP4 stores these values as BE
1778 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1779 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1780 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1781 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1782
1783 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1784 static const int32_t kOpusSampleRate = 48000;
1785 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1786
1787 AMediaFormat_setBuffer(mLastTrack->meta,
1788 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1789 AMediaFormat_setBuffer(mLastTrack->meta,
1790 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1791 AMediaFormat_setBuffer(mLastTrack->meta,
1792 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1793
1794 data_offset += opusInfoSize;
1795 *offset = data_offset;
1796 CHECK_EQ(*offset, stop_offset);
1797 }
1798
1799 if (!mIsQT && chunk_type == FOURCC("alac")) {
1800 data_offset += sizeof(buffer);
1801
1802 status_t err = parseALACSampleEntry(&data_offset);
1803 if (err != OK) {
1804 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1805 return err;
1806 }
1807 *offset = data_offset;
1808 CHECK_EQ(*offset, stop_offset);
1809 }
1810
1811 if (chunk_type == FOURCC("fLaC")) {
1812
1813 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1814 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1815 uint8_t flacInfo[4 + 4 + 34];
1816 // skipping dFla, version
1817 data_offset += sizeof(buffer) + 12;
1818 size_t flacOffset = 4;
1819 // Add flaC header mime type to CSD
1820 strncpy((char *)flacInfo, "fLaC", 4);
1821 if (mDataSource->readAt(
1822 data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1823 (ssize_t)sizeof(flacInfo) - flacOffset) {
1824 return ERROR_IO;
1825 }
1826 data_offset += sizeof(flacInfo) - flacOffset;
1827
1828 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1829 sizeof(flacInfo));
1830 *offset = data_offset;
1831 CHECK_EQ(*offset, stop_offset);
1832 }
1833
1834 while (*offset < stop_offset) {
1835 status_t err = parseChunk(offset, depth + 1);
1836 if (err != OK) {
1837 return err;
1838 }
1839 }
1840
1841 if (*offset != stop_offset) {
1842 return ERROR_MALFORMED;
1843 }
1844 break;
1845 }
1846
1847 case FOURCC("mp4v"):
1848 case FOURCC("encv"):
1849 case FOURCC("s263"):
1850 case FOURCC("H263"):
1851 case FOURCC("h263"):
1852 case FOURCC("avc1"):
1853 case FOURCC("hvc1"):
1854 case FOURCC("hev1"):
1855 case FOURCC("av01"):
1856 case FOURCC("vp09"):
1857 {
1858 uint8_t buffer[78];
1859 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1860 // Basic VideoSampleEntry size.
1861 return ERROR_MALFORMED;
1862 }
1863
1864 if (mDataSource->readAt(
1865 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1866 return ERROR_IO;
1867 }
1868
1869 // we can get data_ref_index value from U16_AT(&buffer[6])
1870 uint16_t width = U16_AT(&buffer[6 + 18]);
1871 uint16_t height = U16_AT(&buffer[6 + 20]);
1872
1873 // The video sample is not standard-compliant if it has invalid dimension.
1874 // Use some default width and height value, and
1875 // let the decoder figure out the actual width and height (and thus
1876 // be prepared for INFO_FOMRAT_CHANGED event).
1877 if (width == 0) width = 352;
1878 if (height == 0) height = 288;
1879
1880 // printf("*** coding='%s' width=%d height=%d\n",
1881 // chunk, width, height);
1882
1883 if (mLastTrack == NULL)
1884 return ERROR_MALFORMED;
1885
1886 if (chunk_type != FOURCC("encv")) {
1887 // if the chunk type is encv, we'll get the type from the frma box later
1888 AMediaFormat_setString(mLastTrack->meta,
1889 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1890 }
1891 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
1892 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
1893
1894 off64_t stop_offset = *offset + chunk_size;
1895 *offset = data_offset + sizeof(buffer);
1896 while (*offset < stop_offset) {
1897 status_t err = parseChunk(offset, depth + 1);
1898 if (err != OK) {
1899 return err;
1900 }
1901 }
1902
1903 if (*offset != stop_offset) {
1904 return ERROR_MALFORMED;
1905 }
1906 break;
1907 }
1908
1909 case FOURCC("stco"):
1910 case FOURCC("co64"):
1911 {
1912 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1913 return ERROR_MALFORMED;
1914 }
1915
1916 status_t err =
1917 mLastTrack->sampleTable->setChunkOffsetParams(
1918 chunk_type, data_offset, chunk_data_size);
1919
1920 *offset += chunk_size;
1921
1922 if (err != OK) {
1923 return err;
1924 }
1925
1926 break;
1927 }
1928
1929 case FOURCC("stsc"):
1930 {
1931 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1932 return ERROR_MALFORMED;
1933
1934 status_t err =
1935 mLastTrack->sampleTable->setSampleToChunkParams(
1936 data_offset, chunk_data_size);
1937
1938 *offset += chunk_size;
1939
1940 if (err != OK) {
1941 return err;
1942 }
1943
1944 break;
1945 }
1946
1947 case FOURCC("stsz"):
1948 case FOURCC("stz2"):
1949 {
1950 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1951 return ERROR_MALFORMED;
1952 }
1953
1954 status_t err =
1955 mLastTrack->sampleTable->setSampleSizeParams(
1956 chunk_type, data_offset, chunk_data_size);
1957
1958 *offset += chunk_size;
1959
1960 if (err != OK) {
1961 return err;
1962 }
1963
1964 adjustRawDefaultFrameSize();
1965
1966 size_t max_size;
1967 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1968
1969 if (err != OK) {
1970 return err;
1971 }
1972
1973 if (max_size != 0) {
1974 // Assume that a given buffer only contains at most 10 chunks,
1975 // each chunk originally prefixed with a 2 byte length will
1976 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1977 // and thus will grow by 2 bytes per chunk.
1978 if (max_size > SIZE_MAX - 10 * 2) {
1979 ALOGE("max sample size too big: %zu", max_size);
1980 return ERROR_MALFORMED;
1981 }
1982 AMediaFormat_setInt32(mLastTrack->meta,
1983 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
1984 } else {
1985 // No size was specified. Pick a conservatively large size.
1986 uint32_t width, height;
1987 if (!AMediaFormat_getInt32(mLastTrack->meta,
1988 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
1989 !AMediaFormat_getInt32(mLastTrack->meta,
1990 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
1991 ALOGE("No width or height, assuming worst case 1080p");
1992 width = 1920;
1993 height = 1080;
1994 } else {
1995 // A resolution was specified, check that it's not too big. The values below
1996 // were chosen so that the calculations below don't cause overflows, they're
1997 // not indicating that resolutions up to 32kx32k are actually supported.
1998 if (width > 32768 || height > 32768) {
1999 ALOGE("can't support %u x %u video", width, height);
2000 return ERROR_MALFORMED;
2001 }
2002 }
2003
2004 const char *mime;
2005 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2006 if (!strncmp(mime, "audio/", 6)) {
2007 // for audio, use 128KB
2008 max_size = 1024 * 128;
2009 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2010 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2011 // AVC & HEVC requires compression ratio of at least 2, and uses
2012 // macroblocks
2013 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2014 } else {
2015 // For all other formats there is no minimum compression
2016 // ratio. Use compression ratio of 1.
2017 max_size = width * height * 3 / 2;
2018 }
2019 // HACK: allow 10% overhead
2020 // TODO: read sample size from traf atom for fragmented MPEG4.
2021 max_size += max_size / 10;
2022 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2023 }
2024
2025 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2026 // mimetype) previously obtained, so don't cache them.
2027 const char *mime;
2028 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2029 // Calculate average frame rate.
2030 if (!strncasecmp("video/", mime, 6)) {
2031 size_t nSamples = mLastTrack->sampleTable->countSamples();
2032 if (nSamples == 0) {
2033 int32_t trackId;
2034 if (AMediaFormat_getInt32(mLastTrack->meta,
2035 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2036 for (size_t i = 0; i < mTrex.size(); i++) {
2037 Trex *t = &mTrex.editItemAt(i);
2038 if (t->track_ID == (uint32_t) trackId) {
2039 if (t->default_sample_duration > 0) {
2040 int32_t frameRate =
2041 mLastTrack->timescale / t->default_sample_duration;
2042 AMediaFormat_setInt32(mLastTrack->meta,
2043 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2044 }
2045 break;
2046 }
2047 }
2048 }
2049 } else {
2050 int64_t durationUs;
2051 if (AMediaFormat_getInt64(mLastTrack->meta,
2052 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2053 if (durationUs > 0) {
2054 int32_t frameRate = (nSamples * 1000000LL +
2055 (durationUs >> 1)) / durationUs;
2056 AMediaFormat_setInt32(mLastTrack->meta,
2057 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2058 }
2059 }
2060 ALOGV("setting frame count %zu", nSamples);
2061 AMediaFormat_setInt32(mLastTrack->meta,
2062 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2063 }
2064 }
2065
2066 break;
2067 }
2068
2069 case FOURCC("stts"):
2070 {
2071 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2072 return ERROR_MALFORMED;
2073
2074 *offset += chunk_size;
2075
2076 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2077 char chunk[5];
2078 MakeFourCCString(mPath[depth - 1], chunk);
2079 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2080 break;
2081 }
2082
2083 status_t err =
2084 mLastTrack->sampleTable->setTimeToSampleParams(
2085 data_offset, chunk_data_size);
2086
2087 if (err != OK) {
2088 return err;
2089 }
2090
2091 break;
2092 }
2093
2094 case FOURCC("ctts"):
2095 {
2096 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2097 return ERROR_MALFORMED;
2098
2099 *offset += chunk_size;
2100
2101 status_t err =
2102 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2103 data_offset, chunk_data_size);
2104
2105 if (err != OK) {
2106 return err;
2107 }
2108
2109 break;
2110 }
2111
2112 case FOURCC("stss"):
2113 {
2114 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2115 return ERROR_MALFORMED;
2116
2117 *offset += chunk_size;
2118
2119 status_t err =
2120 mLastTrack->sampleTable->setSyncSampleParams(
2121 data_offset, chunk_data_size);
2122
2123 if (err != OK) {
2124 return err;
2125 }
2126
2127 break;
2128 }
2129
2130 // \xA9xyz
2131 case FOURCC("\251xyz"):
2132 {
2133 *offset += chunk_size;
2134
2135 // Best case the total data length inside "\xA9xyz" box would
2136 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2137 // where "\x00\x05" is the text string length with value = 5,
2138 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2139 // location (string) value with longitude = 0 and latitude = 0.
2140 // Since some devices encountered in the wild omit the trailing
2141 // slash, we'll allow that.
2142 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2143 return ERROR_MALFORMED;
2144 }
2145
2146 uint16_t len;
2147 if (!mDataSource->getUInt16(data_offset, &len)) {
2148 return ERROR_IO;
2149 }
2150
2151 // allow "+0+0" without trailing slash
2152 if (len < 4 || len > chunk_data_size - 4) {
2153 return ERROR_MALFORMED;
2154 }
2155 // The location string following the language code is formatted
2156 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2157 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2158 // and to add a terminating 0.
2159 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2160 if (!buffer) {
2161 return NO_MEMORY;
2162 }
2163
2164 if (mDataSource->readAt(
2165 data_offset + 4, &buffer[0], len) < len) {
2166 return ERROR_IO;
2167 }
2168
2169 len = strlen(&buffer[0]);
2170 if (len < 4) {
2171 return ERROR_MALFORMED;
2172 }
2173 // Add a trailing slash if there wasn't one.
2174 if (buffer[len - 1] != '/') {
2175 buffer[len] = '/';
2176 }
2177 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2178 break;
2179 }
2180
2181 case FOURCC("esds"):
2182 {
2183 *offset += chunk_size;
2184
2185 if (chunk_data_size < 4) {
2186 return ERROR_MALFORMED;
2187 }
2188
2189 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2190 uint8_t *buffer = tmp.get();
2191 if (buffer == NULL) {
2192 return -ENOMEM;
2193 }
2194
2195 if (mDataSource->readAt(
2196 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2197 return ERROR_IO;
2198 }
2199
2200 if (U32_AT(buffer) != 0) {
2201 // Should be version 0, flags 0.
2202 return ERROR_MALFORMED;
2203 }
2204
2205 if (mLastTrack == NULL)
2206 return ERROR_MALFORMED;
2207
2208 AMediaFormat_setBuffer(mLastTrack->meta,
2209 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2210
2211 if (mPath.size() >= 2
2212 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2213 // Information from the ESDS must be relied on for proper
2214 // setup of sample rate and channel count for MPEG4 Audio.
2215 // The generic header appears to only contain generic
2216 // information...
2217
2218 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2219 &buffer[4], chunk_data_size - 4);
2220
2221 if (err != OK) {
2222 return err;
2223 }
2224 }
2225 if (mPath.size() >= 2
2226 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2227 // Check if the video is MPEG2
2228 ESDS esds(&buffer[4], chunk_data_size - 4);
2229
2230 uint8_t objectTypeIndication;
2231 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2232 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2233 AMediaFormat_setString(mLastTrack->meta,
2234 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2235 }
2236 }
2237 }
2238 break;
2239 }
2240
2241 case FOURCC("btrt"):
2242 {
2243 *offset += chunk_size;
2244 if (mLastTrack == NULL) {
2245 return ERROR_MALFORMED;
2246 }
2247
2248 uint8_t buffer[12];
2249 if (chunk_data_size != sizeof(buffer)) {
2250 return ERROR_MALFORMED;
2251 }
2252
2253 if (mDataSource->readAt(
2254 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2255 return ERROR_IO;
2256 }
2257
2258 uint32_t maxBitrate = U32_AT(&buffer[4]);
2259 uint32_t avgBitrate = U32_AT(&buffer[8]);
2260 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2261 AMediaFormat_setInt32(mLastTrack->meta,
2262 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2263 }
2264 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2265 AMediaFormat_setInt32(mLastTrack->meta,
2266 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2267 }
2268 break;
2269 }
2270
2271 case FOURCC("avcC"):
2272 {
2273 *offset += chunk_size;
2274
2275 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2276
2277 if (buffer.get() == NULL) {
2278 ALOGE("b/28471206");
2279 return NO_MEMORY;
2280 }
2281
2282 if (mDataSource->readAt(
2283 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2284 return ERROR_IO;
2285 }
2286
2287 if (mLastTrack == NULL)
2288 return ERROR_MALFORMED;
2289
2290 AMediaFormat_setBuffer(mLastTrack->meta,
2291 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2292
2293 break;
2294 }
2295 case FOURCC("hvcC"):
2296 {
2297 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2298
2299 if (buffer.get() == NULL) {
2300 ALOGE("b/28471206");
2301 return NO_MEMORY;
2302 }
2303
2304 if (mDataSource->readAt(
2305 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2306 return ERROR_IO;
2307 }
2308
2309 if (mLastTrack == NULL)
2310 return ERROR_MALFORMED;
2311
2312 AMediaFormat_setBuffer(mLastTrack->meta,
2313 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2314
2315 *offset += chunk_size;
2316 break;
2317 }
2318
2319 case FOURCC("vpcC"):
2320 case FOURCC("av1C"):
2321 {
2322 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2323
2324 if (buffer.get() == NULL) {
2325 ALOGE("b/28471206");
2326 return NO_MEMORY;
2327 }
2328
2329 if (mDataSource->readAt(
2330 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2331 return ERROR_IO;
2332 }
2333
2334 if (mLastTrack == NULL)
2335 return ERROR_MALFORMED;
2336
2337 AMediaFormat_setBuffer(mLastTrack->meta,
2338 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2339
2340 *offset += chunk_size;
2341 break;
2342 }
2343 case FOURCC("d263"):
2344 {
2345 *offset += chunk_size;
2346 /*
2347 * d263 contains a fixed 7 bytes part:
2348 * vendor - 4 bytes
2349 * version - 1 byte
2350 * level - 1 byte
2351 * profile - 1 byte
2352 * optionally, "d263" box itself may contain a 16-byte
2353 * bit rate box (bitr)
2354 * average bit rate - 4 bytes
2355 * max bit rate - 4 bytes
2356 */
2357 char buffer[23];
2358 if (chunk_data_size != 7 &&
2359 chunk_data_size != 23) {
2360 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2361 return ERROR_MALFORMED;
2362 }
2363
2364 if (mDataSource->readAt(
2365 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2366 return ERROR_IO;
2367 }
2368
2369 if (mLastTrack == NULL)
2370 return ERROR_MALFORMED;
2371
2372 AMediaFormat_setBuffer(mLastTrack->meta,
2373 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2374
2375 break;
2376 }
2377
2378 case FOURCC("meta"):
2379 {
2380 off64_t stop_offset = *offset + chunk_size;
2381 *offset = data_offset;
2382 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2383 if (!isParsingMetaKeys) {
2384 uint8_t buffer[4];
2385 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2386 *offset = stop_offset;
2387 return ERROR_MALFORMED;
2388 }
2389
2390 if (mDataSource->readAt(
2391 data_offset, buffer, 4) < 4) {
2392 *offset = stop_offset;
2393 return ERROR_IO;
2394 }
2395
2396 if (U32_AT(buffer) != 0) {
2397 // Should be version 0, flags 0.
2398
2399 // If it's not, let's assume this is one of those
2400 // apparently malformed chunks that don't have flags
2401 // and completely different semantics than what's
2402 // in the MPEG4 specs and skip it.
2403 *offset = stop_offset;
2404 return OK;
2405 }
2406 *offset += sizeof(buffer);
2407 }
2408
2409 while (*offset < stop_offset) {
2410 status_t err = parseChunk(offset, depth + 1);
2411 if (err != OK) {
2412 return err;
2413 }
2414 }
2415
2416 if (*offset != stop_offset) {
2417 return ERROR_MALFORMED;
2418 }
2419 break;
2420 }
2421
2422 case FOURCC("iloc"):
2423 case FOURCC("iinf"):
2424 case FOURCC("iprp"):
2425 case FOURCC("pitm"):
2426 case FOURCC("idat"):
2427 case FOURCC("iref"):
2428 case FOURCC("ipro"):
2429 {
2430 if (mIsHeif) {
2431 if (mItemTable == NULL) {
2432 mItemTable = new ItemTable(mDataSource);
2433 }
2434 status_t err = mItemTable->parse(
2435 chunk_type, data_offset, chunk_data_size);
2436 if (err != OK) {
2437 return err;
2438 }
2439 }
2440 *offset += chunk_size;
2441 break;
2442 }
2443
2444 case FOURCC("mean"):
2445 case FOURCC("name"):
2446 case FOURCC("data"):
2447 {
2448 *offset += chunk_size;
2449
2450 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2451 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2452
2453 if (err != OK) {
2454 return err;
2455 }
2456 }
2457
2458 break;
2459 }
2460
2461 case FOURCC("mvhd"):
2462 {
2463 *offset += chunk_size;
2464
2465 if (depth != 1) {
2466 ALOGE("mvhd: depth %d", depth);
2467 return ERROR_MALFORMED;
2468 }
2469 if (chunk_data_size < 32) {
2470 return ERROR_MALFORMED;
2471 }
2472
2473 uint8_t header[32];
2474 if (mDataSource->readAt(
2475 data_offset, header, sizeof(header))
2476 < (ssize_t)sizeof(header)) {
2477 return ERROR_IO;
2478 }
2479
2480 uint64_t creationTime;
2481 uint64_t duration = 0;
2482 if (header[0] == 1) {
2483 creationTime = U64_AT(&header[4]);
2484 mHeaderTimescale = U32_AT(&header[20]);
2485 duration = U64_AT(&header[24]);
2486 if (duration == 0xffffffffffffffff) {
2487 duration = 0;
2488 }
2489 } else if (header[0] != 0) {
2490 return ERROR_MALFORMED;
2491 } else {
2492 creationTime = U32_AT(&header[4]);
2493 mHeaderTimescale = U32_AT(&header[12]);
2494 uint32_t d32 = U32_AT(&header[16]);
2495 if (d32 == 0xffffffff) {
2496 d32 = 0;
2497 }
2498 duration = d32;
2499 }
2500 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2501 AMediaFormat_setInt64(mFileMetaData,
2502 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2503 }
2504
2505 String8 s;
2506 if (convertTimeToDate(creationTime, &s)) {
2507 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2508 }
2509
2510 break;
2511 }
2512
2513 case FOURCC("mehd"):
2514 {
2515 *offset += chunk_size;
2516
2517 if (chunk_data_size < 8) {
2518 return ERROR_MALFORMED;
2519 }
2520
2521 uint8_t flags[4];
2522 if (mDataSource->readAt(
2523 data_offset, flags, sizeof(flags))
2524 < (ssize_t)sizeof(flags)) {
2525 return ERROR_IO;
2526 }
2527
2528 uint64_t duration = 0;
2529 if (flags[0] == 1) {
2530 // 64 bit
2531 if (chunk_data_size < 12) {
2532 return ERROR_MALFORMED;
2533 }
2534 mDataSource->getUInt64(data_offset + 4, &duration);
2535 if (duration == 0xffffffffffffffff) {
2536 duration = 0;
2537 }
2538 } else if (flags[0] == 0) {
2539 // 32 bit
2540 uint32_t d32;
2541 mDataSource->getUInt32(data_offset + 4, &d32);
2542 if (d32 == 0xffffffff) {
2543 d32 = 0;
2544 }
2545 duration = d32;
2546 } else {
2547 return ERROR_MALFORMED;
2548 }
2549
2550 if (duration != 0 && mHeaderTimescale != 0) {
2551 AMediaFormat_setInt64(mFileMetaData,
2552 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2553 }
2554
2555 break;
2556 }
2557
2558 case FOURCC("mdat"):
2559 {
2560 mMdatFound = true;
2561
2562 *offset += chunk_size;
2563 break;
2564 }
2565
2566 case FOURCC("hdlr"):
2567 {
2568 *offset += chunk_size;
2569
2570 if (underQTMetaPath(mPath, 3)) {
2571 break;
2572 }
2573
2574 uint32_t buffer;
2575 if (mDataSource->readAt(
2576 data_offset + 8, &buffer, 4) < 4) {
2577 return ERROR_IO;
2578 }
2579
2580 uint32_t type = ntohl(buffer);
2581 // For the 3GPP file format, the handler-type within the 'hdlr' box
2582 // shall be 'text'. We also want to support 'sbtl' handler type
2583 // for a practical reason as various MPEG4 containers use it.
2584 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2585 if (mLastTrack != NULL) {
2586 AMediaFormat_setString(mLastTrack->meta,
2587 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2588 }
2589 }
2590
2591 break;
2592 }
2593
2594 case FOURCC("keys"):
2595 {
2596 *offset += chunk_size;
2597
2598 if (underQTMetaPath(mPath, 3)) {
2599 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2600 if (err != OK) {
2601 return err;
2602 }
2603 }
2604 break;
2605 }
2606
2607 case FOURCC("trex"):
2608 {
2609 *offset += chunk_size;
2610
2611 if (chunk_data_size < 24) {
2612 return ERROR_IO;
2613 }
2614 Trex trex;
2615 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2616 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2617 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2618 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2619 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2620 return ERROR_IO;
2621 }
2622 mTrex.add(trex);
2623 break;
2624 }
2625
2626 case FOURCC("tx3g"):
2627 {
2628 if (mLastTrack == NULL)
2629 return ERROR_MALFORMED;
2630
2631 // complain about ridiculous chunks
2632 if (chunk_size > kMaxAtomSize) {
2633 return ERROR_MALFORMED;
2634 }
2635
2636 // complain about empty atoms
2637 if (chunk_data_size <= 0) {
2638 ALOGE("b/124330204");
2639 android_errorWriteLog(0x534e4554, "124330204");
2640 return ERROR_MALFORMED;
2641 }
2642
2643 // should fill buffer based on "data_offset" and "chunk_data_size"
2644 // instead of *offset and chunk_size;
2645 // but we've been feeding the extra data to consumers for multiple releases and
2646 // if those apps are compensating for it, we'd break them with such a change
2647 //
2648
2649 if (mLastTrack->mTx3gBuffer == NULL) {
2650 mLastTrack->mTx3gSize = 0;
2651 mLastTrack->mTx3gFilled = 0;
2652 }
2653 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2654 size_t growth = kTx3gGrowth;
2655 if (growth < chunk_size) {
2656 growth = chunk_size;
2657 }
2658 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2659 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2660 ALOGE("b/124330204 - too much space");
2661 android_errorWriteLog(0x534e4554, "124330204");
2662 return ERROR_MALFORMED;
2663 }
2664 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2665 mLastTrack->mTx3gSize + growth);
2666 if (updated == NULL) {
2667 return ERROR_MALFORMED;
2668 }
2669 mLastTrack->mTx3gBuffer = updated;
2670 mLastTrack->mTx3gSize += growth;
2671 }
2672
2673 if ((size_t)(mDataSource->readAt(*offset,
2674 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2675 chunk_size))
2676 < chunk_size) {
2677
2678 // advance read pointer so we don't end up reading this again
2679 *offset += chunk_size;
2680 return ERROR_IO;
2681 }
2682
2683 mLastTrack->mTx3gFilled += chunk_size;
2684 *offset += chunk_size;
2685 break;
2686 }
2687
2688 case FOURCC("covr"):
2689 {
2690 *offset += chunk_size;
2691
2692 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2693 chunk_data_size, data_offset);
2694
2695 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2696 return ERROR_MALFORMED;
2697 }
2698 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2699 if (buffer.get() == NULL) {
2700 ALOGE("b/28471206");
2701 return NO_MEMORY;
2702 }
2703 if (mDataSource->readAt(
2704 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2705 return ERROR_IO;
2706 }
2707 const int kSkipBytesOfDataBox = 16;
2708 if (chunk_data_size <= kSkipBytesOfDataBox) {
2709 return ERROR_MALFORMED;
2710 }
2711
2712 AMediaFormat_setBuffer(mFileMetaData,
2713 AMEDIAFORMAT_KEY_ALBUMART,
2714 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2715
2716 break;
2717 }
2718
2719 case FOURCC("colr"):
2720 {
2721 *offset += chunk_size;
2722 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2723 // ignore otherwise
2724 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
2725 status_t err = parseColorInfo(data_offset, chunk_data_size);
2726 if (err != OK) {
2727 return err;
2728 }
2729 }
2730
2731 break;
2732 }
2733
2734 case FOURCC("titl"):
2735 case FOURCC("perf"):
2736 case FOURCC("auth"):
2737 case FOURCC("gnre"):
2738 case FOURCC("albm"):
2739 case FOURCC("yrrc"):
2740 {
2741 *offset += chunk_size;
2742
2743 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2744
2745 if (err != OK) {
2746 return err;
2747 }
2748
2749 break;
2750 }
2751
2752 case FOURCC("ID32"):
2753 {
2754 *offset += chunk_size;
2755
2756 if (chunk_data_size < 6) {
2757 return ERROR_MALFORMED;
2758 }
2759
2760 parseID3v2MetaData(data_offset + 6);
2761
2762 break;
2763 }
2764
2765 case FOURCC("----"):
2766 {
2767 mLastCommentMean.clear();
2768 mLastCommentName.clear();
2769 mLastCommentData.clear();
2770 *offset += chunk_size;
2771 break;
2772 }
2773
2774 case FOURCC("sidx"):
2775 {
2776 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2777 if (err != OK) {
2778 return err;
2779 }
2780 *offset += chunk_size;
2781 return UNKNOWN_ERROR; // stop parsing after sidx
2782 }
2783
2784 case FOURCC("ac-3"):
2785 {
2786 *offset += chunk_size;
2787 // bypass ac-3 if parse fail
2788 if (parseAC3SpecificBox(data_offset) != OK) {
2789 if (mLastTrack != NULL) {
2790 ALOGW("Fail to parse ac-3");
2791 mLastTrack->skipTrack = true;
2792 }
2793 }
2794 return OK;
2795 }
2796
2797 case FOURCC("ec-3"):
2798 {
2799 *offset += chunk_size;
2800 // bypass ec-3 if parse fail
2801 if (parseEAC3SpecificBox(data_offset) != OK) {
2802 if (mLastTrack != NULL) {
2803 ALOGW("Fail to parse ec-3");
2804 mLastTrack->skipTrack = true;
2805 }
2806 }
2807 return OK;
2808 }
2809
2810 case FOURCC("ac-4"):
2811 {
2812 *offset += chunk_size;
2813 // bypass ac-4 if parse fail
2814 if (parseAC4SpecificBox(data_offset) != OK) {
2815 if (mLastTrack != NULL) {
2816 ALOGW("Fail to parse ac-4");
2817 mLastTrack->skipTrack = true;
2818 }
2819 }
2820 return OK;
2821 }
2822
2823 case FOURCC("ftyp"):
2824 {
2825 if (chunk_data_size < 8 || depth != 0) {
2826 return ERROR_MALFORMED;
2827 }
2828
2829 off64_t stop_offset = *offset + chunk_size;
2830 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2831 std::set<uint32_t> brandSet;
2832 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2833 if (i == 1) {
2834 // Skip this index, it refers to the minorVersion,
2835 // not a brand.
2836 continue;
2837 }
2838
2839 uint32_t brand;
2840 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2841 return ERROR_MALFORMED;
2842 }
2843
2844 brand = ntohl(brand);
2845 brandSet.insert(brand);
2846 }
2847
2848 if (brandSet.count(FOURCC("qt ")) > 0) {
2849 mIsQT = true;
2850 } else {
2851 if (brandSet.count(FOURCC("mif1")) > 0
2852 && brandSet.count(FOURCC("heic")) > 0) {
2853 ALOGV("identified HEIF image");
2854
2855 mIsHeif = true;
2856 brandSet.erase(FOURCC("mif1"));
2857 brandSet.erase(FOURCC("heic"));
2858 }
2859
2860 if (!brandSet.empty()) {
2861 // This means that the file should have moov box.
2862 // It could be any iso files (mp4, heifs, etc.)
2863 mHasMoovBox = true;
2864 if (mIsHeif) {
2865 ALOGV("identified HEIF image with other tracks");
2866 }
2867 }
2868 }
2869
2870 *offset = stop_offset;
2871
2872 break;
2873 }
2874
2875 default:
2876 {
2877 // check if we're parsing 'ilst' for meta keys
2878 // if so, treat type as a number (key-id).
2879 if (underQTMetaPath(mPath, 3)) {
2880 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2881 if (err != OK) {
2882 return err;
2883 }
2884 }
2885
2886 *offset += chunk_size;
2887 break;
2888 }
2889 }
2890
2891 return OK;
2892 }
2893
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)2894 status_t MPEG4Extractor::parseChannelCountSampleRate(
2895 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
2896 // skip 16 bytes:
2897 // + 6-byte reserved,
2898 // + 2-byte data reference index,
2899 // + 8-byte reserved
2900 *offset += 16;
2901 if (!mDataSource->getUInt16(*offset, channelCount)) {
2902 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
2903 return ERROR_MALFORMED;
2904 }
2905 // skip 8 bytes:
2906 // + 2-byte channelCount,
2907 // + 2-byte sample size,
2908 // + 4-byte reserved
2909 *offset += 8;
2910 if (!mDataSource->getUInt16(*offset, sampleRate)) {
2911 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
2912 return ERROR_MALFORMED;
2913 }
2914 // skip 4 bytes:
2915 // + 2-byte sampleRate,
2916 // + 2-byte reserved
2917 *offset += 4;
2918 return OK;
2919 }
2920
parseAC4SpecificBox(off64_t offset)2921 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
2922 if (mLastTrack == NULL) {
2923 return ERROR_MALFORMED;
2924 }
2925
2926 uint16_t sampleRate, channelCount;
2927 status_t status;
2928 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
2929 return status;
2930 }
2931 uint32_t size;
2932 // + 4-byte size
2933 // + 4-byte type
2934 // + 3-byte payload
2935 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
2936 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
2937 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
2938 return ERROR_MALFORMED;
2939 }
2940
2941 // + 4-byte size
2942 offset += 4;
2943 uint32_t type;
2944 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
2945 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
2946 return ERROR_MALFORMED;
2947 }
2948
2949 // + 4-byte type
2950 offset += 4;
2951 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
2952 uint8_t chunk[kAC4SpecificBoxPayloadSize];
2953 ssize_t dsiSize = size - 8; // size of box - size and type fields
2954 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
2955 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
2956 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
2957 return ERROR_MALFORMED;
2958 }
2959 // + size-byte payload
2960 offset += dsiSize;
2961 ABitReader br(chunk, dsiSize);
2962 AC4DSIParser parser(br);
2963 if (!parser.parse()){
2964 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
2965 return ERROR_MALFORMED;
2966 }
2967
2968 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
2969 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
2970 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
2971
2972 AudioPresentationCollection presentations;
2973 // translate the AC4 presentation information to audio presentations for this track
2974 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
2975 if (!ac4Presentations.empty()) {
2976 for (const auto& ac4Presentation : ac4Presentations) {
2977 auto& presentation = ac4Presentation.second;
2978 if (!presentation.mEnabled) {
2979 continue;
2980 }
2981 AudioPresentationV1 ap;
2982 ap.mPresentationId = presentation.mGroupIndex;
2983 ap.mProgramId = presentation.mProgramID;
2984 ap.mLanguage = presentation.mLanguage;
2985 if (presentation.mPreVirtualized) {
2986 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
2987 } else {
2988 switch (presentation.mChannelMode) {
2989 case AC4Parser::AC4Presentation::kChannelMode_Mono:
2990 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
2991 ap.mMasteringIndication = MASTERED_FOR_STEREO;
2992 break;
2993 case AC4Parser::AC4Presentation::kChannelMode_3_0:
2994 case AC4Parser::AC4Presentation::kChannelMode_5_0:
2995 case AC4Parser::AC4Presentation::kChannelMode_5_1:
2996 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
2997 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
2998 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
2999 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3000 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3001 break;
3002 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3003 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3004 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3005 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3006 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3007 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3008 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3009 ap.mMasteringIndication = MASTERED_FOR_3D;
3010 break;
3011 default:
3012 ALOGE("Invalid channel mode in AC4 presentation");
3013 return ERROR_MALFORMED;
3014 }
3015 }
3016
3017 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3018 AC4Parser::AC4Presentation::kVisuallyImpaired);
3019 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3020 AC4Parser::AC4Presentation::kVoiceOver);
3021 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3022 if (!ap.mLanguage.empty()) {
3023 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3024 }
3025 presentations.push_back(std::move(ap));
3026 }
3027 }
3028
3029 if (presentations.empty()) {
3030 // Clear audio presentation info in metadata.
3031 AMediaFormat_setBuffer(
3032 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3033 } else {
3034 std::ostringstream outStream(std::ios::out);
3035 serializeAudioPresentations(presentations, &outStream);
3036 AMediaFormat_setBuffer(
3037 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3038 outStream.str().data(), outStream.str().size());
3039 }
3040 return OK;
3041 }
3042
parseEAC3SpecificBox(off64_t offset)3043 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3044 if (mLastTrack == NULL) {
3045 return ERROR_MALFORMED;
3046 }
3047
3048 uint16_t sampleRate, channels;
3049 status_t status;
3050 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3051 return status;
3052 }
3053 uint32_t size;
3054 // + 4-byte size
3055 // + 4-byte type
3056 // + 3-byte payload
3057 const uint32_t kEAC3SpecificBoxMinSize = 11;
3058 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3059 // calculated from the required bits read below as well as the maximum number of independent
3060 // and dependant sub streams you can have
3061 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3062 if (!mDataSource->getUInt32(offset, &size) ||
3063 size < kEAC3SpecificBoxMinSize ||
3064 size > kEAC3SpecificBoxMaxSize) {
3065 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3066 return ERROR_MALFORMED;
3067 }
3068
3069 offset += 4;
3070 uint32_t type;
3071 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3072 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3073 return ERROR_MALFORMED;
3074 }
3075
3076 offset += 4;
3077 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3078 if (chunk == NULL) {
3079 return ERROR_MALFORMED;
3080 }
3081
3082 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3083 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3084 delete[] chunk;
3085 return ERROR_MALFORMED;
3086 }
3087
3088 ABitReader br(chunk, size);
3089 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3090 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3091
3092 if (br.numBitsLeft() < 16) {
3093 delete[] chunk;
3094 return ERROR_MALFORMED;
3095 }
3096 unsigned data_rate = br.getBits(13);
3097 ALOGV("EAC3 data rate = %d", data_rate);
3098
3099 unsigned num_ind_sub = br.getBits(3) + 1;
3100 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3101 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3102 delete[] chunk;
3103 return ERROR_MALFORMED;
3104 }
3105
3106 unsigned channelCount = 0;
3107 for (unsigned i = 0; i < num_ind_sub; i++) {
3108 unsigned fscod = br.getBits(2);
3109 if (fscod == 3) {
3110 ALOGE("Incorrect fscod (3) in EAC3 header");
3111 delete[] chunk;
3112 return ERROR_MALFORMED;
3113 }
3114 unsigned boxSampleRate = sampleRateTable[fscod];
3115 if (boxSampleRate != sampleRate) {
3116 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3117 boxSampleRate, sampleRate);
3118 delete[] chunk;
3119 return ERROR_MALFORMED;
3120 }
3121
3122 unsigned bsid = br.getBits(5);
3123 if (bsid == 9 || bsid == 10) {
3124 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3125 } else if (bsid > 16) {
3126 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3127 delete[] chunk;
3128 return ERROR_MALFORMED;
3129 }
3130
3131 // skip
3132 br.skipBits(2);
3133 unsigned bsmod = br.getBits(3);
3134 unsigned acmod = br.getBits(3);
3135 unsigned lfeon = br.getBits(1);
3136 // we currently only support the first stream
3137 if (i == 0)
3138 channelCount = channelCountTable[acmod] + lfeon;
3139 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3140
3141 br.skipBits(3);
3142 unsigned num_dep_sub = br.getBits(4);
3143 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3144 if (num_dep_sub != 0) {
3145 if (br.numBitsLeft() < 9) {
3146 delete[] chunk;
3147 return ERROR_MALFORMED;
3148 }
3149 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3150 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3151 unsigned chan_loc = br.getBits(9);
3152 unsigned mask = 1;
3153 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3154 if ((chan_loc & mask) != 0) {
3155 // we currently only support the first stream
3156 if (i == 0) {
3157 channelCount++;
3158 // these are 2 channels in the mask
3159 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3160 channelCount++;
3161 }
3162 }
3163 ALOGV(" %s", chan_loc_tbl[j]);
3164 }
3165 }
3166 } else {
3167 if (br.numBitsLeft() == 0) {
3168 delete[] chunk;
3169 return ERROR_MALFORMED;
3170 }
3171 br.skipBits(1);
3172 }
3173 }
3174
3175 if (br.numBitsLeft() != 0) {
3176 if (br.numBitsLeft() < 8) {
3177 delete[] chunk;
3178 return ERROR_MALFORMED;
3179 }
3180 unsigned mask = br.getBits(8);
3181 for (unsigned i = 0; i < 8; i++) {
3182 if (((0x1 << i) && mask) == 0)
3183 continue;
3184
3185 if (br.numBitsLeft() < 8) {
3186 delete[] chunk;
3187 return ERROR_MALFORMED;
3188 }
3189 switch (i) {
3190 case 0: {
3191 unsigned complexity = br.getBits(8);
3192 ALOGV("Found a JOC stream with complexity = %d", complexity);
3193 }break;
3194 default: {
3195 br.skipBits(8);
3196 }break;
3197 }
3198 }
3199 }
3200 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3201 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3202 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3203
3204 delete[] chunk;
3205 return OK;
3206 }
3207
parseAC3SpecificBox(off64_t offset)3208 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3209 if (mLastTrack == NULL) {
3210 return ERROR_MALFORMED;
3211 }
3212
3213 uint16_t sampleRate, channels;
3214 status_t status;
3215 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3216 return status;
3217 }
3218 uint32_t size;
3219 // + 4-byte size
3220 // + 4-byte type
3221 // + 3-byte payload
3222 const uint32_t kAC3SpecificBoxSize = 11;
3223 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3224 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3225 return ERROR_MALFORMED;
3226 }
3227
3228 offset += 4;
3229 uint32_t type;
3230 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3231 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3232 return ERROR_MALFORMED;
3233 }
3234
3235 offset += 4;
3236 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3237 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3238 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3239 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3240 return ERROR_MALFORMED;
3241 }
3242
3243 ABitReader br(chunk, sizeof(chunk));
3244 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3245 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3246
3247 unsigned fscod = br.getBits(2);
3248 if (fscod == 3) {
3249 ALOGE("Incorrect fscod (3) in AC3 header");
3250 return ERROR_MALFORMED;
3251 }
3252 unsigned boxSampleRate = sampleRateTable[fscod];
3253 if (boxSampleRate != sampleRate) {
3254 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3255 boxSampleRate, sampleRate);
3256 return ERROR_MALFORMED;
3257 }
3258
3259 unsigned bsid = br.getBits(5);
3260 if (bsid > 8) {
3261 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3262 return ERROR_MALFORMED;
3263 }
3264
3265 // skip
3266 br.skipBits(3); // bsmod
3267
3268 unsigned acmod = br.getBits(3);
3269 unsigned lfeon = br.getBits(1);
3270 unsigned channelCount = channelCountTable[acmod] + lfeon;
3271
3272 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3273 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3274 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3275 return OK;
3276 }
3277
parseALACSampleEntry(off64_t * offset)3278 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3279 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3280 // Store ALAC magic cookie (decoder needs it).
3281 uint8_t alacInfo[12];
3282 off64_t data_offset = *offset;
3283
3284 if (mDataSource->readAt(
3285 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3286 return ERROR_IO;
3287 }
3288 uint32_t size = U32_AT(&alacInfo[0]);
3289 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3290 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3291 (U32_AT(&alacInfo[8]) != 0)) {
3292 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3293 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3294 return ERROR_MALFORMED;
3295 }
3296 data_offset += sizeof(alacInfo);
3297 uint8_t cookie[size - sizeof(alacInfo)];
3298 if (mDataSource->readAt(
3299 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3300 return ERROR_IO;
3301 }
3302
3303 uint8_t bitsPerSample = cookie[5];
3304 AMediaFormat_setInt32(mLastTrack->meta,
3305 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3306 AMediaFormat_setInt32(mLastTrack->meta,
3307 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3308 AMediaFormat_setInt32(mLastTrack->meta,
3309 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3310 AMediaFormat_setBuffer(mLastTrack->meta,
3311 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3312 data_offset += sizeof(cookie);
3313 *offset = data_offset;
3314 return OK;
3315 }
3316
parseSegmentIndex(off64_t offset,size_t size)3317 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3318 ALOGV("MPEG4Extractor::parseSegmentIndex");
3319
3320 if (size < 12) {
3321 return -EINVAL;
3322 }
3323
3324 uint32_t flags;
3325 if (!mDataSource->getUInt32(offset, &flags)) {
3326 return ERROR_MALFORMED;
3327 }
3328
3329 uint32_t version = flags >> 24;
3330 flags &= 0xffffff;
3331
3332 ALOGV("sidx version %d", version);
3333
3334 uint32_t referenceId;
3335 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3336 return ERROR_MALFORMED;
3337 }
3338
3339 uint32_t timeScale;
3340 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3341 return ERROR_MALFORMED;
3342 }
3343 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3344 if (timeScale == 0)
3345 return ERROR_MALFORMED;
3346
3347 uint64_t earliestPresentationTime;
3348 uint64_t firstOffset;
3349
3350 offset += 12;
3351 size -= 12;
3352
3353 if (version == 0) {
3354 if (size < 8) {
3355 return -EINVAL;
3356 }
3357 uint32_t tmp;
3358 if (!mDataSource->getUInt32(offset, &tmp)) {
3359 return ERROR_MALFORMED;
3360 }
3361 earliestPresentationTime = tmp;
3362 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3363 return ERROR_MALFORMED;
3364 }
3365 firstOffset = tmp;
3366 offset += 8;
3367 size -= 8;
3368 } else {
3369 if (size < 16) {
3370 return -EINVAL;
3371 }
3372 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3373 return ERROR_MALFORMED;
3374 }
3375 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3376 return ERROR_MALFORMED;
3377 }
3378 offset += 16;
3379 size -= 16;
3380 }
3381 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3382
3383 if (size < 4) {
3384 return -EINVAL;
3385 }
3386
3387 uint16_t referenceCount;
3388 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3389 return ERROR_MALFORMED;
3390 }
3391 offset += 4;
3392 size -= 4;
3393 ALOGV("refcount: %d", referenceCount);
3394
3395 if (size < referenceCount * 12) {
3396 return -EINVAL;
3397 }
3398
3399 uint64_t total_duration = 0;
3400 for (unsigned int i = 0; i < referenceCount; i++) {
3401 uint32_t d1, d2, d3;
3402
3403 if (!mDataSource->getUInt32(offset, &d1) || // size
3404 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3405 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3406 return ERROR_MALFORMED;
3407 }
3408
3409 if (d1 & 0x80000000) {
3410 ALOGW("sub-sidx boxes not supported yet");
3411 }
3412 bool sap = d3 & 0x80000000;
3413 uint32_t saptype = (d3 >> 28) & 7;
3414 if (!sap || (saptype != 1 && saptype != 2)) {
3415 // type 1 and 2 are sync samples
3416 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3417 }
3418 total_duration += d2;
3419 offset += 12;
3420 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3421 SidxEntry se;
3422 se.mSize = d1 & 0x7fffffff;
3423 se.mDurationUs = 1000000LL * d2 / timeScale;
3424 mSidxEntries.add(se);
3425 }
3426
3427 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3428
3429 if (mLastTrack == NULL)
3430 return ERROR_MALFORMED;
3431
3432 int64_t metaDuration;
3433 if (!AMediaFormat_getInt64(mLastTrack->meta,
3434 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3435 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3436 }
3437 return OK;
3438 }
3439
parseQTMetaKey(off64_t offset,size_t size)3440 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3441 if (size < 8) {
3442 return ERROR_MALFORMED;
3443 }
3444
3445 uint32_t count;
3446 if (!mDataSource->getUInt32(offset + 4, &count)) {
3447 return ERROR_MALFORMED;
3448 }
3449
3450 if (mMetaKeyMap.size() > 0) {
3451 ALOGW("'keys' atom seen again, discarding existing entries");
3452 mMetaKeyMap.clear();
3453 }
3454
3455 off64_t keyOffset = offset + 8;
3456 off64_t stopOffset = offset + size;
3457 for (size_t i = 1; i <= count; i++) {
3458 if (keyOffset + 8 > stopOffset) {
3459 return ERROR_MALFORMED;
3460 }
3461
3462 uint32_t keySize;
3463 if (!mDataSource->getUInt32(keyOffset, &keySize)
3464 || keySize < 8
3465 || keyOffset + keySize > stopOffset) {
3466 return ERROR_MALFORMED;
3467 }
3468
3469 uint32_t type;
3470 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3471 || type != FOURCC("mdta")) {
3472 return ERROR_MALFORMED;
3473 }
3474
3475 keySize -= 8;
3476 keyOffset += 8;
3477
3478 auto keyData = heapbuffer<uint8_t>(keySize);
3479 if (keyData.get() == NULL) {
3480 return ERROR_MALFORMED;
3481 }
3482 if (mDataSource->readAt(
3483 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3484 return ERROR_MALFORMED;
3485 }
3486
3487 AString key((const char *)keyData.get(), keySize);
3488 mMetaKeyMap.add(i, key);
3489
3490 keyOffset += keySize;
3491 }
3492 return OK;
3493 }
3494
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3495 status_t MPEG4Extractor::parseQTMetaVal(
3496 int32_t keyId, off64_t offset, size_t size) {
3497 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3498 if (index < 0) {
3499 // corresponding key is not present, ignore
3500 return ERROR_MALFORMED;
3501 }
3502
3503 if (size <= 16) {
3504 return ERROR_MALFORMED;
3505 }
3506 uint32_t dataSize;
3507 if (!mDataSource->getUInt32(offset, &dataSize)
3508 || dataSize > size || dataSize <= 16) {
3509 return ERROR_MALFORMED;
3510 }
3511 uint32_t atomFourCC;
3512 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3513 || atomFourCC != FOURCC("data")) {
3514 return ERROR_MALFORMED;
3515 }
3516 uint32_t dataType;
3517 if (!mDataSource->getUInt32(offset + 8, &dataType)
3518 || ((dataType & 0xff000000) != 0)) {
3519 // not well-known type
3520 return ERROR_MALFORMED;
3521 }
3522
3523 dataSize -= 16;
3524 offset += 16;
3525
3526 if (dataType == 23 && dataSize >= 4) {
3527 // BE Float32
3528 uint32_t val;
3529 if (!mDataSource->getUInt32(offset, &val)) {
3530 return ERROR_MALFORMED;
3531 }
3532 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3533 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3534 }
3535 } else if (dataType == 67 && dataSize >= 4) {
3536 // BE signed int32
3537 uint32_t val;
3538 if (!mDataSource->getUInt32(offset, &val)) {
3539 return ERROR_MALFORMED;
3540 }
3541 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3542 AMediaFormat_setInt32(mFileMetaData,
3543 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3544 }
3545 } else {
3546 // add more keys if needed
3547 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3548 }
3549
3550 return OK;
3551 }
3552
parseTrackHeader(off64_t data_offset,off64_t data_size)3553 status_t MPEG4Extractor::parseTrackHeader(
3554 off64_t data_offset, off64_t data_size) {
3555 if (data_size < 4) {
3556 return ERROR_MALFORMED;
3557 }
3558
3559 uint8_t version;
3560 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3561 return ERROR_IO;
3562 }
3563
3564 size_t dynSize = (version == 1) ? 36 : 24;
3565
3566 uint8_t buffer[36 + 60];
3567
3568 if (data_size != (off64_t)dynSize + 60) {
3569 return ERROR_MALFORMED;
3570 }
3571
3572 if (mDataSource->readAt(
3573 data_offset, buffer, data_size) < (ssize_t)data_size) {
3574 return ERROR_IO;
3575 }
3576
3577 int32_t id;
3578
3579 if (version == 1) {
3580 // we can get ctime value from U64_AT(&buffer[4])
3581 // we can get mtime value from U64_AT(&buffer[12])
3582 id = U32_AT(&buffer[20]);
3583 // we can get duration value from U64_AT(&buffer[28])
3584 } else if (version == 0) {
3585 // we can get ctime value from U32_AT(&buffer[4])
3586 // we can get mtime value from U32_AT(&buffer[8])
3587 id = U32_AT(&buffer[12]);
3588 // we can get duration value from U32_AT(&buffer[20])
3589 } else {
3590 return ERROR_UNSUPPORTED;
3591 }
3592
3593 if (mLastTrack == NULL)
3594 return ERROR_MALFORMED;
3595
3596 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3597
3598 size_t matrixOffset = dynSize + 16;
3599 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3600 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3601 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3602 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3603
3604 #if 0
3605 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3606 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3607
3608 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3609 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3610 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3611 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3612 #endif
3613
3614 uint32_t rotationDegrees;
3615
3616 static const int32_t kFixedOne = 0x10000;
3617 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3618 // Identity, no rotation
3619 rotationDegrees = 0;
3620 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3621 rotationDegrees = 90;
3622 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3623 rotationDegrees = 270;
3624 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3625 rotationDegrees = 180;
3626 } else {
3627 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3628 rotationDegrees = 0;
3629 }
3630
3631 if (rotationDegrees != 0) {
3632 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3633 }
3634
3635 // Handle presentation display size, which could be different
3636 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3637 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3638 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3639 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3640 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3641
3642 return OK;
3643 }
3644
parseITunesMetaData(off64_t offset,size_t size)3645 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3646 if (size == 0) {
3647 return OK;
3648 }
3649
3650 if (size < 4 || size == SIZE_MAX) {
3651 return ERROR_MALFORMED;
3652 }
3653
3654 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3655 if (buffer == NULL) {
3656 return ERROR_MALFORMED;
3657 }
3658 if (mDataSource->readAt(
3659 offset, buffer, size) != (ssize_t)size) {
3660 delete[] buffer;
3661 buffer = NULL;
3662
3663 return ERROR_IO;
3664 }
3665
3666 uint32_t flags = U32_AT(buffer);
3667
3668 const char *metadataKey = nullptr;
3669 char chunk[5];
3670 MakeFourCCString(mPath[4], chunk);
3671 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3672 switch ((int32_t)mPath[4]) {
3673 case FOURCC("\251alb"):
3674 {
3675 metadataKey = "album";
3676 break;
3677 }
3678 case FOURCC("\251ART"):
3679 {
3680 metadataKey = "artist";
3681 break;
3682 }
3683 case FOURCC("aART"):
3684 {
3685 metadataKey = "albumartist";
3686 break;
3687 }
3688 case FOURCC("\251day"):
3689 {
3690 metadataKey = "year";
3691 break;
3692 }
3693 case FOURCC("\251nam"):
3694 {
3695 metadataKey = "title";
3696 break;
3697 }
3698 case FOURCC("\251wrt"):
3699 {
3700 metadataKey = "writer";
3701 break;
3702 }
3703 case FOURCC("covr"):
3704 {
3705 metadataKey = "albumart";
3706 break;
3707 }
3708 case FOURCC("gnre"):
3709 case FOURCC("\251gen"):
3710 {
3711 metadataKey = "genre";
3712 break;
3713 }
3714 case FOURCC("cpil"):
3715 {
3716 if (size == 9 && flags == 21) {
3717 char tmp[16];
3718 sprintf(tmp, "%d",
3719 (int)buffer[size - 1]);
3720
3721 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
3722 }
3723 break;
3724 }
3725 case FOURCC("trkn"):
3726 {
3727 if (size == 16 && flags == 0) {
3728 char tmp[16];
3729 uint16_t* pTrack = (uint16_t*)&buffer[10];
3730 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3731 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3732
3733 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3734 }
3735 break;
3736 }
3737 case FOURCC("disk"):
3738 {
3739 if ((size == 14 || size == 16) && flags == 0) {
3740 char tmp[16];
3741 uint16_t* pDisc = (uint16_t*)&buffer[10];
3742 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3743 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3744
3745 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
3746 }
3747 break;
3748 }
3749 case FOURCC("----"):
3750 {
3751 buffer[size] = '\0';
3752 switch (mPath[5]) {
3753 case FOURCC("mean"):
3754 mLastCommentMean.setTo((const char *)buffer + 4);
3755 break;
3756 case FOURCC("name"):
3757 mLastCommentName.setTo((const char *)buffer + 4);
3758 break;
3759 case FOURCC("data"):
3760 if (size < 8) {
3761 delete[] buffer;
3762 buffer = NULL;
3763 ALOGE("b/24346430");
3764 return ERROR_MALFORMED;
3765 }
3766 mLastCommentData.setTo((const char *)buffer + 8);
3767 break;
3768 }
3769
3770 // Once we have a set of mean/name/data info, go ahead and process
3771 // it to see if its something we are interested in. Whether or not
3772 // were are interested in the specific tag, make sure to clear out
3773 // the set so we can be ready to process another tuple should one
3774 // show up later in the file.
3775 if ((mLastCommentMean.length() != 0) &&
3776 (mLastCommentName.length() != 0) &&
3777 (mLastCommentData.length() != 0)) {
3778
3779 if (mLastCommentMean == "com.apple.iTunes"
3780 && mLastCommentName == "iTunSMPB") {
3781 int32_t delay, padding;
3782 if (sscanf(mLastCommentData,
3783 " %*x %x %x %*x", &delay, &padding) == 2) {
3784 if (mLastTrack == NULL) {
3785 delete[] buffer;
3786 return ERROR_MALFORMED;
3787 }
3788
3789 AMediaFormat_setInt32(mLastTrack->meta,
3790 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
3791 AMediaFormat_setInt32(mLastTrack->meta,
3792 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
3793 }
3794 }
3795
3796 mLastCommentMean.clear();
3797 mLastCommentName.clear();
3798 mLastCommentData.clear();
3799 }
3800 break;
3801 }
3802
3803 default:
3804 break;
3805 }
3806
3807 void *tmpData;
3808 size_t tmpDataSize;
3809 const char *s;
3810 if (size >= 8 && metadataKey &&
3811 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
3812 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
3813 if (!strcmp(metadataKey, "albumart")) {
3814 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
3815 buffer + 8, size - 8);
3816 } else if (!strcmp(metadataKey, "genre")) {
3817 if (flags == 0) {
3818 // uint8_t genre code, iTunes genre codes are
3819 // the standard id3 codes, except they start
3820 // at 1 instead of 0 (e.g. Pop is 14, not 13)
3821 // We use standard id3 numbering, so subtract 1.
3822 int genrecode = (int)buffer[size - 1];
3823 genrecode--;
3824 if (genrecode < 0) {
3825 genrecode = 255; // reserved for 'unknown genre'
3826 }
3827 char genre[10];
3828 sprintf(genre, "%d", genrecode);
3829
3830 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
3831 } else if (flags == 1) {
3832 // custom genre string
3833 buffer[size] = '\0';
3834
3835 AMediaFormat_setString(mFileMetaData,
3836 metadataKey, (const char *)buffer + 8);
3837 }
3838 } else {
3839 buffer[size] = '\0';
3840
3841 AMediaFormat_setString(mFileMetaData,
3842 metadataKey, (const char *)buffer + 8);
3843 }
3844 }
3845
3846 delete[] buffer;
3847 buffer = NULL;
3848
3849 return OK;
3850 }
3851
parseColorInfo(off64_t offset,size_t size)3852 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3853 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3854 return ERROR_MALFORMED;
3855 }
3856
3857 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3858 if (buffer == NULL) {
3859 return ERROR_MALFORMED;
3860 }
3861 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3862 delete[] buffer;
3863 buffer = NULL;
3864
3865 return ERROR_IO;
3866 }
3867
3868 int32_t type = U32_AT(&buffer[0]);
3869 if ((type == FOURCC("nclx") && size >= 11)
3870 || (type == FOURCC("nclc") && size >= 10)) {
3871 // only store the first color specification
3872 int32_t existingColor;
3873 if (!AMediaFormat_getInt32(mLastTrack->meta,
3874 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
3875 int32_t primaries = U16_AT(&buffer[4]);
3876 int32_t isotransfer = U16_AT(&buffer[6]);
3877 int32_t coeffs = U16_AT(&buffer[8]);
3878 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
3879
3880 int32_t range = 0;
3881 int32_t standard = 0;
3882 int32_t transfer = 0;
3883 ColorUtils::convertIsoColorAspectsToPlatformAspects(
3884 primaries, isotransfer, coeffs, fullRange,
3885 &range, &standard, &transfer);
3886
3887 if (range != 0) {
3888 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
3889 }
3890 if (standard != 0) {
3891 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
3892 }
3893 if (transfer != 0) {
3894 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
3895 }
3896 }
3897 }
3898
3899 delete[] buffer;
3900 buffer = NULL;
3901
3902 return OK;
3903 }
3904
parse3GPPMetaData(off64_t offset,size_t size,int depth)3905 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3906 if (size < 4 || size == SIZE_MAX) {
3907 return ERROR_MALFORMED;
3908 }
3909
3910 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3911 if (buffer == NULL) {
3912 return ERROR_MALFORMED;
3913 }
3914 if (mDataSource->readAt(
3915 offset, buffer, size) != (ssize_t)size) {
3916 delete[] buffer;
3917 buffer = NULL;
3918
3919 return ERROR_IO;
3920 }
3921
3922 const char *metadataKey = nullptr;
3923 switch (mPath[depth]) {
3924 case FOURCC("titl"):
3925 {
3926 metadataKey = "title";
3927 break;
3928 }
3929 case FOURCC("perf"):
3930 {
3931 metadataKey = "artist";
3932 break;
3933 }
3934 case FOURCC("auth"):
3935 {
3936 metadataKey = "writer";
3937 break;
3938 }
3939 case FOURCC("gnre"):
3940 {
3941 metadataKey = "genre";
3942 break;
3943 }
3944 case FOURCC("albm"):
3945 {
3946 if (buffer[size - 1] != '\0') {
3947 char tmp[4];
3948 sprintf(tmp, "%u", buffer[size - 1]);
3949
3950 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3951 }
3952
3953 metadataKey = "album";
3954 break;
3955 }
3956 case FOURCC("yrrc"):
3957 {
3958 if (size < 6) {
3959 delete[] buffer;
3960 buffer = NULL;
3961 ALOGE("b/62133227");
3962 android_errorWriteLog(0x534e4554, "62133227");
3963 return ERROR_MALFORMED;
3964 }
3965 char tmp[5];
3966 uint16_t year = U16_AT(&buffer[4]);
3967
3968 if (year < 10000) {
3969 sprintf(tmp, "%u", year);
3970
3971 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
3972 }
3973 break;
3974 }
3975
3976 default:
3977 break;
3978 }
3979
3980 if (metadataKey) {
3981 bool isUTF8 = true; // Common case
3982 char16_t *framedata = NULL;
3983 int len16 = 0; // Number of UTF-16 characters
3984
3985 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3986 if (size < 6) {
3987 delete[] buffer;
3988 buffer = NULL;
3989 return ERROR_MALFORMED;
3990 }
3991
3992 if (size - 6 >= 4) {
3993 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3994 framedata = (char16_t *)(buffer + 6);
3995 if (0xfffe == *framedata) {
3996 // endianness marker (BOM) doesn't match host endianness
3997 for (int i = 0; i < len16; i++) {
3998 framedata[i] = bswap_16(framedata[i]);
3999 }
4000 // BOM is now swapped to 0xfeff, we will execute next block too
4001 }
4002
4003 if (0xfeff == *framedata) {
4004 // Remove the BOM
4005 framedata++;
4006 len16--;
4007 isUTF8 = false;
4008 }
4009 // else normal non-zero-length UTF-8 string
4010 // we can't handle UTF-16 without BOM as there is no other
4011 // indication of encoding.
4012 }
4013
4014 if (isUTF8) {
4015 buffer[size] = 0;
4016 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4017 } else {
4018 // Convert from UTF-16 string to UTF-8 string.
4019 String8 tmpUTF8str(framedata, len16);
4020 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
4021 }
4022 }
4023
4024 delete[] buffer;
4025 buffer = NULL;
4026
4027 return OK;
4028 }
4029
parseID3v2MetaData(off64_t offset)4030 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
4031 ID3 id3(mDataSource, true /* ignorev1 */, offset);
4032
4033 if (id3.isValid()) {
4034 struct Map {
4035 const char *key;
4036 const char *tag1;
4037 const char *tag2;
4038 };
4039 static const Map kMap[] = {
4040 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4041 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4042 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4043 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4044 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4045 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4046 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4047 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4048 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4049 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4050 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4051 };
4052 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4053
4054 for (size_t i = 0; i < kNumMapEntries; ++i) {
4055 const char *ss;
4056 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4057 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4058 if (it->done()) {
4059 delete it;
4060 it = new ID3::Iterator(id3, kMap[i].tag2);
4061 }
4062
4063 if (it->done()) {
4064 delete it;
4065 continue;
4066 }
4067
4068 String8 s;
4069 it->getString(&s);
4070 delete it;
4071
4072 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4073 }
4074 }
4075
4076 size_t dataSize;
4077 String8 mime;
4078 const void *data = id3.getAlbumArt(&dataSize, &mime);
4079
4080 if (data) {
4081 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4082 }
4083 }
4084 }
4085
getTrack(size_t index)4086 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4087 status_t err;
4088 if ((err = readMetaData()) != OK) {
4089 return NULL;
4090 }
4091
4092 Track *track = mFirstTrack;
4093 while (index > 0) {
4094 if (track == NULL) {
4095 return NULL;
4096 }
4097
4098 track = track->next;
4099 --index;
4100 }
4101
4102 if (track == NULL) {
4103 return NULL;
4104 }
4105
4106
4107 Trex *trex = NULL;
4108 int32_t trackId;
4109 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4110 for (size_t i = 0; i < mTrex.size(); i++) {
4111 Trex *t = &mTrex.editItemAt(i);
4112 if (t->track_ID == (uint32_t) trackId) {
4113 trex = t;
4114 break;
4115 }
4116 }
4117 } else {
4118 ALOGE("b/21657957");
4119 return NULL;
4120 }
4121
4122 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4123
4124 const char *mime;
4125 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4126 return NULL;
4127 }
4128
4129 sp<ItemTable> itemTable;
4130 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4131 void *data;
4132 size_t size;
4133 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4134 return NULL;
4135 }
4136
4137 const uint8_t *ptr = (const uint8_t *)data;
4138
4139 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4140 return NULL;
4141 }
4142 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4143 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4144 void *data;
4145 size_t size;
4146 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4147 return NULL;
4148 }
4149
4150 const uint8_t *ptr = (const uint8_t *)data;
4151
4152 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4153 return NULL;
4154 }
4155 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4156 itemTable = mItemTable;
4157 }
4158 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4159 void *data;
4160 size_t size;
4161 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4162 return NULL;
4163 }
4164
4165 const uint8_t *ptr = (const uint8_t *)data;
4166
4167 if (size < 5 || ptr[0] != 0x81) { // configurationVersion == 1
4168 return NULL;
4169 }
4170 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4171 void *data;
4172 size_t size;
4173 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4174 return NULL;
4175 }
4176
4177 const uint8_t *ptr = (const uint8_t *)data;
4178
4179 if (size < 5 || ptr[0] != 0x01) { // configurationVersion == 1
4180 return NULL;
4181 }
4182 }
4183
4184 if (track->has_elst and !strncasecmp("video/", mime, 6) and track->elst_media_time > 0) {
4185 track->elstShiftStartTicks = track->elst_media_time;
4186 ALOGV("video track->elstShiftStartTicks :%" PRIu64, track->elstShiftStartTicks);
4187 }
4188
4189 MPEG4Source *source = new MPEG4Source(
4190 track->meta, mDataSource, track->timescale, track->sampleTable,
4191 mSidxEntries, trex, mMoofOffset, itemTable,
4192 track->elstShiftStartTicks);
4193 if (source->init() != OK) {
4194 delete source;
4195 return NULL;
4196 }
4197 return source;
4198 }
4199
4200 // static
verifyTrack(Track * track)4201 status_t MPEG4Extractor::verifyTrack(Track *track) {
4202 const char *mime;
4203 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4204
4205 void *data;
4206 size_t size;
4207 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4208 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4209 return ERROR_MALFORMED;
4210 }
4211 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4212 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4213 return ERROR_MALFORMED;
4214 }
4215 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4216 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4217 return ERROR_MALFORMED;
4218 }
4219 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4220 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4221 return ERROR_MALFORMED;
4222 }
4223 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4224 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4225 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4226 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4227 return ERROR_MALFORMED;
4228 }
4229 }
4230
4231 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4232 // Make sure we have all the metadata we need.
4233 ALOGE("stbl atom missing/invalid.");
4234 return ERROR_MALFORMED;
4235 }
4236
4237 if (track->timescale == 0) {
4238 ALOGE("timescale invalid.");
4239 return ERROR_MALFORMED;
4240 }
4241
4242 return OK;
4243 }
4244
4245 typedef enum {
4246 //AOT_NONE = -1,
4247 //AOT_NULL_OBJECT = 0,
4248 //AOT_AAC_MAIN = 1, /**< Main profile */
4249 AOT_AAC_LC = 2, /**< Low Complexity object */
4250 //AOT_AAC_SSR = 3,
4251 //AOT_AAC_LTP = 4,
4252 AOT_SBR = 5,
4253 //AOT_AAC_SCAL = 6,
4254 //AOT_TWIN_VQ = 7,
4255 //AOT_CELP = 8,
4256 //AOT_HVXC = 9,
4257 //AOT_RSVD_10 = 10, /**< (reserved) */
4258 //AOT_RSVD_11 = 11, /**< (reserved) */
4259 //AOT_TTSI = 12, /**< TTSI Object */
4260 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4261 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4262 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4263 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4264 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4265 //AOT_RSVD_18 = 18, /**< (reserved) */
4266 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4267 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4268 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4269 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4270 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4271 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4272 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4273 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4274 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4275 //AOT_RSVD_28 = 28, /**< might become SSC */
4276 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4277 //AOT_MPEGS = 30, /**< MPEG Surround */
4278
4279 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4280
4281 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4282 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4283 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4284 //AOT_RSVD_35 = 35, /**< might become DST */
4285 //AOT_RSVD_36 = 36, /**< might become ALS */
4286 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4287 //AOT_SLS = 38, /**< SLS */
4288 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4289
4290 //AOT_USAC = 42, /**< USAC */
4291 //AOT_SAOC = 43, /**< SAOC */
4292 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4293
4294 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4295 } AUDIO_OBJECT_TYPE;
4296
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4297 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4298 const void *esds_data, size_t esds_size) {
4299 ESDS esds(esds_data, esds_size);
4300
4301 uint8_t objectTypeIndication;
4302 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4303 return ERROR_MALFORMED;
4304 }
4305
4306 if (objectTypeIndication == 0xe1) {
4307 // This isn't MPEG4 audio at all, it's QCELP 14k...
4308 if (mLastTrack == NULL)
4309 return ERROR_MALFORMED;
4310
4311 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4312 return OK;
4313 }
4314
4315 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4316 // mp3 audio
4317 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4318 return OK;
4319 }
4320
4321 if (mLastTrack != NULL) {
4322 uint32_t maxBitrate = 0;
4323 uint32_t avgBitrate = 0;
4324 esds.getBitRate(&maxBitrate, &avgBitrate);
4325 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4326 AMediaFormat_setInt32(mLastTrack->meta,
4327 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4328 }
4329 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4330 AMediaFormat_setInt32(mLastTrack->meta,
4331 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4332 }
4333 }
4334
4335 const uint8_t *csd;
4336 size_t csd_size;
4337 if (esds.getCodecSpecificInfo(
4338 (const void **)&csd, &csd_size) != OK) {
4339 return ERROR_MALFORMED;
4340 }
4341
4342 if (kUseHexDump) {
4343 printf("ESD of size %zu\n", csd_size);
4344 hexdump(csd, csd_size);
4345 }
4346
4347 if (csd_size == 0) {
4348 // There's no further information, i.e. no codec specific data
4349 // Let's assume that the information provided in the mpeg4 headers
4350 // is accurate and hope for the best.
4351
4352 return OK;
4353 }
4354
4355 if (csd_size < 2) {
4356 return ERROR_MALFORMED;
4357 }
4358
4359 if (objectTypeIndication == 0xdd) {
4360 // vorbis audio
4361 if (csd[0] != 0x02) {
4362 return ERROR_MALFORMED;
4363 }
4364
4365 // codecInfo starts with two lengths, len1 and len2, that are
4366 // "Xiph-style-lacing encoded"..
4367
4368 size_t offset = 1;
4369 size_t len1 = 0;
4370 while (offset < csd_size && csd[offset] == 0xff) {
4371 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4372 return ERROR_MALFORMED;
4373 }
4374 ++offset;
4375 }
4376 if (offset >= csd_size) {
4377 return ERROR_MALFORMED;
4378 }
4379 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4380 return ERROR_MALFORMED;
4381 }
4382 ++offset;
4383 if (len1 == 0) {
4384 return ERROR_MALFORMED;
4385 }
4386
4387 size_t len2 = 0;
4388 while (offset < csd_size && csd[offset] == 0xff) {
4389 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4390 return ERROR_MALFORMED;
4391 }
4392 ++offset;
4393 }
4394 if (offset >= csd_size) {
4395 return ERROR_MALFORMED;
4396 }
4397 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4398 return ERROR_MALFORMED;
4399 }
4400 ++offset;
4401 if (len2 == 0) {
4402 return ERROR_MALFORMED;
4403 }
4404 if (offset >= csd_size || csd[offset] != 0x01) {
4405 return ERROR_MALFORMED;
4406 }
4407 // formerly kKeyVorbisInfo
4408 AMediaFormat_setBuffer(mLastTrack->meta,
4409 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4410
4411 if (__builtin_add_overflow(offset, len1, &offset) ||
4412 offset >= csd_size || csd[offset] != 0x03) {
4413 return ERROR_MALFORMED;
4414 }
4415
4416 if (__builtin_add_overflow(offset, len2, &offset) ||
4417 offset >= csd_size || csd[offset] != 0x05) {
4418 return ERROR_MALFORMED;
4419 }
4420
4421 // formerly kKeyVorbisBooks
4422 AMediaFormat_setBuffer(mLastTrack->meta,
4423 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4424 AMediaFormat_setString(mLastTrack->meta,
4425 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4426
4427 return OK;
4428 }
4429
4430 static uint32_t kSamplingRate[] = {
4431 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4432 16000, 12000, 11025, 8000, 7350
4433 };
4434
4435 ABitReader br(csd, csd_size);
4436 uint32_t objectType = br.getBits(5);
4437
4438 if (objectType == 31) { // AAC-ELD => additional 6 bits
4439 objectType = 32 + br.getBits(6);
4440 }
4441
4442 if (mLastTrack == NULL)
4443 return ERROR_MALFORMED;
4444
4445 //keep AOT type
4446 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4447
4448 uint32_t freqIndex = br.getBits(4);
4449
4450 int32_t sampleRate = 0;
4451 int32_t numChannels = 0;
4452 if (freqIndex == 15) {
4453 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4454 sampleRate = br.getBits(24);
4455 numChannels = br.getBits(4);
4456 } else {
4457 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4458 numChannels = br.getBits(4);
4459
4460 if (freqIndex == 13 || freqIndex == 14) {
4461 return ERROR_MALFORMED;
4462 }
4463
4464 sampleRate = kSamplingRate[freqIndex];
4465 }
4466
4467 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4468 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4469 uint32_t extFreqIndex = br.getBits(4);
4470 if (extFreqIndex == 15) {
4471 if (csd_size < 8) {
4472 return ERROR_MALFORMED;
4473 }
4474 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4475 br.skipBits(24); // extSampleRate
4476 } else {
4477 if (extFreqIndex == 13 || extFreqIndex == 14) {
4478 return ERROR_MALFORMED;
4479 }
4480 //extSampleRate = kSamplingRate[extFreqIndex];
4481 }
4482 //TODO: save the extension sampling rate value in meta data =>
4483 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4484 }
4485
4486 switch (numChannels) {
4487 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4488 case 0:
4489 case 1:// FC
4490 case 2:// FL FR
4491 case 3:// FC, FL FR
4492 case 4:// FC, FL FR, RC
4493 case 5:// FC, FL FR, SL SR
4494 case 6:// FC, FL FR, SL SR, LFE
4495 //numChannels already contains the right value
4496 break;
4497 case 11:// FC, FL FR, SL SR, RC, LFE
4498 numChannels = 7;
4499 break;
4500 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4501 case 12:// FC, FL FR, SL SR, RL RR, LFE
4502 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4503 numChannels = 8;
4504 break;
4505 default:
4506 return ERROR_UNSUPPORTED;
4507 }
4508
4509 {
4510 if (objectType == AOT_SBR || objectType == AOT_PS) {
4511 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4512 objectType = br.getBits(5);
4513
4514 if (objectType == AOT_ESCAPE) {
4515 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4516 objectType = 32 + br.getBits(6);
4517 }
4518 }
4519 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4520 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4521 objectType == AOT_ER_BSAC) {
4522 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4523 br.skipBits(1); // frameLengthFlag
4524
4525 const int32_t dependsOnCoreCoder = br.getBits(1);
4526
4527 if (dependsOnCoreCoder ) {
4528 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4529 br.skipBits(14); // coreCoderDelay
4530 }
4531
4532 int32_t extensionFlag = -1;
4533 if (br.numBitsLeft() > 0) {
4534 extensionFlag = br.getBits(1);
4535 } else {
4536 switch (objectType) {
4537 // 14496-3 4.5.1.1 extensionFlag
4538 case AOT_AAC_LC:
4539 extensionFlag = 0;
4540 break;
4541 case AOT_ER_AAC_LC:
4542 case AOT_ER_AAC_SCAL:
4543 case AOT_ER_BSAC:
4544 case AOT_ER_AAC_LD:
4545 extensionFlag = 1;
4546 break;
4547 default:
4548 return ERROR_MALFORMED;
4549 break;
4550 }
4551 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4552 extensionFlag, objectType);
4553 }
4554
4555 if (numChannels == 0) {
4556 int32_t channelsEffectiveNum = 0;
4557 int32_t channelsNum = 0;
4558 if (br.numBitsLeft() < 32) {
4559 return ERROR_MALFORMED;
4560 }
4561 br.skipBits(4); // ElementInstanceTag
4562 br.skipBits(2); // Profile
4563 br.skipBits(4); // SamplingFrequencyIndex
4564 const int32_t NumFrontChannelElements = br.getBits(4);
4565 const int32_t NumSideChannelElements = br.getBits(4);
4566 const int32_t NumBackChannelElements = br.getBits(4);
4567 const int32_t NumLfeChannelElements = br.getBits(2);
4568 br.skipBits(3); // NumAssocDataElements
4569 br.skipBits(4); // NumValidCcElements
4570
4571 const int32_t MonoMixdownPresent = br.getBits(1);
4572
4573 if (MonoMixdownPresent != 0) {
4574 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4575 br.skipBits(4); // MonoMixdownElementNumber
4576 }
4577
4578 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4579 const int32_t StereoMixdownPresent = br.getBits(1);
4580 if (StereoMixdownPresent != 0) {
4581 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4582 br.skipBits(4); // StereoMixdownElementNumber
4583 }
4584
4585 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4586 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4587 if (MatrixMixdownIndexPresent != 0) {
4588 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4589 br.skipBits(2); // MatrixMixdownIndex
4590 br.skipBits(1); // PseudoSurroundEnable
4591 }
4592
4593 int i;
4594 for (i=0; i < NumFrontChannelElements; i++) {
4595 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4596 const int32_t FrontElementIsCpe = br.getBits(1);
4597 br.skipBits(4); // FrontElementTagSelect
4598 channelsNum += FrontElementIsCpe ? 2 : 1;
4599 }
4600
4601 for (i=0; i < NumSideChannelElements; i++) {
4602 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4603 const int32_t SideElementIsCpe = br.getBits(1);
4604 br.skipBits(4); // SideElementTagSelect
4605 channelsNum += SideElementIsCpe ? 2 : 1;
4606 }
4607
4608 for (i=0; i < NumBackChannelElements; i++) {
4609 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4610 const int32_t BackElementIsCpe = br.getBits(1);
4611 br.skipBits(4); // BackElementTagSelect
4612 channelsNum += BackElementIsCpe ? 2 : 1;
4613 }
4614 channelsEffectiveNum = channelsNum;
4615
4616 for (i=0; i < NumLfeChannelElements; i++) {
4617 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4618 br.skipBits(4); // LfeElementTagSelect
4619 channelsNum += 1;
4620 }
4621 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
4622 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
4623 numChannels = channelsNum;
4624 }
4625 }
4626 }
4627
4628 if (numChannels == 0) {
4629 return ERROR_UNSUPPORTED;
4630 }
4631
4632 if (mLastTrack == NULL)
4633 return ERROR_MALFORMED;
4634
4635 int32_t prevSampleRate;
4636 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
4637
4638 if (prevSampleRate != sampleRate) {
4639 ALOGV("mpeg4 audio sample rate different from previous setting. "
4640 "was: %d, now: %d", prevSampleRate, sampleRate);
4641 }
4642
4643 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
4644
4645 int32_t prevChannelCount;
4646 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
4647 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
4648
4649 if (prevChannelCount != numChannels) {
4650 ALOGV("mpeg4 audio channel count different from previous setting. "
4651 "was: %d, now: %d", prevChannelCount, numChannels);
4652 }
4653
4654 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
4655
4656 return OK;
4657 }
4658
adjustRawDefaultFrameSize()4659 void MPEG4Extractor::adjustRawDefaultFrameSize() {
4660 int32_t chanCount = 0;
4661 int32_t bitWidth = 0;
4662 const char *mimeStr = NULL;
4663
4664 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
4665 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
4666 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
4667 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
4668 // samplesize in stsz may not right , so updade default samplesize
4669 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
4670 }
4671 }
4672
4673 ////////////////////////////////////////////////////////////////////////////////
4674
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks)4675 MPEG4Source::MPEG4Source(
4676 AMediaFormat *format,
4677 DataSourceHelper *dataSource,
4678 int32_t timeScale,
4679 const sp<SampleTable> &sampleTable,
4680 Vector<SidxEntry> &sidx,
4681 const Trex *trex,
4682 off64_t firstMoofOffset,
4683 const sp<ItemTable> &itemTable,
4684 uint64_t elstShiftStartTicks)
4685 : mFormat(format),
4686 mDataSource(dataSource),
4687 mTimescale(timeScale),
4688 mSampleTable(sampleTable),
4689 mCurrentSampleIndex(0),
4690 mCurrentFragmentIndex(0),
4691 mSegments(sidx),
4692 mTrex(trex),
4693 mFirstMoofOffset(firstMoofOffset),
4694 mCurrentMoofOffset(firstMoofOffset),
4695 mNextMoofOffset(-1),
4696 mCurrentTime(0),
4697 mDefaultEncryptedByteBlock(0),
4698 mDefaultSkipByteBlock(0),
4699 mCurrentSampleInfoAllocSize(0),
4700 mCurrentSampleInfoSizes(NULL),
4701 mCurrentSampleInfoOffsetsAllocSize(0),
4702 mCurrentSampleInfoOffsets(NULL),
4703 mIsAVC(false),
4704 mIsHEVC(false),
4705 mIsAC4(false),
4706 mIsPcm(false),
4707 mNALLengthSize(0),
4708 mStarted(false),
4709 mBuffer(NULL),
4710 mSrcBuffer(NULL),
4711 mIsHeif(itemTable != NULL),
4712 mItemTable(itemTable),
4713 mElstShiftStartTicks(elstShiftStartTicks) {
4714
4715 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
4716
4717 AMediaFormat_getInt32(mFormat,
4718 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
4719 mDefaultIVSize = 0;
4720 AMediaFormat_getInt32(mFormat,
4721 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
4722 void *key;
4723 size_t keysize;
4724 if (AMediaFormat_getBuffer(mFormat,
4725 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
4726 CHECK(keysize <= 16);
4727 memset(mCryptoKey, 0, 16);
4728 memcpy(mCryptoKey, key, keysize);
4729 }
4730
4731 AMediaFormat_getInt32(mFormat,
4732 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
4733 AMediaFormat_getInt32(mFormat,
4734 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
4735
4736 const char *mime;
4737 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
4738 CHECK(success);
4739
4740 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
4741 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
4742 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
4743 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
4744
4745 if (mIsAVC) {
4746 void *data;
4747 size_t size;
4748 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
4749
4750 const uint8_t *ptr = (const uint8_t *)data;
4751
4752 CHECK(size >= 7);
4753 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4754
4755 // The number of bytes used to encode the length of a NAL unit.
4756 mNALLengthSize = 1 + (ptr[4] & 3);
4757 } else if (mIsHEVC) {
4758 void *data;
4759 size_t size;
4760 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
4761
4762 const uint8_t *ptr = (const uint8_t *)data;
4763
4764 CHECK(size >= 22);
4765 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4766
4767 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
4768 }
4769
4770 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
4771 mIsAudio = !strncasecmp(mime, "audio/", 6);
4772
4773 if (mIsPcm) {
4774 int32_t numChannels = 0;
4775 int32_t bitsPerSample = 0;
4776 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
4777 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
4778
4779 int32_t bytesPerSample = bitsPerSample >> 3;
4780 int32_t pcmSampleSize = bytesPerSample * numChannels;
4781
4782 size_t maxSampleSize;
4783 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
4784 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
4785 || bitsPerSample != 16) {
4786 // Not supported
4787 mIsPcm = false;
4788 } else {
4789 AMediaFormat_setInt32(mFormat,
4790 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
4791 }
4792 }
4793
4794 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
4795
4796 }
4797
init()4798 status_t MPEG4Source::init() {
4799 status_t err = OK;
4800 const char *mime;
4801 CHECK(AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime));
4802 if (mFirstMoofOffset != 0) {
4803 off64_t offset = mFirstMoofOffset;
4804 err = parseChunk(&offset);
4805 if(err == OK && !strncasecmp("video/", mime, 6)
4806 && !mCurrentSamples.isEmpty()) {
4807 // Start offset should be less or equal to composition time of first sample.
4808 // ISO : sample_composition_time_offset, version 0 (unsigned) for major brands.
4809 mElstShiftStartTicks = std::min(mElstShiftStartTicks,
4810 (uint64_t)(*mCurrentSamples.begin()).compositionOffset);
4811 }
4812 return err;
4813 }
4814
4815 if (!strncasecmp("video/", mime, 6)) {
4816 uint64_t firstSampleCTS = 0;
4817 err = mSampleTable->getMetaDataForSample(0, NULL, NULL, &firstSampleCTS);
4818 // Start offset should be less or equal to composition time of first sample.
4819 // Composition time stamp of first sample cannot be negative.
4820 mElstShiftStartTicks = std::min(mElstShiftStartTicks, firstSampleCTS);
4821 }
4822
4823 return err;
4824 }
4825
~MPEG4Source()4826 MPEG4Source::~MPEG4Source() {
4827 if (mStarted) {
4828 stop();
4829 }
4830 free(mCurrentSampleInfoSizes);
4831 free(mCurrentSampleInfoOffsets);
4832 }
4833
start()4834 media_status_t MPEG4Source::start() {
4835 Mutex::Autolock autoLock(mLock);
4836
4837 CHECK(!mStarted);
4838
4839 int32_t tmp;
4840 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
4841 size_t max_size = tmp;
4842
4843 // A somewhat arbitrary limit that should be sufficient for 8k video frames
4844 // If you see the message below for a valid input stream: increase the limit
4845 const size_t kMaxBufferSize = 64 * 1024 * 1024;
4846 if (max_size > kMaxBufferSize) {
4847 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
4848 return AMEDIA_ERROR_MALFORMED;
4849 }
4850 if (max_size == 0) {
4851 ALOGE("zero max input size");
4852 return AMEDIA_ERROR_MALFORMED;
4853 }
4854
4855 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
4856 const size_t kInitialBuffers = 2;
4857 const size_t kMaxBuffers = 8;
4858 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
4859 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
4860 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
4861 if (mSrcBuffer == NULL) {
4862 // file probably specified a bad max size
4863 return AMEDIA_ERROR_MALFORMED;
4864 }
4865
4866 mStarted = true;
4867
4868 return AMEDIA_OK;
4869 }
4870
stop()4871 media_status_t MPEG4Source::stop() {
4872 Mutex::Autolock autoLock(mLock);
4873
4874 CHECK(mStarted);
4875
4876 if (mBuffer != NULL) {
4877 mBuffer->release();
4878 mBuffer = NULL;
4879 }
4880
4881 delete[] mSrcBuffer;
4882 mSrcBuffer = NULL;
4883
4884 mStarted = false;
4885 mCurrentSampleIndex = 0;
4886
4887 return AMEDIA_OK;
4888 }
4889
parseChunk(off64_t * offset)4890 status_t MPEG4Source::parseChunk(off64_t *offset) {
4891 uint32_t hdr[2];
4892 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4893 return ERROR_IO;
4894 }
4895 uint64_t chunk_size = ntohl(hdr[0]);
4896 uint32_t chunk_type = ntohl(hdr[1]);
4897 off64_t data_offset = *offset + 8;
4898
4899 if (chunk_size == 1) {
4900 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4901 return ERROR_IO;
4902 }
4903 chunk_size = ntoh64(chunk_size);
4904 data_offset += 8;
4905
4906 if (chunk_size < 16) {
4907 // The smallest valid chunk is 16 bytes long in this case.
4908 return ERROR_MALFORMED;
4909 }
4910 } else if (chunk_size < 8) {
4911 // The smallest valid chunk is 8 bytes long.
4912 return ERROR_MALFORMED;
4913 }
4914
4915 char chunk[5];
4916 MakeFourCCString(chunk_type, chunk);
4917 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4918
4919 off64_t chunk_data_size = *offset + chunk_size - data_offset;
4920
4921 switch(chunk_type) {
4922
4923 case FOURCC("traf"):
4924 case FOURCC("moof"): {
4925 off64_t stop_offset = *offset + chunk_size;
4926 *offset = data_offset;
4927 while (*offset < stop_offset) {
4928 status_t err = parseChunk(offset);
4929 if (err != OK) {
4930 return err;
4931 }
4932 }
4933 if (chunk_type == FOURCC("moof")) {
4934 // *offset points to the box following this moof. Find the next moof from there.
4935
4936 while (true) {
4937 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4938 // no more box to the end of file.
4939 break;
4940 }
4941 chunk_size = ntohl(hdr[0]);
4942 chunk_type = ntohl(hdr[1]);
4943 if (chunk_size == 1) {
4944 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4945 // which is defined in 4.2 Object Structure.
4946 // When chunk_size==1, 8 bytes follows as "largesize".
4947 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4948 return ERROR_IO;
4949 }
4950 chunk_size = ntoh64(chunk_size);
4951 if (chunk_size < 16) {
4952 // The smallest valid chunk is 16 bytes long in this case.
4953 return ERROR_MALFORMED;
4954 }
4955 } else if (chunk_size == 0) {
4956 // next box extends to end of file.
4957 } else if (chunk_size < 8) {
4958 // The smallest valid chunk is 8 bytes long in this case.
4959 return ERROR_MALFORMED;
4960 }
4961
4962 if (chunk_type == FOURCC("moof")) {
4963 mNextMoofOffset = *offset;
4964 break;
4965 } else if (chunk_size == 0) {
4966 break;
4967 }
4968 *offset += chunk_size;
4969 }
4970 }
4971 break;
4972 }
4973
4974 case FOURCC("tfhd"): {
4975 status_t err;
4976 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4977 return err;
4978 }
4979 *offset += chunk_size;
4980 break;
4981 }
4982
4983 case FOURCC("trun"): {
4984 status_t err;
4985 if (mLastParsedTrackId == mTrackId) {
4986 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4987 return err;
4988 }
4989 }
4990
4991 *offset += chunk_size;
4992 break;
4993 }
4994
4995 case FOURCC("saiz"): {
4996 status_t err;
4997 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4998 return err;
4999 }
5000 *offset += chunk_size;
5001 break;
5002 }
5003 case FOURCC("saio"): {
5004 status_t err;
5005 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5006 != OK) {
5007 return err;
5008 }
5009 *offset += chunk_size;
5010 break;
5011 }
5012
5013 case FOURCC("senc"): {
5014 status_t err;
5015 if ((err = parseSampleEncryption(data_offset)) != OK) {
5016 return err;
5017 }
5018 *offset += chunk_size;
5019 break;
5020 }
5021
5022 case FOURCC("mdat"): {
5023 // parse DRM info if present
5024 ALOGV("MPEG4Source::parseChunk mdat");
5025 // if saiz/saoi was previously observed, do something with the sampleinfos
5026 *offset += chunk_size;
5027 break;
5028 }
5029
5030 default: {
5031 *offset += chunk_size;
5032 break;
5033 }
5034 }
5035 return OK;
5036 }
5037
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5038 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5039 off64_t offset, off64_t size) {
5040 ALOGV("parseSampleAuxiliaryInformationSizes");
5041 if (size < 9) {
5042 return -EINVAL;
5043 }
5044 // 14496-12 8.7.12
5045 uint8_t version;
5046 if (mDataSource->readAt(
5047 offset, &version, sizeof(version))
5048 < (ssize_t)sizeof(version)) {
5049 return ERROR_IO;
5050 }
5051
5052 if (version != 0) {
5053 return ERROR_UNSUPPORTED;
5054 }
5055 offset++;
5056 size--;
5057
5058 uint32_t flags;
5059 if (!mDataSource->getUInt24(offset, &flags)) {
5060 return ERROR_IO;
5061 }
5062 offset += 3;
5063 size -= 3;
5064
5065 if (flags & 1) {
5066 if (size < 13) {
5067 return -EINVAL;
5068 }
5069 uint32_t tmp;
5070 if (!mDataSource->getUInt32(offset, &tmp)) {
5071 return ERROR_MALFORMED;
5072 }
5073 mCurrentAuxInfoType = tmp;
5074 offset += 4;
5075 size -= 4;
5076 if (!mDataSource->getUInt32(offset, &tmp)) {
5077 return ERROR_MALFORMED;
5078 }
5079 mCurrentAuxInfoTypeParameter = tmp;
5080 offset += 4;
5081 size -= 4;
5082 }
5083
5084 uint8_t defsize;
5085 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5086 return ERROR_MALFORMED;
5087 }
5088 mCurrentDefaultSampleInfoSize = defsize;
5089 offset++;
5090 size--;
5091
5092 uint32_t smplcnt;
5093 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5094 return ERROR_MALFORMED;
5095 }
5096 mCurrentSampleInfoCount = smplcnt;
5097 offset += 4;
5098 size -= 4;
5099 if (mCurrentDefaultSampleInfoSize != 0) {
5100 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5101 return OK;
5102 }
5103 if(smplcnt > size) {
5104 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5105 android_errorWriteLog(0x534e4554, "124525515");
5106 return -EINVAL;
5107 }
5108 if (smplcnt > mCurrentSampleInfoAllocSize) {
5109 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5110 if (newPtr == NULL) {
5111 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5112 return NO_MEMORY;
5113 }
5114 mCurrentSampleInfoSizes = newPtr;
5115 mCurrentSampleInfoAllocSize = smplcnt;
5116 }
5117
5118 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5119 return OK;
5120 }
5121
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5122 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5123 off64_t offset, off64_t size) {
5124 ALOGV("parseSampleAuxiliaryInformationOffsets");
5125 if (size < 8) {
5126 return -EINVAL;
5127 }
5128 // 14496-12 8.7.13
5129 uint8_t version;
5130 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5131 return ERROR_IO;
5132 }
5133 offset++;
5134 size--;
5135
5136 uint32_t flags;
5137 if (!mDataSource->getUInt24(offset, &flags)) {
5138 return ERROR_IO;
5139 }
5140 offset += 3;
5141 size -= 3;
5142
5143 uint32_t entrycount;
5144 if (!mDataSource->getUInt32(offset, &entrycount)) {
5145 return ERROR_IO;
5146 }
5147 offset += 4;
5148 size -= 4;
5149 if (entrycount == 0) {
5150 return OK;
5151 }
5152 if (entrycount > UINT32_MAX / 8) {
5153 return ERROR_MALFORMED;
5154 }
5155
5156 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5157 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5158 if (newPtr == NULL) {
5159 ALOGE("failed to realloc %u -> %u",
5160 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5161 return NO_MEMORY;
5162 }
5163 mCurrentSampleInfoOffsets = newPtr;
5164 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5165 }
5166 mCurrentSampleInfoOffsetCount = entrycount;
5167
5168 if (mCurrentSampleInfoOffsets == NULL) {
5169 return OK;
5170 }
5171
5172 for (size_t i = 0; i < entrycount; i++) {
5173 if (version == 0) {
5174 if (size < 4) {
5175 ALOGW("b/124526959");
5176 android_errorWriteLog(0x534e4554, "124526959");
5177 return -EINVAL;
5178 }
5179 uint32_t tmp;
5180 if (!mDataSource->getUInt32(offset, &tmp)) {
5181 return ERROR_IO;
5182 }
5183 mCurrentSampleInfoOffsets[i] = tmp;
5184 offset += 4;
5185 size -= 4;
5186 } else {
5187 if (size < 8) {
5188 ALOGW("b/124526959");
5189 android_errorWriteLog(0x534e4554, "124526959");
5190 return -EINVAL;
5191 }
5192 uint64_t tmp;
5193 if (!mDataSource->getUInt64(offset, &tmp)) {
5194 return ERROR_IO;
5195 }
5196 mCurrentSampleInfoOffsets[i] = tmp;
5197 offset += 8;
5198 size -= 8;
5199 }
5200 }
5201
5202 // parse clear/encrypted data
5203
5204 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5205
5206 drmoffset += mCurrentMoofOffset;
5207
5208 return parseClearEncryptedSizes(drmoffset, false, 0);
5209 }
5210
parseClearEncryptedSizes(off64_t offset,bool isSubsampleEncryption,uint32_t flags)5211 status_t MPEG4Source::parseClearEncryptedSizes(
5212 off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
5213
5214 int32_t ivlength;
5215 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5216 return ERROR_MALFORMED;
5217 }
5218
5219 // only 0, 8 and 16 byte initialization vectors are supported
5220 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5221 ALOGW("unsupported IV length: %d", ivlength);
5222 return ERROR_MALFORMED;
5223 }
5224
5225 uint32_t sampleCount = mCurrentSampleInfoCount;
5226 if (isSubsampleEncryption) {
5227 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5228 return ERROR_IO;
5229 }
5230 offset += 4;
5231 }
5232
5233 // read CencSampleAuxiliaryDataFormats
5234 for (size_t i = 0; i < sampleCount; i++) {
5235 if (i >= mCurrentSamples.size()) {
5236 ALOGW("too few samples");
5237 break;
5238 }
5239 Sample *smpl = &mCurrentSamples.editItemAt(i);
5240 if (!smpl->clearsizes.isEmpty()) {
5241 continue;
5242 }
5243
5244 memset(smpl->iv, 0, 16);
5245 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5246 return ERROR_IO;
5247 }
5248
5249 offset += ivlength;
5250
5251 bool readSubsamples;
5252 if (isSubsampleEncryption) {
5253 readSubsamples = flags & 2;
5254 } else {
5255 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5256 if (smplinfosize == 0) {
5257 smplinfosize = mCurrentSampleInfoSizes[i];
5258 }
5259 readSubsamples = smplinfosize > ivlength;
5260 }
5261
5262 if (readSubsamples) {
5263 uint16_t numsubsamples;
5264 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5265 return ERROR_IO;
5266 }
5267 offset += 2;
5268 for (size_t j = 0; j < numsubsamples; j++) {
5269 uint16_t numclear;
5270 uint32_t numencrypted;
5271 if (!mDataSource->getUInt16(offset, &numclear)) {
5272 return ERROR_IO;
5273 }
5274 offset += 2;
5275 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5276 return ERROR_IO;
5277 }
5278 offset += 4;
5279 smpl->clearsizes.add(numclear);
5280 smpl->encryptedsizes.add(numencrypted);
5281 }
5282 } else {
5283 smpl->clearsizes.add(0);
5284 smpl->encryptedsizes.add(smpl->size);
5285 }
5286 }
5287
5288 return OK;
5289 }
5290
parseSampleEncryption(off64_t offset)5291 status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
5292 uint32_t flags;
5293 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5294 return ERROR_MALFORMED;
5295 }
5296 return parseClearEncryptedSizes(offset + 4, true, flags);
5297 }
5298
parseTrackFragmentHeader(off64_t offset,off64_t size)5299 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5300
5301 if (size < 8) {
5302 return -EINVAL;
5303 }
5304
5305 uint32_t flags;
5306 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5307 return ERROR_MALFORMED;
5308 }
5309
5310 if (flags & 0xff000000) {
5311 return -EINVAL;
5312 }
5313
5314 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5315 return ERROR_MALFORMED;
5316 }
5317
5318 if (mLastParsedTrackId != mTrackId) {
5319 // this is not the right track, skip it
5320 return OK;
5321 }
5322
5323 mTrackFragmentHeaderInfo.mFlags = flags;
5324 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5325 offset += 8;
5326 size -= 8;
5327
5328 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5329
5330 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5331 if (size < 8) {
5332 return -EINVAL;
5333 }
5334
5335 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5336 return ERROR_MALFORMED;
5337 }
5338 offset += 8;
5339 size -= 8;
5340 }
5341
5342 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5343 if (size < 4) {
5344 return -EINVAL;
5345 }
5346
5347 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5348 return ERROR_MALFORMED;
5349 }
5350 offset += 4;
5351 size -= 4;
5352 }
5353
5354 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5355 if (size < 4) {
5356 return -EINVAL;
5357 }
5358
5359 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5360 return ERROR_MALFORMED;
5361 }
5362 offset += 4;
5363 size -= 4;
5364 }
5365
5366 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5367 if (size < 4) {
5368 return -EINVAL;
5369 }
5370
5371 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5372 return ERROR_MALFORMED;
5373 }
5374 offset += 4;
5375 size -= 4;
5376 }
5377
5378 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5379 if (size < 4) {
5380 return -EINVAL;
5381 }
5382
5383 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5384 return ERROR_MALFORMED;
5385 }
5386 offset += 4;
5387 size -= 4;
5388 }
5389
5390 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5391 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5392 }
5393
5394 mTrackFragmentHeaderInfo.mDataOffset = 0;
5395 return OK;
5396 }
5397
parseTrackFragmentRun(off64_t offset,off64_t size)5398 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5399
5400 ALOGV("MPEG4Source::parseTrackFragmentRun");
5401 if (size < 8) {
5402 return -EINVAL;
5403 }
5404
5405 enum {
5406 kDataOffsetPresent = 0x01,
5407 kFirstSampleFlagsPresent = 0x04,
5408 kSampleDurationPresent = 0x100,
5409 kSampleSizePresent = 0x200,
5410 kSampleFlagsPresent = 0x400,
5411 kSampleCompositionTimeOffsetPresent = 0x800,
5412 };
5413
5414 uint32_t flags;
5415 if (!mDataSource->getUInt32(offset, &flags)) {
5416 return ERROR_MALFORMED;
5417 }
5418 // |version| only affects SampleCompositionTimeOffset field.
5419 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5420 // Otherwise, SampleCompositionTimeOffset is int32_t.
5421 // Sample.compositionOffset is defined as int32_t.
5422 uint8_t version = flags >> 24;
5423 flags &= 0xffffff;
5424 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5425
5426 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5427 // These two shall not be used together.
5428 return -EINVAL;
5429 }
5430
5431 uint32_t sampleCount;
5432 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5433 return ERROR_MALFORMED;
5434 }
5435 offset += 8;
5436 size -= 8;
5437
5438 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5439
5440 uint32_t firstSampleFlags = 0;
5441
5442 if (flags & kDataOffsetPresent) {
5443 if (size < 4) {
5444 return -EINVAL;
5445 }
5446
5447 int32_t dataOffsetDelta;
5448 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
5449 return ERROR_MALFORMED;
5450 }
5451
5452 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
5453
5454 offset += 4;
5455 size -= 4;
5456 }
5457
5458 if (flags & kFirstSampleFlagsPresent) {
5459 if (size < 4) {
5460 return -EINVAL;
5461 }
5462
5463 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5464 return ERROR_MALFORMED;
5465 }
5466 offset += 4;
5467 size -= 4;
5468 }
5469
5470 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5471 sampleCtsOffset = 0;
5472
5473 size_t bytesPerSample = 0;
5474 if (flags & kSampleDurationPresent) {
5475 bytesPerSample += 4;
5476 } else if (mTrackFragmentHeaderInfo.mFlags
5477 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5478 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5479 } else if (mTrex) {
5480 sampleDuration = mTrex->default_sample_duration;
5481 }
5482
5483 if (flags & kSampleSizePresent) {
5484 bytesPerSample += 4;
5485 } else if (mTrackFragmentHeaderInfo.mFlags
5486 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5487 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5488 } else {
5489 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5490 }
5491
5492 if (flags & kSampleFlagsPresent) {
5493 bytesPerSample += 4;
5494 } else if (mTrackFragmentHeaderInfo.mFlags
5495 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5496 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5497 } else {
5498 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5499 }
5500
5501 if (flags & kSampleCompositionTimeOffsetPresent) {
5502 bytesPerSample += 4;
5503 } else {
5504 sampleCtsOffset = 0;
5505 }
5506
5507 if (bytesPerSample != 0) {
5508 if (size < (off64_t)sampleCount * bytesPerSample) {
5509 return -EINVAL;
5510 }
5511 } else {
5512 if (sampleDuration == 0) {
5513 ALOGW("b/123389881 sampleDuration == 0");
5514 android_errorWriteLog(0x534e4554, "124389881 zero");
5515 return -EINVAL;
5516 }
5517
5518 // apply some quick (vs strict legality) checks
5519 //
5520 static constexpr uint32_t kMaxTrunSampleCount = 10000;
5521 if (sampleCount > kMaxTrunSampleCount) {
5522 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
5523 sampleCount, kMaxTrunSampleCount);
5524 android_errorWriteLog(0x534e4554, "124389881 count");
5525 return -EINVAL;
5526 }
5527 }
5528
5529 Sample tmp;
5530 for (uint32_t i = 0; i < sampleCount; ++i) {
5531 if (flags & kSampleDurationPresent) {
5532 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
5533 return ERROR_MALFORMED;
5534 }
5535 offset += 4;
5536 }
5537
5538 if (flags & kSampleSizePresent) {
5539 if (!mDataSource->getUInt32(offset, &sampleSize)) {
5540 return ERROR_MALFORMED;
5541 }
5542 offset += 4;
5543 }
5544
5545 if (flags & kSampleFlagsPresent) {
5546 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
5547 return ERROR_MALFORMED;
5548 }
5549 offset += 4;
5550 }
5551
5552 if (flags & kSampleCompositionTimeOffsetPresent) {
5553 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
5554 return ERROR_MALFORMED;
5555 }
5556 offset += 4;
5557 }
5558
5559 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
5560 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
5561 dataOffset, sampleSize, sampleDuration,
5562 (flags & kFirstSampleFlagsPresent) && i == 0
5563 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
5564 tmp.offset = dataOffset;
5565 tmp.size = sampleSize;
5566 tmp.duration = sampleDuration;
5567 tmp.compositionOffset = sampleCtsOffset;
5568 memset(tmp.iv, 0, sizeof(tmp.iv));
5569 if (mCurrentSamples.add(tmp) < 0) {
5570 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
5571 android_errorWriteLog(0x534e4554, "124389881 allocation");
5572 mCurrentSamples.clear();
5573 return NO_MEMORY;
5574 }
5575
5576 dataOffset += sampleSize;
5577 }
5578
5579 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
5580
5581 return OK;
5582 }
5583
getFormat(AMediaFormat * meta)5584 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
5585 Mutex::Autolock autoLock(mLock);
5586 AMediaFormat_copy(meta, mFormat);
5587 return AMEDIA_OK;
5588 }
5589
parseNALSize(const uint8_t * data) const5590 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
5591 switch (mNALLengthSize) {
5592 case 1:
5593 return *data;
5594 case 2:
5595 return U16_AT(data);
5596 case 3:
5597 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
5598 case 4:
5599 return U32_AT(data);
5600 }
5601
5602 // This cannot happen, mNALLengthSize springs to life by adding 1 to
5603 // a 2-bit integer.
5604 CHECK(!"Should not be here.");
5605
5606 return 0;
5607 }
5608
parseHEVCLayerId(const uint8_t * data,size_t size)5609 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
5610 if (data == nullptr || size < mNALLengthSize + 2) {
5611 return -1;
5612 }
5613
5614 // HEVC NAL-header (16-bit)
5615 // 1 6 6 3
5616 // |-|uuuuuu|------|iii|
5617 // ^ ^
5618 // NAL_type layer_id + 1
5619 //
5620 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
5621 enum {
5622 TSA_N = 2,
5623 TSA_R = 3,
5624 STSA_N = 4,
5625 STSA_R = 5,
5626 };
5627
5628 data += mNALLengthSize;
5629 uint16_t nalHeader = data[0] << 8 | data[1];
5630
5631 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
5632 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
5633 int32_t layerIdPlusOne = nalHeader & 0x7u;
5634 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
5635 return layerIdPlusOne - 1;
5636 }
5637 return 0;
5638 }
5639
read(MediaBufferHelper ** out,const ReadOptions * options)5640 media_status_t MPEG4Source::read(
5641 MediaBufferHelper **out, const ReadOptions *options) {
5642 Mutex::Autolock autoLock(mLock);
5643
5644 CHECK(mStarted);
5645
5646 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
5647 *out = nullptr;
5648 return AMEDIA_ERROR_WOULD_BLOCK;
5649 }
5650
5651 if (mFirstMoofOffset > 0) {
5652 return fragmentedRead(out, options);
5653 }
5654
5655 *out = NULL;
5656
5657 int64_t targetSampleTimeUs = -1;
5658
5659 int64_t seekTimeUs;
5660 ReadOptions::SeekMode mode;
5661 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5662
5663 if (mIsHeif) {
5664 CHECK(mSampleTable == NULL);
5665 CHECK(mItemTable != NULL);
5666 int32_t imageIndex;
5667 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
5668 return AMEDIA_ERROR_MALFORMED;
5669 }
5670
5671 status_t err;
5672 if (seekTimeUs >= 0) {
5673 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
5674 } else {
5675 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
5676 }
5677 if (err != OK) {
5678 return AMEDIA_ERROR_UNKNOWN;
5679 }
5680 } else {
5681 uint32_t findFlags = 0;
5682 switch (mode) {
5683 case ReadOptions::SEEK_PREVIOUS_SYNC:
5684 findFlags = SampleTable::kFlagBefore;
5685 break;
5686 case ReadOptions::SEEK_NEXT_SYNC:
5687 findFlags = SampleTable::kFlagAfter;
5688 break;
5689 case ReadOptions::SEEK_CLOSEST_SYNC:
5690 case ReadOptions::SEEK_CLOSEST:
5691 findFlags = SampleTable::kFlagClosest;
5692 break;
5693 case ReadOptions::SEEK_FRAME_INDEX:
5694 findFlags = SampleTable::kFlagFrameIndex;
5695 break;
5696 default:
5697 CHECK(!"Should not be here.");
5698 break;
5699 }
5700 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
5701 seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
5702 }
5703
5704 uint32_t sampleIndex;
5705 status_t err = mSampleTable->findSampleAtTime(
5706 seekTimeUs, 1000000, mTimescale,
5707 &sampleIndex, findFlags);
5708
5709 if (mode == ReadOptions::SEEK_CLOSEST
5710 || mode == ReadOptions::SEEK_FRAME_INDEX) {
5711 // We found the closest sample already, now we want the sync
5712 // sample preceding it (or the sample itself of course), even
5713 // if the subsequent sync sample is closer.
5714 findFlags = SampleTable::kFlagBefore;
5715 }
5716
5717 uint32_t syncSampleIndex = sampleIndex;
5718 // assume every audio sample is a sync sample. This works around
5719 // seek issues with files that were incorrectly written with an
5720 // empty or single-sample stss block for the audio track
5721 if (err == OK && !mIsAudio) {
5722 err = mSampleTable->findSyncSampleNear(
5723 sampleIndex, &syncSampleIndex, findFlags);
5724 }
5725
5726 uint64_t sampleTime;
5727 if (err == OK) {
5728 err = mSampleTable->getMetaDataForSample(
5729 sampleIndex, NULL, NULL, &sampleTime);
5730 }
5731
5732 if (err != OK) {
5733 if (err == ERROR_OUT_OF_RANGE) {
5734 // An attempt to seek past the end of the stream would
5735 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
5736 // this all the way to the MediaPlayer would cause abnormal
5737 // termination. Legacy behaviour appears to be to behave as if
5738 // we had seeked to the end of stream, ending normally.
5739 return AMEDIA_ERROR_END_OF_STREAM;
5740 }
5741 ALOGV("end of stream");
5742 return AMEDIA_ERROR_UNKNOWN;
5743 }
5744
5745 if (mode == ReadOptions::SEEK_CLOSEST
5746 || mode == ReadOptions::SEEK_FRAME_INDEX) {
5747 sampleTime -= mElstShiftStartTicks;
5748 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
5749 }
5750
5751 #if 0
5752 uint32_t syncSampleTime;
5753 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
5754 syncSampleIndex, NULL, NULL, &syncSampleTime));
5755
5756 ALOGI("seek to time %lld us => sample at time %lld us, "
5757 "sync sample at time %lld us",
5758 seekTimeUs,
5759 sampleTime * 1000000ll / mTimescale,
5760 syncSampleTime * 1000000ll / mTimescale);
5761 #endif
5762
5763 mCurrentSampleIndex = syncSampleIndex;
5764 }
5765
5766 if (mBuffer != NULL) {
5767 mBuffer->release();
5768 mBuffer = NULL;
5769 }
5770
5771 // fall through
5772 }
5773
5774 off64_t offset = 0;
5775 size_t size = 0;
5776 uint64_t cts, stts;
5777 bool isSyncSample;
5778 bool newBuffer = false;
5779 if (mBuffer == NULL) {
5780 newBuffer = true;
5781
5782 status_t err;
5783 if (!mIsHeif) {
5784 err = mSampleTable->getMetaDataForSample(
5785 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
5786 if(err == OK) {
5787 /* Composition Time Stamp cannot be negative. Some files have video Sample
5788 * Time(STTS)delta with zero value(b/117402420). Hence subtract only
5789 * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
5790 */
5791 cts -= std::min(cts, mElstShiftStartTicks);
5792 }
5793
5794 } else {
5795 err = mItemTable->getImageOffsetAndSize(
5796 options && options->getSeekTo(&seekTimeUs, &mode) ?
5797 &mCurrentSampleIndex : NULL, &offset, &size);
5798
5799 cts = stts = 0;
5800 isSyncSample = 0;
5801 ALOGV("image offset %lld, size %zu", (long long)offset, size);
5802 }
5803
5804 if (err != OK) {
5805 if (err == ERROR_END_OF_STREAM) {
5806 return AMEDIA_ERROR_END_OF_STREAM;
5807 }
5808 return AMEDIA_ERROR_UNKNOWN;
5809 }
5810
5811 err = mBufferGroup->acquire_buffer(&mBuffer);
5812
5813 if (err != OK) {
5814 CHECK(mBuffer == NULL);
5815 return AMEDIA_ERROR_UNKNOWN;
5816 }
5817 if (size > mBuffer->size()) {
5818 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5819 mBuffer->release();
5820 mBuffer = NULL;
5821 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
5822 }
5823 }
5824
5825 if (!mIsAVC && !mIsHEVC && !mIsAC4) {
5826 if (newBuffer) {
5827 if (mIsPcm) {
5828 // The twos' PCM block reader assumes that all samples has the same size.
5829
5830 uint32_t samplesToRead = mSampleTable->getLastSampleIndexInChunk()
5831 - mCurrentSampleIndex + 1;
5832 if (samplesToRead > kMaxPcmFrameSize) {
5833 samplesToRead = kMaxPcmFrameSize;
5834 }
5835
5836 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
5837 samplesToRead, size, mCurrentSampleIndex,
5838 mSampleTable->getLastSampleIndexInChunk());
5839
5840 size_t totalSize = samplesToRead * size;
5841 uint8_t* buf = (uint8_t *)mBuffer->data();
5842 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
5843 if (bytesRead < (ssize_t)totalSize) {
5844 mBuffer->release();
5845 mBuffer = NULL;
5846
5847 return AMEDIA_ERROR_IO;
5848 }
5849
5850 AMediaFormat *meta = mBuffer->meta_data();
5851 AMediaFormat_clear(meta);
5852 AMediaFormat_setInt64(
5853 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5854 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5855
5856 int32_t byteOrder = 0;
5857 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
5858 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
5859
5860 if (isGetBigEndian && byteOrder == 1) {
5861 // Big-endian -> little-endian
5862 uint16_t *dstData = (uint16_t *)buf;
5863 uint16_t *srcData = (uint16_t *)buf;
5864
5865 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
5866 dstData[j] = ntohs(srcData[j]);
5867 }
5868 }
5869
5870 mCurrentSampleIndex += samplesToRead;
5871 mBuffer->set_range(0, totalSize);
5872 } else {
5873 ssize_t num_bytes_read =
5874 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5875
5876 if (num_bytes_read < (ssize_t)size) {
5877 mBuffer->release();
5878 mBuffer = NULL;
5879
5880 return AMEDIA_ERROR_IO;
5881 }
5882
5883 CHECK(mBuffer != NULL);
5884 mBuffer->set_range(0, size);
5885 AMediaFormat *meta = mBuffer->meta_data();
5886 AMediaFormat_clear(meta);
5887 AMediaFormat_setInt64(
5888 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5889 AMediaFormat_setInt64(
5890 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5891
5892 if (targetSampleTimeUs >= 0) {
5893 AMediaFormat_setInt64(
5894 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5895 }
5896
5897 if (isSyncSample) {
5898 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5899 }
5900
5901 ++mCurrentSampleIndex;
5902 }
5903 }
5904
5905 *out = mBuffer;
5906 mBuffer = NULL;
5907
5908 return AMEDIA_OK;
5909
5910 } else if (mIsAC4) {
5911 CHECK(mBuffer != NULL);
5912 // Make sure there is enough space to write the sync header and the raw frame
5913 if (mBuffer->range_length() < (7 + size)) {
5914 mBuffer->release();
5915 mBuffer = NULL;
5916
5917 return AMEDIA_ERROR_IO;
5918 }
5919
5920 uint8_t *dstData = (uint8_t *)mBuffer->data();
5921 size_t dstOffset = 0;
5922 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
5923 // AC40 sync word, meaning no CRC at the end of the frame
5924 dstData[dstOffset++] = 0xAC;
5925 dstData[dstOffset++] = 0x40;
5926 dstData[dstOffset++] = 0xFF;
5927 dstData[dstOffset++] = 0xFF;
5928 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
5929 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
5930 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
5931
5932 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
5933 if (numBytesRead != (ssize_t)size) {
5934 mBuffer->release();
5935 mBuffer = NULL;
5936
5937 return AMEDIA_ERROR_IO;
5938 }
5939
5940 mBuffer->set_range(0, dstOffset + size);
5941 AMediaFormat *meta = mBuffer->meta_data();
5942 AMediaFormat_clear(meta);
5943 AMediaFormat_setInt64(
5944 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5945 AMediaFormat_setInt64(
5946 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5947
5948 if (targetSampleTimeUs >= 0) {
5949 AMediaFormat_setInt64(
5950 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5951 }
5952
5953 if (isSyncSample) {
5954 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5955 }
5956
5957 ++mCurrentSampleIndex;
5958
5959 *out = mBuffer;
5960 mBuffer = NULL;
5961
5962 return AMEDIA_OK;
5963 } else {
5964 // Whole NAL units are returned but each fragment is prefixed by
5965 // the start code (0x00 00 00 01).
5966 ssize_t num_bytes_read = 0;
5967 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
5968
5969 if (num_bytes_read < (ssize_t)size) {
5970 mBuffer->release();
5971 mBuffer = NULL;
5972
5973 return AMEDIA_ERROR_IO;
5974 }
5975
5976 uint8_t *dstData = (uint8_t *)mBuffer->data();
5977 size_t srcOffset = 0;
5978 size_t dstOffset = 0;
5979
5980 while (srcOffset < size) {
5981 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5982 size_t nalLength = 0;
5983 if (!isMalFormed) {
5984 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5985 srcOffset += mNALLengthSize;
5986 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
5987 }
5988
5989 if (isMalFormed) {
5990 //if nallength abnormal,ignore it.
5991 ALOGW("abnormal nallength, ignore this NAL");
5992 srcOffset = size;
5993 break;
5994 }
5995
5996 if (nalLength == 0) {
5997 continue;
5998 }
5999
6000 if (dstOffset > SIZE_MAX - 4 ||
6001 dstOffset + 4 > SIZE_MAX - nalLength ||
6002 dstOffset + 4 + nalLength > mBuffer->size()) {
6003 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6004 android_errorWriteLog(0x534e4554, "27208621");
6005 mBuffer->release();
6006 mBuffer = NULL;
6007 return AMEDIA_ERROR_MALFORMED;
6008 }
6009
6010 dstData[dstOffset++] = 0;
6011 dstData[dstOffset++] = 0;
6012 dstData[dstOffset++] = 0;
6013 dstData[dstOffset++] = 1;
6014 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6015 srcOffset += nalLength;
6016 dstOffset += nalLength;
6017 }
6018 CHECK_EQ(srcOffset, size);
6019 CHECK(mBuffer != NULL);
6020 mBuffer->set_range(0, dstOffset);
6021
6022 AMediaFormat *meta = mBuffer->meta_data();
6023 AMediaFormat_clear(meta);
6024 AMediaFormat_setInt64(
6025 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6026 AMediaFormat_setInt64(
6027 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6028
6029 if (targetSampleTimeUs >= 0) {
6030 AMediaFormat_setInt64(
6031 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6032 }
6033
6034 if (mIsAVC) {
6035 uint32_t layerId = FindAVCLayerId(
6036 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6037 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6038 } else if (mIsHEVC) {
6039 int32_t layerId = parseHEVCLayerId(
6040 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6041 if (layerId >= 0) {
6042 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6043 }
6044 }
6045
6046 if (isSyncSample) {
6047 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6048 }
6049
6050 ++mCurrentSampleIndex;
6051
6052 *out = mBuffer;
6053 mBuffer = NULL;
6054
6055 return AMEDIA_OK;
6056 }
6057 }
6058
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6059 media_status_t MPEG4Source::fragmentedRead(
6060 MediaBufferHelper **out, const ReadOptions *options) {
6061
6062 ALOGV("MPEG4Source::fragmentedRead");
6063
6064 CHECK(mStarted);
6065
6066 *out = NULL;
6067
6068 int64_t targetSampleTimeUs = -1;
6069
6070 int64_t seekTimeUs;
6071 ReadOptions::SeekMode mode;
6072 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6073
6074 seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6075 ALOGV("shifted seekTimeUs :%" PRId64 ", mElstShiftStartTicks:%" PRIu64, seekTimeUs,
6076 mElstShiftStartTicks);
6077
6078 int numSidxEntries = mSegments.size();
6079 if (numSidxEntries != 0) {
6080 int64_t totalTime = 0;
6081 off64_t totalOffset = mFirstMoofOffset;
6082 for (int i = 0; i < numSidxEntries; i++) {
6083 const SidxEntry *se = &mSegments[i];
6084 if (totalTime + se->mDurationUs > seekTimeUs) {
6085 // The requested time is somewhere in this segment
6086 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6087 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6088 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6089 // requested next sync, or closest sync and it was closer to the end of
6090 // this segment
6091 totalTime += se->mDurationUs;
6092 totalOffset += se->mSize;
6093 }
6094 break;
6095 }
6096 totalTime += se->mDurationUs;
6097 totalOffset += se->mSize;
6098 }
6099 mCurrentMoofOffset = totalOffset;
6100 mNextMoofOffset = -1;
6101 mCurrentSamples.clear();
6102 mCurrentSampleIndex = 0;
6103 status_t err = parseChunk(&totalOffset);
6104 if (err != OK) {
6105 return AMEDIA_ERROR_UNKNOWN;
6106 }
6107 mCurrentTime = totalTime * mTimescale / 1000000ll;
6108 } else {
6109 // without sidx boxes, we can only seek to 0
6110 mCurrentMoofOffset = mFirstMoofOffset;
6111 mNextMoofOffset = -1;
6112 mCurrentSamples.clear();
6113 mCurrentSampleIndex = 0;
6114 off64_t tmp = mCurrentMoofOffset;
6115 status_t err = parseChunk(&tmp);
6116 if (err != OK) {
6117 return AMEDIA_ERROR_UNKNOWN;
6118 }
6119 mCurrentTime = 0;
6120 }
6121
6122 if (mBuffer != NULL) {
6123 mBuffer->release();
6124 mBuffer = NULL;
6125 }
6126
6127 // fall through
6128 }
6129
6130 off64_t offset = 0;
6131 size_t size = 0;
6132 uint64_t cts = 0;
6133 bool isSyncSample = false;
6134 bool newBuffer = false;
6135 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6136 newBuffer = true;
6137
6138 if (mBuffer != NULL) {
6139 mBuffer->release();
6140 mBuffer = NULL;
6141 }
6142 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6143 // move to next fragment if there is one
6144 if (mNextMoofOffset <= mCurrentMoofOffset) {
6145 return AMEDIA_ERROR_END_OF_STREAM;
6146 }
6147 off64_t nextMoof = mNextMoofOffset;
6148 mCurrentMoofOffset = nextMoof;
6149 mCurrentSamples.clear();
6150 mCurrentSampleIndex = 0;
6151 status_t err = parseChunk(&nextMoof);
6152 if (err != OK) {
6153 return AMEDIA_ERROR_UNKNOWN;
6154 }
6155 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6156 return AMEDIA_ERROR_END_OF_STREAM;
6157 }
6158 }
6159
6160 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6161 offset = smpl->offset;
6162 size = smpl->size;
6163 cts = mCurrentTime + smpl->compositionOffset;
6164 /* Composition Time Stamp cannot be negative. Some files have video Sample
6165 * Time(STTS)delta with zero value(b/117402420). Hence subtract only
6166 * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
6167 */
6168 cts -= std::min(cts, mElstShiftStartTicks);
6169
6170 mCurrentTime += smpl->duration;
6171 isSyncSample = (mCurrentSampleIndex == 0);
6172
6173 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6174
6175 if (err != OK) {
6176 CHECK(mBuffer == NULL);
6177 ALOGV("acquire_buffer returned %d", err);
6178 return AMEDIA_ERROR_UNKNOWN;
6179 }
6180 if (size > mBuffer->size()) {
6181 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6182 mBuffer->release();
6183 mBuffer = NULL;
6184 return AMEDIA_ERROR_UNKNOWN;
6185 }
6186 }
6187
6188 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6189 AMediaFormat *bufmeta = mBuffer->meta_data();
6190 AMediaFormat_clear(bufmeta);
6191 if (smpl->encryptedsizes.size()) {
6192 // store clear/encrypted lengths in metadata
6193 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6194 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
6195 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6196 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
6197 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6198 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6199 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6200 AMediaFormat_setInt32(bufmeta,
6201 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6202 AMediaFormat_setInt32(bufmeta,
6203 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6204
6205 void *iv = NULL;
6206 size_t ivlength = 0;
6207 if (!AMediaFormat_getBuffer(mFormat,
6208 "crypto-iv", &iv, &ivlength)) {
6209 iv = (void *) smpl->iv;
6210 ivlength = 16; // use 16 or the actual size?
6211 }
6212 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6213 }
6214
6215 if (!mIsAVC && !mIsHEVC) {
6216 if (newBuffer) {
6217 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6218 mBuffer->release();
6219 mBuffer = NULL;
6220
6221 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6222 return AMEDIA_ERROR_MALFORMED;
6223 }
6224
6225 ssize_t num_bytes_read =
6226 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6227
6228 if (num_bytes_read < (ssize_t)size) {
6229 mBuffer->release();
6230 mBuffer = NULL;
6231
6232 ALOGE("i/o error");
6233 return AMEDIA_ERROR_IO;
6234 }
6235
6236 CHECK(mBuffer != NULL);
6237 mBuffer->set_range(0, size);
6238 AMediaFormat_setInt64(bufmeta,
6239 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6240 AMediaFormat_setInt64(bufmeta,
6241 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6242
6243 if (targetSampleTimeUs >= 0) {
6244 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6245 }
6246
6247 if (mIsAVC) {
6248 uint32_t layerId = FindAVCLayerId(
6249 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6250 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6251 } else if (mIsHEVC) {
6252 int32_t layerId = parseHEVCLayerId(
6253 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6254 if (layerId >= 0) {
6255 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6256 }
6257 }
6258
6259 if (isSyncSample) {
6260 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6261 }
6262
6263 ++mCurrentSampleIndex;
6264 }
6265
6266 *out = mBuffer;
6267 mBuffer = NULL;
6268
6269 return AMEDIA_OK;
6270
6271 } else {
6272 ALOGV("whole NAL");
6273 // Whole NAL units are returned but each fragment is prefixed by
6274 // the start code (0x00 00 00 01).
6275 ssize_t num_bytes_read = 0;
6276 void *data = NULL;
6277 bool isMalFormed = false;
6278 int32_t max_size;
6279 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6280 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6281 isMalFormed = true;
6282 } else {
6283 data = mSrcBuffer;
6284 }
6285
6286 if (isMalFormed || data == NULL) {
6287 ALOGE("isMalFormed size %zu", size);
6288 if (mBuffer != NULL) {
6289 mBuffer->release();
6290 mBuffer = NULL;
6291 }
6292 return AMEDIA_ERROR_MALFORMED;
6293 }
6294 num_bytes_read = mDataSource->readAt(offset, data, size);
6295
6296 if (num_bytes_read < (ssize_t)size) {
6297 mBuffer->release();
6298 mBuffer = NULL;
6299
6300 ALOGE("i/o error");
6301 return AMEDIA_ERROR_IO;
6302 }
6303
6304 uint8_t *dstData = (uint8_t *)mBuffer->data();
6305 size_t srcOffset = 0;
6306 size_t dstOffset = 0;
6307
6308 while (srcOffset < size) {
6309 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6310 size_t nalLength = 0;
6311 if (!isMalFormed) {
6312 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6313 srcOffset += mNALLengthSize;
6314 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6315 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6316 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6317 }
6318
6319 if (isMalFormed) {
6320 ALOGE("Video is malformed; nalLength %zu", nalLength);
6321 mBuffer->release();
6322 mBuffer = NULL;
6323 return AMEDIA_ERROR_MALFORMED;
6324 }
6325
6326 if (nalLength == 0) {
6327 continue;
6328 }
6329
6330 if (dstOffset > SIZE_MAX - 4 ||
6331 dstOffset + 4 > SIZE_MAX - nalLength ||
6332 dstOffset + 4 + nalLength > mBuffer->size()) {
6333 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6334 android_errorWriteLog(0x534e4554, "26365349");
6335 mBuffer->release();
6336 mBuffer = NULL;
6337 return AMEDIA_ERROR_MALFORMED;
6338 }
6339
6340 dstData[dstOffset++] = 0;
6341 dstData[dstOffset++] = 0;
6342 dstData[dstOffset++] = 0;
6343 dstData[dstOffset++] = 1;
6344 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6345 srcOffset += nalLength;
6346 dstOffset += nalLength;
6347 }
6348 CHECK_EQ(srcOffset, size);
6349 CHECK(mBuffer != NULL);
6350 mBuffer->set_range(0, dstOffset);
6351
6352 AMediaFormat *bufmeta = mBuffer->meta_data();
6353 AMediaFormat_setInt64(bufmeta,
6354 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6355 AMediaFormat_setInt64(bufmeta,
6356 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6357
6358 if (targetSampleTimeUs >= 0) {
6359 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6360 }
6361
6362 if (isSyncSample) {
6363 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6364 }
6365
6366 ++mCurrentSampleIndex;
6367
6368 *out = mBuffer;
6369 mBuffer = NULL;
6370
6371 return AMEDIA_OK;
6372 }
6373
6374 return AMEDIA_OK;
6375 }
6376
findTrackByMimePrefix(const char * mimePrefix)6377 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6378 const char *mimePrefix) {
6379 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6380 const char *mime;
6381 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6382 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6383 return track;
6384 }
6385 }
6386
6387 return NULL;
6388 }
6389
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6390 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6391 uint8_t header[8];
6392
6393 ssize_t n = source->readAt(4, header, sizeof(header));
6394 if (n < (ssize_t)sizeof(header)) {
6395 return false;
6396 }
6397
6398 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6399 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6400 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6401 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6402 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6403 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6404 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6405 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
6406 *confidence = 0.4;
6407
6408 return true;
6409 }
6410
6411 return false;
6412 }
6413
isCompatibleBrand(uint32_t fourcc)6414 static bool isCompatibleBrand(uint32_t fourcc) {
6415 static const uint32_t kCompatibleBrands[] = {
6416 FOURCC("isom"),
6417 FOURCC("iso2"),
6418 FOURCC("avc1"),
6419 FOURCC("hvc1"),
6420 FOURCC("hev1"),
6421 FOURCC("av01"),
6422 FOURCC("vp09"),
6423 FOURCC("3gp4"),
6424 FOURCC("mp41"),
6425 FOURCC("mp42"),
6426 FOURCC("dash"),
6427 FOURCC("nvr1"),
6428
6429 // Won't promise that the following file types can be played.
6430 // Just give these file types a chance.
6431 FOURCC("qt "), // Apple's QuickTime
6432 FOURCC("MSNV"), // Sony's PSP
6433 FOURCC("wmf "),
6434
6435 FOURCC("3g2a"), // 3GPP2
6436 FOURCC("3g2b"),
6437 FOURCC("mif1"), // HEIF image
6438 FOURCC("heic"), // HEIF image
6439 FOURCC("msf1"), // HEIF image sequence
6440 FOURCC("hevc"), // HEIF image sequence
6441 };
6442
6443 for (size_t i = 0;
6444 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
6445 ++i) {
6446 if (kCompatibleBrands[i] == fourcc) {
6447 return true;
6448 }
6449 }
6450
6451 return false;
6452 }
6453
6454 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
6455 // compatible brand is present.
6456 // Also try to identify where this file's metadata ends
6457 // (end of the 'moov' atom) and report it to the caller as part of
6458 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)6459 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
6460 // We scan up to 128 bytes to identify this file as an MP4.
6461 static const off64_t kMaxScanOffset = 128ll;
6462
6463 off64_t offset = 0ll;
6464 bool foundGoodFileType = false;
6465 off64_t moovAtomEndOffset = -1ll;
6466 bool done = false;
6467
6468 while (!done && offset < kMaxScanOffset) {
6469 uint32_t hdr[2];
6470 if (source->readAt(offset, hdr, 8) < 8) {
6471 return false;
6472 }
6473
6474 uint64_t chunkSize = ntohl(hdr[0]);
6475 uint32_t chunkType = ntohl(hdr[1]);
6476 off64_t chunkDataOffset = offset + 8;
6477
6478 if (chunkSize == 1) {
6479 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
6480 return false;
6481 }
6482
6483 chunkSize = ntoh64(chunkSize);
6484 chunkDataOffset += 8;
6485
6486 if (chunkSize < 16) {
6487 // The smallest valid chunk is 16 bytes long in this case.
6488 return false;
6489 }
6490
6491 } else if (chunkSize < 8) {
6492 // The smallest valid chunk is 8 bytes long.
6493 return false;
6494 }
6495
6496 // (data_offset - offset) is either 8 or 16
6497 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
6498 if (chunkDataSize < 0) {
6499 ALOGE("b/23540914");
6500 return false;
6501 }
6502
6503 char chunkstring[5];
6504 MakeFourCCString(chunkType, chunkstring);
6505 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
6506 chunkstring, chunkSize, (long long)offset);
6507 switch (chunkType) {
6508 case FOURCC("ftyp"):
6509 {
6510 if (chunkDataSize < 8) {
6511 return false;
6512 }
6513
6514 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
6515 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
6516 if (i == 1) {
6517 // Skip this index, it refers to the minorVersion,
6518 // not a brand.
6519 continue;
6520 }
6521
6522 uint32_t brand;
6523 if (source->readAt(
6524 chunkDataOffset + 4 * i, &brand, 4) < 4) {
6525 return false;
6526 }
6527
6528 brand = ntohl(brand);
6529
6530 if (isCompatibleBrand(brand)) {
6531 foundGoodFileType = true;
6532 break;
6533 }
6534 }
6535
6536 if (!foundGoodFileType) {
6537 return false;
6538 }
6539
6540 break;
6541 }
6542
6543 case FOURCC("moov"):
6544 {
6545 moovAtomEndOffset = offset + chunkSize;
6546
6547 done = true;
6548 break;
6549 }
6550
6551 default:
6552 break;
6553 }
6554
6555 offset += chunkSize;
6556 }
6557
6558 if (!foundGoodFileType) {
6559 return false;
6560 }
6561
6562 *confidence = 0.4f;
6563
6564 return true;
6565 }
6566
CreateExtractor(CDataSource * source,void *)6567 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
6568 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
6569 }
6570
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)6571 static CreatorFunc Sniff(
6572 CDataSource *source, float *confidence, void **,
6573 FreeMetaFunc *) {
6574 DataSourceHelper helper(source);
6575 if (BetterSniffMPEG4(&helper, confidence)) {
6576 return CreateExtractor;
6577 }
6578
6579 if (LegacySniffMPEG4(&helper, confidence)) {
6580 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
6581 return CreateExtractor;
6582 }
6583
6584 return NULL;
6585 }
6586
6587 static const char *extensions[] = {
6588 "3g2",
6589 "3ga",
6590 "3gp",
6591 "3gpp",
6592 "3gpp2",
6593 "m4a",
6594 "m4r",
6595 "m4v",
6596 "mov",
6597 "mp4",
6598 "qt",
6599 NULL
6600 };
6601
6602 extern "C" {
6603 // This is the only symbol that needs to be exported
6604 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()6605 ExtractorDef GETEXTRACTORDEF() {
6606 return {
6607 EXTRACTORDEF_VERSION,
6608 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
6609 2, // version
6610 "MP4 Extractor",
6611 { .v3 = {Sniff, extensions} },
6612 };
6613 }
6614
6615 } // extern "C"
6616
6617 } // namespace android
6618