1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19 
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include <log/log.h>
28 #include <utils/Log.h>
29 
30 #include "AC4Parser.h"
31 #include "MPEG4Extractor.h"
32 #include "SampleTable.h"
33 #include "ItemTable.h"
34 
35 #include <ESDS.h>
36 #include <ID3.h>
37 #include <media/DataSourceBase.h>
38 #include <media/ExtractorUtils.h>
39 #include <media/stagefright/foundation/ABitReader.h>
40 #include <media/stagefright/foundation/ABuffer.h>
41 #include <media/stagefright/foundation/ADebug.h>
42 #include <media/stagefright/foundation/AMessage.h>
43 #include <media/stagefright/foundation/AudioPresentationInfo.h>
44 #include <media/stagefright/foundation/AUtils.h>
45 #include <media/stagefright/foundation/ByteUtils.h>
46 #include <media/stagefright/foundation/ColorUtils.h>
47 #include <media/stagefright/foundation/avc_utils.h>
48 #include <media/stagefright/foundation/hexdump.h>
49 #include <media/stagefright/foundation/OpusHeader.h>
50 #include <media/stagefright/MediaBufferGroup.h>
51 #include <media/stagefright/MediaDefs.h>
52 #include <media/stagefright/MetaDataBase.h>
53 #include <utils/String8.h>
54 
55 #include <byteswap.h>
56 
57 #ifndef UINT32_MAX
58 #define UINT32_MAX       (4294967295U)
59 #endif
60 
61 #define ALAC_SPECIFIC_INFO_SIZE (36)
62 
63 namespace android {
64 
65 enum {
66     // max track header chunk to return
67     kMaxTrackHeaderSize = 32,
68 
69     // maximum size of an atom. Some atoms can be bigger according to the spec,
70     // but we only allow up to this size.
71     kMaxAtomSize = 64 * 1024 * 1024,
72 };
73 
74 class MPEG4Source : public MediaTrackHelper {
75 static const size_t  kMaxPcmFrameSize = 8192;
76 public:
77     // Caller retains ownership of both "dataSource" and "sampleTable".
78     MPEG4Source(AMediaFormat *format,
79                 DataSourceHelper *dataSource,
80                 int32_t timeScale,
81                 const sp<SampleTable> &sampleTable,
82                 Vector<SidxEntry> &sidx,
83                 const Trex *trex,
84                 off64_t firstMoofOffset,
85                 const sp<ItemTable> &itemTable,
86                 uint64_t elstShiftStartTicks);
87     virtual status_t init();
88 
89     virtual media_status_t start();
90     virtual media_status_t stop();
91 
92     virtual media_status_t getFormat(AMediaFormat *);
93 
94     virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()95     bool supportsNonBlockingRead() override { return true; }
96     virtual media_status_t fragmentedRead(
97             MediaBufferHelper **buffer, const ReadOptions *options = NULL);
98 
99     virtual ~MPEG4Source();
100 
101 private:
102     Mutex mLock;
103 
104     AMediaFormat *mFormat;
105     DataSourceHelper *mDataSource;
106     int32_t mTimescale;
107     sp<SampleTable> mSampleTable;
108     uint32_t mCurrentSampleIndex;
109     uint32_t mCurrentFragmentIndex;
110     Vector<SidxEntry> &mSegments;
111     const Trex *mTrex;
112     off64_t mFirstMoofOffset;
113     off64_t mCurrentMoofOffset;
114     off64_t mNextMoofOffset;
115     uint32_t mCurrentTime; // in media timescale ticks
116     int32_t mLastParsedTrackId;
117     int32_t mTrackId;
118 
119     int32_t mCryptoMode;    // passed in from extractor
120     int32_t mDefaultIVSize; // passed in from extractor
121     uint8_t mCryptoKey[16]; // passed in from extractor
122     int32_t mDefaultEncryptedByteBlock;
123     int32_t mDefaultSkipByteBlock;
124     uint32_t mCurrentAuxInfoType;
125     uint32_t mCurrentAuxInfoTypeParameter;
126     int32_t mCurrentDefaultSampleInfoSize;
127     uint32_t mCurrentSampleInfoCount;
128     uint32_t mCurrentSampleInfoAllocSize;
129     uint8_t* mCurrentSampleInfoSizes;
130     uint32_t mCurrentSampleInfoOffsetCount;
131     uint32_t mCurrentSampleInfoOffsetsAllocSize;
132     uint64_t* mCurrentSampleInfoOffsets;
133 
134     bool mIsAVC;
135     bool mIsHEVC;
136     bool mIsAC4;
137     bool mIsPcm;
138     size_t mNALLengthSize;
139 
140     bool mStarted;
141 
142     MediaBufferHelper *mBuffer;
143 
144     uint8_t *mSrcBuffer;
145 
146     bool mIsHeif;
147     bool mIsAudio;
148     sp<ItemTable> mItemTable;
149 
150     // Start offset from composition time to presentation time.
151     // Support shift only for video tracks through mElstShiftStartTicks for now.
152     uint64_t mElstShiftStartTicks;
153 
154     size_t parseNALSize(const uint8_t *data) const;
155     status_t parseChunk(off64_t *offset);
156     status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
157     status_t parseTrackFragmentRun(off64_t offset, off64_t size);
158     status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
159     status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
160     status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
161     status_t parseSampleEncryption(off64_t offset);
162     // returns -1 for invalid layer ID
163     int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
164 
165     struct TrackFragmentHeaderInfo {
166         enum Flags {
167             kBaseDataOffsetPresent         = 0x01,
168             kSampleDescriptionIndexPresent = 0x02,
169             kDefaultSampleDurationPresent  = 0x08,
170             kDefaultSampleSizePresent      = 0x10,
171             kDefaultSampleFlagsPresent     = 0x20,
172             kDurationIsEmpty               = 0x10000,
173         };
174 
175         uint32_t mTrackID;
176         uint32_t mFlags;
177         uint64_t mBaseDataOffset;
178         uint32_t mSampleDescriptionIndex;
179         uint32_t mDefaultSampleDuration;
180         uint32_t mDefaultSampleSize;
181         uint32_t mDefaultSampleFlags;
182 
183         uint64_t mDataOffset;
184     };
185     TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
186 
187     struct Sample {
188         off64_t offset;
189         size_t size;
190         uint32_t duration;
191         int32_t compositionOffset;
192         uint8_t iv[16];
193         Vector<size_t> clearsizes;
194         Vector<size_t> encryptedsizes;
195     };
196     Vector<Sample> mCurrentSamples;
197 
198     MPEG4Source(const MPEG4Source &);
199     MPEG4Source &operator=(const MPEG4Source &);
200 };
201 
202 // This custom data source wraps an existing one and satisfies requests
203 // falling entirely within a cached range from the cache while forwarding
204 // all remaining requests to the wrapped datasource.
205 // This is used to cache the full sampletable metadata for a single track,
206 // possibly wrapping multiple times to cover all tracks, i.e.
207 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
208 
209 class CachedRangedDataSource : public DataSourceHelper {
210 public:
211     explicit CachedRangedDataSource(DataSourceHelper *source);
212     virtual ~CachedRangedDataSource();
213 
214     ssize_t readAt(off64_t offset, void *data, size_t size) override;
215     status_t getSize(off64_t *size) override;
216     uint32_t flags() override;
217 
218     status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
219 
220 
221 private:
222     Mutex mLock;
223 
224     DataSourceHelper *mSource;
225     bool mOwnsDataSource;
226     off64_t mCachedOffset;
227     size_t mCachedSize;
228     uint8_t *mCache;
229 
230     void clearCache();
231 
232     CachedRangedDataSource(const CachedRangedDataSource &);
233     CachedRangedDataSource &operator=(const CachedRangedDataSource &);
234 };
235 
CachedRangedDataSource(DataSourceHelper * source)236 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
237     : DataSourceHelper(source),
238       mSource(source),
239       mOwnsDataSource(false),
240       mCachedOffset(0),
241       mCachedSize(0),
242       mCache(NULL) {
243 }
244 
~CachedRangedDataSource()245 CachedRangedDataSource::~CachedRangedDataSource() {
246     clearCache();
247     if (mOwnsDataSource) {
248         delete mSource;
249     }
250 }
251 
clearCache()252 void CachedRangedDataSource::clearCache() {
253     if (mCache) {
254         free(mCache);
255         mCache = NULL;
256     }
257 
258     mCachedOffset = 0;
259     mCachedSize = 0;
260 }
261 
readAt(off64_t offset,void * data,size_t size)262 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
263     Mutex::Autolock autoLock(mLock);
264 
265     if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
266         memcpy(data, &mCache[offset - mCachedOffset], size);
267         return size;
268     }
269 
270     return mSource->readAt(offset, data, size);
271 }
272 
getSize(off64_t * size)273 status_t CachedRangedDataSource::getSize(off64_t *size) {
274     return mSource->getSize(size);
275 }
276 
flags()277 uint32_t CachedRangedDataSource::flags() {
278     return mSource->flags();
279 }
280 
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)281 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
282         size_t size,
283         bool assumeSourceOwnershipOnSuccess) {
284     Mutex::Autolock autoLock(mLock);
285 
286     clearCache();
287 
288     mCache = (uint8_t *)malloc(size);
289 
290     if (mCache == NULL) {
291         return -ENOMEM;
292     }
293 
294     mCachedOffset = offset;
295     mCachedSize = size;
296 
297     ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
298 
299     if (err < (ssize_t)size) {
300         clearCache();
301 
302         return ERROR_IO;
303     }
304     mOwnsDataSource = assumeSourceOwnershipOnSuccess;
305     return OK;
306 }
307 
308 ////////////////////////////////////////////////////////////////////////////////
309 
310 static const bool kUseHexDump = false;
311 
FourCC2MIME(uint32_t fourcc)312 static const char *FourCC2MIME(uint32_t fourcc) {
313     switch (fourcc) {
314         case FOURCC("mp4a"):
315             return MEDIA_MIMETYPE_AUDIO_AAC;
316 
317         case FOURCC("samr"):
318             return MEDIA_MIMETYPE_AUDIO_AMR_NB;
319 
320         case FOURCC("sawb"):
321             return MEDIA_MIMETYPE_AUDIO_AMR_WB;
322 
323         case FOURCC("ec-3"):
324             return MEDIA_MIMETYPE_AUDIO_EAC3;
325 
326         case FOURCC("mp4v"):
327             return MEDIA_MIMETYPE_VIDEO_MPEG4;
328 
329         case FOURCC("s263"):
330         case FOURCC("h263"):
331         case FOURCC("H263"):
332             return MEDIA_MIMETYPE_VIDEO_H263;
333 
334         case FOURCC("avc1"):
335             return MEDIA_MIMETYPE_VIDEO_AVC;
336 
337         case FOURCC("hvc1"):
338         case FOURCC("hev1"):
339             return MEDIA_MIMETYPE_VIDEO_HEVC;
340         case FOURCC("ac-4"):
341             return MEDIA_MIMETYPE_AUDIO_AC4;
342         case FOURCC("Opus"):
343             return MEDIA_MIMETYPE_AUDIO_OPUS;
344 
345         case FOURCC("twos"):
346         case FOURCC("sowt"):
347             return MEDIA_MIMETYPE_AUDIO_RAW;
348         case FOURCC("alac"):
349             return MEDIA_MIMETYPE_AUDIO_ALAC;
350         case FOURCC("fLaC"):
351             return MEDIA_MIMETYPE_AUDIO_FLAC;
352         case FOURCC("av01"):
353             return MEDIA_MIMETYPE_VIDEO_AV1;
354         case FOURCC("vp09"):
355             return MEDIA_MIMETYPE_VIDEO_VP9;
356         case FOURCC(".mp3"):
357         case 0x6D730055: // "ms U" mp3 audio
358             return MEDIA_MIMETYPE_AUDIO_MPEG;
359         default:
360             ALOGW("Unknown fourcc: %c%c%c%c",
361                    (fourcc >> 24) & 0xff,
362                    (fourcc >> 16) & 0xff,
363                    (fourcc >> 8) & 0xff,
364                    fourcc & 0xff
365                    );
366             return "application/octet-stream";
367     }
368 }
369 
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)370 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
371     if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
372         // AMR NB audio is always mono, 8kHz
373         *channels = 1;
374         *rate = 8000;
375         return true;
376     } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
377         // AMR WB audio is always mono, 16kHz
378         *channels = 1;
379         *rate = 16000;
380         return true;
381     }
382     return false;
383 }
384 
MPEG4Extractor(DataSourceHelper * source,const char * mime)385 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
386     : mMoofOffset(0),
387       mMoofFound(false),
388       mMdatFound(false),
389       mDataSource(source),
390       mInitCheck(NO_INIT),
391       mHeaderTimescale(0),
392       mIsQT(false),
393       mIsHeif(false),
394       mHasMoovBox(false),
395       mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
396       mFirstTrack(NULL),
397       mLastTrack(NULL) {
398     ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
399     mFileMetaData = AMediaFormat_new();
400 }
401 
~MPEG4Extractor()402 MPEG4Extractor::~MPEG4Extractor() {
403     Track *track = mFirstTrack;
404     while (track) {
405         Track *next = track->next;
406 
407         delete track;
408         track = next;
409     }
410     mFirstTrack = mLastTrack = NULL;
411 
412     for (size_t i = 0; i < mPssh.size(); i++) {
413         delete [] mPssh[i].data;
414     }
415     mPssh.clear();
416 
417     delete mDataSource;
418     AMediaFormat_delete(mFileMetaData);
419 }
420 
flags() const421 uint32_t MPEG4Extractor::flags() const {
422     return CAN_PAUSE |
423             ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
424                     (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
425 }
426 
getMetaData(AMediaFormat * meta)427 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
428     status_t err;
429     if ((err = readMetaData()) != OK) {
430         return AMEDIA_ERROR_UNKNOWN;
431     }
432     AMediaFormat_copy(meta, mFileMetaData);
433     return AMEDIA_OK;
434 }
435 
countTracks()436 size_t MPEG4Extractor::countTracks() {
437     status_t err;
438     if ((err = readMetaData()) != OK) {
439         ALOGV("MPEG4Extractor::countTracks: no tracks");
440         return 0;
441     }
442 
443     size_t n = 0;
444     Track *track = mFirstTrack;
445     while (track) {
446         ++n;
447         track = track->next;
448     }
449 
450     ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
451     return n;
452 }
453 
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)454 media_status_t MPEG4Extractor::getTrackMetaData(
455         AMediaFormat *meta,
456         size_t index, uint32_t flags) {
457     status_t err;
458     if ((err = readMetaData()) != OK) {
459         return AMEDIA_ERROR_UNKNOWN;
460     }
461 
462     Track *track = mFirstTrack;
463     while (index > 0) {
464         if (track == NULL) {
465             return AMEDIA_ERROR_UNKNOWN;
466         }
467 
468         track = track->next;
469         --index;
470     }
471 
472     if (track == NULL) {
473         return AMEDIA_ERROR_UNKNOWN;
474     }
475 
476     [=] {
477         int64_t duration;
478         int32_t samplerate;
479         // Only for audio track.
480         if (track->has_elst && mHeaderTimescale != 0 &&
481                 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
482                 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
483 
484             // Elst has to be processed only the first time this function is called.
485             track->has_elst = false;
486 
487             if (track->elst_segment_duration > INT64_MAX) {
488                 return;
489             }
490             int64_t segment_duration = track->elst_segment_duration;
491             int64_t media_time = track->elst_media_time;
492             int64_t halfscale = track->timescale / 2;
493 
494             ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
495                   ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
496                   segment_duration, media_time,
497                   halfscale, mHeaderTimescale, track->timescale);
498 
499             if ((uint32_t)samplerate != track->timescale){
500                 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
501                     samplerate);
502             }
503             // Both delay and paddingsamples have to be set inorder for either to be
504             // effective in the lower layers.
505             int64_t delay = 0;
506             if (media_time > 0) { // Gapless playback
507                 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
508                 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
509                         __builtin_add_overflow(delay, halfscale, &delay) ||
510                         (delay /= track->timescale, false) ||
511                         delay > INT32_MAX ||
512                         delay < INT32_MIN) {
513                     ALOGW("ignoring edit list with bogus values");
514                     return;
515                 }
516             }
517             ALOGV("delay = %" PRId64, delay);
518             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
519 
520             int64_t paddingsamples = 0;
521             if (segment_duration > 0) {
522                 int64_t scaled_duration;
523                 // scaled_duration = duration * mHeaderTimescale;
524                 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
525                     return;
526                 }
527                 ALOGV("scaled_duration = %" PRId64, scaled_duration);
528 
529                 int64_t segment_end;
530                 int64_t padding;
531                 int64_t segment_duration_e6;
532                 int64_t media_time_scaled_e6;
533                 int64_t media_time_scaled;
534                 // padding = scaled_duration - ((segment_duration * 1000000) +
535                 //                  ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
536                 // segment_duration is based on timescale in movie header box(mdhd)
537                 // media_time is based on timescale track header/media timescale
538                 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
539                     __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
540                     __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
541                     return;
542                 }
543                 media_time_scaled_e6 /= track->timescale;
544                 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
545                     || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
546                     return;
547                 }
548                 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
549                 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
550                 // might be slightly shorter than the segment duration, which would make the
551                 // padding negative. Clamp to zero.
552                 if (padding > 0) {
553                     int64_t halfscale_mht = mHeaderTimescale / 2;
554                     int64_t halfscale_e6;
555                     int64_t timescale_e6;
556                     // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
557                     //                / (mHeaderTimescale * 1000000);
558                     if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
559                             __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
560                             __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
561                             __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
562                             (paddingsamples /= timescale_e6, false) ||
563                             paddingsamples > INT32_MAX) {
564                         return;
565                     }
566                 }
567             }
568             ALOGV("paddingsamples = %" PRId64, paddingsamples);
569             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
570         }
571     }();
572 
573     if ((flags & kIncludeExtensiveMetaData)
574             && !track->includes_expensive_metadata) {
575         track->includes_expensive_metadata = true;
576 
577         const char *mime;
578         CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
579         if (!strncasecmp("video/", mime, 6)) {
580             // MPEG2 tracks do not provide CSD, so read the stream header
581             if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
582                 off64_t offset;
583                 size_t size;
584                 if (track->sampleTable->getMetaDataForSample(
585                             0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
586                     if (size > kMaxTrackHeaderSize) {
587                         size = kMaxTrackHeaderSize;
588                     }
589                     uint8_t header[kMaxTrackHeaderSize];
590                     if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
591                         AMediaFormat_setBuffer(track->meta,
592                                 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
593                     }
594                 }
595             }
596 
597             if (mMoofOffset > 0) {
598                 int64_t duration;
599                 if (AMediaFormat_getInt64(track->meta,
600                         AMEDIAFORMAT_KEY_DURATION, &duration)) {
601                     // nothing fancy, just pick a frame near 1/4th of the duration
602                     AMediaFormat_setInt64(track->meta,
603                             AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
604                 }
605             } else {
606                 uint32_t sampleIndex;
607                 uint64_t sampleTime;
608                 if (track->timescale != 0 &&
609                         track->sampleTable->findThumbnailSample(&sampleIndex) == OK
610                         && track->sampleTable->getMetaDataForSample(
611                             sampleIndex, NULL /* offset */, NULL /* size */,
612                             &sampleTime) == OK) {
613                         AMediaFormat_setInt64(track->meta,
614                                 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
615                                 ((int64_t)sampleTime * 1000000) / track->timescale);
616                 }
617             }
618         }
619     }
620 
621     AMediaFormat_copy(meta, track->meta);
622     return AMEDIA_OK;
623 }
624 
readMetaData()625 status_t MPEG4Extractor::readMetaData() {
626     if (mInitCheck != NO_INIT) {
627         return mInitCheck;
628     }
629 
630     off64_t offset = 0;
631     status_t err;
632     bool sawMoovOrSidx = false;
633 
634     while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
635              (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
636                      (mItemTable != NULL) && mItemTable->isValid()))) {
637         off64_t orig_offset = offset;
638         err = parseChunk(&offset, 0);
639 
640         if (err != OK && err != UNKNOWN_ERROR) {
641             break;
642         } else if (offset <= orig_offset) {
643             // only continue parsing if the offset was advanced,
644             // otherwise we might end up in an infinite loop
645             ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
646             err = ERROR_MALFORMED;
647             break;
648         } else if (err == UNKNOWN_ERROR) {
649             sawMoovOrSidx = true;
650         }
651     }
652 
653     if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
654         off64_t exifOffset;
655         size_t exifSize;
656         if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
657             AMediaFormat_setInt64(mFileMetaData,
658                     AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
659             AMediaFormat_setInt64(mFileMetaData,
660                     AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
661         }
662         for (uint32_t imageIndex = 0;
663                 imageIndex < mItemTable->countImages(); imageIndex++) {
664             AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
665             if (meta == NULL) {
666                 ALOGE("heif image %u has no meta!", imageIndex);
667                 continue;
668             }
669             // Some heif files advertise image sequence brands (eg. 'hevc') in
670             // ftyp box, but don't have any valid tracks in them. Instead of
671             // reporting the entire file as malformed, we override the error
672             // to allow still images to be extracted.
673             if (err != OK) {
674                 ALOGW("Extracting still images only");
675                 err = OK;
676             }
677             mInitCheck = OK;
678 
679             ALOGV("adding HEIF image track %u", imageIndex);
680             Track *track = new Track;
681             if (mLastTrack != NULL) {
682                 mLastTrack->next = track;
683             } else {
684                 mFirstTrack = track;
685             }
686             mLastTrack = track;
687 
688             track->meta = meta;
689             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
690             track->timescale = 1000000;
691         }
692     }
693 
694     if (mInitCheck == OK) {
695         if (findTrackByMimePrefix("video/") != NULL) {
696             AMediaFormat_setString(mFileMetaData,
697                     AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
698         } else if (findTrackByMimePrefix("audio/") != NULL) {
699             AMediaFormat_setString(mFileMetaData,
700                     AMEDIAFORMAT_KEY_MIME, "audio/mp4");
701         } else if (findTrackByMimePrefix(
702                 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
703             AMediaFormat_setString(mFileMetaData,
704                     AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
705         } else {
706             AMediaFormat_setString(mFileMetaData,
707                     AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
708         }
709     } else {
710         mInitCheck = err;
711     }
712 
713     CHECK_NE(err, (status_t)NO_INIT);
714 
715     // copy pssh data into file metadata
716     uint64_t psshsize = 0;
717     for (size_t i = 0; i < mPssh.size(); i++) {
718         psshsize += 20 + mPssh[i].datalen;
719     }
720     if (psshsize > 0 && psshsize <= UINT32_MAX) {
721         char *buf = (char*)malloc(psshsize);
722         if (!buf) {
723             ALOGE("b/28471206");
724             return NO_MEMORY;
725         }
726         char *ptr = buf;
727         for (size_t i = 0; i < mPssh.size(); i++) {
728             memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
729             memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
730             ptr += (20 + mPssh[i].datalen);
731         }
732         AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
733         free(buf);
734     }
735 
736     return mInitCheck;
737 }
738 
739 struct PathAdder {
PathAdderandroid::PathAdder740     PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
741         : mPath(path) {
742         mPath->push(chunkType);
743     }
744 
~PathAdderandroid::PathAdder745     ~PathAdder() {
746         mPath->pop();
747     }
748 
749 private:
750     Vector<uint32_t> *mPath;
751 
752     PathAdder(const PathAdder &);
753     PathAdder &operator=(const PathAdder &);
754 };
755 
underMetaDataPath(const Vector<uint32_t> & path)756 static bool underMetaDataPath(const Vector<uint32_t> &path) {
757     return path.size() >= 5
758         && path[0] == FOURCC("moov")
759         && path[1] == FOURCC("udta")
760         && path[2] == FOURCC("meta")
761         && path[3] == FOURCC("ilst");
762 }
763 
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)764 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
765     return path.size() >= 2
766             && path[0] == FOURCC("moov")
767             && path[1] == FOURCC("meta")
768             && (depth == 2
769             || (depth == 3
770                     && (path[2] == FOURCC("hdlr")
771                     ||  path[2] == FOURCC("ilst")
772                     ||  path[2] == FOURCC("keys"))));
773 }
774 
775 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)776 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
777     // delta between mpeg4 time and unix epoch time
778     static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
779     if (time_1904 < INT64_MIN + delta) {
780         return false;
781     }
782     time_t time_1970 = time_1904 - delta;
783 
784     char tmp[32];
785     struct tm* tm = gmtime(&time_1970);
786     if (tm != NULL &&
787             strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
788         s->setTo(tmp);
789         return true;
790     }
791     return false;
792 }
793 
parseChunk(off64_t * offset,int depth)794 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
795     ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
796 
797     if (*offset < 0) {
798         ALOGE("b/23540914");
799         return ERROR_MALFORMED;
800     }
801     if (depth > 100) {
802         ALOGE("b/27456299");
803         return ERROR_MALFORMED;
804     }
805     uint32_t hdr[2];
806     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
807         return ERROR_IO;
808     }
809     uint64_t chunk_size = ntohl(hdr[0]);
810     int32_t chunk_type = ntohl(hdr[1]);
811     off64_t data_offset = *offset + 8;
812 
813     if (chunk_size == 1) {
814         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
815             return ERROR_IO;
816         }
817         chunk_size = ntoh64(chunk_size);
818         data_offset += 8;
819 
820         if (chunk_size < 16) {
821             // The smallest valid chunk is 16 bytes long in this case.
822             return ERROR_MALFORMED;
823         }
824     } else if (chunk_size == 0) {
825         if (depth == 0) {
826             // atom extends to end of file
827             off64_t sourceSize;
828             if (mDataSource->getSize(&sourceSize) == OK) {
829                 chunk_size = (sourceSize - *offset);
830             } else {
831                 // XXX could we just pick a "sufficiently large" value here?
832                 ALOGE("atom size is 0, and data source has no size");
833                 return ERROR_MALFORMED;
834             }
835         } else {
836             // not allowed for non-toplevel atoms, skip it
837             *offset += 4;
838             return OK;
839         }
840     } else if (chunk_size < 8) {
841         // The smallest valid chunk is 8 bytes long.
842         ALOGE("invalid chunk size: %" PRIu64, chunk_size);
843         return ERROR_MALFORMED;
844     }
845 
846     char chunk[5];
847     MakeFourCCString(chunk_type, chunk);
848     ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
849 
850     if (kUseHexDump) {
851         static const char kWhitespace[] = "                                        ";
852         const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
853         printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
854 
855         char buffer[256];
856         size_t n = chunk_size;
857         if (n > sizeof(buffer)) {
858             n = sizeof(buffer);
859         }
860         if (mDataSource->readAt(*offset, buffer, n)
861                 < (ssize_t)n) {
862             return ERROR_IO;
863         }
864 
865         hexdump(buffer, n);
866     }
867 
868     PathAdder autoAdder(&mPath, chunk_type);
869 
870     // (data_offset - *offset) is either 8 or 16
871     off64_t chunk_data_size = chunk_size - (data_offset - *offset);
872     if (chunk_data_size < 0) {
873         ALOGE("b/23540914");
874         return ERROR_MALFORMED;
875     }
876     if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
877         char errMsg[100];
878         sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
879         ALOGE("%s (b/28615448)", errMsg);
880         android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
881         return ERROR_MALFORMED;
882     }
883 
884     if (chunk_type != FOURCC("cprt")
885             && chunk_type != FOURCC("covr")
886             && mPath.size() == 5 && underMetaDataPath(mPath)) {
887         off64_t stop_offset = *offset + chunk_size;
888         *offset = data_offset;
889         while (*offset < stop_offset) {
890             status_t err = parseChunk(offset, depth + 1);
891             if (err != OK) {
892                 return err;
893             }
894         }
895 
896         if (*offset != stop_offset) {
897             return ERROR_MALFORMED;
898         }
899 
900         return OK;
901     }
902 
903     switch(chunk_type) {
904         case FOURCC("moov"):
905         case FOURCC("trak"):
906         case FOURCC("mdia"):
907         case FOURCC("minf"):
908         case FOURCC("dinf"):
909         case FOURCC("stbl"):
910         case FOURCC("mvex"):
911         case FOURCC("moof"):
912         case FOURCC("traf"):
913         case FOURCC("mfra"):
914         case FOURCC("udta"):
915         case FOURCC("ilst"):
916         case FOURCC("sinf"):
917         case FOURCC("schi"):
918         case FOURCC("edts"):
919         case FOURCC("wave"):
920         {
921             if (chunk_type == FOURCC("moov") && depth != 0) {
922                 ALOGE("moov: depth %d", depth);
923                 return ERROR_MALFORMED;
924             }
925 
926             if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
927                 ALOGE("duplicate moov");
928                 return ERROR_MALFORMED;
929             }
930 
931             if (chunk_type == FOURCC("moof") && !mMoofFound) {
932                 // store the offset of the first segment
933                 mMoofFound = true;
934                 mMoofOffset = *offset;
935             }
936 
937             if (chunk_type == FOURCC("stbl")) {
938                 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
939 
940                 if (mDataSource->flags()
941                         & (DataSourceBase::kWantsPrefetching
942                             | DataSourceBase::kIsCachingDataSource)) {
943                     CachedRangedDataSource *cachedSource =
944                         new CachedRangedDataSource(mDataSource);
945 
946                     if (cachedSource->setCachedRange(
947                             *offset, chunk_size,
948                             true /* assume ownership on success */) == OK) {
949                         mDataSource = cachedSource;
950                     } else {
951                         delete cachedSource;
952                     }
953                 }
954 
955                 if (mLastTrack == NULL) {
956                     return ERROR_MALFORMED;
957                 }
958 
959                 mLastTrack->sampleTable = new SampleTable(mDataSource);
960             }
961 
962             bool isTrack = false;
963             if (chunk_type == FOURCC("trak")) {
964                 if (depth != 1) {
965                     ALOGE("trak: depth %d", depth);
966                     return ERROR_MALFORMED;
967                 }
968                 isTrack = true;
969 
970                 ALOGV("adding new track");
971                 Track *track = new Track;
972                 if (mLastTrack) {
973                     mLastTrack->next = track;
974                 } else {
975                     mFirstTrack = track;
976                 }
977                 mLastTrack = track;
978 
979                 track->meta = AMediaFormat_new();
980                 AMediaFormat_setString(track->meta,
981                         AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
982             }
983 
984             off64_t stop_offset = *offset + chunk_size;
985             *offset = data_offset;
986             while (*offset < stop_offset) {
987 
988                 // pass udata terminate
989                 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
990                     // handle the case that udta terminates with terminate code x00000000
991                     // note that 0 terminator is optional and we just handle this case.
992                     uint32_t terminate_code = 1;
993                     mDataSource->readAt(*offset, &terminate_code, 4);
994                     if (0 == terminate_code) {
995                         *offset += 4;
996                         ALOGD("Terminal code for udta");
997                         continue;
998                     } else {
999                         ALOGW("invalid udta Terminal code");
1000                     }
1001                 }
1002 
1003                 status_t err = parseChunk(offset, depth + 1);
1004                 if (err != OK) {
1005                     if (isTrack) {
1006                         mLastTrack->skipTrack = true;
1007                         break;
1008                     }
1009                     return err;
1010                 }
1011             }
1012 
1013             if (*offset != stop_offset) {
1014                 return ERROR_MALFORMED;
1015             }
1016 
1017             if (isTrack) {
1018                 int32_t trackId;
1019                 // There must be exactly one track header per track.
1020 
1021                 if (!AMediaFormat_getInt32(mLastTrack->meta,
1022                         AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1023                     mLastTrack->skipTrack = true;
1024                 }
1025 
1026                 status_t err = verifyTrack(mLastTrack);
1027                 if (err != OK) {
1028                     mLastTrack->skipTrack = true;
1029                 }
1030 
1031 
1032                 if (mLastTrack->skipTrack) {
1033                     ALOGV("skipping this track...");
1034                     Track *cur = mFirstTrack;
1035 
1036                     if (cur == mLastTrack) {
1037                         delete cur;
1038                         mFirstTrack = mLastTrack = NULL;
1039                     } else {
1040                         while (cur && cur->next != mLastTrack) {
1041                             cur = cur->next;
1042                         }
1043                         if (cur) {
1044                             cur->next = NULL;
1045                         }
1046                         delete mLastTrack;
1047                         mLastTrack = cur;
1048                     }
1049 
1050                     return OK;
1051                 }
1052 
1053                 // place things we built elsewhere into their final locations
1054 
1055                 // put aggregated tx3g data into the metadata
1056                 if (mLastTrack->mTx3gFilled > 0) {
1057                     ALOGV("Putting %zu bytes of tx3g data into meta data",
1058                           mLastTrack->mTx3gFilled);
1059                     AMediaFormat_setBuffer(mLastTrack->meta,
1060                         AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1061                         mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1062                     // drop it now to reduce our footprint
1063                     free(mLastTrack->mTx3gBuffer);
1064                     mLastTrack->mTx3gBuffer = NULL;
1065                     mLastTrack->mTx3gFilled = 0;
1066                     mLastTrack->mTx3gSize = 0;
1067                 }
1068 
1069             } else if (chunk_type == FOURCC("moov")) {
1070                 mInitCheck = OK;
1071 
1072                 return UNKNOWN_ERROR;  // Return a generic error.
1073             }
1074             break;
1075         }
1076 
1077         case FOURCC("schm"):
1078         {
1079 
1080             *offset += chunk_size;
1081             if (!mLastTrack) {
1082                 return ERROR_MALFORMED;
1083             }
1084 
1085             uint32_t scheme_type;
1086             if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1087                 return ERROR_IO;
1088             }
1089             scheme_type = ntohl(scheme_type);
1090             int32_t mode = kCryptoModeUnencrypted;
1091             switch(scheme_type) {
1092                 case FOURCC("cbc1"):
1093                 {
1094                     mode = kCryptoModeAesCbc;
1095                     break;
1096                 }
1097                 case FOURCC("cbcs"):
1098                 {
1099                     mode = kCryptoModeAesCbc;
1100                     mLastTrack->subsample_encryption = true;
1101                     break;
1102                 }
1103                 case FOURCC("cenc"):
1104                 {
1105                     mode = kCryptoModeAesCtr;
1106                     break;
1107                 }
1108                 case FOURCC("cens"):
1109                 {
1110                     mode = kCryptoModeAesCtr;
1111                     mLastTrack->subsample_encryption = true;
1112                     break;
1113                 }
1114             }
1115             if (mode != kCryptoModeUnencrypted) {
1116                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1117             }
1118             break;
1119         }
1120 
1121 
1122         case FOURCC("elst"):
1123         {
1124             *offset += chunk_size;
1125 
1126             if (!mLastTrack) {
1127                 return ERROR_MALFORMED;
1128             }
1129 
1130             // See 14496-12 8.6.6
1131             uint8_t version;
1132             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1133                 return ERROR_IO;
1134             }
1135 
1136             uint32_t entry_count;
1137             if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1138                 return ERROR_IO;
1139             }
1140 
1141             if (entry_count != 1) {
1142                 // we only support a single entry at the moment, for gapless playback
1143                 // or start offset
1144                 ALOGW("ignoring edit list with %d entries", entry_count);
1145             } else {
1146                 off64_t entriesoffset = data_offset + 8;
1147                 uint64_t segment_duration;
1148                 int64_t media_time;
1149 
1150                 if (version == 1) {
1151                     if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1152                             !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1153                         return ERROR_IO;
1154                     }
1155                 } else if (version == 0) {
1156                     uint32_t sd;
1157                     int32_t mt;
1158                     if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1159                             !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1160                         return ERROR_IO;
1161                     }
1162                     segment_duration = sd;
1163                     media_time = mt;
1164                 } else {
1165                     return ERROR_IO;
1166                 }
1167 
1168                 // save these for later, because the elst atom might precede
1169                 // the atoms that actually gives us the duration and sample rate
1170                 // needed to calculate the padding and delay values
1171                 mLastTrack->has_elst = true;
1172                 mLastTrack->elst_media_time = media_time;
1173                 mLastTrack->elst_segment_duration = segment_duration;
1174             }
1175             break;
1176         }
1177 
1178         case FOURCC("frma"):
1179         {
1180             *offset += chunk_size;
1181 
1182             uint32_t original_fourcc;
1183             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1184                 return ERROR_IO;
1185             }
1186             original_fourcc = ntohl(original_fourcc);
1187             ALOGV("read original format: %d", original_fourcc);
1188 
1189             if (mLastTrack == NULL) {
1190                 return ERROR_MALFORMED;
1191             }
1192 
1193             AMediaFormat_setString(mLastTrack->meta,
1194                     AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1195             uint32_t num_channels = 0;
1196             uint32_t sample_rate = 0;
1197             if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1198                 AMediaFormat_setInt32(mLastTrack->meta,
1199                         AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1200                 AMediaFormat_setInt32(mLastTrack->meta,
1201                         AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1202             }
1203 
1204             if (!mIsQT && original_fourcc == FOURCC("alac")) {
1205                 off64_t tmpOffset = *offset;
1206                 status_t err = parseALACSampleEntry(&tmpOffset);
1207                 if (err != OK) {
1208                     ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1209                     return err;
1210                 }
1211                 *offset = tmpOffset + 8;
1212             }
1213 
1214             break;
1215         }
1216 
1217         case FOURCC("tenc"):
1218         {
1219             *offset += chunk_size;
1220 
1221             if (chunk_size < 32) {
1222                 return ERROR_MALFORMED;
1223             }
1224 
1225             // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1226             // default IV size, 16 bytes default KeyID
1227             // (ISO 23001-7)
1228 
1229             uint8_t version;
1230             if (mDataSource->readAt(data_offset, &version, sizeof(version))
1231                     < (ssize_t)sizeof(version)) {
1232                 return ERROR_IO;
1233             }
1234 
1235             uint8_t buf[4];
1236             memset(buf, 0, 4);
1237             if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1238                 return ERROR_IO;
1239             }
1240 
1241             if (mLastTrack == NULL) {
1242                 return ERROR_MALFORMED;
1243             }
1244 
1245             uint8_t defaultEncryptedByteBlock = 0;
1246             uint8_t defaultSkipByteBlock = 0;
1247             uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1248             if (version == 1) {
1249                 uint32_t pattern = buf[2];
1250                 defaultEncryptedByteBlock = pattern >> 4;
1251                 defaultSkipByteBlock = pattern & 0xf;
1252                 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1253                     // use (1,0) to mean "encrypt everything"
1254                     defaultEncryptedByteBlock = 1;
1255                 }
1256             } else if (mLastTrack->subsample_encryption) {
1257                 ALOGW("subsample_encryption should be version 1");
1258             } else if (defaultAlgorithmId > 1) {
1259                 // only 0 (clear) and 1 (AES-128) are valid
1260                 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1261                 defaultAlgorithmId = 1;
1262             }
1263 
1264             memset(buf, 0, 4);
1265             if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1266                 return ERROR_IO;
1267             }
1268             uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1269 
1270             if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1271                 // only unencrypted data must have 0 IV size
1272                 return ERROR_MALFORMED;
1273             } else if (defaultIVSize != 0 &&
1274                     defaultIVSize != 8 &&
1275                     defaultIVSize != 16) {
1276                 return ERROR_MALFORMED;
1277             }
1278 
1279             uint8_t defaultKeyId[16];
1280 
1281             if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1282                 return ERROR_IO;
1283             }
1284 
1285             sp<ABuffer> defaultConstantIv;
1286             if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1287 
1288                 uint8_t ivlength;
1289                 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1290                         < (ssize_t)sizeof(ivlength)) {
1291                     return ERROR_IO;
1292                 }
1293 
1294                 if (ivlength != 8 && ivlength != 16) {
1295                     ALOGW("unsupported IV length: %u", ivlength);
1296                     return ERROR_MALFORMED;
1297                 }
1298 
1299                 defaultConstantIv = new ABuffer(ivlength);
1300                 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1301                         < (ssize_t)ivlength) {
1302                     return ERROR_IO;
1303                 }
1304 
1305                 defaultConstantIv->setRange(0, ivlength);
1306             }
1307 
1308             int32_t tmpAlgorithmId;
1309             if (!AMediaFormat_getInt32(mLastTrack->meta,
1310                     AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1311                 AMediaFormat_setInt32(mLastTrack->meta,
1312                         AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1313             }
1314 
1315             AMediaFormat_setInt32(mLastTrack->meta,
1316                     AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1317             AMediaFormat_setBuffer(mLastTrack->meta,
1318                     AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1319             AMediaFormat_setInt32(mLastTrack->meta,
1320                     AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1321             AMediaFormat_setInt32(mLastTrack->meta,
1322                     AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1323             if (defaultConstantIv != NULL) {
1324                 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1325                         defaultConstantIv->data(), defaultConstantIv->size());
1326             }
1327             break;
1328         }
1329 
1330         case FOURCC("tkhd"):
1331         {
1332             *offset += chunk_size;
1333 
1334             status_t err;
1335             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1336                 return err;
1337             }
1338 
1339             break;
1340         }
1341 
1342         case FOURCC("tref"):
1343         {
1344             off64_t stop_offset = *offset + chunk_size;
1345             *offset = data_offset;
1346             while (*offset < stop_offset) {
1347                 status_t err = parseChunk(offset, depth + 1);
1348                 if (err != OK) {
1349                     return err;
1350                 }
1351             }
1352             if (*offset != stop_offset) {
1353                 return ERROR_MALFORMED;
1354             }
1355             break;
1356         }
1357 
1358         case FOURCC("thmb"):
1359         {
1360             *offset += chunk_size;
1361 
1362             if (mLastTrack != NULL) {
1363                 // Skip thumbnail track for now since we don't have an
1364                 // API to retrieve it yet.
1365                 // The thumbnail track can't be accessed by negative index or time,
1366                 // because each timed sample has its own corresponding thumbnail
1367                 // in the thumbnail track. We'll need a dedicated API to retrieve
1368                 // thumbnail at time instead.
1369                 mLastTrack->skipTrack = true;
1370             }
1371 
1372             break;
1373         }
1374 
1375         case FOURCC("pssh"):
1376         {
1377             *offset += chunk_size;
1378 
1379             PsshInfo pssh;
1380 
1381             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1382                 return ERROR_IO;
1383             }
1384 
1385             uint32_t psshdatalen = 0;
1386             if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1387                 return ERROR_IO;
1388             }
1389             pssh.datalen = ntohl(psshdatalen);
1390             ALOGV("pssh data size: %d", pssh.datalen);
1391             if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1392                 // pssh data length exceeds size of containing box
1393                 return ERROR_MALFORMED;
1394             }
1395 
1396             pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1397             if (pssh.data == NULL) {
1398                 return ERROR_MALFORMED;
1399             }
1400             ALOGV("allocated pssh @ %p", pssh.data);
1401             ssize_t requested = (ssize_t) pssh.datalen;
1402             if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1403                 delete[] pssh.data;
1404                 return ERROR_IO;
1405             }
1406             mPssh.push_back(pssh);
1407 
1408             break;
1409         }
1410 
1411         case FOURCC("mdhd"):
1412         {
1413             *offset += chunk_size;
1414 
1415             if (chunk_data_size < 4 || mLastTrack == NULL) {
1416                 return ERROR_MALFORMED;
1417             }
1418 
1419             uint8_t version;
1420             if (mDataSource->readAt(
1421                         data_offset, &version, sizeof(version))
1422                     < (ssize_t)sizeof(version)) {
1423                 return ERROR_IO;
1424             }
1425 
1426             off64_t timescale_offset;
1427 
1428             if (version == 1) {
1429                 timescale_offset = data_offset + 4 + 16;
1430             } else if (version == 0) {
1431                 timescale_offset = data_offset + 4 + 8;
1432             } else {
1433                 return ERROR_IO;
1434             }
1435 
1436             uint32_t timescale;
1437             if (mDataSource->readAt(
1438                         timescale_offset, &timescale, sizeof(timescale))
1439                     < (ssize_t)sizeof(timescale)) {
1440                 return ERROR_IO;
1441             }
1442 
1443             if (!timescale) {
1444                 ALOGE("timescale should not be ZERO.");
1445                 return ERROR_MALFORMED;
1446             }
1447 
1448             mLastTrack->timescale = ntohl(timescale);
1449 
1450             // 14496-12 says all ones means indeterminate, but some files seem to use
1451             // 0 instead. We treat both the same.
1452             int64_t duration = 0;
1453             if (version == 1) {
1454                 if (mDataSource->readAt(
1455                             timescale_offset + 4, &duration, sizeof(duration))
1456                         < (ssize_t)sizeof(duration)) {
1457                     return ERROR_IO;
1458                 }
1459                 if (duration != -1) {
1460                     duration = ntoh64(duration);
1461                 }
1462             } else {
1463                 uint32_t duration32;
1464                 if (mDataSource->readAt(
1465                             timescale_offset + 4, &duration32, sizeof(duration32))
1466                         < (ssize_t)sizeof(duration32)) {
1467                     return ERROR_IO;
1468                 }
1469                 if (duration32 != 0xffffffff) {
1470                     duration = ntohl(duration32);
1471                 }
1472             }
1473             if (duration != 0 && mLastTrack->timescale != 0) {
1474                 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1475                 if (durationUs < 0 || durationUs > INT64_MAX) {
1476                     ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1477                           (long long) duration, (long long) mLastTrack->timescale);
1478                     return ERROR_MALFORMED;
1479                 }
1480                 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1481             }
1482 
1483             uint8_t lang[2];
1484             off64_t lang_offset;
1485             if (version == 1) {
1486                 lang_offset = timescale_offset + 4 + 8;
1487             } else if (version == 0) {
1488                 lang_offset = timescale_offset + 4 + 4;
1489             } else {
1490                 return ERROR_IO;
1491             }
1492 
1493             if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1494                     < (ssize_t)sizeof(lang)) {
1495                 return ERROR_IO;
1496             }
1497 
1498             // To get the ISO-639-2/T three character language code
1499             // 1 bit pad followed by 3 5-bits characters. Each character
1500             // is packed as the difference between its ASCII value and 0x60.
1501             char lang_code[4];
1502             lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1503             lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1504             lang_code[2] = (lang[1] & 0x1f) + 0x60;
1505             lang_code[3] = '\0';
1506 
1507             AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1508 
1509             break;
1510         }
1511 
1512         case FOURCC("stsd"):
1513         {
1514             uint8_t buffer[8];
1515             if (chunk_data_size < (off64_t)sizeof(buffer)) {
1516                 return ERROR_MALFORMED;
1517             }
1518 
1519             if (mDataSource->readAt(
1520                         data_offset, buffer, 8) < 8) {
1521                 return ERROR_IO;
1522             }
1523 
1524             if (U32_AT(buffer) != 0) {
1525                 // Should be version 0, flags 0.
1526                 return ERROR_MALFORMED;
1527             }
1528 
1529             uint32_t entry_count = U32_AT(&buffer[4]);
1530 
1531             if (entry_count > 1) {
1532                 // For 3GPP timed text, there could be multiple tx3g boxes contain
1533                 // multiple text display formats. These formats will be used to
1534                 // display the timed text.
1535                 // For encrypted files, there may also be more than one entry.
1536                 const char *mime;
1537 
1538                 if (mLastTrack == NULL)
1539                     return ERROR_MALFORMED;
1540 
1541                 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1542                 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1543                         strcasecmp(mime, "application/octet-stream")) {
1544                     // For now we only support a single type of media per track.
1545                     mLastTrack->skipTrack = true;
1546                     *offset += chunk_size;
1547                     break;
1548                 }
1549             }
1550             off64_t stop_offset = *offset + chunk_size;
1551             *offset = data_offset + 8;
1552             for (uint32_t i = 0; i < entry_count; ++i) {
1553                 status_t err = parseChunk(offset, depth + 1);
1554                 if (err != OK) {
1555                     return err;
1556                 }
1557             }
1558 
1559             if (*offset != stop_offset) {
1560                 return ERROR_MALFORMED;
1561             }
1562             break;
1563         }
1564         case FOURCC("mett"):
1565         {
1566             *offset += chunk_size;
1567 
1568             // the absolute minimum size of a compliant mett box is 11 bytes:
1569             // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1570             // The resulting mime_format would be invalid at that size though.
1571             if (mLastTrack == NULL || chunk_data_size < 11) {
1572                 return ERROR_MALFORMED;
1573             }
1574 
1575             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1576             if (buffer.get() == NULL) {
1577                 return NO_MEMORY;
1578             }
1579 
1580             if (mDataSource->readAt(
1581                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1582                 return ERROR_IO;
1583             }
1584 
1585             // ISO-14496-12:
1586             // int8 reserved[6];               // should be all zeroes
1587             // int16_t data_reference_index;
1588             // char content_encoding[];        // null terminated, optional (= just the null byte)
1589             // char mime_format[];             // null terminated, mandatory
1590             // optional other boxes
1591             //
1592             // API < 29:
1593             // char mime_format[];             // null terminated
1594             //
1595             // API >= 29
1596             // char mime_format[];             // null terminated
1597             // char mime_format[];             // null terminated
1598 
1599             // Prior to API 29, the metadata track was not compliant with ISO/IEC
1600             // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1601             // metatrack. As of API 29 and onwards, a change was made to metadata track to
1602             // make it somewhat compatible with the standard. The workaround is to write the
1603             // null-terminated mime_format string twice. This allows compliant parsers to
1604             // read the missing reserved, data_reference_index, and content_encoding fields
1605             // from the first mime_type string. The actual mime_format field would then be
1606             // read correctly from the second string. The non-compliant Android frameworks
1607             // from API 28 and earlier would still be able to read the mime_format correctly
1608             // as it would only read the first null-terminated mime_format string. To enable
1609             // reading metadata tracks generated from both the non-compliant and compliant
1610             // formats, a check needs to be done to see which format is used.
1611             const char *str = (const char*) buffer.get();
1612             size_t string_length = strnlen(str, chunk_data_size);
1613 
1614             if (string_length == chunk_data_size - 1) {
1615                 // This is likely a pre API 29 file, since it's a single null terminated
1616                 // string filling the entire box.
1617                 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1618             } else {
1619                 // This might be a fully compliant metadata track, a "double mime" compatibility
1620                 // track, or anything else, including a single non-terminated string, so we need
1621                 // to determine the length of each string we want to parse out of the box.
1622                 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1623                 if (encoding_length + 8 >= chunk_data_size - 2) {
1624                     // the encoding extends to the end of the box, so there's no mime_format
1625                     return ERROR_MALFORMED;
1626                 }
1627                 String8 contentEncoding(str + 8, encoding_length);
1628                 String8 mimeFormat(str + 8 + encoding_length + 1,
1629                         chunk_data_size - 8 - encoding_length - 1);
1630                 AMediaFormat_setString(mLastTrack->meta,
1631                         AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1632             }
1633             break;
1634         }
1635 
1636         case FOURCC("mp4a"):
1637         case FOURCC("enca"):
1638         case FOURCC("samr"):
1639         case FOURCC("sawb"):
1640         case FOURCC("Opus"):
1641         case FOURCC("twos"):
1642         case FOURCC("sowt"):
1643         case FOURCC("alac"):
1644         case FOURCC("fLaC"):
1645         case FOURCC(".mp3"):
1646         case 0x6D730055: // "ms U" mp3 audio
1647         {
1648             if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1649 
1650                 if (chunk_type == FOURCC("alac")) {
1651                     off64_t offsetTmp = *offset;
1652                     status_t err = parseALACSampleEntry(&offsetTmp);
1653                     if (err != OK) {
1654                         ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1655                         return err;
1656                     }
1657                 }
1658 
1659                 // Ignore all atoms embedded in QT wave atom
1660                 ALOGV("Ignore all atoms embedded in QT wave atom");
1661                 *offset += chunk_size;
1662                 break;
1663             }
1664 
1665             uint8_t buffer[8 + 20];
1666             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1667                 // Basic AudioSampleEntry size.
1668                 return ERROR_MALFORMED;
1669             }
1670 
1671             if (mDataSource->readAt(
1672                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1673                 return ERROR_IO;
1674             }
1675 
1676             // we can get data_ref_index value from U16_AT(&buffer[6])
1677             uint16_t version = U16_AT(&buffer[8]);
1678             uint32_t num_channels = U16_AT(&buffer[16]);
1679 
1680             uint16_t sample_size = U16_AT(&buffer[18]);
1681             uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1682 
1683             if (mLastTrack == NULL)
1684                 return ERROR_MALFORMED;
1685 
1686             off64_t stop_offset = *offset + chunk_size;
1687             *offset = data_offset + sizeof(buffer);
1688 
1689             if (mIsQT) {
1690                 if (version == 1) {
1691                     if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1692                         return ERROR_IO;
1693                     }
1694 
1695 #if 0
1696                     U32_AT(buffer);  // samples per packet
1697                     U32_AT(&buffer[4]);  // bytes per packet
1698                     U32_AT(&buffer[8]);  // bytes per frame
1699                     U32_AT(&buffer[12]);  // bytes per sample
1700 #endif
1701                     *offset += 16;
1702                 } else if (version == 2) {
1703                     uint8_t v2buffer[36];
1704                     if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1705                         return ERROR_IO;
1706                     }
1707 
1708 #if 0
1709                     U32_AT(v2buffer);  // size of struct only
1710                     sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1711                     num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1712                     U32_AT(&v2buffer[16]);  // always 0x7f000000
1713                     sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1714                     U32_AT(&v2buffer[24]);  // format specifc flags
1715                     U32_AT(&v2buffer[28]);  // const bytes per audio packet
1716                     U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1717 #endif
1718                     *offset += 36;
1719                 }
1720             }
1721 
1722             if (chunk_type != FOURCC("enca")) {
1723                 // if the chunk type is enca, we'll get the type from the frma box later
1724                 AMediaFormat_setString(mLastTrack->meta,
1725                         AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1726                 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1727 
1728                 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1729                     AMediaFormat_setInt32(mLastTrack->meta,
1730                             AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1731                     if (chunk_type == FOURCC("twos")) {
1732                         AMediaFormat_setInt32(mLastTrack->meta,
1733                                 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1734                     }
1735                 }
1736             }
1737             ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1738                    chunk, num_channels, sample_size, sample_rate);
1739             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1740             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1741 
1742             if (chunk_type == FOURCC("Opus")) {
1743                 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1744                 data_offset += sizeof(buffer);
1745                 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1746 
1747                 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1748                     opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1749                     return ERROR_MALFORMED;
1750                 }
1751                 // Read Opus Header
1752                 if (mDataSource->readAt(
1753                         data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1754                     return ERROR_IO;
1755                 }
1756 
1757                 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1758                 // http://wiki.xiph.org/OggOpus#ID_Header
1759                 strncpy((char *)opusInfo, "OpusHead", 8);
1760 
1761                 // Version shall be 0 as per mp4 Opus Specific Box
1762                 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1763                 if (opusInfo[8]) {
1764                     return ERROR_MALFORMED;
1765                 }
1766                 // Force version to 1 as per OpusHead definition
1767                 // (http://wiki.xiph.org/OggOpus#ID_Header)
1768                 opusInfo[8] = 1;
1769 
1770                 // Read Opus Specific Box values
1771                 size_t opusOffset = 10;
1772                 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1773                 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1774                 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1775 
1776                 // Convert Opus Specific Box values. ParseOpusHeader expects
1777                 // the values in LE, however MP4 stores these values as BE
1778                 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1779                 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1780                 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1781                 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1782 
1783                 static const int64_t kSeekPreRollNs = 80000000;  // Fixed 80 msec
1784                 static const int32_t kOpusSampleRate = 48000;
1785                 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1786 
1787                 AMediaFormat_setBuffer(mLastTrack->meta,
1788                             AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1789                 AMediaFormat_setBuffer(mLastTrack->meta,
1790                         AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1791                 AMediaFormat_setBuffer(mLastTrack->meta,
1792                         AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1793 
1794                 data_offset += opusInfoSize;
1795                 *offset = data_offset;
1796                 CHECK_EQ(*offset, stop_offset);
1797             }
1798 
1799             if (!mIsQT && chunk_type == FOURCC("alac")) {
1800                 data_offset += sizeof(buffer);
1801 
1802                 status_t err = parseALACSampleEntry(&data_offset);
1803                 if (err != OK) {
1804                     ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1805                     return err;
1806                 }
1807                 *offset = data_offset;
1808                 CHECK_EQ(*offset, stop_offset);
1809             }
1810 
1811             if (chunk_type == FOURCC("fLaC")) {
1812 
1813                 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1814                 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1815                 uint8_t flacInfo[4 + 4 + 34];
1816                 // skipping dFla, version
1817                 data_offset += sizeof(buffer) + 12;
1818                 size_t flacOffset = 4;
1819                 // Add flaC header mime type to CSD
1820                 strncpy((char *)flacInfo, "fLaC", 4);
1821                 if (mDataSource->readAt(
1822                         data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1823                         (ssize_t)sizeof(flacInfo) - flacOffset) {
1824                     return ERROR_IO;
1825                 }
1826                 data_offset += sizeof(flacInfo) - flacOffset;
1827 
1828                 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1829                                        sizeof(flacInfo));
1830                 *offset = data_offset;
1831                 CHECK_EQ(*offset, stop_offset);
1832             }
1833 
1834             while (*offset < stop_offset) {
1835                 status_t err = parseChunk(offset, depth + 1);
1836                 if (err != OK) {
1837                     return err;
1838                 }
1839             }
1840 
1841             if (*offset != stop_offset) {
1842                 return ERROR_MALFORMED;
1843             }
1844             break;
1845         }
1846 
1847         case FOURCC("mp4v"):
1848         case FOURCC("encv"):
1849         case FOURCC("s263"):
1850         case FOURCC("H263"):
1851         case FOURCC("h263"):
1852         case FOURCC("avc1"):
1853         case FOURCC("hvc1"):
1854         case FOURCC("hev1"):
1855         case FOURCC("av01"):
1856         case FOURCC("vp09"):
1857         {
1858             uint8_t buffer[78];
1859             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1860                 // Basic VideoSampleEntry size.
1861                 return ERROR_MALFORMED;
1862             }
1863 
1864             if (mDataSource->readAt(
1865                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1866                 return ERROR_IO;
1867             }
1868 
1869             // we can get data_ref_index value from U16_AT(&buffer[6])
1870             uint16_t width = U16_AT(&buffer[6 + 18]);
1871             uint16_t height = U16_AT(&buffer[6 + 20]);
1872 
1873             // The video sample is not standard-compliant if it has invalid dimension.
1874             // Use some default width and height value, and
1875             // let the decoder figure out the actual width and height (and thus
1876             // be prepared for INFO_FOMRAT_CHANGED event).
1877             if (width == 0)  width  = 352;
1878             if (height == 0) height = 288;
1879 
1880             // printf("*** coding='%s' width=%d height=%d\n",
1881             //        chunk, width, height);
1882 
1883             if (mLastTrack == NULL)
1884                 return ERROR_MALFORMED;
1885 
1886             if (chunk_type != FOURCC("encv")) {
1887                 // if the chunk type is encv, we'll get the type from the frma box later
1888                 AMediaFormat_setString(mLastTrack->meta,
1889                         AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1890             }
1891             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
1892             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
1893 
1894             off64_t stop_offset = *offset + chunk_size;
1895             *offset = data_offset + sizeof(buffer);
1896             while (*offset < stop_offset) {
1897                 status_t err = parseChunk(offset, depth + 1);
1898                 if (err != OK) {
1899                     return err;
1900                 }
1901             }
1902 
1903             if (*offset != stop_offset) {
1904                 return ERROR_MALFORMED;
1905             }
1906             break;
1907         }
1908 
1909         case FOURCC("stco"):
1910         case FOURCC("co64"):
1911         {
1912             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1913                 return ERROR_MALFORMED;
1914             }
1915 
1916             status_t err =
1917                 mLastTrack->sampleTable->setChunkOffsetParams(
1918                         chunk_type, data_offset, chunk_data_size);
1919 
1920             *offset += chunk_size;
1921 
1922             if (err != OK) {
1923                 return err;
1924             }
1925 
1926             break;
1927         }
1928 
1929         case FOURCC("stsc"):
1930         {
1931             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1932                 return ERROR_MALFORMED;
1933 
1934             status_t err =
1935                 mLastTrack->sampleTable->setSampleToChunkParams(
1936                         data_offset, chunk_data_size);
1937 
1938             *offset += chunk_size;
1939 
1940             if (err != OK) {
1941                 return err;
1942             }
1943 
1944             break;
1945         }
1946 
1947         case FOURCC("stsz"):
1948         case FOURCC("stz2"):
1949         {
1950             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1951                 return ERROR_MALFORMED;
1952             }
1953 
1954             status_t err =
1955                 mLastTrack->sampleTable->setSampleSizeParams(
1956                         chunk_type, data_offset, chunk_data_size);
1957 
1958             *offset += chunk_size;
1959 
1960             if (err != OK) {
1961                 return err;
1962             }
1963 
1964             adjustRawDefaultFrameSize();
1965 
1966             size_t max_size;
1967             err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1968 
1969             if (err != OK) {
1970                 return err;
1971             }
1972 
1973             if (max_size != 0) {
1974                 // Assume that a given buffer only contains at most 10 chunks,
1975                 // each chunk originally prefixed with a 2 byte length will
1976                 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1977                 // and thus will grow by 2 bytes per chunk.
1978                 if (max_size > SIZE_MAX - 10 * 2) {
1979                     ALOGE("max sample size too big: %zu", max_size);
1980                     return ERROR_MALFORMED;
1981                 }
1982                 AMediaFormat_setInt32(mLastTrack->meta,
1983                         AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
1984             } else {
1985                 // No size was specified. Pick a conservatively large size.
1986                 uint32_t width, height;
1987                 if (!AMediaFormat_getInt32(mLastTrack->meta,
1988                         AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
1989                     !AMediaFormat_getInt32(mLastTrack->meta,
1990                             AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
1991                     ALOGE("No width or height, assuming worst case 1080p");
1992                     width = 1920;
1993                     height = 1080;
1994                 } else {
1995                     // A resolution was specified, check that it's not too big. The values below
1996                     // were chosen so that the calculations below don't cause overflows, they're
1997                     // not indicating that resolutions up to 32kx32k are actually supported.
1998                     if (width > 32768 || height > 32768) {
1999                         ALOGE("can't support %u x %u video", width, height);
2000                         return ERROR_MALFORMED;
2001                     }
2002                 }
2003 
2004                 const char *mime;
2005                 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2006                 if (!strncmp(mime, "audio/", 6)) {
2007                     // for audio, use 128KB
2008                     max_size = 1024 * 128;
2009                 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2010                         || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2011                     // AVC & HEVC requires compression ratio of at least 2, and uses
2012                     // macroblocks
2013                     max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2014                 } else {
2015                     // For all other formats there is no minimum compression
2016                     // ratio. Use compression ratio of 1.
2017                     max_size = width * height * 3 / 2;
2018                 }
2019                 // HACK: allow 10% overhead
2020                 // TODO: read sample size from traf atom for fragmented MPEG4.
2021                 max_size += max_size / 10;
2022                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2023             }
2024 
2025             // NOTE: setting another piece of metadata invalidates any pointers (such as the
2026             // mimetype) previously obtained, so don't cache them.
2027             const char *mime;
2028             CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2029             // Calculate average frame rate.
2030             if (!strncasecmp("video/", mime, 6)) {
2031                 size_t nSamples = mLastTrack->sampleTable->countSamples();
2032                 if (nSamples == 0) {
2033                     int32_t trackId;
2034                     if (AMediaFormat_getInt32(mLastTrack->meta,
2035                             AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2036                         for (size_t i = 0; i < mTrex.size(); i++) {
2037                             Trex *t = &mTrex.editItemAt(i);
2038                             if (t->track_ID == (uint32_t) trackId) {
2039                                 if (t->default_sample_duration > 0) {
2040                                     int32_t frameRate =
2041                                             mLastTrack->timescale / t->default_sample_duration;
2042                                     AMediaFormat_setInt32(mLastTrack->meta,
2043                                             AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2044                                 }
2045                                 break;
2046                             }
2047                         }
2048                     }
2049                 } else {
2050                     int64_t durationUs;
2051                     if (AMediaFormat_getInt64(mLastTrack->meta,
2052                             AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2053                         if (durationUs > 0) {
2054                             int32_t frameRate = (nSamples * 1000000LL +
2055                                         (durationUs >> 1)) / durationUs;
2056                             AMediaFormat_setInt32(mLastTrack->meta,
2057                                     AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2058                         }
2059                     }
2060                     ALOGV("setting frame count %zu", nSamples);
2061                     AMediaFormat_setInt32(mLastTrack->meta,
2062                             AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2063                 }
2064             }
2065 
2066             break;
2067         }
2068 
2069         case FOURCC("stts"):
2070         {
2071             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2072                 return ERROR_MALFORMED;
2073 
2074             *offset += chunk_size;
2075 
2076             if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2077                 char chunk[5];
2078                 MakeFourCCString(mPath[depth - 1], chunk);
2079                 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2080                 break;
2081             }
2082 
2083             status_t err =
2084                 mLastTrack->sampleTable->setTimeToSampleParams(
2085                         data_offset, chunk_data_size);
2086 
2087             if (err != OK) {
2088                 return err;
2089             }
2090 
2091             break;
2092         }
2093 
2094         case FOURCC("ctts"):
2095         {
2096             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2097                 return ERROR_MALFORMED;
2098 
2099             *offset += chunk_size;
2100 
2101             status_t err =
2102                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2103                         data_offset, chunk_data_size);
2104 
2105             if (err != OK) {
2106                 return err;
2107             }
2108 
2109             break;
2110         }
2111 
2112         case FOURCC("stss"):
2113         {
2114             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2115                 return ERROR_MALFORMED;
2116 
2117             *offset += chunk_size;
2118 
2119             status_t err =
2120                 mLastTrack->sampleTable->setSyncSampleParams(
2121                         data_offset, chunk_data_size);
2122 
2123             if (err != OK) {
2124                 return err;
2125             }
2126 
2127             break;
2128         }
2129 
2130         // \xA9xyz
2131         case FOURCC("\251xyz"):
2132         {
2133             *offset += chunk_size;
2134 
2135             // Best case the total data length inside "\xA9xyz" box would
2136             // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2137             // where "\x00\x05" is the text string length with value = 5,
2138             // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2139             // location (string) value with longitude = 0 and latitude = 0.
2140             // Since some devices encountered in the wild omit the trailing
2141             // slash, we'll allow that.
2142             if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2143                 return ERROR_MALFORMED;
2144             }
2145 
2146             uint16_t len;
2147             if (!mDataSource->getUInt16(data_offset, &len)) {
2148                 return ERROR_IO;
2149             }
2150 
2151             // allow "+0+0" without trailing slash
2152             if (len < 4 || len > chunk_data_size - 4) {
2153                 return ERROR_MALFORMED;
2154             }
2155             // The location string following the language code is formatted
2156             // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2157             // Allocate 2 extra bytes, in case we need to add a trailing slash,
2158             // and to add a terminating 0.
2159             std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2160             if (!buffer) {
2161                 return NO_MEMORY;
2162             }
2163 
2164             if (mDataSource->readAt(
2165                         data_offset + 4, &buffer[0], len) < len) {
2166                 return ERROR_IO;
2167             }
2168 
2169             len = strlen(&buffer[0]);
2170             if (len < 4) {
2171                 return ERROR_MALFORMED;
2172             }
2173             // Add a trailing slash if there wasn't one.
2174             if (buffer[len - 1] != '/') {
2175                 buffer[len] = '/';
2176             }
2177             AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2178             break;
2179         }
2180 
2181         case FOURCC("esds"):
2182         {
2183             *offset += chunk_size;
2184 
2185             if (chunk_data_size < 4) {
2186                 return ERROR_MALFORMED;
2187             }
2188 
2189             auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2190             uint8_t *buffer = tmp.get();
2191             if (buffer == NULL) {
2192                 return -ENOMEM;
2193             }
2194 
2195             if (mDataSource->readAt(
2196                         data_offset, buffer, chunk_data_size) < chunk_data_size) {
2197                 return ERROR_IO;
2198             }
2199 
2200             if (U32_AT(buffer) != 0) {
2201                 // Should be version 0, flags 0.
2202                 return ERROR_MALFORMED;
2203             }
2204 
2205             if (mLastTrack == NULL)
2206                 return ERROR_MALFORMED;
2207 
2208             AMediaFormat_setBuffer(mLastTrack->meta,
2209                     AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2210 
2211             if (mPath.size() >= 2
2212                     && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2213                 // Information from the ESDS must be relied on for proper
2214                 // setup of sample rate and channel count for MPEG4 Audio.
2215                 // The generic header appears to only contain generic
2216                 // information...
2217 
2218                 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2219                         &buffer[4], chunk_data_size - 4);
2220 
2221                 if (err != OK) {
2222                     return err;
2223                 }
2224             }
2225             if (mPath.size() >= 2
2226                     && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2227                 // Check if the video is MPEG2
2228                 ESDS esds(&buffer[4], chunk_data_size - 4);
2229 
2230                 uint8_t objectTypeIndication;
2231                 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2232                     if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2233                         AMediaFormat_setString(mLastTrack->meta,
2234                                 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2235                     }
2236                 }
2237             }
2238             break;
2239         }
2240 
2241         case FOURCC("btrt"):
2242         {
2243             *offset += chunk_size;
2244             if (mLastTrack == NULL) {
2245                 return ERROR_MALFORMED;
2246             }
2247 
2248             uint8_t buffer[12];
2249             if (chunk_data_size != sizeof(buffer)) {
2250                 return ERROR_MALFORMED;
2251             }
2252 
2253             if (mDataSource->readAt(
2254                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
2255                 return ERROR_IO;
2256             }
2257 
2258             uint32_t maxBitrate = U32_AT(&buffer[4]);
2259             uint32_t avgBitrate = U32_AT(&buffer[8]);
2260             if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2261                 AMediaFormat_setInt32(mLastTrack->meta,
2262                         AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2263             }
2264             if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2265                 AMediaFormat_setInt32(mLastTrack->meta,
2266                         AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2267             }
2268             break;
2269         }
2270 
2271         case FOURCC("avcC"):
2272         {
2273             *offset += chunk_size;
2274 
2275             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2276 
2277             if (buffer.get() == NULL) {
2278                 ALOGE("b/28471206");
2279                 return NO_MEMORY;
2280             }
2281 
2282             if (mDataSource->readAt(
2283                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2284                 return ERROR_IO;
2285             }
2286 
2287             if (mLastTrack == NULL)
2288                 return ERROR_MALFORMED;
2289 
2290             AMediaFormat_setBuffer(mLastTrack->meta,
2291                     AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2292 
2293             break;
2294         }
2295         case FOURCC("hvcC"):
2296         {
2297             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2298 
2299             if (buffer.get() == NULL) {
2300                 ALOGE("b/28471206");
2301                 return NO_MEMORY;
2302             }
2303 
2304             if (mDataSource->readAt(
2305                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2306                 return ERROR_IO;
2307             }
2308 
2309             if (mLastTrack == NULL)
2310                 return ERROR_MALFORMED;
2311 
2312             AMediaFormat_setBuffer(mLastTrack->meta,
2313                     AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2314 
2315             *offset += chunk_size;
2316             break;
2317         }
2318 
2319         case FOURCC("vpcC"):
2320         case FOURCC("av1C"):
2321         {
2322             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2323 
2324             if (buffer.get() == NULL) {
2325                 ALOGE("b/28471206");
2326                 return NO_MEMORY;
2327             }
2328 
2329             if (mDataSource->readAt(
2330                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2331                 return ERROR_IO;
2332             }
2333 
2334             if (mLastTrack == NULL)
2335                 return ERROR_MALFORMED;
2336 
2337             AMediaFormat_setBuffer(mLastTrack->meta,
2338                    AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2339 
2340             *offset += chunk_size;
2341             break;
2342         }
2343         case FOURCC("d263"):
2344         {
2345             *offset += chunk_size;
2346             /*
2347              * d263 contains a fixed 7 bytes part:
2348              *   vendor - 4 bytes
2349              *   version - 1 byte
2350              *   level - 1 byte
2351              *   profile - 1 byte
2352              * optionally, "d263" box itself may contain a 16-byte
2353              * bit rate box (bitr)
2354              *   average bit rate - 4 bytes
2355              *   max bit rate - 4 bytes
2356              */
2357             char buffer[23];
2358             if (chunk_data_size != 7 &&
2359                 chunk_data_size != 23) {
2360                 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2361                 return ERROR_MALFORMED;
2362             }
2363 
2364             if (mDataSource->readAt(
2365                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
2366                 return ERROR_IO;
2367             }
2368 
2369             if (mLastTrack == NULL)
2370                 return ERROR_MALFORMED;
2371 
2372             AMediaFormat_setBuffer(mLastTrack->meta,
2373                     AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2374 
2375             break;
2376         }
2377 
2378         case FOURCC("meta"):
2379         {
2380             off64_t stop_offset = *offset + chunk_size;
2381             *offset = data_offset;
2382             bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2383             if (!isParsingMetaKeys) {
2384                 uint8_t buffer[4];
2385                 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2386                     *offset = stop_offset;
2387                     return ERROR_MALFORMED;
2388                 }
2389 
2390                 if (mDataSource->readAt(
2391                             data_offset, buffer, 4) < 4) {
2392                     *offset = stop_offset;
2393                     return ERROR_IO;
2394                 }
2395 
2396                 if (U32_AT(buffer) != 0) {
2397                     // Should be version 0, flags 0.
2398 
2399                     // If it's not, let's assume this is one of those
2400                     // apparently malformed chunks that don't have flags
2401                     // and completely different semantics than what's
2402                     // in the MPEG4 specs and skip it.
2403                     *offset = stop_offset;
2404                     return OK;
2405                 }
2406                 *offset +=  sizeof(buffer);
2407             }
2408 
2409             while (*offset < stop_offset) {
2410                 status_t err = parseChunk(offset, depth + 1);
2411                 if (err != OK) {
2412                     return err;
2413                 }
2414             }
2415 
2416             if (*offset != stop_offset) {
2417                 return ERROR_MALFORMED;
2418             }
2419             break;
2420         }
2421 
2422         case FOURCC("iloc"):
2423         case FOURCC("iinf"):
2424         case FOURCC("iprp"):
2425         case FOURCC("pitm"):
2426         case FOURCC("idat"):
2427         case FOURCC("iref"):
2428         case FOURCC("ipro"):
2429         {
2430             if (mIsHeif) {
2431                 if (mItemTable == NULL) {
2432                     mItemTable = new ItemTable(mDataSource);
2433                 }
2434                 status_t err = mItemTable->parse(
2435                         chunk_type, data_offset, chunk_data_size);
2436                 if (err != OK) {
2437                     return err;
2438                 }
2439             }
2440             *offset += chunk_size;
2441             break;
2442         }
2443 
2444         case FOURCC("mean"):
2445         case FOURCC("name"):
2446         case FOURCC("data"):
2447         {
2448             *offset += chunk_size;
2449 
2450             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2451                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2452 
2453                 if (err != OK) {
2454                     return err;
2455                 }
2456             }
2457 
2458             break;
2459         }
2460 
2461         case FOURCC("mvhd"):
2462         {
2463             *offset += chunk_size;
2464 
2465             if (depth != 1) {
2466                 ALOGE("mvhd: depth %d", depth);
2467                 return ERROR_MALFORMED;
2468             }
2469             if (chunk_data_size < 32) {
2470                 return ERROR_MALFORMED;
2471             }
2472 
2473             uint8_t header[32];
2474             if (mDataSource->readAt(
2475                         data_offset, header, sizeof(header))
2476                     < (ssize_t)sizeof(header)) {
2477                 return ERROR_IO;
2478             }
2479 
2480             uint64_t creationTime;
2481             uint64_t duration = 0;
2482             if (header[0] == 1) {
2483                 creationTime = U64_AT(&header[4]);
2484                 mHeaderTimescale = U32_AT(&header[20]);
2485                 duration = U64_AT(&header[24]);
2486                 if (duration == 0xffffffffffffffff) {
2487                     duration = 0;
2488                 }
2489             } else if (header[0] != 0) {
2490                 return ERROR_MALFORMED;
2491             } else {
2492                 creationTime = U32_AT(&header[4]);
2493                 mHeaderTimescale = U32_AT(&header[12]);
2494                 uint32_t d32 = U32_AT(&header[16]);
2495                 if (d32 == 0xffffffff) {
2496                     d32 = 0;
2497                 }
2498                 duration = d32;
2499             }
2500             if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2501                 AMediaFormat_setInt64(mFileMetaData,
2502                         AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2503             }
2504 
2505             String8 s;
2506             if (convertTimeToDate(creationTime, &s)) {
2507                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2508             }
2509 
2510             break;
2511         }
2512 
2513         case FOURCC("mehd"):
2514         {
2515             *offset += chunk_size;
2516 
2517             if (chunk_data_size < 8) {
2518                 return ERROR_MALFORMED;
2519             }
2520 
2521             uint8_t flags[4];
2522             if (mDataSource->readAt(
2523                         data_offset, flags, sizeof(flags))
2524                     < (ssize_t)sizeof(flags)) {
2525                 return ERROR_IO;
2526             }
2527 
2528             uint64_t duration = 0;
2529             if (flags[0] == 1) {
2530                 // 64 bit
2531                 if (chunk_data_size < 12) {
2532                     return ERROR_MALFORMED;
2533                 }
2534                 mDataSource->getUInt64(data_offset + 4, &duration);
2535                 if (duration == 0xffffffffffffffff) {
2536                     duration = 0;
2537                 }
2538             } else if (flags[0] == 0) {
2539                 // 32 bit
2540                 uint32_t d32;
2541                 mDataSource->getUInt32(data_offset + 4, &d32);
2542                 if (d32 == 0xffffffff) {
2543                     d32 = 0;
2544                 }
2545                 duration = d32;
2546             } else {
2547                 return ERROR_MALFORMED;
2548             }
2549 
2550             if (duration != 0 && mHeaderTimescale != 0) {
2551                 AMediaFormat_setInt64(mFileMetaData,
2552                         AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2553             }
2554 
2555             break;
2556         }
2557 
2558         case FOURCC("mdat"):
2559         {
2560             mMdatFound = true;
2561 
2562             *offset += chunk_size;
2563             break;
2564         }
2565 
2566         case FOURCC("hdlr"):
2567         {
2568             *offset += chunk_size;
2569 
2570             if (underQTMetaPath(mPath, 3)) {
2571                 break;
2572             }
2573 
2574             uint32_t buffer;
2575             if (mDataSource->readAt(
2576                         data_offset + 8, &buffer, 4) < 4) {
2577                 return ERROR_IO;
2578             }
2579 
2580             uint32_t type = ntohl(buffer);
2581             // For the 3GPP file format, the handler-type within the 'hdlr' box
2582             // shall be 'text'. We also want to support 'sbtl' handler type
2583             // for a practical reason as various MPEG4 containers use it.
2584             if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2585                 if (mLastTrack != NULL) {
2586                     AMediaFormat_setString(mLastTrack->meta,
2587                             AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2588                 }
2589             }
2590 
2591             break;
2592         }
2593 
2594         case FOURCC("keys"):
2595         {
2596             *offset += chunk_size;
2597 
2598             if (underQTMetaPath(mPath, 3)) {
2599                 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2600                 if (err != OK) {
2601                     return err;
2602                 }
2603             }
2604             break;
2605         }
2606 
2607         case FOURCC("trex"):
2608         {
2609             *offset += chunk_size;
2610 
2611             if (chunk_data_size < 24) {
2612                 return ERROR_IO;
2613             }
2614             Trex trex;
2615             if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2616                 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2617                 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2618                 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2619                 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2620                 return ERROR_IO;
2621             }
2622             mTrex.add(trex);
2623             break;
2624         }
2625 
2626         case FOURCC("tx3g"):
2627         {
2628             if (mLastTrack == NULL)
2629                 return ERROR_MALFORMED;
2630 
2631             // complain about ridiculous chunks
2632             if (chunk_size > kMaxAtomSize) {
2633                 return ERROR_MALFORMED;
2634             }
2635 
2636             // complain about empty atoms
2637             if (chunk_data_size <= 0) {
2638                 ALOGE("b/124330204");
2639                 android_errorWriteLog(0x534e4554, "124330204");
2640                 return ERROR_MALFORMED;
2641             }
2642 
2643             // should fill buffer based on "data_offset" and "chunk_data_size"
2644             // instead of *offset and chunk_size;
2645             // but we've been feeding the extra data to consumers for multiple releases and
2646             // if those apps are compensating for it, we'd break them with such a change
2647             //
2648 
2649             if (mLastTrack->mTx3gBuffer == NULL) {
2650                 mLastTrack->mTx3gSize = 0;
2651                 mLastTrack->mTx3gFilled = 0;
2652             }
2653             if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2654                 size_t growth = kTx3gGrowth;
2655                 if (growth < chunk_size) {
2656                     growth = chunk_size;
2657                 }
2658                 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2659                 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2660                     ALOGE("b/124330204 - too much space");
2661                     android_errorWriteLog(0x534e4554, "124330204");
2662                     return ERROR_MALFORMED;
2663                 }
2664                 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2665                                                 mLastTrack->mTx3gSize + growth);
2666                 if (updated == NULL) {
2667                     return ERROR_MALFORMED;
2668                 }
2669                 mLastTrack->mTx3gBuffer = updated;
2670                 mLastTrack->mTx3gSize += growth;
2671             }
2672 
2673             if ((size_t)(mDataSource->readAt(*offset,
2674                                              mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2675                                              chunk_size))
2676                     < chunk_size) {
2677 
2678                 // advance read pointer so we don't end up reading this again
2679                 *offset += chunk_size;
2680                 return ERROR_IO;
2681             }
2682 
2683             mLastTrack->mTx3gFilled += chunk_size;
2684             *offset += chunk_size;
2685             break;
2686         }
2687 
2688         case FOURCC("covr"):
2689         {
2690             *offset += chunk_size;
2691 
2692             ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2693                   chunk_data_size, data_offset);
2694 
2695             if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2696                 return ERROR_MALFORMED;
2697             }
2698             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2699             if (buffer.get() == NULL) {
2700                 ALOGE("b/28471206");
2701                 return NO_MEMORY;
2702             }
2703             if (mDataSource->readAt(
2704                 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2705                 return ERROR_IO;
2706             }
2707             const int kSkipBytesOfDataBox = 16;
2708             if (chunk_data_size <= kSkipBytesOfDataBox) {
2709                 return ERROR_MALFORMED;
2710             }
2711 
2712             AMediaFormat_setBuffer(mFileMetaData,
2713                 AMEDIAFORMAT_KEY_ALBUMART,
2714                 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2715 
2716             break;
2717         }
2718 
2719         case FOURCC("colr"):
2720         {
2721             *offset += chunk_size;
2722             // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2723             // ignore otherwise
2724             if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
2725                 status_t err = parseColorInfo(data_offset, chunk_data_size);
2726                 if (err != OK) {
2727                     return err;
2728                 }
2729             }
2730 
2731             break;
2732         }
2733 
2734         case FOURCC("titl"):
2735         case FOURCC("perf"):
2736         case FOURCC("auth"):
2737         case FOURCC("gnre"):
2738         case FOURCC("albm"):
2739         case FOURCC("yrrc"):
2740         {
2741             *offset += chunk_size;
2742 
2743             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2744 
2745             if (err != OK) {
2746                 return err;
2747             }
2748 
2749             break;
2750         }
2751 
2752         case FOURCC("ID32"):
2753         {
2754             *offset += chunk_size;
2755 
2756             if (chunk_data_size < 6) {
2757                 return ERROR_MALFORMED;
2758             }
2759 
2760             parseID3v2MetaData(data_offset + 6);
2761 
2762             break;
2763         }
2764 
2765         case FOURCC("----"):
2766         {
2767             mLastCommentMean.clear();
2768             mLastCommentName.clear();
2769             mLastCommentData.clear();
2770             *offset += chunk_size;
2771             break;
2772         }
2773 
2774         case FOURCC("sidx"):
2775         {
2776             status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2777             if (err != OK) {
2778                 return err;
2779             }
2780             *offset += chunk_size;
2781             return UNKNOWN_ERROR; // stop parsing after sidx
2782         }
2783 
2784         case FOURCC("ac-3"):
2785         {
2786             *offset += chunk_size;
2787             // bypass ac-3 if parse fail
2788             if (parseAC3SpecificBox(data_offset) != OK) {
2789                 if (mLastTrack != NULL) {
2790                     ALOGW("Fail to parse ac-3");
2791                     mLastTrack->skipTrack = true;
2792                 }
2793             }
2794             return OK;
2795         }
2796 
2797         case FOURCC("ec-3"):
2798         {
2799             *offset += chunk_size;
2800             // bypass ec-3 if parse fail
2801             if (parseEAC3SpecificBox(data_offset) != OK) {
2802                 if (mLastTrack != NULL) {
2803                     ALOGW("Fail to parse ec-3");
2804                     mLastTrack->skipTrack = true;
2805                 }
2806             }
2807             return OK;
2808         }
2809 
2810         case FOURCC("ac-4"):
2811         {
2812             *offset += chunk_size;
2813             // bypass ac-4 if parse fail
2814             if (parseAC4SpecificBox(data_offset) != OK) {
2815                 if (mLastTrack != NULL) {
2816                     ALOGW("Fail to parse ac-4");
2817                     mLastTrack->skipTrack = true;
2818                 }
2819             }
2820             return OK;
2821         }
2822 
2823         case FOURCC("ftyp"):
2824         {
2825             if (chunk_data_size < 8 || depth != 0) {
2826                 return ERROR_MALFORMED;
2827             }
2828 
2829             off64_t stop_offset = *offset + chunk_size;
2830             uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2831             std::set<uint32_t> brandSet;
2832             for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2833                 if (i == 1) {
2834                     // Skip this index, it refers to the minorVersion,
2835                     // not a brand.
2836                     continue;
2837                 }
2838 
2839                 uint32_t brand;
2840                 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2841                     return ERROR_MALFORMED;
2842                 }
2843 
2844                 brand = ntohl(brand);
2845                 brandSet.insert(brand);
2846             }
2847 
2848             if (brandSet.count(FOURCC("qt  ")) > 0) {
2849                 mIsQT = true;
2850             } else {
2851                 if (brandSet.count(FOURCC("mif1")) > 0
2852                  && brandSet.count(FOURCC("heic")) > 0) {
2853                     ALOGV("identified HEIF image");
2854 
2855                     mIsHeif = true;
2856                     brandSet.erase(FOURCC("mif1"));
2857                     brandSet.erase(FOURCC("heic"));
2858                 }
2859 
2860                 if (!brandSet.empty()) {
2861                     // This means that the file should have moov box.
2862                     // It could be any iso files (mp4, heifs, etc.)
2863                     mHasMoovBox = true;
2864                     if (mIsHeif) {
2865                         ALOGV("identified HEIF image with other tracks");
2866                     }
2867                 }
2868             }
2869 
2870             *offset = stop_offset;
2871 
2872             break;
2873         }
2874 
2875         default:
2876         {
2877             // check if we're parsing 'ilst' for meta keys
2878             // if so, treat type as a number (key-id).
2879             if (underQTMetaPath(mPath, 3)) {
2880                 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2881                 if (err != OK) {
2882                     return err;
2883                 }
2884             }
2885 
2886             *offset += chunk_size;
2887             break;
2888         }
2889     }
2890 
2891     return OK;
2892 }
2893 
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)2894 status_t MPEG4Extractor::parseChannelCountSampleRate(
2895         off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
2896     // skip 16 bytes:
2897     //  + 6-byte reserved,
2898     //  + 2-byte data reference index,
2899     //  + 8-byte reserved
2900     *offset += 16;
2901     if (!mDataSource->getUInt16(*offset, channelCount)) {
2902         ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
2903         return ERROR_MALFORMED;
2904     }
2905     // skip 8 bytes:
2906     //  + 2-byte channelCount,
2907     //  + 2-byte sample size,
2908     //  + 4-byte reserved
2909     *offset += 8;
2910     if (!mDataSource->getUInt16(*offset, sampleRate)) {
2911         ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
2912         return ERROR_MALFORMED;
2913     }
2914     // skip 4 bytes:
2915     //  + 2-byte sampleRate,
2916     //  + 2-byte reserved
2917     *offset += 4;
2918     return OK;
2919 }
2920 
parseAC4SpecificBox(off64_t offset)2921 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
2922     if (mLastTrack == NULL) {
2923         return ERROR_MALFORMED;
2924     }
2925 
2926     uint16_t sampleRate, channelCount;
2927     status_t status;
2928     if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
2929         return status;
2930     }
2931     uint32_t size;
2932     // + 4-byte size
2933     // + 4-byte type
2934     // + 3-byte payload
2935     const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
2936     if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
2937         ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
2938         return ERROR_MALFORMED;
2939     }
2940 
2941     // + 4-byte size
2942     offset += 4;
2943     uint32_t type;
2944     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
2945         ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
2946         return ERROR_MALFORMED;
2947     }
2948 
2949     // + 4-byte type
2950     offset += 4;
2951     const uint32_t kAC4SpecificBoxPayloadSize = 1176;
2952     uint8_t chunk[kAC4SpecificBoxPayloadSize];
2953     ssize_t dsiSize = size - 8; // size of box - size and type fields
2954     if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
2955         mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
2956         ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
2957         return ERROR_MALFORMED;
2958     }
2959     // + size-byte payload
2960     offset += dsiSize;
2961     ABitReader br(chunk, dsiSize);
2962     AC4DSIParser parser(br);
2963     if (!parser.parse()){
2964         ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
2965         return ERROR_MALFORMED;
2966     }
2967 
2968     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
2969     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
2970     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
2971 
2972     AudioPresentationCollection presentations;
2973     // translate the AC4 presentation information to audio presentations for this track
2974     AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
2975     if (!ac4Presentations.empty()) {
2976         for (const auto& ac4Presentation : ac4Presentations) {
2977             auto& presentation = ac4Presentation.second;
2978             if (!presentation.mEnabled) {
2979                 continue;
2980             }
2981             AudioPresentationV1 ap;
2982             ap.mPresentationId = presentation.mGroupIndex;
2983             ap.mProgramId = presentation.mProgramID;
2984             ap.mLanguage = presentation.mLanguage;
2985             if (presentation.mPreVirtualized) {
2986                 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
2987             } else {
2988                 switch (presentation.mChannelMode) {
2989                     case AC4Parser::AC4Presentation::kChannelMode_Mono:
2990                     case AC4Parser::AC4Presentation::kChannelMode_Stereo:
2991                         ap.mMasteringIndication = MASTERED_FOR_STEREO;
2992                         break;
2993                     case AC4Parser::AC4Presentation::kChannelMode_3_0:
2994                     case AC4Parser::AC4Presentation::kChannelMode_5_0:
2995                     case AC4Parser::AC4Presentation::kChannelMode_5_1:
2996                     case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
2997                     case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
2998                     case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
2999                     case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3000                         ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3001                         break;
3002                     case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3003                     case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3004                     case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3005                     case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3006                     case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3007                     case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3008                     case AC4Parser::AC4Presentation::kChannelMode_22_2:
3009                         ap.mMasteringIndication = MASTERED_FOR_3D;
3010                         break;
3011                     default:
3012                         ALOGE("Invalid channel mode in AC4 presentation");
3013                         return ERROR_MALFORMED;
3014                 }
3015             }
3016 
3017             ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3018                     AC4Parser::AC4Presentation::kVisuallyImpaired);
3019             ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3020                     AC4Parser::AC4Presentation::kVoiceOver);
3021             ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3022             if (!ap.mLanguage.empty()) {
3023                 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3024             }
3025             presentations.push_back(std::move(ap));
3026         }
3027     }
3028 
3029     if (presentations.empty()) {
3030         // Clear audio presentation info in metadata.
3031         AMediaFormat_setBuffer(
3032                 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3033     } else {
3034         std::ostringstream outStream(std::ios::out);
3035         serializeAudioPresentations(presentations, &outStream);
3036         AMediaFormat_setBuffer(
3037                 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3038                 outStream.str().data(), outStream.str().size());
3039     }
3040     return OK;
3041 }
3042 
parseEAC3SpecificBox(off64_t offset)3043 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3044     if (mLastTrack == NULL) {
3045         return ERROR_MALFORMED;
3046     }
3047 
3048     uint16_t sampleRate, channels;
3049     status_t status;
3050     if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3051         return status;
3052     }
3053     uint32_t size;
3054     // + 4-byte size
3055     // + 4-byte type
3056     // + 3-byte payload
3057     const uint32_t kEAC3SpecificBoxMinSize = 11;
3058     // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3059     // calculated from the required bits read below as well as the maximum number of independent
3060     // and dependant sub streams you can have
3061     const uint32_t kEAC3SpecificBoxMaxSize = 152;
3062     if (!mDataSource->getUInt32(offset, &size) ||
3063         size < kEAC3SpecificBoxMinSize ||
3064         size > kEAC3SpecificBoxMaxSize) {
3065         ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3066         return ERROR_MALFORMED;
3067     }
3068 
3069     offset += 4;
3070     uint32_t type;
3071     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3072         ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3073         return ERROR_MALFORMED;
3074     }
3075 
3076     offset += 4;
3077     uint8_t* chunk = new (std::nothrow) uint8_t[size];
3078     if (chunk == NULL) {
3079         return ERROR_MALFORMED;
3080     }
3081 
3082     if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3083         ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3084         delete[] chunk;
3085         return ERROR_MALFORMED;
3086     }
3087 
3088     ABitReader br(chunk, size);
3089     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3090     static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3091 
3092     if (br.numBitsLeft() < 16) {
3093         delete[] chunk;
3094         return ERROR_MALFORMED;
3095     }
3096     unsigned data_rate = br.getBits(13);
3097     ALOGV("EAC3 data rate = %d", data_rate);
3098 
3099     unsigned num_ind_sub = br.getBits(3) + 1;
3100     ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3101     if (br.numBitsLeft() < (num_ind_sub * 23)) {
3102         delete[] chunk;
3103         return ERROR_MALFORMED;
3104     }
3105 
3106     unsigned channelCount = 0;
3107     for (unsigned i = 0; i < num_ind_sub; i++) {
3108         unsigned fscod = br.getBits(2);
3109         if (fscod == 3) {
3110             ALOGE("Incorrect fscod (3) in EAC3 header");
3111             delete[] chunk;
3112             return ERROR_MALFORMED;
3113         }
3114         unsigned boxSampleRate = sampleRateTable[fscod];
3115         if (boxSampleRate != sampleRate) {
3116             ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3117                 boxSampleRate, sampleRate);
3118             delete[] chunk;
3119             return ERROR_MALFORMED;
3120         }
3121 
3122         unsigned bsid = br.getBits(5);
3123         if (bsid == 9 || bsid == 10) {
3124             ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3125         } else if (bsid > 16) {
3126             ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3127             delete[] chunk;
3128             return ERROR_MALFORMED;
3129         }
3130 
3131         // skip
3132         br.skipBits(2);
3133         unsigned bsmod = br.getBits(3);
3134         unsigned acmod = br.getBits(3);
3135         unsigned lfeon = br.getBits(1);
3136         // we currently only support the first stream
3137         if (i == 0)
3138             channelCount = channelCountTable[acmod] + lfeon;
3139         ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3140 
3141         br.skipBits(3);
3142         unsigned num_dep_sub = br.getBits(4);
3143         ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3144         if (num_dep_sub != 0) {
3145             if (br.numBitsLeft() < 9) {
3146                 delete[] chunk;
3147                 return ERROR_MALFORMED;
3148             }
3149             static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3150                 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3151             unsigned chan_loc = br.getBits(9);
3152             unsigned mask = 1;
3153             for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3154                 if ((chan_loc & mask) != 0) {
3155                     // we currently only support the first stream
3156                     if (i == 0) {
3157                         channelCount++;
3158                         // these are 2 channels in the mask
3159                         if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3160                             channelCount++;
3161                         }
3162                     }
3163                     ALOGV(" %s", chan_loc_tbl[j]);
3164                 }
3165             }
3166         } else {
3167             if (br.numBitsLeft() == 0) {
3168                 delete[] chunk;
3169                 return ERROR_MALFORMED;
3170             }
3171             br.skipBits(1);
3172         }
3173     }
3174 
3175     if (br.numBitsLeft() != 0) {
3176         if (br.numBitsLeft() < 8) {
3177             delete[] chunk;
3178             return ERROR_MALFORMED;
3179         }
3180         unsigned mask = br.getBits(8);
3181         for (unsigned i = 0; i < 8; i++) {
3182             if (((0x1 << i) && mask) == 0)
3183                 continue;
3184 
3185             if (br.numBitsLeft() < 8) {
3186                 delete[] chunk;
3187                 return ERROR_MALFORMED;
3188             }
3189             switch (i) {
3190                 case 0: {
3191                     unsigned complexity = br.getBits(8);
3192                     ALOGV("Found a JOC stream with complexity = %d", complexity);
3193                 }break;
3194                 default: {
3195                     br.skipBits(8);
3196                 }break;
3197             }
3198         }
3199     }
3200     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3201     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3202     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3203 
3204     delete[] chunk;
3205     return OK;
3206 }
3207 
parseAC3SpecificBox(off64_t offset)3208 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3209     if (mLastTrack == NULL) {
3210         return ERROR_MALFORMED;
3211     }
3212 
3213     uint16_t sampleRate, channels;
3214     status_t status;
3215     if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3216         return status;
3217     }
3218     uint32_t size;
3219     // + 4-byte size
3220     // + 4-byte type
3221     // + 3-byte payload
3222     const uint32_t kAC3SpecificBoxSize = 11;
3223     if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3224         ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3225         return ERROR_MALFORMED;
3226     }
3227 
3228     offset += 4;
3229     uint32_t type;
3230     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3231         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3232         return ERROR_MALFORMED;
3233     }
3234 
3235     offset += 4;
3236     const uint32_t kAC3SpecificBoxPayloadSize = 3;
3237     uint8_t chunk[kAC3SpecificBoxPayloadSize];
3238     if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3239         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3240         return ERROR_MALFORMED;
3241     }
3242 
3243     ABitReader br(chunk, sizeof(chunk));
3244     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3245     static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3246 
3247     unsigned fscod = br.getBits(2);
3248     if (fscod == 3) {
3249         ALOGE("Incorrect fscod (3) in AC3 header");
3250         return ERROR_MALFORMED;
3251     }
3252     unsigned boxSampleRate = sampleRateTable[fscod];
3253     if (boxSampleRate != sampleRate) {
3254         ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3255             boxSampleRate, sampleRate);
3256         return ERROR_MALFORMED;
3257     }
3258 
3259     unsigned bsid = br.getBits(5);
3260     if (bsid > 8) {
3261         ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3262         return ERROR_MALFORMED;
3263     }
3264 
3265     // skip
3266     br.skipBits(3); // bsmod
3267 
3268     unsigned acmod = br.getBits(3);
3269     unsigned lfeon = br.getBits(1);
3270     unsigned channelCount = channelCountTable[acmod] + lfeon;
3271 
3272     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3273     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3274     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3275     return OK;
3276 }
3277 
parseALACSampleEntry(off64_t * offset)3278 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3279     // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3280     // Store ALAC magic cookie (decoder needs it).
3281     uint8_t alacInfo[12];
3282     off64_t data_offset = *offset;
3283 
3284     if (mDataSource->readAt(
3285             data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3286         return ERROR_IO;
3287     }
3288     uint32_t size = U32_AT(&alacInfo[0]);
3289     if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3290             (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3291             (U32_AT(&alacInfo[8]) != 0)) {
3292         ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3293             size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3294         return ERROR_MALFORMED;
3295     }
3296     data_offset += sizeof(alacInfo);
3297     uint8_t cookie[size - sizeof(alacInfo)];
3298     if (mDataSource->readAt(
3299             data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3300         return ERROR_IO;
3301     }
3302 
3303     uint8_t bitsPerSample = cookie[5];
3304     AMediaFormat_setInt32(mLastTrack->meta,
3305             AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3306     AMediaFormat_setInt32(mLastTrack->meta,
3307             AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3308     AMediaFormat_setInt32(mLastTrack->meta,
3309             AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3310     AMediaFormat_setBuffer(mLastTrack->meta,
3311             AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3312     data_offset += sizeof(cookie);
3313     *offset = data_offset;
3314     return OK;
3315 }
3316 
parseSegmentIndex(off64_t offset,size_t size)3317 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3318   ALOGV("MPEG4Extractor::parseSegmentIndex");
3319 
3320     if (size < 12) {
3321       return -EINVAL;
3322     }
3323 
3324     uint32_t flags;
3325     if (!mDataSource->getUInt32(offset, &flags)) {
3326         return ERROR_MALFORMED;
3327     }
3328 
3329     uint32_t version = flags >> 24;
3330     flags &= 0xffffff;
3331 
3332     ALOGV("sidx version %d", version);
3333 
3334     uint32_t referenceId;
3335     if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3336         return ERROR_MALFORMED;
3337     }
3338 
3339     uint32_t timeScale;
3340     if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3341         return ERROR_MALFORMED;
3342     }
3343     ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3344     if (timeScale == 0)
3345         return ERROR_MALFORMED;
3346 
3347     uint64_t earliestPresentationTime;
3348     uint64_t firstOffset;
3349 
3350     offset += 12;
3351     size -= 12;
3352 
3353     if (version == 0) {
3354         if (size < 8) {
3355             return -EINVAL;
3356         }
3357         uint32_t tmp;
3358         if (!mDataSource->getUInt32(offset, &tmp)) {
3359             return ERROR_MALFORMED;
3360         }
3361         earliestPresentationTime = tmp;
3362         if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3363             return ERROR_MALFORMED;
3364         }
3365         firstOffset = tmp;
3366         offset += 8;
3367         size -= 8;
3368     } else {
3369         if (size < 16) {
3370             return -EINVAL;
3371         }
3372         if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3373             return ERROR_MALFORMED;
3374         }
3375         if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3376             return ERROR_MALFORMED;
3377         }
3378         offset += 16;
3379         size -= 16;
3380     }
3381     ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3382 
3383     if (size < 4) {
3384         return -EINVAL;
3385     }
3386 
3387     uint16_t referenceCount;
3388     if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3389         return ERROR_MALFORMED;
3390     }
3391     offset += 4;
3392     size -= 4;
3393     ALOGV("refcount: %d", referenceCount);
3394 
3395     if (size < referenceCount * 12) {
3396         return -EINVAL;
3397     }
3398 
3399     uint64_t total_duration = 0;
3400     for (unsigned int i = 0; i < referenceCount; i++) {
3401         uint32_t d1, d2, d3;
3402 
3403         if (!mDataSource->getUInt32(offset, &d1) ||     // size
3404             !mDataSource->getUInt32(offset + 4, &d2) || // duration
3405             !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3406             return ERROR_MALFORMED;
3407         }
3408 
3409         if (d1 & 0x80000000) {
3410             ALOGW("sub-sidx boxes not supported yet");
3411         }
3412         bool sap = d3 & 0x80000000;
3413         uint32_t saptype = (d3 >> 28) & 7;
3414         if (!sap || (saptype != 1 && saptype != 2)) {
3415             // type 1 and 2 are sync samples
3416             ALOGW("not a stream access point, or unsupported type: %08x", d3);
3417         }
3418         total_duration += d2;
3419         offset += 12;
3420         ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3421         SidxEntry se;
3422         se.mSize = d1 & 0x7fffffff;
3423         se.mDurationUs = 1000000LL * d2 / timeScale;
3424         mSidxEntries.add(se);
3425     }
3426 
3427     uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3428 
3429     if (mLastTrack == NULL)
3430         return ERROR_MALFORMED;
3431 
3432     int64_t metaDuration;
3433     if (!AMediaFormat_getInt64(mLastTrack->meta,
3434                 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3435         AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3436     }
3437     return OK;
3438 }
3439 
parseQTMetaKey(off64_t offset,size_t size)3440 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3441     if (size < 8) {
3442         return ERROR_MALFORMED;
3443     }
3444 
3445     uint32_t count;
3446     if (!mDataSource->getUInt32(offset + 4, &count)) {
3447         return ERROR_MALFORMED;
3448     }
3449 
3450     if (mMetaKeyMap.size() > 0) {
3451         ALOGW("'keys' atom seen again, discarding existing entries");
3452         mMetaKeyMap.clear();
3453     }
3454 
3455     off64_t keyOffset = offset + 8;
3456     off64_t stopOffset = offset + size;
3457     for (size_t i = 1; i <= count; i++) {
3458         if (keyOffset + 8 > stopOffset) {
3459             return ERROR_MALFORMED;
3460         }
3461 
3462         uint32_t keySize;
3463         if (!mDataSource->getUInt32(keyOffset, &keySize)
3464                 || keySize < 8
3465                 || keyOffset + keySize > stopOffset) {
3466             return ERROR_MALFORMED;
3467         }
3468 
3469         uint32_t type;
3470         if (!mDataSource->getUInt32(keyOffset + 4, &type)
3471                 || type != FOURCC("mdta")) {
3472             return ERROR_MALFORMED;
3473         }
3474 
3475         keySize -= 8;
3476         keyOffset += 8;
3477 
3478         auto keyData = heapbuffer<uint8_t>(keySize);
3479         if (keyData.get() == NULL) {
3480             return ERROR_MALFORMED;
3481         }
3482         if (mDataSource->readAt(
3483                 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3484             return ERROR_MALFORMED;
3485         }
3486 
3487         AString key((const char *)keyData.get(), keySize);
3488         mMetaKeyMap.add(i, key);
3489 
3490         keyOffset += keySize;
3491     }
3492     return OK;
3493 }
3494 
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3495 status_t MPEG4Extractor::parseQTMetaVal(
3496         int32_t keyId, off64_t offset, size_t size) {
3497     ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3498     if (index < 0) {
3499         // corresponding key is not present, ignore
3500         return ERROR_MALFORMED;
3501     }
3502 
3503     if (size <= 16) {
3504         return ERROR_MALFORMED;
3505     }
3506     uint32_t dataSize;
3507     if (!mDataSource->getUInt32(offset, &dataSize)
3508             || dataSize > size || dataSize <= 16) {
3509         return ERROR_MALFORMED;
3510     }
3511     uint32_t atomFourCC;
3512     if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3513             || atomFourCC != FOURCC("data")) {
3514         return ERROR_MALFORMED;
3515     }
3516     uint32_t dataType;
3517     if (!mDataSource->getUInt32(offset + 8, &dataType)
3518             || ((dataType & 0xff000000) != 0)) {
3519         // not well-known type
3520         return ERROR_MALFORMED;
3521     }
3522 
3523     dataSize -= 16;
3524     offset += 16;
3525 
3526     if (dataType == 23 && dataSize >= 4) {
3527         // BE Float32
3528         uint32_t val;
3529         if (!mDataSource->getUInt32(offset, &val)) {
3530             return ERROR_MALFORMED;
3531         }
3532         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3533             AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3534         }
3535     } else if (dataType == 67 && dataSize >= 4) {
3536         // BE signed int32
3537         uint32_t val;
3538         if (!mDataSource->getUInt32(offset, &val)) {
3539             return ERROR_MALFORMED;
3540         }
3541         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3542             AMediaFormat_setInt32(mFileMetaData,
3543                     AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3544         }
3545     } else {
3546         // add more keys if needed
3547         ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3548     }
3549 
3550     return OK;
3551 }
3552 
parseTrackHeader(off64_t data_offset,off64_t data_size)3553 status_t MPEG4Extractor::parseTrackHeader(
3554         off64_t data_offset, off64_t data_size) {
3555     if (data_size < 4) {
3556         return ERROR_MALFORMED;
3557     }
3558 
3559     uint8_t version;
3560     if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3561         return ERROR_IO;
3562     }
3563 
3564     size_t dynSize = (version == 1) ? 36 : 24;
3565 
3566     uint8_t buffer[36 + 60];
3567 
3568     if (data_size != (off64_t)dynSize + 60) {
3569         return ERROR_MALFORMED;
3570     }
3571 
3572     if (mDataSource->readAt(
3573                 data_offset, buffer, data_size) < (ssize_t)data_size) {
3574         return ERROR_IO;
3575     }
3576 
3577     int32_t id;
3578 
3579     if (version == 1) {
3580         // we can get ctime value from U64_AT(&buffer[4])
3581         // we can get mtime value from U64_AT(&buffer[12])
3582         id = U32_AT(&buffer[20]);
3583         // we can get duration value from U64_AT(&buffer[28])
3584     } else if (version == 0) {
3585         // we can get ctime value from U32_AT(&buffer[4])
3586         // we can get mtime value from U32_AT(&buffer[8])
3587         id = U32_AT(&buffer[12]);
3588         // we can get duration value from U32_AT(&buffer[20])
3589     } else {
3590         return ERROR_UNSUPPORTED;
3591     }
3592 
3593     if (mLastTrack == NULL)
3594         return ERROR_MALFORMED;
3595 
3596     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3597 
3598     size_t matrixOffset = dynSize + 16;
3599     int32_t a00 = U32_AT(&buffer[matrixOffset]);
3600     int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3601     int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3602     int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3603 
3604 #if 0
3605     int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3606     int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3607 
3608     ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3609          a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3610     ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3611          a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3612 #endif
3613 
3614     uint32_t rotationDegrees;
3615 
3616     static const int32_t kFixedOne = 0x10000;
3617     if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3618         // Identity, no rotation
3619         rotationDegrees = 0;
3620     } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3621         rotationDegrees = 90;
3622     } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3623         rotationDegrees = 270;
3624     } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3625         rotationDegrees = 180;
3626     } else {
3627         ALOGW("We only support 0,90,180,270 degree rotation matrices");
3628         rotationDegrees = 0;
3629     }
3630 
3631     if (rotationDegrees != 0) {
3632         AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3633     }
3634 
3635     // Handle presentation display size, which could be different
3636     // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3637     uint32_t width = U32_AT(&buffer[dynSize + 52]);
3638     uint32_t height = U32_AT(&buffer[dynSize + 56]);
3639     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3640     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3641 
3642     return OK;
3643 }
3644 
parseITunesMetaData(off64_t offset,size_t size)3645 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3646     if (size == 0) {
3647         return OK;
3648     }
3649 
3650     if (size < 4 || size == SIZE_MAX) {
3651         return ERROR_MALFORMED;
3652     }
3653 
3654     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3655     if (buffer == NULL) {
3656         return ERROR_MALFORMED;
3657     }
3658     if (mDataSource->readAt(
3659                 offset, buffer, size) != (ssize_t)size) {
3660         delete[] buffer;
3661         buffer = NULL;
3662 
3663         return ERROR_IO;
3664     }
3665 
3666     uint32_t flags = U32_AT(buffer);
3667 
3668     const char *metadataKey = nullptr;
3669     char chunk[5];
3670     MakeFourCCString(mPath[4], chunk);
3671     ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3672     switch ((int32_t)mPath[4]) {
3673         case FOURCC("\251alb"):
3674         {
3675             metadataKey = "album";
3676             break;
3677         }
3678         case FOURCC("\251ART"):
3679         {
3680             metadataKey = "artist";
3681             break;
3682         }
3683         case FOURCC("aART"):
3684         {
3685             metadataKey = "albumartist";
3686             break;
3687         }
3688         case FOURCC("\251day"):
3689         {
3690             metadataKey = "year";
3691             break;
3692         }
3693         case FOURCC("\251nam"):
3694         {
3695             metadataKey = "title";
3696             break;
3697         }
3698         case FOURCC("\251wrt"):
3699         {
3700             metadataKey = "writer";
3701             break;
3702         }
3703         case FOURCC("covr"):
3704         {
3705             metadataKey = "albumart";
3706             break;
3707         }
3708         case FOURCC("gnre"):
3709         case FOURCC("\251gen"):
3710         {
3711             metadataKey = "genre";
3712             break;
3713         }
3714         case FOURCC("cpil"):
3715         {
3716             if (size == 9 && flags == 21) {
3717                 char tmp[16];
3718                 sprintf(tmp, "%d",
3719                         (int)buffer[size - 1]);
3720 
3721                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
3722             }
3723             break;
3724         }
3725         case FOURCC("trkn"):
3726         {
3727             if (size == 16 && flags == 0) {
3728                 char tmp[16];
3729                 uint16_t* pTrack = (uint16_t*)&buffer[10];
3730                 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3731                 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3732 
3733                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3734             }
3735             break;
3736         }
3737         case FOURCC("disk"):
3738         {
3739             if ((size == 14 || size == 16) && flags == 0) {
3740                 char tmp[16];
3741                 uint16_t* pDisc = (uint16_t*)&buffer[10];
3742                 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3743                 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3744 
3745                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
3746             }
3747             break;
3748         }
3749         case FOURCC("----"):
3750         {
3751             buffer[size] = '\0';
3752             switch (mPath[5]) {
3753                 case FOURCC("mean"):
3754                     mLastCommentMean.setTo((const char *)buffer + 4);
3755                     break;
3756                 case FOURCC("name"):
3757                     mLastCommentName.setTo((const char *)buffer + 4);
3758                     break;
3759                 case FOURCC("data"):
3760                     if (size < 8) {
3761                         delete[] buffer;
3762                         buffer = NULL;
3763                         ALOGE("b/24346430");
3764                         return ERROR_MALFORMED;
3765                     }
3766                     mLastCommentData.setTo((const char *)buffer + 8);
3767                     break;
3768             }
3769 
3770             // Once we have a set of mean/name/data info, go ahead and process
3771             // it to see if its something we are interested in.  Whether or not
3772             // were are interested in the specific tag, make sure to clear out
3773             // the set so we can be ready to process another tuple should one
3774             // show up later in the file.
3775             if ((mLastCommentMean.length() != 0) &&
3776                 (mLastCommentName.length() != 0) &&
3777                 (mLastCommentData.length() != 0)) {
3778 
3779                 if (mLastCommentMean == "com.apple.iTunes"
3780                         && mLastCommentName == "iTunSMPB") {
3781                     int32_t delay, padding;
3782                     if (sscanf(mLastCommentData,
3783                                " %*x %x %x %*x", &delay, &padding) == 2) {
3784                         if (mLastTrack == NULL) {
3785                             delete[] buffer;
3786                             return ERROR_MALFORMED;
3787                         }
3788 
3789                         AMediaFormat_setInt32(mLastTrack->meta,
3790                                 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
3791                         AMediaFormat_setInt32(mLastTrack->meta,
3792                                 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
3793                     }
3794                 }
3795 
3796                 mLastCommentMean.clear();
3797                 mLastCommentName.clear();
3798                 mLastCommentData.clear();
3799             }
3800             break;
3801         }
3802 
3803         default:
3804             break;
3805     }
3806 
3807     void *tmpData;
3808     size_t tmpDataSize;
3809     const char *s;
3810     if (size >= 8 && metadataKey &&
3811             !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
3812             !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
3813         if (!strcmp(metadataKey, "albumart")) {
3814             AMediaFormat_setBuffer(mFileMetaData, metadataKey,
3815                     buffer + 8, size - 8);
3816         } else if (!strcmp(metadataKey, "genre")) {
3817             if (flags == 0) {
3818                 // uint8_t genre code, iTunes genre codes are
3819                 // the standard id3 codes, except they start
3820                 // at 1 instead of 0 (e.g. Pop is 14, not 13)
3821                 // We use standard id3 numbering, so subtract 1.
3822                 int genrecode = (int)buffer[size - 1];
3823                 genrecode--;
3824                 if (genrecode < 0) {
3825                     genrecode = 255; // reserved for 'unknown genre'
3826                 }
3827                 char genre[10];
3828                 sprintf(genre, "%d", genrecode);
3829 
3830                 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
3831             } else if (flags == 1) {
3832                 // custom genre string
3833                 buffer[size] = '\0';
3834 
3835                 AMediaFormat_setString(mFileMetaData,
3836                         metadataKey, (const char *)buffer + 8);
3837             }
3838         } else {
3839             buffer[size] = '\0';
3840 
3841             AMediaFormat_setString(mFileMetaData,
3842                     metadataKey, (const char *)buffer + 8);
3843         }
3844     }
3845 
3846     delete[] buffer;
3847     buffer = NULL;
3848 
3849     return OK;
3850 }
3851 
parseColorInfo(off64_t offset,size_t size)3852 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3853     if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3854         return ERROR_MALFORMED;
3855     }
3856 
3857     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3858     if (buffer == NULL) {
3859         return ERROR_MALFORMED;
3860     }
3861     if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3862         delete[] buffer;
3863         buffer = NULL;
3864 
3865         return ERROR_IO;
3866     }
3867 
3868     int32_t type = U32_AT(&buffer[0]);
3869     if ((type == FOURCC("nclx") && size >= 11)
3870             || (type == FOURCC("nclc") && size >= 10)) {
3871         // only store the first color specification
3872         int32_t existingColor;
3873         if (!AMediaFormat_getInt32(mLastTrack->meta,
3874                 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
3875             int32_t primaries = U16_AT(&buffer[4]);
3876             int32_t isotransfer = U16_AT(&buffer[6]);
3877             int32_t coeffs = U16_AT(&buffer[8]);
3878             bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
3879 
3880             int32_t range = 0;
3881             int32_t standard = 0;
3882             int32_t transfer = 0;
3883             ColorUtils::convertIsoColorAspectsToPlatformAspects(
3884                     primaries, isotransfer, coeffs, fullRange,
3885                     &range, &standard, &transfer);
3886 
3887             if (range != 0) {
3888                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
3889             }
3890             if (standard != 0) {
3891                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
3892             }
3893             if (transfer != 0) {
3894                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
3895             }
3896         }
3897     }
3898 
3899     delete[] buffer;
3900     buffer = NULL;
3901 
3902     return OK;
3903 }
3904 
parse3GPPMetaData(off64_t offset,size_t size,int depth)3905 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3906     if (size < 4 || size == SIZE_MAX) {
3907         return ERROR_MALFORMED;
3908     }
3909 
3910     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3911     if (buffer == NULL) {
3912         return ERROR_MALFORMED;
3913     }
3914     if (mDataSource->readAt(
3915                 offset, buffer, size) != (ssize_t)size) {
3916         delete[] buffer;
3917         buffer = NULL;
3918 
3919         return ERROR_IO;
3920     }
3921 
3922     const char *metadataKey = nullptr;
3923     switch (mPath[depth]) {
3924         case FOURCC("titl"):
3925         {
3926             metadataKey = "title";
3927             break;
3928         }
3929         case FOURCC("perf"):
3930         {
3931             metadataKey = "artist";
3932             break;
3933         }
3934         case FOURCC("auth"):
3935         {
3936             metadataKey = "writer";
3937             break;
3938         }
3939         case FOURCC("gnre"):
3940         {
3941             metadataKey = "genre";
3942             break;
3943         }
3944         case FOURCC("albm"):
3945         {
3946             if (buffer[size - 1] != '\0') {
3947               char tmp[4];
3948               sprintf(tmp, "%u", buffer[size - 1]);
3949 
3950               AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3951             }
3952 
3953             metadataKey = "album";
3954             break;
3955         }
3956         case FOURCC("yrrc"):
3957         {
3958             if (size < 6) {
3959                 delete[] buffer;
3960                 buffer = NULL;
3961                 ALOGE("b/62133227");
3962                 android_errorWriteLog(0x534e4554, "62133227");
3963                 return ERROR_MALFORMED;
3964             }
3965             char tmp[5];
3966             uint16_t year = U16_AT(&buffer[4]);
3967 
3968             if (year < 10000) {
3969                 sprintf(tmp, "%u", year);
3970 
3971                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
3972             }
3973             break;
3974         }
3975 
3976         default:
3977             break;
3978     }
3979 
3980     if (metadataKey) {
3981         bool isUTF8 = true; // Common case
3982         char16_t *framedata = NULL;
3983         int len16 = 0; // Number of UTF-16 characters
3984 
3985         // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3986         if (size < 6) {
3987             delete[] buffer;
3988             buffer = NULL;
3989             return ERROR_MALFORMED;
3990         }
3991 
3992         if (size - 6 >= 4) {
3993             len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3994             framedata = (char16_t *)(buffer + 6);
3995             if (0xfffe == *framedata) {
3996                 // endianness marker (BOM) doesn't match host endianness
3997                 for (int i = 0; i < len16; i++) {
3998                     framedata[i] = bswap_16(framedata[i]);
3999                 }
4000                 // BOM is now swapped to 0xfeff, we will execute next block too
4001             }
4002 
4003             if (0xfeff == *framedata) {
4004                 // Remove the BOM
4005                 framedata++;
4006                 len16--;
4007                 isUTF8 = false;
4008             }
4009             // else normal non-zero-length UTF-8 string
4010             // we can't handle UTF-16 without BOM as there is no other
4011             // indication of encoding.
4012         }
4013 
4014         if (isUTF8) {
4015             buffer[size] = 0;
4016             AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4017         } else {
4018             // Convert from UTF-16 string to UTF-8 string.
4019             String8 tmpUTF8str(framedata, len16);
4020             AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
4021         }
4022     }
4023 
4024     delete[] buffer;
4025     buffer = NULL;
4026 
4027     return OK;
4028 }
4029 
parseID3v2MetaData(off64_t offset)4030 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
4031     ID3 id3(mDataSource, true /* ignorev1 */, offset);
4032 
4033     if (id3.isValid()) {
4034         struct Map {
4035             const char *key;
4036             const char *tag1;
4037             const char *tag2;
4038         };
4039         static const Map kMap[] = {
4040             { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4041             { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4042             { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4043             { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4044             { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4045             { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4046             { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4047             { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4048             { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4049             { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4050             { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4051         };
4052         static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4053 
4054         for (size_t i = 0; i < kNumMapEntries; ++i) {
4055             const char *ss;
4056             if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4057                 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4058                 if (it->done()) {
4059                     delete it;
4060                     it = new ID3::Iterator(id3, kMap[i].tag2);
4061                 }
4062 
4063                 if (it->done()) {
4064                     delete it;
4065                     continue;
4066                 }
4067 
4068                 String8 s;
4069                 it->getString(&s);
4070                 delete it;
4071 
4072                 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4073             }
4074         }
4075 
4076         size_t dataSize;
4077         String8 mime;
4078         const void *data = id3.getAlbumArt(&dataSize, &mime);
4079 
4080         if (data) {
4081             AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4082         }
4083     }
4084 }
4085 
getTrack(size_t index)4086 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4087     status_t err;
4088     if ((err = readMetaData()) != OK) {
4089         return NULL;
4090     }
4091 
4092     Track *track = mFirstTrack;
4093     while (index > 0) {
4094         if (track == NULL) {
4095             return NULL;
4096         }
4097 
4098         track = track->next;
4099         --index;
4100     }
4101 
4102     if (track == NULL) {
4103         return NULL;
4104     }
4105 
4106 
4107     Trex *trex = NULL;
4108     int32_t trackId;
4109     if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4110         for (size_t i = 0; i < mTrex.size(); i++) {
4111             Trex *t = &mTrex.editItemAt(i);
4112             if (t->track_ID == (uint32_t) trackId) {
4113                 trex = t;
4114                 break;
4115             }
4116         }
4117     } else {
4118         ALOGE("b/21657957");
4119         return NULL;
4120     }
4121 
4122     ALOGV("getTrack called, pssh: %zu", mPssh.size());
4123 
4124     const char *mime;
4125     if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4126         return NULL;
4127     }
4128 
4129     sp<ItemTable> itemTable;
4130     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4131         void *data;
4132         size_t size;
4133         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4134             return NULL;
4135         }
4136 
4137         const uint8_t *ptr = (const uint8_t *)data;
4138 
4139         if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
4140             return NULL;
4141         }
4142     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4143             || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4144         void *data;
4145         size_t size;
4146         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4147             return NULL;
4148         }
4149 
4150         const uint8_t *ptr = (const uint8_t *)data;
4151 
4152         if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
4153             return NULL;
4154         }
4155         if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4156             itemTable = mItemTable;
4157         }
4158     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4159         void *data;
4160         size_t size;
4161         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4162             return NULL;
4163         }
4164 
4165         const uint8_t *ptr = (const uint8_t *)data;
4166 
4167         if (size < 5 || ptr[0] != 0x81) {  // configurationVersion == 1
4168             return NULL;
4169         }
4170     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4171         void *data;
4172         size_t size;
4173         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4174             return NULL;
4175         }
4176 
4177         const uint8_t *ptr = (const uint8_t *)data;
4178 
4179         if (size < 5 || ptr[0] != 0x01) {  // configurationVersion == 1
4180             return NULL;
4181         }
4182     }
4183 
4184     if (track->has_elst and !strncasecmp("video/", mime, 6) and track->elst_media_time > 0) {
4185         track->elstShiftStartTicks = track->elst_media_time;
4186         ALOGV("video track->elstShiftStartTicks :%" PRIu64, track->elstShiftStartTicks);
4187     }
4188 
4189     MPEG4Source *source =  new MPEG4Source(
4190             track->meta, mDataSource, track->timescale, track->sampleTable,
4191             mSidxEntries, trex, mMoofOffset, itemTable,
4192             track->elstShiftStartTicks);
4193     if (source->init() != OK) {
4194         delete source;
4195         return NULL;
4196     }
4197     return source;
4198 }
4199 
4200 // static
verifyTrack(Track * track)4201 status_t MPEG4Extractor::verifyTrack(Track *track) {
4202     const char *mime;
4203     CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4204 
4205     void *data;
4206     size_t size;
4207     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4208         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4209             return ERROR_MALFORMED;
4210         }
4211     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4212         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4213             return ERROR_MALFORMED;
4214         }
4215     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4216         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4217             return ERROR_MALFORMED;
4218         }
4219     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4220         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4221             return ERROR_MALFORMED;
4222         }
4223     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4224             || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4225             || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4226         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4227             return ERROR_MALFORMED;
4228         }
4229     }
4230 
4231     if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4232         // Make sure we have all the metadata we need.
4233         ALOGE("stbl atom missing/invalid.");
4234         return ERROR_MALFORMED;
4235     }
4236 
4237     if (track->timescale == 0) {
4238         ALOGE("timescale invalid.");
4239         return ERROR_MALFORMED;
4240     }
4241 
4242     return OK;
4243 }
4244 
4245 typedef enum {
4246     //AOT_NONE             = -1,
4247     //AOT_NULL_OBJECT      = 0,
4248     //AOT_AAC_MAIN         = 1, /**< Main profile                              */
4249     AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
4250     //AOT_AAC_SSR          = 3,
4251     //AOT_AAC_LTP          = 4,
4252     AOT_SBR              = 5,
4253     //AOT_AAC_SCAL         = 6,
4254     //AOT_TWIN_VQ          = 7,
4255     //AOT_CELP             = 8,
4256     //AOT_HVXC             = 9,
4257     //AOT_RSVD_10          = 10, /**< (reserved)                                */
4258     //AOT_RSVD_11          = 11, /**< (reserved)                                */
4259     //AOT_TTSI             = 12, /**< TTSI Object                               */
4260     //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
4261     //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
4262     //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
4263     //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4264     AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
4265     //AOT_RSVD_18          = 18, /**< (reserved)                                */
4266     //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
4267     AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
4268     //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
4269     AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
4270     AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
4271     //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
4272     //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
4273     //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
4274     //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
4275     //AOT_RSVD_28          = 28, /**< might become SSC                          */
4276     AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
4277     //AOT_MPEGS            = 30, /**< MPEG Surround                             */
4278 
4279     AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
4280 
4281     //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
4282     //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
4283     //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
4284     //AOT_RSVD_35          = 35, /**< might become DST                          */
4285     //AOT_RSVD_36          = 36, /**< might become ALS                          */
4286     //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
4287     //AOT_SLS              = 38, /**< SLS                                       */
4288     //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
4289 
4290     //AOT_USAC             = 42, /**< USAC                                      */
4291     //AOT_SAOC             = 43, /**< SAOC                                      */
4292     //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
4293 
4294     //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
4295 } AUDIO_OBJECT_TYPE;
4296 
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4297 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4298         const void *esds_data, size_t esds_size) {
4299     ESDS esds(esds_data, esds_size);
4300 
4301     uint8_t objectTypeIndication;
4302     if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4303         return ERROR_MALFORMED;
4304     }
4305 
4306     if (objectTypeIndication == 0xe1) {
4307         // This isn't MPEG4 audio at all, it's QCELP 14k...
4308         if (mLastTrack == NULL)
4309             return ERROR_MALFORMED;
4310 
4311         AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4312         return OK;
4313     }
4314 
4315     if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4316         // mp3 audio
4317         AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4318         return OK;
4319     }
4320 
4321     if (mLastTrack != NULL) {
4322         uint32_t maxBitrate = 0;
4323         uint32_t avgBitrate = 0;
4324         esds.getBitRate(&maxBitrate, &avgBitrate);
4325         if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4326             AMediaFormat_setInt32(mLastTrack->meta,
4327                     AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4328         }
4329         if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4330             AMediaFormat_setInt32(mLastTrack->meta,
4331                     AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4332         }
4333     }
4334 
4335     const uint8_t *csd;
4336     size_t csd_size;
4337     if (esds.getCodecSpecificInfo(
4338                 (const void **)&csd, &csd_size) != OK) {
4339         return ERROR_MALFORMED;
4340     }
4341 
4342     if (kUseHexDump) {
4343         printf("ESD of size %zu\n", csd_size);
4344         hexdump(csd, csd_size);
4345     }
4346 
4347     if (csd_size == 0) {
4348         // There's no further information, i.e. no codec specific data
4349         // Let's assume that the information provided in the mpeg4 headers
4350         // is accurate and hope for the best.
4351 
4352         return OK;
4353     }
4354 
4355     if (csd_size < 2) {
4356         return ERROR_MALFORMED;
4357     }
4358 
4359     if (objectTypeIndication == 0xdd) {
4360         // vorbis audio
4361         if (csd[0] != 0x02) {
4362             return ERROR_MALFORMED;
4363         }
4364 
4365         // codecInfo starts with two lengths, len1 and len2, that are
4366         // "Xiph-style-lacing encoded"..
4367 
4368         size_t offset = 1;
4369         size_t len1 = 0;
4370         while (offset < csd_size && csd[offset] == 0xff) {
4371             if (__builtin_add_overflow(len1, 0xff, &len1)) {
4372                 return ERROR_MALFORMED;
4373             }
4374             ++offset;
4375         }
4376         if (offset >= csd_size) {
4377             return ERROR_MALFORMED;
4378         }
4379         if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4380             return ERROR_MALFORMED;
4381         }
4382         ++offset;
4383         if (len1 == 0) {
4384             return ERROR_MALFORMED;
4385         }
4386 
4387         size_t len2 = 0;
4388         while (offset < csd_size && csd[offset] == 0xff) {
4389             if (__builtin_add_overflow(len2, 0xff, &len2)) {
4390                 return ERROR_MALFORMED;
4391             }
4392             ++offset;
4393         }
4394         if (offset >= csd_size) {
4395             return ERROR_MALFORMED;
4396         }
4397         if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4398             return ERROR_MALFORMED;
4399         }
4400         ++offset;
4401         if (len2 == 0) {
4402             return ERROR_MALFORMED;
4403         }
4404         if (offset >= csd_size || csd[offset] != 0x01) {
4405             return ERROR_MALFORMED;
4406         }
4407         // formerly kKeyVorbisInfo
4408         AMediaFormat_setBuffer(mLastTrack->meta,
4409                 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4410 
4411         if (__builtin_add_overflow(offset, len1, &offset) ||
4412                 offset >= csd_size || csd[offset] != 0x03) {
4413             return ERROR_MALFORMED;
4414         }
4415 
4416         if (__builtin_add_overflow(offset, len2, &offset) ||
4417                 offset >= csd_size || csd[offset] != 0x05) {
4418             return ERROR_MALFORMED;
4419         }
4420 
4421         // formerly kKeyVorbisBooks
4422         AMediaFormat_setBuffer(mLastTrack->meta,
4423                 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4424         AMediaFormat_setString(mLastTrack->meta,
4425                 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4426 
4427         return OK;
4428     }
4429 
4430     static uint32_t kSamplingRate[] = {
4431         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4432         16000, 12000, 11025, 8000, 7350
4433     };
4434 
4435     ABitReader br(csd, csd_size);
4436     uint32_t objectType = br.getBits(5);
4437 
4438     if (objectType == 31) {  // AAC-ELD => additional 6 bits
4439         objectType = 32 + br.getBits(6);
4440     }
4441 
4442     if (mLastTrack == NULL)
4443         return ERROR_MALFORMED;
4444 
4445     //keep AOT type
4446     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4447 
4448     uint32_t freqIndex = br.getBits(4);
4449 
4450     int32_t sampleRate = 0;
4451     int32_t numChannels = 0;
4452     if (freqIndex == 15) {
4453         if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4454         sampleRate = br.getBits(24);
4455         numChannels = br.getBits(4);
4456     } else {
4457         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4458         numChannels = br.getBits(4);
4459 
4460         if (freqIndex == 13 || freqIndex == 14) {
4461             return ERROR_MALFORMED;
4462         }
4463 
4464         sampleRate = kSamplingRate[freqIndex];
4465     }
4466 
4467     if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4468         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4469         uint32_t extFreqIndex = br.getBits(4);
4470         if (extFreqIndex == 15) {
4471             if (csd_size < 8) {
4472                 return ERROR_MALFORMED;
4473             }
4474             if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4475             br.skipBits(24); // extSampleRate
4476         } else {
4477             if (extFreqIndex == 13 || extFreqIndex == 14) {
4478                 return ERROR_MALFORMED;
4479             }
4480             //extSampleRate = kSamplingRate[extFreqIndex];
4481         }
4482         //TODO: save the extension sampling rate value in meta data =>
4483         //      AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4484     }
4485 
4486     switch (numChannels) {
4487         // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4488         case 0:
4489         case 1:// FC
4490         case 2:// FL FR
4491         case 3:// FC, FL FR
4492         case 4:// FC, FL FR, RC
4493         case 5:// FC, FL FR, SL SR
4494         case 6:// FC, FL FR, SL SR, LFE
4495             //numChannels already contains the right value
4496             break;
4497         case 11:// FC, FL FR, SL SR, RC, LFE
4498             numChannels = 7;
4499             break;
4500         case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4501         case 12:// FC, FL  FR,  SL SR, RL RR, LFE
4502         case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
4503             numChannels = 8;
4504             break;
4505         default:
4506             return ERROR_UNSUPPORTED;
4507     }
4508 
4509     {
4510         if (objectType == AOT_SBR || objectType == AOT_PS) {
4511             if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4512             objectType = br.getBits(5);
4513 
4514             if (objectType == AOT_ESCAPE) {
4515                 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4516                 objectType = 32 + br.getBits(6);
4517             }
4518         }
4519         if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4520                 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4521                 objectType == AOT_ER_BSAC) {
4522             if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4523             br.skipBits(1); // frameLengthFlag
4524 
4525             const int32_t dependsOnCoreCoder = br.getBits(1);
4526 
4527             if (dependsOnCoreCoder ) {
4528                 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4529                 br.skipBits(14); // coreCoderDelay
4530             }
4531 
4532             int32_t extensionFlag = -1;
4533             if (br.numBitsLeft() > 0) {
4534                 extensionFlag = br.getBits(1);
4535             } else {
4536                 switch (objectType) {
4537                 // 14496-3 4.5.1.1 extensionFlag
4538                 case AOT_AAC_LC:
4539                     extensionFlag = 0;
4540                     break;
4541                 case AOT_ER_AAC_LC:
4542                 case AOT_ER_AAC_SCAL:
4543                 case AOT_ER_BSAC:
4544                 case AOT_ER_AAC_LD:
4545                     extensionFlag = 1;
4546                     break;
4547                 default:
4548                     return ERROR_MALFORMED;
4549                     break;
4550                 }
4551                 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4552                         extensionFlag, objectType);
4553             }
4554 
4555             if (numChannels == 0) {
4556                 int32_t channelsEffectiveNum = 0;
4557                 int32_t channelsNum = 0;
4558                 if (br.numBitsLeft() < 32) {
4559                     return ERROR_MALFORMED;
4560                 }
4561                 br.skipBits(4); // ElementInstanceTag
4562                 br.skipBits(2); // Profile
4563                 br.skipBits(4); // SamplingFrequencyIndex
4564                 const int32_t NumFrontChannelElements = br.getBits(4);
4565                 const int32_t NumSideChannelElements = br.getBits(4);
4566                 const int32_t NumBackChannelElements = br.getBits(4);
4567                 const int32_t NumLfeChannelElements = br.getBits(2);
4568                 br.skipBits(3); // NumAssocDataElements
4569                 br.skipBits(4); // NumValidCcElements
4570 
4571                 const int32_t MonoMixdownPresent = br.getBits(1);
4572 
4573                 if (MonoMixdownPresent != 0) {
4574                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4575                     br.skipBits(4); // MonoMixdownElementNumber
4576                 }
4577 
4578                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4579                 const int32_t StereoMixdownPresent = br.getBits(1);
4580                 if (StereoMixdownPresent != 0) {
4581                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4582                     br.skipBits(4); // StereoMixdownElementNumber
4583                 }
4584 
4585                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4586                 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4587                 if (MatrixMixdownIndexPresent != 0) {
4588                     if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4589                     br.skipBits(2); // MatrixMixdownIndex
4590                     br.skipBits(1); // PseudoSurroundEnable
4591                 }
4592 
4593                 int i;
4594                 for (i=0; i < NumFrontChannelElements; i++) {
4595                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4596                     const int32_t FrontElementIsCpe = br.getBits(1);
4597                     br.skipBits(4); // FrontElementTagSelect
4598                     channelsNum += FrontElementIsCpe ? 2 : 1;
4599                 }
4600 
4601                 for (i=0; i < NumSideChannelElements; i++) {
4602                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4603                     const int32_t SideElementIsCpe = br.getBits(1);
4604                     br.skipBits(4); // SideElementTagSelect
4605                     channelsNum += SideElementIsCpe ? 2 : 1;
4606                 }
4607 
4608                 for (i=0; i < NumBackChannelElements; i++) {
4609                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4610                     const int32_t BackElementIsCpe = br.getBits(1);
4611                     br.skipBits(4); // BackElementTagSelect
4612                     channelsNum += BackElementIsCpe ? 2 : 1;
4613                 }
4614                 channelsEffectiveNum = channelsNum;
4615 
4616                 for (i=0; i < NumLfeChannelElements; i++) {
4617                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4618                     br.skipBits(4); // LfeElementTagSelect
4619                     channelsNum += 1;
4620                 }
4621                 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
4622                 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
4623                 numChannels = channelsNum;
4624             }
4625         }
4626     }
4627 
4628     if (numChannels == 0) {
4629         return ERROR_UNSUPPORTED;
4630     }
4631 
4632     if (mLastTrack == NULL)
4633         return ERROR_MALFORMED;
4634 
4635     int32_t prevSampleRate;
4636     CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
4637 
4638     if (prevSampleRate != sampleRate) {
4639         ALOGV("mpeg4 audio sample rate different from previous setting. "
4640              "was: %d, now: %d", prevSampleRate, sampleRate);
4641     }
4642 
4643     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
4644 
4645     int32_t prevChannelCount;
4646     CHECK(AMediaFormat_getInt32(mLastTrack->meta,
4647             AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
4648 
4649     if (prevChannelCount != numChannels) {
4650         ALOGV("mpeg4 audio channel count different from previous setting. "
4651              "was: %d, now: %d", prevChannelCount, numChannels);
4652     }
4653 
4654     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
4655 
4656     return OK;
4657 }
4658 
adjustRawDefaultFrameSize()4659 void MPEG4Extractor::adjustRawDefaultFrameSize() {
4660     int32_t chanCount = 0;
4661     int32_t bitWidth = 0;
4662     const char *mimeStr = NULL;
4663 
4664     if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
4665         !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
4666         AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
4667         AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
4668         // samplesize in stsz may not right , so updade default samplesize
4669         mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
4670     }
4671 }
4672 
4673 ////////////////////////////////////////////////////////////////////////////////
4674 
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks)4675 MPEG4Source::MPEG4Source(
4676         AMediaFormat *format,
4677         DataSourceHelper *dataSource,
4678         int32_t timeScale,
4679         const sp<SampleTable> &sampleTable,
4680         Vector<SidxEntry> &sidx,
4681         const Trex *trex,
4682         off64_t firstMoofOffset,
4683         const sp<ItemTable> &itemTable,
4684         uint64_t elstShiftStartTicks)
4685     : mFormat(format),
4686       mDataSource(dataSource),
4687       mTimescale(timeScale),
4688       mSampleTable(sampleTable),
4689       mCurrentSampleIndex(0),
4690       mCurrentFragmentIndex(0),
4691       mSegments(sidx),
4692       mTrex(trex),
4693       mFirstMoofOffset(firstMoofOffset),
4694       mCurrentMoofOffset(firstMoofOffset),
4695       mNextMoofOffset(-1),
4696       mCurrentTime(0),
4697       mDefaultEncryptedByteBlock(0),
4698       mDefaultSkipByteBlock(0),
4699       mCurrentSampleInfoAllocSize(0),
4700       mCurrentSampleInfoSizes(NULL),
4701       mCurrentSampleInfoOffsetsAllocSize(0),
4702       mCurrentSampleInfoOffsets(NULL),
4703       mIsAVC(false),
4704       mIsHEVC(false),
4705       mIsAC4(false),
4706       mIsPcm(false),
4707       mNALLengthSize(0),
4708       mStarted(false),
4709       mBuffer(NULL),
4710       mSrcBuffer(NULL),
4711       mIsHeif(itemTable != NULL),
4712       mItemTable(itemTable),
4713       mElstShiftStartTicks(elstShiftStartTicks) {
4714 
4715     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
4716 
4717     AMediaFormat_getInt32(mFormat,
4718             AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
4719     mDefaultIVSize = 0;
4720     AMediaFormat_getInt32(mFormat,
4721             AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
4722     void *key;
4723     size_t keysize;
4724     if (AMediaFormat_getBuffer(mFormat,
4725             AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
4726         CHECK(keysize <= 16);
4727         memset(mCryptoKey, 0, 16);
4728         memcpy(mCryptoKey, key, keysize);
4729     }
4730 
4731     AMediaFormat_getInt32(mFormat,
4732             AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
4733     AMediaFormat_getInt32(mFormat,
4734             AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
4735 
4736     const char *mime;
4737     bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
4738     CHECK(success);
4739 
4740     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
4741     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
4742               !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
4743     mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
4744 
4745     if (mIsAVC) {
4746         void *data;
4747         size_t size;
4748         CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
4749 
4750         const uint8_t *ptr = (const uint8_t *)data;
4751 
4752         CHECK(size >= 7);
4753         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
4754 
4755         // The number of bytes used to encode the length of a NAL unit.
4756         mNALLengthSize = 1 + (ptr[4] & 3);
4757     } else if (mIsHEVC) {
4758         void *data;
4759         size_t size;
4760         CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
4761 
4762         const uint8_t *ptr = (const uint8_t *)data;
4763 
4764         CHECK(size >= 22);
4765         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
4766 
4767         mNALLengthSize = 1 + (ptr[14 + 7] & 3);
4768     }
4769 
4770     mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
4771     mIsAudio = !strncasecmp(mime, "audio/", 6);
4772 
4773     if (mIsPcm) {
4774         int32_t numChannels = 0;
4775         int32_t bitsPerSample = 0;
4776         CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
4777         CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
4778 
4779         int32_t bytesPerSample = bitsPerSample >> 3;
4780         int32_t pcmSampleSize = bytesPerSample * numChannels;
4781 
4782         size_t maxSampleSize;
4783         status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
4784         if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
4785                || bitsPerSample != 16) {
4786             // Not supported
4787             mIsPcm = false;
4788         } else {
4789             AMediaFormat_setInt32(mFormat,
4790                     AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
4791         }
4792     }
4793 
4794     CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
4795 
4796 }
4797 
init()4798 status_t MPEG4Source::init() {
4799     status_t err = OK;
4800     const char *mime;
4801     CHECK(AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime));
4802     if (mFirstMoofOffset != 0) {
4803         off64_t offset = mFirstMoofOffset;
4804         err = parseChunk(&offset);
4805         if(err == OK && !strncasecmp("video/", mime, 6)
4806             && !mCurrentSamples.isEmpty()) {
4807             // Start offset should be less or equal to composition time of first sample.
4808             // ISO : sample_composition_time_offset, version 0 (unsigned) for major brands.
4809             mElstShiftStartTicks = std::min(mElstShiftStartTicks,
4810                                             (uint64_t)(*mCurrentSamples.begin()).compositionOffset);
4811         }
4812         return err;
4813     }
4814 
4815     if (!strncasecmp("video/", mime, 6)) {
4816         uint64_t firstSampleCTS = 0;
4817         err = mSampleTable->getMetaDataForSample(0, NULL, NULL, &firstSampleCTS);
4818         // Start offset should be less or equal to composition time of first sample.
4819         // Composition time stamp of first sample cannot be negative.
4820         mElstShiftStartTicks = std::min(mElstShiftStartTicks, firstSampleCTS);
4821     }
4822 
4823     return err;
4824 }
4825 
~MPEG4Source()4826 MPEG4Source::~MPEG4Source() {
4827     if (mStarted) {
4828         stop();
4829     }
4830     free(mCurrentSampleInfoSizes);
4831     free(mCurrentSampleInfoOffsets);
4832 }
4833 
start()4834 media_status_t MPEG4Source::start() {
4835     Mutex::Autolock autoLock(mLock);
4836 
4837     CHECK(!mStarted);
4838 
4839     int32_t tmp;
4840     CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
4841     size_t max_size = tmp;
4842 
4843     // A somewhat arbitrary limit that should be sufficient for 8k video frames
4844     // If you see the message below for a valid input stream: increase the limit
4845     const size_t kMaxBufferSize = 64 * 1024 * 1024;
4846     if (max_size > kMaxBufferSize) {
4847         ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
4848         return AMEDIA_ERROR_MALFORMED;
4849     }
4850     if (max_size == 0) {
4851         ALOGE("zero max input size");
4852         return AMEDIA_ERROR_MALFORMED;
4853     }
4854 
4855     // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
4856     const size_t kInitialBuffers = 2;
4857     const size_t kMaxBuffers = 8;
4858     const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
4859     mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
4860     mSrcBuffer = new (std::nothrow) uint8_t[max_size];
4861     if (mSrcBuffer == NULL) {
4862         // file probably specified a bad max size
4863         return AMEDIA_ERROR_MALFORMED;
4864     }
4865 
4866     mStarted = true;
4867 
4868     return AMEDIA_OK;
4869 }
4870 
stop()4871 media_status_t MPEG4Source::stop() {
4872     Mutex::Autolock autoLock(mLock);
4873 
4874     CHECK(mStarted);
4875 
4876     if (mBuffer != NULL) {
4877         mBuffer->release();
4878         mBuffer = NULL;
4879     }
4880 
4881     delete[] mSrcBuffer;
4882     mSrcBuffer = NULL;
4883 
4884     mStarted = false;
4885     mCurrentSampleIndex = 0;
4886 
4887     return AMEDIA_OK;
4888 }
4889 
parseChunk(off64_t * offset)4890 status_t MPEG4Source::parseChunk(off64_t *offset) {
4891     uint32_t hdr[2];
4892     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4893         return ERROR_IO;
4894     }
4895     uint64_t chunk_size = ntohl(hdr[0]);
4896     uint32_t chunk_type = ntohl(hdr[1]);
4897     off64_t data_offset = *offset + 8;
4898 
4899     if (chunk_size == 1) {
4900         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4901             return ERROR_IO;
4902         }
4903         chunk_size = ntoh64(chunk_size);
4904         data_offset += 8;
4905 
4906         if (chunk_size < 16) {
4907             // The smallest valid chunk is 16 bytes long in this case.
4908             return ERROR_MALFORMED;
4909         }
4910     } else if (chunk_size < 8) {
4911         // The smallest valid chunk is 8 bytes long.
4912         return ERROR_MALFORMED;
4913     }
4914 
4915     char chunk[5];
4916     MakeFourCCString(chunk_type, chunk);
4917     ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4918 
4919     off64_t chunk_data_size = *offset + chunk_size - data_offset;
4920 
4921     switch(chunk_type) {
4922 
4923         case FOURCC("traf"):
4924         case FOURCC("moof"): {
4925             off64_t stop_offset = *offset + chunk_size;
4926             *offset = data_offset;
4927             while (*offset < stop_offset) {
4928                 status_t err = parseChunk(offset);
4929                 if (err != OK) {
4930                     return err;
4931                 }
4932             }
4933             if (chunk_type == FOURCC("moof")) {
4934                 // *offset points to the box following this moof. Find the next moof from there.
4935 
4936                 while (true) {
4937                     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4938                         // no more box to the end of file.
4939                         break;
4940                     }
4941                     chunk_size = ntohl(hdr[0]);
4942                     chunk_type = ntohl(hdr[1]);
4943                     if (chunk_size == 1) {
4944                         // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4945                         // which is defined in 4.2 Object Structure.
4946                         // When chunk_size==1, 8 bytes follows as "largesize".
4947                         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4948                             return ERROR_IO;
4949                         }
4950                         chunk_size = ntoh64(chunk_size);
4951                         if (chunk_size < 16) {
4952                             // The smallest valid chunk is 16 bytes long in this case.
4953                             return ERROR_MALFORMED;
4954                         }
4955                     } else if (chunk_size == 0) {
4956                         // next box extends to end of file.
4957                     } else if (chunk_size < 8) {
4958                         // The smallest valid chunk is 8 bytes long in this case.
4959                         return ERROR_MALFORMED;
4960                     }
4961 
4962                     if (chunk_type == FOURCC("moof")) {
4963                         mNextMoofOffset = *offset;
4964                         break;
4965                     } else if (chunk_size == 0) {
4966                         break;
4967                     }
4968                     *offset += chunk_size;
4969                 }
4970             }
4971             break;
4972         }
4973 
4974         case FOURCC("tfhd"): {
4975                 status_t err;
4976                 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4977                     return err;
4978                 }
4979                 *offset += chunk_size;
4980                 break;
4981         }
4982 
4983         case FOURCC("trun"): {
4984                 status_t err;
4985                 if (mLastParsedTrackId == mTrackId) {
4986                     if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4987                         return err;
4988                     }
4989                 }
4990 
4991                 *offset += chunk_size;
4992                 break;
4993         }
4994 
4995         case FOURCC("saiz"): {
4996             status_t err;
4997             if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4998                 return err;
4999             }
5000             *offset += chunk_size;
5001             break;
5002         }
5003         case FOURCC("saio"): {
5004             status_t err;
5005             if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5006                     != OK) {
5007                 return err;
5008             }
5009             *offset += chunk_size;
5010             break;
5011         }
5012 
5013         case FOURCC("senc"): {
5014             status_t err;
5015             if ((err = parseSampleEncryption(data_offset)) != OK) {
5016                 return err;
5017             }
5018             *offset += chunk_size;
5019             break;
5020         }
5021 
5022         case FOURCC("mdat"): {
5023             // parse DRM info if present
5024             ALOGV("MPEG4Source::parseChunk mdat");
5025             // if saiz/saoi was previously observed, do something with the sampleinfos
5026             *offset += chunk_size;
5027             break;
5028         }
5029 
5030         default: {
5031             *offset += chunk_size;
5032             break;
5033         }
5034     }
5035     return OK;
5036 }
5037 
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5038 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5039         off64_t offset, off64_t size) {
5040     ALOGV("parseSampleAuxiliaryInformationSizes");
5041     if (size < 9) {
5042         return -EINVAL;
5043     }
5044     // 14496-12 8.7.12
5045     uint8_t version;
5046     if (mDataSource->readAt(
5047             offset, &version, sizeof(version))
5048             < (ssize_t)sizeof(version)) {
5049         return ERROR_IO;
5050     }
5051 
5052     if (version != 0) {
5053         return ERROR_UNSUPPORTED;
5054     }
5055     offset++;
5056     size--;
5057 
5058     uint32_t flags;
5059     if (!mDataSource->getUInt24(offset, &flags)) {
5060         return ERROR_IO;
5061     }
5062     offset += 3;
5063     size -= 3;
5064 
5065     if (flags & 1) {
5066         if (size < 13) {
5067             return -EINVAL;
5068         }
5069         uint32_t tmp;
5070         if (!mDataSource->getUInt32(offset, &tmp)) {
5071             return ERROR_MALFORMED;
5072         }
5073         mCurrentAuxInfoType = tmp;
5074         offset += 4;
5075         size -= 4;
5076         if (!mDataSource->getUInt32(offset, &tmp)) {
5077             return ERROR_MALFORMED;
5078         }
5079         mCurrentAuxInfoTypeParameter = tmp;
5080         offset += 4;
5081         size -= 4;
5082     }
5083 
5084     uint8_t defsize;
5085     if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5086         return ERROR_MALFORMED;
5087     }
5088     mCurrentDefaultSampleInfoSize = defsize;
5089     offset++;
5090     size--;
5091 
5092     uint32_t smplcnt;
5093     if (!mDataSource->getUInt32(offset, &smplcnt)) {
5094         return ERROR_MALFORMED;
5095     }
5096     mCurrentSampleInfoCount = smplcnt;
5097     offset += 4;
5098     size -= 4;
5099     if (mCurrentDefaultSampleInfoSize != 0) {
5100         ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5101         return OK;
5102     }
5103     if(smplcnt > size) {
5104         ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5105         android_errorWriteLog(0x534e4554, "124525515");
5106         return -EINVAL;
5107     }
5108     if (smplcnt > mCurrentSampleInfoAllocSize) {
5109         uint8_t * newPtr =  (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5110         if (newPtr == NULL) {
5111             ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5112             return NO_MEMORY;
5113         }
5114         mCurrentSampleInfoSizes = newPtr;
5115         mCurrentSampleInfoAllocSize = smplcnt;
5116     }
5117 
5118     mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5119     return OK;
5120 }
5121 
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5122 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5123         off64_t offset, off64_t size) {
5124     ALOGV("parseSampleAuxiliaryInformationOffsets");
5125     if (size < 8) {
5126         return -EINVAL;
5127     }
5128     // 14496-12 8.7.13
5129     uint8_t version;
5130     if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5131         return ERROR_IO;
5132     }
5133     offset++;
5134     size--;
5135 
5136     uint32_t flags;
5137     if (!mDataSource->getUInt24(offset, &flags)) {
5138         return ERROR_IO;
5139     }
5140     offset += 3;
5141     size -= 3;
5142 
5143     uint32_t entrycount;
5144     if (!mDataSource->getUInt32(offset, &entrycount)) {
5145         return ERROR_IO;
5146     }
5147     offset += 4;
5148     size -= 4;
5149     if (entrycount == 0) {
5150         return OK;
5151     }
5152     if (entrycount > UINT32_MAX / 8) {
5153         return ERROR_MALFORMED;
5154     }
5155 
5156     if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5157         uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5158         if (newPtr == NULL) {
5159             ALOGE("failed to realloc %u -> %u",
5160                     mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5161             return NO_MEMORY;
5162         }
5163         mCurrentSampleInfoOffsets = newPtr;
5164         mCurrentSampleInfoOffsetsAllocSize = entrycount;
5165     }
5166     mCurrentSampleInfoOffsetCount = entrycount;
5167 
5168     if (mCurrentSampleInfoOffsets == NULL) {
5169         return OK;
5170     }
5171 
5172     for (size_t i = 0; i < entrycount; i++) {
5173         if (version == 0) {
5174             if (size < 4) {
5175                 ALOGW("b/124526959");
5176                 android_errorWriteLog(0x534e4554, "124526959");
5177                 return -EINVAL;
5178             }
5179             uint32_t tmp;
5180             if (!mDataSource->getUInt32(offset, &tmp)) {
5181                 return ERROR_IO;
5182             }
5183             mCurrentSampleInfoOffsets[i] = tmp;
5184             offset += 4;
5185             size -= 4;
5186         } else {
5187             if (size < 8) {
5188                 ALOGW("b/124526959");
5189                 android_errorWriteLog(0x534e4554, "124526959");
5190                 return -EINVAL;
5191             }
5192             uint64_t tmp;
5193             if (!mDataSource->getUInt64(offset, &tmp)) {
5194                 return ERROR_IO;
5195             }
5196             mCurrentSampleInfoOffsets[i] = tmp;
5197             offset += 8;
5198             size -= 8;
5199         }
5200     }
5201 
5202     // parse clear/encrypted data
5203 
5204     off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5205 
5206     drmoffset += mCurrentMoofOffset;
5207 
5208     return parseClearEncryptedSizes(drmoffset, false, 0);
5209 }
5210 
parseClearEncryptedSizes(off64_t offset,bool isSubsampleEncryption,uint32_t flags)5211 status_t MPEG4Source::parseClearEncryptedSizes(
5212         off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
5213 
5214     int32_t ivlength;
5215     if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5216         return ERROR_MALFORMED;
5217     }
5218 
5219     // only 0, 8 and 16 byte initialization vectors are supported
5220     if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5221         ALOGW("unsupported IV length: %d", ivlength);
5222         return ERROR_MALFORMED;
5223     }
5224 
5225     uint32_t sampleCount = mCurrentSampleInfoCount;
5226     if (isSubsampleEncryption) {
5227         if (!mDataSource->getUInt32(offset, &sampleCount)) {
5228             return ERROR_IO;
5229         }
5230         offset += 4;
5231     }
5232 
5233     // read CencSampleAuxiliaryDataFormats
5234     for (size_t i = 0; i < sampleCount; i++) {
5235         if (i >= mCurrentSamples.size()) {
5236             ALOGW("too few samples");
5237             break;
5238         }
5239         Sample *smpl = &mCurrentSamples.editItemAt(i);
5240         if (!smpl->clearsizes.isEmpty()) {
5241             continue;
5242         }
5243 
5244         memset(smpl->iv, 0, 16);
5245         if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5246             return ERROR_IO;
5247         }
5248 
5249         offset += ivlength;
5250 
5251         bool readSubsamples;
5252         if (isSubsampleEncryption) {
5253             readSubsamples = flags & 2;
5254         } else {
5255             int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5256             if (smplinfosize == 0) {
5257                 smplinfosize = mCurrentSampleInfoSizes[i];
5258             }
5259             readSubsamples = smplinfosize > ivlength;
5260         }
5261 
5262         if (readSubsamples) {
5263             uint16_t numsubsamples;
5264             if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5265                 return ERROR_IO;
5266             }
5267             offset += 2;
5268             for (size_t j = 0; j < numsubsamples; j++) {
5269                 uint16_t numclear;
5270                 uint32_t numencrypted;
5271                 if (!mDataSource->getUInt16(offset, &numclear)) {
5272                     return ERROR_IO;
5273                 }
5274                 offset += 2;
5275                 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5276                     return ERROR_IO;
5277                 }
5278                 offset += 4;
5279                 smpl->clearsizes.add(numclear);
5280                 smpl->encryptedsizes.add(numencrypted);
5281             }
5282         } else {
5283             smpl->clearsizes.add(0);
5284             smpl->encryptedsizes.add(smpl->size);
5285         }
5286     }
5287 
5288     return OK;
5289 }
5290 
parseSampleEncryption(off64_t offset)5291 status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
5292     uint32_t flags;
5293     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5294         return ERROR_MALFORMED;
5295     }
5296     return parseClearEncryptedSizes(offset + 4, true, flags);
5297 }
5298 
parseTrackFragmentHeader(off64_t offset,off64_t size)5299 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5300 
5301     if (size < 8) {
5302         return -EINVAL;
5303     }
5304 
5305     uint32_t flags;
5306     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5307         return ERROR_MALFORMED;
5308     }
5309 
5310     if (flags & 0xff000000) {
5311         return -EINVAL;
5312     }
5313 
5314     if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5315         return ERROR_MALFORMED;
5316     }
5317 
5318     if (mLastParsedTrackId != mTrackId) {
5319         // this is not the right track, skip it
5320         return OK;
5321     }
5322 
5323     mTrackFragmentHeaderInfo.mFlags = flags;
5324     mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5325     offset += 8;
5326     size -= 8;
5327 
5328     ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5329 
5330     if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5331         if (size < 8) {
5332             return -EINVAL;
5333         }
5334 
5335         if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5336             return ERROR_MALFORMED;
5337         }
5338         offset += 8;
5339         size -= 8;
5340     }
5341 
5342     if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5343         if (size < 4) {
5344             return -EINVAL;
5345         }
5346 
5347         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5348             return ERROR_MALFORMED;
5349         }
5350         offset += 4;
5351         size -= 4;
5352     }
5353 
5354     if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5355         if (size < 4) {
5356             return -EINVAL;
5357         }
5358 
5359         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5360             return ERROR_MALFORMED;
5361         }
5362         offset += 4;
5363         size -= 4;
5364     }
5365 
5366     if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5367         if (size < 4) {
5368             return -EINVAL;
5369         }
5370 
5371         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5372             return ERROR_MALFORMED;
5373         }
5374         offset += 4;
5375         size -= 4;
5376     }
5377 
5378     if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5379         if (size < 4) {
5380             return -EINVAL;
5381         }
5382 
5383         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5384             return ERROR_MALFORMED;
5385         }
5386         offset += 4;
5387         size -= 4;
5388     }
5389 
5390     if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5391         mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5392     }
5393 
5394     mTrackFragmentHeaderInfo.mDataOffset = 0;
5395     return OK;
5396 }
5397 
parseTrackFragmentRun(off64_t offset,off64_t size)5398 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5399 
5400     ALOGV("MPEG4Source::parseTrackFragmentRun");
5401     if (size < 8) {
5402         return -EINVAL;
5403     }
5404 
5405     enum {
5406         kDataOffsetPresent                  = 0x01,
5407         kFirstSampleFlagsPresent            = 0x04,
5408         kSampleDurationPresent              = 0x100,
5409         kSampleSizePresent                  = 0x200,
5410         kSampleFlagsPresent                 = 0x400,
5411         kSampleCompositionTimeOffsetPresent = 0x800,
5412     };
5413 
5414     uint32_t flags;
5415     if (!mDataSource->getUInt32(offset, &flags)) {
5416         return ERROR_MALFORMED;
5417     }
5418     // |version| only affects SampleCompositionTimeOffset field.
5419     // If version == 0, SampleCompositionTimeOffset is uint32_t;
5420     // Otherwise, SampleCompositionTimeOffset is int32_t.
5421     // Sample.compositionOffset is defined as int32_t.
5422     uint8_t version = flags >> 24;
5423     flags &= 0xffffff;
5424     ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5425 
5426     if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5427         // These two shall not be used together.
5428         return -EINVAL;
5429     }
5430 
5431     uint32_t sampleCount;
5432     if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5433         return ERROR_MALFORMED;
5434     }
5435     offset += 8;
5436     size -= 8;
5437 
5438     uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5439 
5440     uint32_t firstSampleFlags = 0;
5441 
5442     if (flags & kDataOffsetPresent) {
5443         if (size < 4) {
5444             return -EINVAL;
5445         }
5446 
5447         int32_t dataOffsetDelta;
5448         if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
5449             return ERROR_MALFORMED;
5450         }
5451 
5452         dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
5453 
5454         offset += 4;
5455         size -= 4;
5456     }
5457 
5458     if (flags & kFirstSampleFlagsPresent) {
5459         if (size < 4) {
5460             return -EINVAL;
5461         }
5462 
5463         if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5464             return ERROR_MALFORMED;
5465         }
5466         offset += 4;
5467         size -= 4;
5468     }
5469 
5470     uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5471              sampleCtsOffset = 0;
5472 
5473     size_t bytesPerSample = 0;
5474     if (flags & kSampleDurationPresent) {
5475         bytesPerSample += 4;
5476     } else if (mTrackFragmentHeaderInfo.mFlags
5477             & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5478         sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5479     } else if (mTrex) {
5480         sampleDuration = mTrex->default_sample_duration;
5481     }
5482 
5483     if (flags & kSampleSizePresent) {
5484         bytesPerSample += 4;
5485     } else if (mTrackFragmentHeaderInfo.mFlags
5486             & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5487         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5488     } else {
5489         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5490     }
5491 
5492     if (flags & kSampleFlagsPresent) {
5493         bytesPerSample += 4;
5494     } else if (mTrackFragmentHeaderInfo.mFlags
5495             & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5496         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5497     } else {
5498         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5499     }
5500 
5501     if (flags & kSampleCompositionTimeOffsetPresent) {
5502         bytesPerSample += 4;
5503     } else {
5504         sampleCtsOffset = 0;
5505     }
5506 
5507     if (bytesPerSample != 0) {
5508         if (size < (off64_t)sampleCount * bytesPerSample) {
5509             return -EINVAL;
5510         }
5511     } else {
5512         if (sampleDuration == 0) {
5513             ALOGW("b/123389881 sampleDuration == 0");
5514             android_errorWriteLog(0x534e4554, "124389881 zero");
5515             return -EINVAL;
5516         }
5517 
5518         // apply some quick (vs strict legality) checks
5519         //
5520         static constexpr uint32_t kMaxTrunSampleCount = 10000;
5521         if (sampleCount > kMaxTrunSampleCount) {
5522             ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
5523                   sampleCount, kMaxTrunSampleCount);
5524             android_errorWriteLog(0x534e4554, "124389881 count");
5525             return -EINVAL;
5526         }
5527     }
5528 
5529     Sample tmp;
5530     for (uint32_t i = 0; i < sampleCount; ++i) {
5531         if (flags & kSampleDurationPresent) {
5532             if (!mDataSource->getUInt32(offset, &sampleDuration)) {
5533                 return ERROR_MALFORMED;
5534             }
5535             offset += 4;
5536         }
5537 
5538         if (flags & kSampleSizePresent) {
5539             if (!mDataSource->getUInt32(offset, &sampleSize)) {
5540                 return ERROR_MALFORMED;
5541             }
5542             offset += 4;
5543         }
5544 
5545         if (flags & kSampleFlagsPresent) {
5546             if (!mDataSource->getUInt32(offset, &sampleFlags)) {
5547                 return ERROR_MALFORMED;
5548             }
5549             offset += 4;
5550         }
5551 
5552         if (flags & kSampleCompositionTimeOffsetPresent) {
5553             if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
5554                 return ERROR_MALFORMED;
5555             }
5556             offset += 4;
5557         }
5558 
5559         ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
5560               " flags 0x%08x ctsOffset %" PRIu32, i + 1,
5561                 dataOffset, sampleSize, sampleDuration,
5562                 (flags & kFirstSampleFlagsPresent) && i == 0
5563                     ? firstSampleFlags : sampleFlags, sampleCtsOffset);
5564         tmp.offset = dataOffset;
5565         tmp.size = sampleSize;
5566         tmp.duration = sampleDuration;
5567         tmp.compositionOffset = sampleCtsOffset;
5568         memset(tmp.iv, 0, sizeof(tmp.iv));
5569         if (mCurrentSamples.add(tmp) < 0) {
5570             ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
5571             android_errorWriteLog(0x534e4554, "124389881 allocation");
5572             mCurrentSamples.clear();
5573             return NO_MEMORY;
5574         }
5575 
5576         dataOffset += sampleSize;
5577     }
5578 
5579     mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
5580 
5581     return OK;
5582 }
5583 
getFormat(AMediaFormat * meta)5584 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
5585     Mutex::Autolock autoLock(mLock);
5586     AMediaFormat_copy(meta, mFormat);
5587     return AMEDIA_OK;
5588 }
5589 
parseNALSize(const uint8_t * data) const5590 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
5591     switch (mNALLengthSize) {
5592         case 1:
5593             return *data;
5594         case 2:
5595             return U16_AT(data);
5596         case 3:
5597             return ((size_t)data[0] << 16) | U16_AT(&data[1]);
5598         case 4:
5599             return U32_AT(data);
5600     }
5601 
5602     // This cannot happen, mNALLengthSize springs to life by adding 1 to
5603     // a 2-bit integer.
5604     CHECK(!"Should not be here.");
5605 
5606     return 0;
5607 }
5608 
parseHEVCLayerId(const uint8_t * data,size_t size)5609 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
5610     if (data == nullptr || size < mNALLengthSize + 2) {
5611         return -1;
5612     }
5613 
5614     // HEVC NAL-header (16-bit)
5615     //  1   6      6     3
5616     // |-|uuuuuu|------|iii|
5617     //      ^            ^
5618     //  NAL_type        layer_id + 1
5619     //
5620     // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
5621     enum {
5622         TSA_N = 2,
5623         TSA_R = 3,
5624         STSA_N = 4,
5625         STSA_R = 5,
5626     };
5627 
5628     data += mNALLengthSize;
5629     uint16_t nalHeader = data[0] << 8 | data[1];
5630 
5631     uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
5632     if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
5633         int32_t layerIdPlusOne = nalHeader & 0x7u;
5634         ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
5635         return layerIdPlusOne - 1;
5636     }
5637     return 0;
5638 }
5639 
read(MediaBufferHelper ** out,const ReadOptions * options)5640 media_status_t MPEG4Source::read(
5641         MediaBufferHelper **out, const ReadOptions *options) {
5642     Mutex::Autolock autoLock(mLock);
5643 
5644     CHECK(mStarted);
5645 
5646     if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
5647         *out = nullptr;
5648         return AMEDIA_ERROR_WOULD_BLOCK;
5649     }
5650 
5651     if (mFirstMoofOffset > 0) {
5652         return fragmentedRead(out, options);
5653     }
5654 
5655     *out = NULL;
5656 
5657     int64_t targetSampleTimeUs = -1;
5658 
5659     int64_t seekTimeUs;
5660     ReadOptions::SeekMode mode;
5661     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5662 
5663         if (mIsHeif) {
5664             CHECK(mSampleTable == NULL);
5665             CHECK(mItemTable != NULL);
5666             int32_t imageIndex;
5667             if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
5668                 return AMEDIA_ERROR_MALFORMED;
5669             }
5670 
5671             status_t err;
5672             if (seekTimeUs >= 0) {
5673                 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
5674             } else {
5675                 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
5676             }
5677             if (err != OK) {
5678                 return AMEDIA_ERROR_UNKNOWN;
5679             }
5680         } else {
5681             uint32_t findFlags = 0;
5682             switch (mode) {
5683                 case ReadOptions::SEEK_PREVIOUS_SYNC:
5684                     findFlags = SampleTable::kFlagBefore;
5685                     break;
5686                 case ReadOptions::SEEK_NEXT_SYNC:
5687                     findFlags = SampleTable::kFlagAfter;
5688                     break;
5689                 case ReadOptions::SEEK_CLOSEST_SYNC:
5690                 case ReadOptions::SEEK_CLOSEST:
5691                     findFlags = SampleTable::kFlagClosest;
5692                     break;
5693                 case ReadOptions::SEEK_FRAME_INDEX:
5694                     findFlags = SampleTable::kFlagFrameIndex;
5695                     break;
5696                 default:
5697                     CHECK(!"Should not be here.");
5698                     break;
5699             }
5700             if( mode != ReadOptions::SEEK_FRAME_INDEX) {
5701                 seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
5702             }
5703 
5704             uint32_t sampleIndex;
5705             status_t err = mSampleTable->findSampleAtTime(
5706                     seekTimeUs, 1000000, mTimescale,
5707                     &sampleIndex, findFlags);
5708 
5709             if (mode == ReadOptions::SEEK_CLOSEST
5710                     || mode == ReadOptions::SEEK_FRAME_INDEX) {
5711                 // We found the closest sample already, now we want the sync
5712                 // sample preceding it (or the sample itself of course), even
5713                 // if the subsequent sync sample is closer.
5714                 findFlags = SampleTable::kFlagBefore;
5715             }
5716 
5717             uint32_t syncSampleIndex = sampleIndex;
5718             // assume every audio sample is a sync sample. This works around
5719             // seek issues with files that were incorrectly written with an
5720             // empty or single-sample stss block for the audio track
5721             if (err == OK && !mIsAudio) {
5722                 err = mSampleTable->findSyncSampleNear(
5723                         sampleIndex, &syncSampleIndex, findFlags);
5724             }
5725 
5726             uint64_t sampleTime;
5727             if (err == OK) {
5728                 err = mSampleTable->getMetaDataForSample(
5729                         sampleIndex, NULL, NULL, &sampleTime);
5730             }
5731 
5732             if (err != OK) {
5733                 if (err == ERROR_OUT_OF_RANGE) {
5734                     // An attempt to seek past the end of the stream would
5735                     // normally cause this ERROR_OUT_OF_RANGE error. Propagating
5736                     // this all the way to the MediaPlayer would cause abnormal
5737                     // termination. Legacy behaviour appears to be to behave as if
5738                     // we had seeked to the end of stream, ending normally.
5739                     return AMEDIA_ERROR_END_OF_STREAM;
5740                 }
5741                 ALOGV("end of stream");
5742                 return AMEDIA_ERROR_UNKNOWN;
5743             }
5744 
5745             if (mode == ReadOptions::SEEK_CLOSEST
5746                 || mode == ReadOptions::SEEK_FRAME_INDEX) {
5747                 sampleTime -= mElstShiftStartTicks;
5748                 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
5749             }
5750 
5751 #if 0
5752             uint32_t syncSampleTime;
5753             CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
5754                         syncSampleIndex, NULL, NULL, &syncSampleTime));
5755 
5756             ALOGI("seek to time %lld us => sample at time %lld us, "
5757                  "sync sample at time %lld us",
5758                  seekTimeUs,
5759                  sampleTime * 1000000ll / mTimescale,
5760                  syncSampleTime * 1000000ll / mTimescale);
5761 #endif
5762 
5763             mCurrentSampleIndex = syncSampleIndex;
5764         }
5765 
5766         if (mBuffer != NULL) {
5767             mBuffer->release();
5768             mBuffer = NULL;
5769         }
5770 
5771         // fall through
5772     }
5773 
5774     off64_t offset = 0;
5775     size_t size = 0;
5776     uint64_t cts, stts;
5777     bool isSyncSample;
5778     bool newBuffer = false;
5779     if (mBuffer == NULL) {
5780         newBuffer = true;
5781 
5782         status_t err;
5783         if (!mIsHeif) {
5784             err = mSampleTable->getMetaDataForSample(
5785                     mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
5786             if(err == OK) {
5787                 /* Composition Time Stamp cannot be negative. Some files have video Sample
5788                 * Time(STTS)delta with zero value(b/117402420).  Hence subtract only
5789                 * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
5790                 */
5791                 cts -= std::min(cts, mElstShiftStartTicks);
5792             }
5793 
5794         } else {
5795             err = mItemTable->getImageOffsetAndSize(
5796                     options && options->getSeekTo(&seekTimeUs, &mode) ?
5797                             &mCurrentSampleIndex : NULL, &offset, &size);
5798 
5799             cts = stts = 0;
5800             isSyncSample = 0;
5801             ALOGV("image offset %lld, size %zu", (long long)offset, size);
5802         }
5803 
5804         if (err != OK) {
5805             if (err == ERROR_END_OF_STREAM) {
5806                 return AMEDIA_ERROR_END_OF_STREAM;
5807             }
5808             return AMEDIA_ERROR_UNKNOWN;
5809         }
5810 
5811         err = mBufferGroup->acquire_buffer(&mBuffer);
5812 
5813         if (err != OK) {
5814             CHECK(mBuffer == NULL);
5815             return AMEDIA_ERROR_UNKNOWN;
5816         }
5817         if (size > mBuffer->size()) {
5818             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5819             mBuffer->release();
5820             mBuffer = NULL;
5821             return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
5822         }
5823     }
5824 
5825     if (!mIsAVC && !mIsHEVC && !mIsAC4) {
5826         if (newBuffer) {
5827             if (mIsPcm) {
5828                 // The twos' PCM block reader assumes that all samples has the same size.
5829 
5830                 uint32_t samplesToRead = mSampleTable->getLastSampleIndexInChunk()
5831                                                       - mCurrentSampleIndex + 1;
5832                 if (samplesToRead > kMaxPcmFrameSize) {
5833                     samplesToRead = kMaxPcmFrameSize;
5834                 }
5835 
5836                 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
5837                       samplesToRead, size, mCurrentSampleIndex,
5838                       mSampleTable->getLastSampleIndexInChunk());
5839 
5840                size_t totalSize = samplesToRead * size;
5841                 uint8_t* buf = (uint8_t *)mBuffer->data();
5842                 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
5843                 if (bytesRead < (ssize_t)totalSize) {
5844                     mBuffer->release();
5845                     mBuffer = NULL;
5846 
5847                     return AMEDIA_ERROR_IO;
5848                 }
5849 
5850                 AMediaFormat *meta = mBuffer->meta_data();
5851                 AMediaFormat_clear(meta);
5852                 AMediaFormat_setInt64(
5853                       meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5854                 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5855 
5856                 int32_t byteOrder = 0;
5857                 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
5858                         AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
5859 
5860                 if (isGetBigEndian && byteOrder == 1) {
5861                     // Big-endian -> little-endian
5862                     uint16_t *dstData = (uint16_t *)buf;
5863                     uint16_t *srcData = (uint16_t *)buf;
5864 
5865                     for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
5866                          dstData[j] = ntohs(srcData[j]);
5867                     }
5868                 }
5869 
5870                 mCurrentSampleIndex += samplesToRead;
5871                 mBuffer->set_range(0, totalSize);
5872             } else {
5873                 ssize_t num_bytes_read =
5874                     mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5875 
5876                 if (num_bytes_read < (ssize_t)size) {
5877                     mBuffer->release();
5878                     mBuffer = NULL;
5879 
5880                     return AMEDIA_ERROR_IO;
5881                 }
5882 
5883                 CHECK(mBuffer != NULL);
5884                 mBuffer->set_range(0, size);
5885                 AMediaFormat *meta = mBuffer->meta_data();
5886                 AMediaFormat_clear(meta);
5887                 AMediaFormat_setInt64(
5888                         meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5889                 AMediaFormat_setInt64(
5890                         meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5891 
5892                 if (targetSampleTimeUs >= 0) {
5893                     AMediaFormat_setInt64(
5894                             meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5895                 }
5896 
5897                 if (isSyncSample) {
5898                     AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5899                 }
5900 
5901                 ++mCurrentSampleIndex;
5902             }
5903         }
5904 
5905         *out = mBuffer;
5906         mBuffer = NULL;
5907 
5908         return AMEDIA_OK;
5909 
5910     } else if (mIsAC4) {
5911         CHECK(mBuffer != NULL);
5912         // Make sure there is enough space to write the sync header and the raw frame
5913         if (mBuffer->range_length() < (7 + size)) {
5914             mBuffer->release();
5915             mBuffer = NULL;
5916 
5917             return AMEDIA_ERROR_IO;
5918         }
5919 
5920         uint8_t *dstData = (uint8_t *)mBuffer->data();
5921         size_t dstOffset = 0;
5922         // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
5923         // AC40 sync word, meaning no CRC at the end of the frame
5924         dstData[dstOffset++] = 0xAC;
5925         dstData[dstOffset++] = 0x40;
5926         dstData[dstOffset++] = 0xFF;
5927         dstData[dstOffset++] = 0xFF;
5928         dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
5929         dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
5930         dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
5931 
5932         ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
5933         if (numBytesRead != (ssize_t)size) {
5934             mBuffer->release();
5935             mBuffer = NULL;
5936 
5937             return AMEDIA_ERROR_IO;
5938         }
5939 
5940         mBuffer->set_range(0, dstOffset + size);
5941         AMediaFormat *meta = mBuffer->meta_data();
5942         AMediaFormat_clear(meta);
5943         AMediaFormat_setInt64(
5944                 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5945         AMediaFormat_setInt64(
5946                 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5947 
5948         if (targetSampleTimeUs >= 0) {
5949             AMediaFormat_setInt64(
5950                     meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5951         }
5952 
5953         if (isSyncSample) {
5954             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5955         }
5956 
5957         ++mCurrentSampleIndex;
5958 
5959         *out = mBuffer;
5960         mBuffer = NULL;
5961 
5962         return AMEDIA_OK;
5963     } else {
5964         // Whole NAL units are returned but each fragment is prefixed by
5965         // the start code (0x00 00 00 01).
5966         ssize_t num_bytes_read = 0;
5967         num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
5968 
5969         if (num_bytes_read < (ssize_t)size) {
5970             mBuffer->release();
5971             mBuffer = NULL;
5972 
5973             return AMEDIA_ERROR_IO;
5974         }
5975 
5976         uint8_t *dstData = (uint8_t *)mBuffer->data();
5977         size_t srcOffset = 0;
5978         size_t dstOffset = 0;
5979 
5980         while (srcOffset < size) {
5981             bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5982             size_t nalLength = 0;
5983             if (!isMalFormed) {
5984                 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5985                 srcOffset += mNALLengthSize;
5986                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
5987             }
5988 
5989             if (isMalFormed) {
5990                 //if nallength abnormal,ignore it.
5991                 ALOGW("abnormal nallength, ignore this NAL");
5992                 srcOffset = size;
5993                 break;
5994             }
5995 
5996             if (nalLength == 0) {
5997                 continue;
5998             }
5999 
6000             if (dstOffset > SIZE_MAX - 4 ||
6001                     dstOffset + 4 > SIZE_MAX - nalLength ||
6002                     dstOffset + 4 + nalLength > mBuffer->size()) {
6003                 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6004                 android_errorWriteLog(0x534e4554, "27208621");
6005                 mBuffer->release();
6006                 mBuffer = NULL;
6007                 return AMEDIA_ERROR_MALFORMED;
6008             }
6009 
6010             dstData[dstOffset++] = 0;
6011             dstData[dstOffset++] = 0;
6012             dstData[dstOffset++] = 0;
6013             dstData[dstOffset++] = 1;
6014             memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6015             srcOffset += nalLength;
6016             dstOffset += nalLength;
6017         }
6018         CHECK_EQ(srcOffset, size);
6019         CHECK(mBuffer != NULL);
6020         mBuffer->set_range(0, dstOffset);
6021 
6022         AMediaFormat *meta = mBuffer->meta_data();
6023         AMediaFormat_clear(meta);
6024         AMediaFormat_setInt64(
6025                 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6026         AMediaFormat_setInt64(
6027                 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6028 
6029         if (targetSampleTimeUs >= 0) {
6030             AMediaFormat_setInt64(
6031                     meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6032         }
6033 
6034         if (mIsAVC) {
6035             uint32_t layerId = FindAVCLayerId(
6036                     (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6037             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6038         } else if (mIsHEVC) {
6039             int32_t layerId = parseHEVCLayerId(
6040                     (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6041             if (layerId >= 0) {
6042                 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6043             }
6044         }
6045 
6046         if (isSyncSample) {
6047             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6048         }
6049 
6050         ++mCurrentSampleIndex;
6051 
6052         *out = mBuffer;
6053         mBuffer = NULL;
6054 
6055         return AMEDIA_OK;
6056     }
6057 }
6058 
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6059 media_status_t MPEG4Source::fragmentedRead(
6060         MediaBufferHelper **out, const ReadOptions *options) {
6061 
6062     ALOGV("MPEG4Source::fragmentedRead");
6063 
6064     CHECK(mStarted);
6065 
6066     *out = NULL;
6067 
6068     int64_t targetSampleTimeUs = -1;
6069 
6070     int64_t seekTimeUs;
6071     ReadOptions::SeekMode mode;
6072     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6073 
6074         seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6075         ALOGV("shifted seekTimeUs :%" PRId64 ", mElstShiftStartTicks:%" PRIu64, seekTimeUs,
6076               mElstShiftStartTicks);
6077 
6078         int numSidxEntries = mSegments.size();
6079         if (numSidxEntries != 0) {
6080             int64_t totalTime = 0;
6081             off64_t totalOffset = mFirstMoofOffset;
6082             for (int i = 0; i < numSidxEntries; i++) {
6083                 const SidxEntry *se = &mSegments[i];
6084                 if (totalTime + se->mDurationUs > seekTimeUs) {
6085                     // The requested time is somewhere in this segment
6086                     if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6087                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6088                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6089                         // requested next sync, or closest sync and it was closer to the end of
6090                         // this segment
6091                         totalTime += se->mDurationUs;
6092                         totalOffset += se->mSize;
6093                     }
6094                     break;
6095                 }
6096                 totalTime += se->mDurationUs;
6097                 totalOffset += se->mSize;
6098             }
6099             mCurrentMoofOffset = totalOffset;
6100             mNextMoofOffset = -1;
6101             mCurrentSamples.clear();
6102             mCurrentSampleIndex = 0;
6103             status_t err = parseChunk(&totalOffset);
6104             if (err != OK) {
6105                 return AMEDIA_ERROR_UNKNOWN;
6106             }
6107             mCurrentTime = totalTime * mTimescale / 1000000ll;
6108         } else {
6109             // without sidx boxes, we can only seek to 0
6110             mCurrentMoofOffset = mFirstMoofOffset;
6111             mNextMoofOffset = -1;
6112             mCurrentSamples.clear();
6113             mCurrentSampleIndex = 0;
6114             off64_t tmp = mCurrentMoofOffset;
6115             status_t err = parseChunk(&tmp);
6116             if (err != OK) {
6117                 return AMEDIA_ERROR_UNKNOWN;
6118             }
6119             mCurrentTime = 0;
6120         }
6121 
6122         if (mBuffer != NULL) {
6123             mBuffer->release();
6124             mBuffer = NULL;
6125         }
6126 
6127         // fall through
6128     }
6129 
6130     off64_t offset = 0;
6131     size_t size = 0;
6132     uint64_t cts = 0;
6133     bool isSyncSample = false;
6134     bool newBuffer = false;
6135     if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6136         newBuffer = true;
6137 
6138         if (mBuffer != NULL) {
6139             mBuffer->release();
6140             mBuffer = NULL;
6141         }
6142         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6143             // move to next fragment if there is one
6144             if (mNextMoofOffset <= mCurrentMoofOffset) {
6145                 return AMEDIA_ERROR_END_OF_STREAM;
6146             }
6147             off64_t nextMoof = mNextMoofOffset;
6148             mCurrentMoofOffset = nextMoof;
6149             mCurrentSamples.clear();
6150             mCurrentSampleIndex = 0;
6151             status_t err = parseChunk(&nextMoof);
6152             if (err != OK) {
6153                 return AMEDIA_ERROR_UNKNOWN;
6154             }
6155             if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6156                 return AMEDIA_ERROR_END_OF_STREAM;
6157             }
6158         }
6159 
6160         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6161         offset = smpl->offset;
6162         size = smpl->size;
6163         cts = mCurrentTime + smpl->compositionOffset;
6164         /* Composition Time Stamp cannot be negative. Some files have video Sample
6165         * Time(STTS)delta with zero value(b/117402420).  Hence subtract only
6166         * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
6167         */
6168         cts -= std::min(cts, mElstShiftStartTicks);
6169 
6170         mCurrentTime += smpl->duration;
6171         isSyncSample = (mCurrentSampleIndex == 0);
6172 
6173         status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6174 
6175         if (err != OK) {
6176             CHECK(mBuffer == NULL);
6177             ALOGV("acquire_buffer returned %d", err);
6178             return AMEDIA_ERROR_UNKNOWN;
6179         }
6180         if (size > mBuffer->size()) {
6181             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6182             mBuffer->release();
6183             mBuffer = NULL;
6184             return AMEDIA_ERROR_UNKNOWN;
6185         }
6186     }
6187 
6188     const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6189     AMediaFormat *bufmeta = mBuffer->meta_data();
6190     AMediaFormat_clear(bufmeta);
6191     if (smpl->encryptedsizes.size()) {
6192         // store clear/encrypted lengths in metadata
6193         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6194                 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
6195         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6196                 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
6197         AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6198         AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6199         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6200         AMediaFormat_setInt32(bufmeta,
6201                 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6202         AMediaFormat_setInt32(bufmeta,
6203                 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6204 
6205         void *iv = NULL;
6206         size_t ivlength = 0;
6207         if (!AMediaFormat_getBuffer(mFormat,
6208                 "crypto-iv", &iv, &ivlength)) {
6209             iv = (void *) smpl->iv;
6210             ivlength = 16; // use 16 or the actual size?
6211         }
6212         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6213     }
6214 
6215     if (!mIsAVC && !mIsHEVC) {
6216         if (newBuffer) {
6217             if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6218                 mBuffer->release();
6219                 mBuffer = NULL;
6220 
6221                 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6222                 return AMEDIA_ERROR_MALFORMED;
6223             }
6224 
6225             ssize_t num_bytes_read =
6226                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6227 
6228             if (num_bytes_read < (ssize_t)size) {
6229                 mBuffer->release();
6230                 mBuffer = NULL;
6231 
6232                 ALOGE("i/o error");
6233                 return AMEDIA_ERROR_IO;
6234             }
6235 
6236             CHECK(mBuffer != NULL);
6237             mBuffer->set_range(0, size);
6238             AMediaFormat_setInt64(bufmeta,
6239                     AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6240             AMediaFormat_setInt64(bufmeta,
6241                     AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6242 
6243             if (targetSampleTimeUs >= 0) {
6244                 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6245             }
6246 
6247             if (mIsAVC) {
6248                 uint32_t layerId = FindAVCLayerId(
6249                         (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6250                 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6251             } else if (mIsHEVC) {
6252                 int32_t layerId = parseHEVCLayerId(
6253                         (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6254                 if (layerId >= 0) {
6255                     AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6256                 }
6257             }
6258 
6259             if (isSyncSample) {
6260                 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6261             }
6262 
6263             ++mCurrentSampleIndex;
6264         }
6265 
6266         *out = mBuffer;
6267         mBuffer = NULL;
6268 
6269         return AMEDIA_OK;
6270 
6271     } else {
6272         ALOGV("whole NAL");
6273         // Whole NAL units are returned but each fragment is prefixed by
6274         // the start code (0x00 00 00 01).
6275         ssize_t num_bytes_read = 0;
6276         void *data = NULL;
6277         bool isMalFormed = false;
6278         int32_t max_size;
6279         if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6280                 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6281             isMalFormed = true;
6282         } else {
6283             data = mSrcBuffer;
6284         }
6285 
6286         if (isMalFormed || data == NULL) {
6287             ALOGE("isMalFormed size %zu", size);
6288             if (mBuffer != NULL) {
6289                 mBuffer->release();
6290                 mBuffer = NULL;
6291             }
6292             return AMEDIA_ERROR_MALFORMED;
6293         }
6294         num_bytes_read = mDataSource->readAt(offset, data, size);
6295 
6296         if (num_bytes_read < (ssize_t)size) {
6297             mBuffer->release();
6298             mBuffer = NULL;
6299 
6300             ALOGE("i/o error");
6301             return AMEDIA_ERROR_IO;
6302         }
6303 
6304         uint8_t *dstData = (uint8_t *)mBuffer->data();
6305         size_t srcOffset = 0;
6306         size_t dstOffset = 0;
6307 
6308         while (srcOffset < size) {
6309             isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6310             size_t nalLength = 0;
6311             if (!isMalFormed) {
6312                 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6313                 srcOffset += mNALLengthSize;
6314                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6315                         || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6316                         || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6317             }
6318 
6319             if (isMalFormed) {
6320                 ALOGE("Video is malformed; nalLength %zu", nalLength);
6321                 mBuffer->release();
6322                 mBuffer = NULL;
6323                 return AMEDIA_ERROR_MALFORMED;
6324             }
6325 
6326             if (nalLength == 0) {
6327                 continue;
6328             }
6329 
6330             if (dstOffset > SIZE_MAX - 4 ||
6331                     dstOffset + 4 > SIZE_MAX - nalLength ||
6332                     dstOffset + 4 + nalLength > mBuffer->size()) {
6333                 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6334                 android_errorWriteLog(0x534e4554, "26365349");
6335                 mBuffer->release();
6336                 mBuffer = NULL;
6337                 return AMEDIA_ERROR_MALFORMED;
6338             }
6339 
6340             dstData[dstOffset++] = 0;
6341             dstData[dstOffset++] = 0;
6342             dstData[dstOffset++] = 0;
6343             dstData[dstOffset++] = 1;
6344             memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6345             srcOffset += nalLength;
6346             dstOffset += nalLength;
6347         }
6348         CHECK_EQ(srcOffset, size);
6349         CHECK(mBuffer != NULL);
6350         mBuffer->set_range(0, dstOffset);
6351 
6352         AMediaFormat *bufmeta = mBuffer->meta_data();
6353         AMediaFormat_setInt64(bufmeta,
6354                 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6355         AMediaFormat_setInt64(bufmeta,
6356                 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6357 
6358         if (targetSampleTimeUs >= 0) {
6359             AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6360         }
6361 
6362         if (isSyncSample) {
6363             AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6364         }
6365 
6366         ++mCurrentSampleIndex;
6367 
6368         *out = mBuffer;
6369         mBuffer = NULL;
6370 
6371         return AMEDIA_OK;
6372     }
6373 
6374     return AMEDIA_OK;
6375 }
6376 
findTrackByMimePrefix(const char * mimePrefix)6377 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6378         const char *mimePrefix) {
6379     for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6380         const char *mime;
6381         if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6382                 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6383             return track;
6384         }
6385     }
6386 
6387     return NULL;
6388 }
6389 
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6390 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6391     uint8_t header[8];
6392 
6393     ssize_t n = source->readAt(4, header, sizeof(header));
6394     if (n < (ssize_t)sizeof(header)) {
6395         return false;
6396     }
6397 
6398     if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6399         || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6400         || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6401         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6402         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6403         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6404         || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6405         || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
6406         *confidence = 0.4;
6407 
6408         return true;
6409     }
6410 
6411     return false;
6412 }
6413 
isCompatibleBrand(uint32_t fourcc)6414 static bool isCompatibleBrand(uint32_t fourcc) {
6415     static const uint32_t kCompatibleBrands[] = {
6416         FOURCC("isom"),
6417         FOURCC("iso2"),
6418         FOURCC("avc1"),
6419         FOURCC("hvc1"),
6420         FOURCC("hev1"),
6421         FOURCC("av01"),
6422         FOURCC("vp09"),
6423         FOURCC("3gp4"),
6424         FOURCC("mp41"),
6425         FOURCC("mp42"),
6426         FOURCC("dash"),
6427         FOURCC("nvr1"),
6428 
6429         // Won't promise that the following file types can be played.
6430         // Just give these file types a chance.
6431         FOURCC("qt  "),  // Apple's QuickTime
6432         FOURCC("MSNV"),  // Sony's PSP
6433         FOURCC("wmf "),
6434 
6435         FOURCC("3g2a"),  // 3GPP2
6436         FOURCC("3g2b"),
6437         FOURCC("mif1"),  // HEIF image
6438         FOURCC("heic"),  // HEIF image
6439         FOURCC("msf1"),  // HEIF image sequence
6440         FOURCC("hevc"),  // HEIF image sequence
6441     };
6442 
6443     for (size_t i = 0;
6444          i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
6445          ++i) {
6446         if (kCompatibleBrands[i] == fourcc) {
6447             return true;
6448         }
6449     }
6450 
6451     return false;
6452 }
6453 
6454 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
6455 // compatible brand is present.
6456 // Also try to identify where this file's metadata ends
6457 // (end of the 'moov' atom) and report it to the caller as part of
6458 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)6459 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
6460     // We scan up to 128 bytes to identify this file as an MP4.
6461     static const off64_t kMaxScanOffset = 128ll;
6462 
6463     off64_t offset = 0ll;
6464     bool foundGoodFileType = false;
6465     off64_t moovAtomEndOffset = -1ll;
6466     bool done = false;
6467 
6468     while (!done && offset < kMaxScanOffset) {
6469         uint32_t hdr[2];
6470         if (source->readAt(offset, hdr, 8) < 8) {
6471             return false;
6472         }
6473 
6474         uint64_t chunkSize = ntohl(hdr[0]);
6475         uint32_t chunkType = ntohl(hdr[1]);
6476         off64_t chunkDataOffset = offset + 8;
6477 
6478         if (chunkSize == 1) {
6479             if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
6480                 return false;
6481             }
6482 
6483             chunkSize = ntoh64(chunkSize);
6484             chunkDataOffset += 8;
6485 
6486             if (chunkSize < 16) {
6487                 // The smallest valid chunk is 16 bytes long in this case.
6488                 return false;
6489             }
6490 
6491         } else if (chunkSize < 8) {
6492             // The smallest valid chunk is 8 bytes long.
6493             return false;
6494         }
6495 
6496         // (data_offset - offset) is either 8 or 16
6497         off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
6498         if (chunkDataSize < 0) {
6499             ALOGE("b/23540914");
6500             return false;
6501         }
6502 
6503         char chunkstring[5];
6504         MakeFourCCString(chunkType, chunkstring);
6505         ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
6506                 chunkstring, chunkSize, (long long)offset);
6507         switch (chunkType) {
6508             case FOURCC("ftyp"):
6509             {
6510                 if (chunkDataSize < 8) {
6511                     return false;
6512                 }
6513 
6514                 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
6515                 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
6516                     if (i == 1) {
6517                         // Skip this index, it refers to the minorVersion,
6518                         // not a brand.
6519                         continue;
6520                     }
6521 
6522                     uint32_t brand;
6523                     if (source->readAt(
6524                                 chunkDataOffset + 4 * i, &brand, 4) < 4) {
6525                         return false;
6526                     }
6527 
6528                     brand = ntohl(brand);
6529 
6530                     if (isCompatibleBrand(brand)) {
6531                         foundGoodFileType = true;
6532                         break;
6533                     }
6534                 }
6535 
6536                 if (!foundGoodFileType) {
6537                     return false;
6538                 }
6539 
6540                 break;
6541             }
6542 
6543             case FOURCC("moov"):
6544             {
6545                 moovAtomEndOffset = offset + chunkSize;
6546 
6547                 done = true;
6548                 break;
6549             }
6550 
6551             default:
6552                 break;
6553         }
6554 
6555         offset += chunkSize;
6556     }
6557 
6558     if (!foundGoodFileType) {
6559         return false;
6560     }
6561 
6562     *confidence = 0.4f;
6563 
6564     return true;
6565 }
6566 
CreateExtractor(CDataSource * source,void *)6567 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
6568     return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
6569 }
6570 
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)6571 static CreatorFunc Sniff(
6572         CDataSource *source, float *confidence, void **,
6573         FreeMetaFunc *) {
6574     DataSourceHelper helper(source);
6575     if (BetterSniffMPEG4(&helper, confidence)) {
6576         return CreateExtractor;
6577     }
6578 
6579     if (LegacySniffMPEG4(&helper, confidence)) {
6580         ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
6581         return CreateExtractor;
6582     }
6583 
6584     return NULL;
6585 }
6586 
6587 static const char *extensions[] = {
6588     "3g2",
6589     "3ga",
6590     "3gp",
6591     "3gpp",
6592     "3gpp2",
6593     "m4a",
6594     "m4r",
6595     "m4v",
6596     "mov",
6597     "mp4",
6598     "qt",
6599     NULL
6600 };
6601 
6602 extern "C" {
6603 // This is the only symbol that needs to be exported
6604 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()6605 ExtractorDef GETEXTRACTORDEF() {
6606     return {
6607         EXTRACTORDEF_VERSION,
6608         UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
6609         2, // version
6610         "MP4 Extractor",
6611         { .v3 = {Sniff, extensions} },
6612     };
6613 }
6614 
6615 } // extern "C"
6616 
6617 }  // namespace android
6618