1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Determine coverage of font given its raw "cmap" OpenType table
18 
19 #include "minikin/CmapCoverage.h"
20 
21 #include <algorithm>
22 #include <vector>
23 
24 #include "minikin/Range.h"
25 #include "minikin/SparseBitSet.h"
26 
27 #include "MinikinInternal.h"
28 
29 namespace minikin {
30 
31 // These could perhaps be optimized to use __builtin_bswap16 and friends.
readU16(const uint8_t * data,size_t offset)32 static uint32_t readU16(const uint8_t* data, size_t offset) {
33     return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
34 }
35 
readU24(const uint8_t * data,size_t offset)36 static uint32_t readU24(const uint8_t* data, size_t offset) {
37     return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 |
38            ((uint32_t)data[offset + 2]);
39 }
40 
readU32(const uint8_t * data,size_t offset)41 static uint32_t readU32(const uint8_t* data, size_t offset) {
42     return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
43            ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
44 }
45 
46 // The start must be larger than or equal to coverage.back() if coverage is not empty.
47 // Returns true if the range is appended. Otherwise returns false as an error.
addRange(std::vector<uint32_t> & coverage,uint32_t start,uint32_t end)48 static bool addRange(std::vector<uint32_t>& coverage, uint32_t start, uint32_t end) {
49     if (coverage.empty() || coverage.back() < start) {
50         coverage.push_back(start);
51         coverage.push_back(end);
52         return true;
53     } else if (coverage.back() == start) {
54         coverage.back() = end;
55         return true;
56     } else {
57         // Reject unordered range input since SparseBitSet assumes that the given range vector is
58         // sorted. OpenType specification says cmap entries are sorted in order of code point
59         // values, thus for OpenType compliant font files, we don't reach here.
60         android_errorWriteLog(0x534e4554, "32178311");
61         return false;
62     }
63 }
64 
65 // Returns true if the range is appended. Otherwise returns false as an error.
addRangeCmap4(std::vector<uint32_t> & coverage,uint32_t start,uint32_t end)66 static bool addRangeCmap4(std::vector<uint32_t>& coverage, uint32_t start, uint32_t end) {
67     if (!coverage.empty() && coverage.back() > end) {
68         // Reject unordered end code points.
69         return false;
70     }
71     if (coverage.empty() || coverage.back() < start) {
72         coverage.push_back(start);
73         coverage.push_back(end);
74         return true;
75     } else {
76         coverage.back() = end;
77         return true;
78     }
79 }
80 
81 // Returns Range from given ranges vector. Returns invalidRange if i is out of range.
getRange(const std::vector<uint32_t> & r,size_t i)82 static inline Range getRange(const std::vector<uint32_t>& r, size_t i) {
83     return i + 1 < r.size() ? Range({r[i], r[i + 1]}) : Range::invalidRange();
84 }
85 
86 // Merge two sorted lists of ranges into one sorted list.
mergeRanges(const std::vector<uint32_t> & lRanges,const std::vector<uint32_t> & rRanges)87 static std::vector<uint32_t> mergeRanges(const std::vector<uint32_t>& lRanges,
88                                          const std::vector<uint32_t>& rRanges) {
89     std::vector<uint32_t> out;
90 
91     const size_t lsize = lRanges.size();
92     const size_t rsize = rRanges.size();
93     out.reserve(lsize + rsize);
94     size_t ri = 0;
95     size_t li = 0;
96     while (li < lsize || ri < rsize) {
97         Range left = getRange(lRanges, li);
98         Range right = getRange(rRanges, ri);
99 
100         if (!right.isValid()) {
101             // No ranges left in rRanges. Just put all remaining ranges in lRanges.
102             do {
103                 Range r = getRange(lRanges, li);
104                 addRange(out, r.getStart(), r.getEnd());  // Input is sorted. Never returns false.
105                 li += 2;
106             } while (li < lsize);
107             break;
108         } else if (!left.isValid()) {
109             // No ranges left in lRanges. Just put all remaining ranges in rRanges.
110             do {
111                 Range r = getRange(rRanges, ri);
112                 addRange(out, r.getStart(), r.getEnd());  // Input is sorted. Never returns false.
113                 ri += 2;
114             } while (ri < rsize);
115             break;
116         } else if (!Range::intersects(left, right)) {
117             // No intersection. Add smaller range.
118             if (left.getStart() < right.getStart()) {
119                 // Input is sorted. Never returns false.
120                 addRange(out, left.getStart(), left.getEnd());
121                 li += 2;
122             } else {
123                 // Input is sorted. Never returns false.
124                 addRange(out, right.getStart(), right.getEnd());
125                 ri += 2;
126             }
127         } else {
128             Range merged = Range::merge(left, right);
129             li += 2;
130             ri += 2;
131             left = getRange(lRanges, li);
132             right = getRange(rRanges, ri);
133             while (Range::intersects(merged, left) || Range::intersects(merged, right)) {
134                 if (Range::intersects(merged, left)) {
135                     merged = Range::merge(merged, left);
136                     li += 2;
137                     left = getRange(lRanges, li);
138                 } else {
139                     merged = Range::merge(merged, right);
140                     ri += 2;
141                     right = getRange(rRanges, ri);
142                 }
143             }
144             // Input is sorted. Never returns false.
145             addRange(out, merged.getStart(), merged.getEnd());
146         }
147     }
148 
149     return out;
150 }
151 
152 // Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
getCoverageFormat4(std::vector<uint32_t> & coverage,const uint8_t * data,size_t size)153 static bool getCoverageFormat4(std::vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
154     const size_t kSegCountOffset = 6;
155     const size_t kEndCountOffset = 14;
156     const size_t kHeaderSize = 16;
157     const size_t kSegmentSize = 8;  // total size of array elements for one segment
158     if (kEndCountOffset > size) {
159         return false;
160     }
161     size_t segCount = readU16(data, kSegCountOffset) >> 1;
162     if (kHeaderSize + segCount * kSegmentSize > size) {
163         return false;
164     }
165     for (size_t i = 0; i < segCount; i++) {
166         uint32_t end = readU16(data, kEndCountOffset + 2 * i);
167         uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i));
168         if (end < start) {
169             // invalid segment range: size must be positive
170             android_errorWriteLog(0x534e4554, "26413177");
171             return false;
172         }
173         uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
174         if (rangeOffset == 0) {
175             uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
176             if (((end + delta) & 0xffff) > end - start) {
177                 if (!addRangeCmap4(coverage, start, end + 1)) {
178                     return false;
179                 }
180             } else {
181                 for (uint32_t j = start; j < end + 1; j++) {
182                     if (((j + delta) & 0xffff) != 0) {
183                         if (!addRangeCmap4(coverage, j, j + 1)) {
184                             return false;
185                         }
186                     }
187                 }
188             }
189         } else {
190             for (uint32_t j = start; j < end + 1; j++) {
191                 uint32_t actualRangeOffset =
192                         kHeaderSize + 6 * segCount + rangeOffset + (i + j - start) * 2;
193                 if (actualRangeOffset + 2 > size) {
194                     // invalid rangeOffset is considered a "warning" by OpenType Sanitizer
195                     continue;
196                 }
197                 uint32_t glyphId = readU16(data, actualRangeOffset);
198                 if (glyphId != 0) {
199                     if (!addRangeCmap4(coverage, j, j + 1)) {
200                         return false;
201                     }
202                 }
203             }
204         }
205     }
206     return true;
207 }
208 
209 // Get the coverage information out of a Format 12 subtable, storing it in the coverage vector
getCoverageFormat12(std::vector<uint32_t> & coverage,const uint8_t * data,size_t size)210 static bool getCoverageFormat12(std::vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
211     const size_t kNGroupsOffset = 12;
212     const size_t kFirstGroupOffset = 16;
213     const size_t kGroupSize = 12;
214     const size_t kStartCharCodeOffset = 0;
215     const size_t kEndCharCodeOffset = 4;
216     const size_t kMaxNGroups = 0xfffffff0 / kGroupSize;  // protection against overflow
217     // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
218     if (kFirstGroupOffset > size) {
219         return false;
220     }
221     uint32_t nGroups = readU32(data, kNGroupsOffset);
222     if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
223         android_errorWriteLog(0x534e4554, "25645298");
224         return false;
225     }
226     for (uint32_t i = 0; i < nGroups; i++) {
227         uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize;
228         uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset);
229         uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset);
230         if (end < start) {
231             // invalid group range: size must be positive
232             android_errorWriteLog(0x534e4554, "26413177");
233             return false;
234         }
235 
236         // No need to read outside of Unicode code point range.
237         if (start > MAX_UNICODE_CODE_POINT) {
238             return true;
239         }
240         if (end > MAX_UNICODE_CODE_POINT) {
241             // file is inclusive, vector is exclusive
242             if (end == 0xFFFFFFFF) {
243                 android_errorWriteLog(0x534e4554, "62134807");
244             }
245             return addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1);
246         }
247         if (!addRange(coverage, start, end + 1)) {  // file is inclusive, vector is exclusive
248             return false;
249         }
250     }
251     return true;
252 }
253 
254 // Lower value has higher priority. 0 for the highest priority table.
255 // kLowestPriority for unsupported tables.
256 // This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it.
257 constexpr uint8_t kLowestPriority = 255;
getTablePriority(uint16_t platformId,uint16_t encodingId)258 uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) {
259     if (platformId == 3 && encodingId == 10) {
260         return 0;
261     }
262     if (platformId == 0 && encodingId == 6) {
263         return 1;
264     }
265     if (platformId == 0 && encodingId == 4) {
266         return 2;
267     }
268     if (platformId == 3 && encodingId == 1) {
269         return 3;
270     }
271     if (platformId == 0 && encodingId == 3) {
272         return 4;
273     }
274     if (platformId == 0 && encodingId == 2) {
275         return 5;
276     }
277     if (platformId == 0 && encodingId == 1) {
278         return 6;
279     }
280     if (platformId == 0 && encodingId == 0) {
281         return 7;
282     }
283     // Tables other than above are not supported.
284     return kLowestPriority;
285 }
286 
287 // Get merged coverage information from default UVS Table and non-default UVS Table. Note that this
288 // function assumes code points in both default UVS Table and non-default UVS table are stored in
289 // ascending order. This is required by the standard.
getVSCoverage(std::vector<uint32_t> * out_ranges,const uint8_t * data,size_t size,uint32_t defaultUVSTableOffset,uint32_t nonDefaultUVSTableOffset,const SparseBitSet & baseCoverage)290 static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size,
291                           uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset,
292                           const SparseBitSet& baseCoverage) {
293     // Need to merge supported ranges from default UVS Table and non-default UVS Table.
294     // First, collect all supported code points from non default UVS table.
295     std::vector<uint32_t> rangesFromNonDefaultUVSTable;
296     if (nonDefaultUVSTableOffset != 0) {
297         constexpr size_t kHeaderSize = 4;
298         constexpr size_t kUVSMappingRecordSize = 5;
299 
300         const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset;
301         // This subtraction doesn't underflow since the caller already checked
302         // size > nonDefaultUVSTableOffset.
303         const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset;
304         if (nonDefaultUVSTableRemaining < kHeaderSize) {
305             return false;
306         }
307         const uint64_t numRecords = readU32(nonDefaultUVSTable, 0);
308         const uint64_t sizeToRead = numRecords * kUVSMappingRecordSize + kHeaderSize;
309         if (sizeToRead > nonDefaultUVSTableRemaining) {
310             if (sizeToRead > UINT_MAX) {
311                 android_errorWriteLog(0x534e4554, "70808908");
312             }
313             return false;
314         }
315         for (uint32_t i = 0; i < numRecords; ++i) {
316             const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i;
317             const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset);
318             if (!addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1)) {
319                 return false;
320             }
321         }
322     }
323 
324     // Then, construct range from default UVS Table with merging code points from non default UVS
325     // table.
326     std::vector<uint32_t> rangesFromDefaultUVSTable;
327     if (defaultUVSTableOffset != 0) {
328         constexpr size_t kHeaderSize = 4;
329         constexpr size_t kUnicodeRangeRecordSize = 4;
330 
331         const uint8_t* defaultUVSTable = data + defaultUVSTableOffset;
332         // This subtraction doesn't underflow since the caller already checked
333         // size > defaultUVSTableOffset.
334         const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset;
335 
336         if (defaultUVSTableRemaining < kHeaderSize) {
337             return false;
338         }
339         const uint64_t numRecords = readU32(defaultUVSTable, 0);
340         const uint64_t sizeToRead = numRecords * kUnicodeRangeRecordSize + kHeaderSize;
341         if (sizeToRead > defaultUVSTableRemaining) {
342             if (sizeToRead > UINT_MAX) {
343                 android_errorWriteLog(0x534e4554, "70808908");
344             }
345             return false;
346         }
347 
348         for (uint32_t i = 0; i < numRecords; ++i) {
349             const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i;
350             const uint32_t startCp = readU24(defaultUVSTable, recordOffset);
351             const uint8_t rangeLength = defaultUVSTable[recordOffset + 3];
352 
353             // Then insert range from default UVS Table, but exclude if the base codepoint is not
354             // supported.
355             for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) {
356                 // All codepoints in default UVS table should go to the glyphs of the codepoints
357                 // without variation selectors. We need to check the default glyph availability and
358                 // exclude the codepoint if it is not supported by defualt cmap table.
359                 if (baseCoverage.get(cp)) {
360                     if (!addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */)) {
361                         return false;
362                     }
363                 }
364             }
365         }
366     }
367     *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable);
368     return true;
369 }
370 
getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>> * out,const uint8_t * data,size_t size,const SparseBitSet & baseCoverage)371 static void getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>>* out,
372                                 const uint8_t* data, size_t size,
373                                 const SparseBitSet& baseCoverage) {
374     constexpr size_t kHeaderSize = 10;
375     constexpr size_t kRecordSize = 11;
376     constexpr size_t kLengthOffset = 2;
377     constexpr size_t kNumRecordOffset = 6;
378 
379     out->clear();
380     if (size < kHeaderSize) {
381         return;
382     }
383 
384     const uint32_t length = readU32(data, kLengthOffset);
385     if (size < length) {
386         return;
387     }
388 
389     const uint64_t numRecords = readU32(data, kNumRecordOffset);
390     const uint64_t sizeToRead = kHeaderSize + kRecordSize * numRecords;
391     if (numRecords == 0 || sizeToRead > length) {
392         if (sizeToRead > UINT_MAX) {
393             android_errorWriteLog(0x534e4554, "70808908");
394         }
395         return;
396     }
397 
398     for (uint32_t i = 0; i < numRecords; ++i) {
399         // Insert from the largest code points since it determines the size of the output vector.
400         const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1);
401         const uint32_t vsCodePoint = readU24(data, recordHeadOffset);
402         const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3);
403         const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7);
404         if (defaultUVSOffset > length || nonDefaultUVSOffset > length) {
405             continue;
406         }
407 
408         const uint16_t vsIndex = getVsIndex(vsCodePoint);
409         if (vsIndex == INVALID_VS_INDEX) {
410             continue;
411         }
412         std::vector<uint32_t> ranges;
413         if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset,
414                            baseCoverage)) {
415             continue;
416         }
417         if (out->size() < vsIndex + 1) {
418             out->resize(vsIndex + 1);
419         }
420         (*out)[vsIndex].reset(new SparseBitSet(ranges.data(), ranges.size() >> 1));
421     }
422 
423     out->shrink_to_fit();
424 }
425 
getCoverage(const uint8_t * cmap_data,size_t cmap_size,std::vector<std::unique_ptr<SparseBitSet>> * out)426 SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size,
427                                        std::vector<std::unique_ptr<SparseBitSet>>* out) {
428     constexpr size_t kHeaderSize = 4;
429     constexpr size_t kNumTablesOffset = 2;
430     constexpr size_t kTableSize = 8;
431     constexpr size_t kPlatformIdOffset = 0;
432     constexpr size_t kEncodingIdOffset = 2;
433     constexpr size_t kOffsetOffset = 4;
434     constexpr size_t kFormatOffset = 0;
435     constexpr uint32_t kNoTable = UINT32_MAX;
436 
437     if (kHeaderSize > cmap_size) {
438         return SparseBitSet();
439     }
440     uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
441     if (kHeaderSize + numTables * kTableSize > cmap_size) {
442         return SparseBitSet();
443     }
444 
445     uint32_t bestTableOffset = kNoTable;
446     uint16_t bestTableFormat = 0;
447     uint8_t bestTablePriority = kLowestPriority;
448     uint32_t vsTableOffset = kNoTable;
449     for (uint32_t i = 0; i < numTables; ++i) {
450         const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize;
451         const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset);
452         const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset);
453         const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset);
454 
455         if (offset > cmap_size - 2) {
456             continue;  // Invalid table: not enough space to read.
457         }
458         const uint16_t format = readU16(cmap_data, offset + kFormatOffset);
459 
460         if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) {
461             if (vsTableOffset == kNoTable && format == 14) {
462                 vsTableOffset = offset;
463             } else {
464                 // Ignore the (0, 5) table if we have already seen another valid one or it's in a
465                 // format we don't understand.
466             }
467         } else {
468             uint32_t length;
469             uint32_t language;
470 
471             if (format == 4) {
472                 constexpr size_t lengthOffset = 2;
473                 constexpr size_t languageOffset = 4;
474                 constexpr size_t minTableSize = languageOffset + 2;
475                 if (offset > cmap_size - minTableSize) {
476                     continue;  // Invalid table: not enough space to read.
477                 }
478                 length = readU16(cmap_data, offset + lengthOffset);
479                 language = readU16(cmap_data, offset + languageOffset);
480             } else if (format == 12) {
481                 constexpr size_t lengthOffset = 4;
482                 constexpr size_t languageOffset = 8;
483                 constexpr size_t minTableSize = languageOffset + 4;
484                 if (offset > cmap_size - minTableSize) {
485                     continue;  // Invalid table: not enough space to read.
486                 }
487                 length = readU32(cmap_data, offset + lengthOffset);
488                 language = readU32(cmap_data, offset + languageOffset);
489             } else {
490                 continue;
491             }
492 
493             if (length > cmap_size - offset) {
494                 continue;  // Invalid table: table length is larger than whole cmap data size.
495             }
496             if (language != 0) {
497                 // Unsupported or invalid table: this is either a subtable for the Macintosh
498                 // platform (which we don't support), or an invalid subtable since language field
499                 // should be zero for non-Macintosh subtables.
500                 continue;
501             }
502             const uint8_t priority = getTablePriority(platformId, encodingId);
503             if (priority < bestTablePriority) {
504                 bestTableOffset = offset;
505                 bestTablePriority = priority;
506                 bestTableFormat = format;
507             }
508         }
509         if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) {
510             // Already found the highest priority table and variation sequences table. No need to
511             // look at remaining tables.
512             break;
513         }
514     }
515 
516     SparseBitSet coverage;
517 
518     if (bestTableOffset != kNoTable) {
519         const uint8_t* tableData = cmap_data + bestTableOffset;
520         const size_t tableSize = cmap_size - bestTableOffset;
521         bool success;
522         std::vector<uint32_t> coverageVec;
523         if (bestTableFormat == 4) {
524             success = getCoverageFormat4(coverageVec, tableData, tableSize);
525         } else {
526             success = getCoverageFormat12(coverageVec, tableData, tableSize);
527         }
528 
529         if (success) {
530             coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1);
531         }
532     }
533 
534     if (vsTableOffset != kNoTable) {
535         const uint8_t* tableData = cmap_data + vsTableOffset;
536         const size_t tableSize = cmap_size - vsTableOffset;
537         getCoverageFormat14(out, tableData, tableSize, coverage);
538     }
539     return coverage;
540 }
541 
542 }  // namespace minikin
543