1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Minikin"
18 
19 #include "minikin/FontCollection.h"
20 
21 #include <algorithm>
22 
23 #include <log/log.h>
24 #include <unicode/unorm2.h>
25 
26 #include "minikin/Emoji.h"
27 
28 #include "Locale.h"
29 #include "LocaleListCache.h"
30 #include "MinikinInternal.h"
31 
32 using std::vector;
33 
34 namespace minikin {
35 
36 template <typename T>
max(T a,T b)37 static inline T max(T a, T b) {
38     return a > b ? a : b;
39 }
40 
41 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
42 const uint32_t TEXT_STYLE_VS = 0xFE0E;
43 
44 static std::atomic<uint32_t> gNextCollectionId = {0};
45 
FontCollection(std::shared_ptr<FontFamily> && typeface)46 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
47     std::vector<std::shared_ptr<FontFamily>> typefaces;
48     typefaces.push_back(typeface);
49     init(typefaces);
50 }
51 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)52 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : mMaxChar(0) {
53     init(typefaces);
54 }
55 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)56 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
57     mId = gNextCollectionId++;
58     vector<uint32_t> lastChar;
59     size_t nTypefaces = typefaces.size();
60     const FontStyle defaultStyle;
61     for (size_t i = 0; i < nTypefaces; i++) {
62         const std::shared_ptr<FontFamily>& family = typefaces[i];
63         if (family->getClosestMatch(defaultStyle).font == nullptr) {
64             continue;
65         }
66         const SparseBitSet& coverage = family->getCoverage();
67         mFamilies.push_back(family);  // emplace_back would be better
68         if (family->hasVSTable()) {
69             mVSFamilyVec.push_back(family);
70         }
71         mMaxChar = max(mMaxChar, coverage.length());
72         lastChar.push_back(coverage.nextSetBit(0));
73 
74         const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
75         mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
76     }
77     nTypefaces = mFamilies.size();
78     MINIKIN_ASSERT(nTypefaces > 0, "Font collection must have at least one valid typeface");
79     MINIKIN_ASSERT(nTypefaces <= MAX_FAMILY_COUNT,
80                    "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
81     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
82     // TODO: Use variation selector map for mRanges construction.
83     // A font can have a glyph for a base code point and variation selector pair but no glyph for
84     // the base code point without variation selector. The family won't be listed in the range in
85     // this case.
86     for (size_t i = 0; i < nPages; i++) {
87         Range dummy;
88         mRanges.push_back(dummy);
89         Range* range = &mRanges.back();
90         range->start = mFamilyVec.size();
91         for (size_t j = 0; j < nTypefaces; j++) {
92             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
93                 const std::shared_ptr<FontFamily>& family = mFamilies[j];
94                 mFamilyVec.push_back(static_cast<uint8_t>(j));
95                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
96                 lastChar[j] = nextChar;
97             }
98         }
99         range->end = mFamilyVec.size();
100     }
101     // See the comment in Range for more details.
102     LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
103                         "Exceeded the maximum indexable cmap coverage.");
104 }
105 
106 // Special scores for the font fallback.
107 const uint32_t kUnsupportedFontScore = 0;
108 const uint32_t kFirstFontScore = UINT32_MAX;
109 
110 // Calculates a font score.
111 // The score of the font family is based on three subscores.
112 //  - Coverage Score: How well the font family covers the given character or variation sequence.
113 //  - Locale Score: How well the font family is appropriate for the locale.
114 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
115 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
116 //
117 // Then, there is a priority for these three subscores as follow:
118 //   Coverage Score > Locale Score > Variant Score
119 // The returned score reflects this priority order.
120 //
121 // Note that there are two special scores.
122 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
123 //    base character.
124 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
125 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FamilyVariant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const126 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FamilyVariant variant,
127                                          uint32_t localeListId,
128                                          const std::shared_ptr<FontFamily>& fontFamily) const {
129     const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
130     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
131         // No need to calculate other scores.
132         return coverageScore;
133     }
134 
135     const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
136     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
137 
138     // Subscores are encoded into 31 bits representation to meet the subscore priority.
139     // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
140     // then the last 1 bit is for variant score.
141     return coverageScore << 29 | localeScore << 1 | variantScore;
142 }
143 
144 // Calculates a font score based on variation sequence coverage.
145 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
146 //   character.
147 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
148 //   supports the given character or variation sequence.
149 // - Returns 3 if the font family supports the variation sequence.
150 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
151 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
152 // - Returns 1 if the variation selector is not specified or if the font family only supports the
153 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const154 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
155                                            const std::shared_ptr<FontFamily>& fontFamily) const {
156     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
157     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
158         // The font doesn't support either variation sequence or even the base character.
159         return kUnsupportedFontScore;
160     }
161 
162     if ((vs == 0 || hasVSGlyph) && (mFamilies[0] == fontFamily || fontFamily->isCustomFallback())) {
163         // If the first font family supports the given character or variation sequence, always use
164         // it.
165         return kFirstFontScore;
166     }
167 
168     if (vs != 0 && hasVSGlyph) {
169         return 3;
170     }
171 
172     bool colorEmojiRequest;
173     if (vs == EMOJI_STYLE_VS) {
174         colorEmojiRequest = true;
175     } else if (vs == TEXT_STYLE_VS) {
176         colorEmojiRequest = false;
177     } else {
178         switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
179             case EmojiStyle::EMOJI:
180                 colorEmojiRequest = true;
181                 break;
182             case EmojiStyle::TEXT:
183                 colorEmojiRequest = false;
184                 break;
185             case EmojiStyle::EMPTY:
186             case EmojiStyle::DEFAULT:
187             default:
188                 // Do not give any extra score for the default emoji style.
189                 return 1;
190                 break;
191         }
192     }
193 
194     return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
195 }
196 
197 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
198 //
199 // 1. If only the font's language matches or there is no matches between requested font and
200 //    supported font, then the font obtains a score of 0.
201 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
202 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
203 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
204 //    language-and-script obtains a socre of 3 with the same reason above.
205 //
206 // If two locales in the requested list have the same locale score, the font matching with higher
207 // priority locale gets a higher score. For example, in the case the user requested locale list is
208 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
209 // "en-Latn".
210 //
211 // To achieve score calculation with priorities, the locale score is determined as follows:
212 //   LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
213 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
214 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)215 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
216                                                  const FontFamily& fontFamily) {
217     const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
218     const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
219 
220     const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
221     uint32_t score = 0;
222     for (size_t i = 0; i < maxCompareNum; ++i) {
223         score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
224     }
225     return score;
226 }
227 
228 // Calculates a font score based on variant ("compact" or "elegant") matching.
229 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
230 //  - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FamilyVariant variant,const FontFamily & fontFamily)231 uint32_t FontCollection::calcVariantMatchingScore(FamilyVariant variant,
232                                                   const FontFamily& fontFamily) {
233     const FamilyVariant familyVariant = fontFamily.variant();
234     if (familyVariant == FamilyVariant::DEFAULT) {
235         return 1;
236     }
237     if (familyVariant == variant) {
238         return 1;
239     }
240     if (variant == FamilyVariant::DEFAULT && familyVariant == FamilyVariant::COMPACT) {
241         // If default is requested, prefer compat variation.
242         return 1;
243     }
244     return 0;
245 }
246 
247 // Implement heuristic for choosing best-match font. Here are the rules:
248 // 1. If first font in the collection has the character, it wins.
249 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
250 // 3. Highest score wins, with ties resolved to the first font.
251 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FamilyVariant variant) const252 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
253                                                                     uint32_t localeListId,
254                                                                     FamilyVariant variant) const {
255     if (ch >= mMaxChar) {
256         return mFamilies[0];
257     }
258 
259     Range range = mRanges[ch >> kLogCharsPerPage];
260 
261     if (vs != 0) {
262         range = {0, static_cast<uint16_t>(mFamilies.size())};
263     }
264 
265     int bestFamilyIndex = -1;
266     uint32_t bestScore = kUnsupportedFontScore;
267     for (size_t i = range.start; i < range.end; i++) {
268         const std::shared_ptr<FontFamily>& family =
269                 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
270         const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
271         if (score == kFirstFontScore) {
272             // If the first font family supports the given character or variation sequence, always
273             // use it.
274             return family;
275         }
276         if (score > bestScore) {
277             bestScore = score;
278             bestFamilyIndex = i;
279         }
280     }
281     if (bestFamilyIndex == -1) {
282         UErrorCode errorCode = U_ZERO_ERROR;
283         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
284         if (U_SUCCESS(errorCode)) {
285             UChar decomposed[4];
286             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
287             if (U_SUCCESS(errorCode) && len > 0) {
288                 int off = 0;
289                 U16_NEXT_UNSAFE(decomposed, off, ch);
290                 return getFamilyForChar(ch, vs, localeListId, variant);
291             }
292         }
293         return mFamilies[0];
294     }
295     return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
296 }
297 
298 // Characters where we want to continue using existing font run for (or stick to the next run if
299 // they start a string), even if the font does not support them explicitly. These are handled
300 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
301 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)302 static bool doesNotNeedFontSupport(uint32_t c) {
303     return c == 0x00AD                      // SOFT HYPHEN
304            || c == 0x034F                   // COMBINING GRAPHEME JOINER
305            || c == 0x061C                   // ARABIC LETTER MARK
306            || (0x200C <= c && c <= 0x200F)  // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
307            || (0x202A <= c && c <= 0x202E)  // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
308            || (0x2066 <= c && c <= 0x2069)  // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
309            || c == 0xFEFF                   // BYTE ORDER MARK
310            || isVariationSelector(c);
311 }
312 
313 // Characters where we want to continue using existing font run instead of
314 // recomputing the best match in the fallback list.
315 static const uint32_t stickyWhitelist[] = {
316         '!',    ',', '-', '.', ':', ';', '?',
317         0x00A0,  // NBSP
318         0x2010,  // HYPHEN
319         0x2011,  // NB_HYPHEN
320         0x202F,  // NNBSP
321         0x2640,  // FEMALE_SIGN,
322         0x2642,  // MALE_SIGN,
323         0x2695,  // STAFF_OF_AESCULAPIUS
324 };
325 
isStickyWhitelisted(uint32_t c)326 static bool isStickyWhitelisted(uint32_t c) {
327     for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
328         if (stickyWhitelist[i] == c) return true;
329     }
330     return false;
331 }
332 
isCombining(uint32_t c)333 static inline bool isCombining(uint32_t c) {
334     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
335 }
336 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const337 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
338                                           uint32_t variationSelector) const {
339     if (!isVariationSelector(variationSelector)) {
340         return false;
341     }
342     if (baseCodepoint >= mMaxChar) {
343         return false;
344     }
345 
346     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
347     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
348         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
349             return true;
350         }
351     }
352 
353     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
354     // for <char, text presentation selector> case since we have special fallback rule for the
355     // sequence. Note that we don't need to restrict this to already standardized variation
356     // sequences, since Unicode is adding variation sequences more frequently now and may even move
357     // towards allowing text and emoji variation selectors on any character.
358     if (variationSelector == TEXT_STYLE_VS) {
359         for (size_t i = 0; i < mFamilies.size(); ++i) {
360             if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
361                 return true;
362             }
363         }
364     }
365 
366     return false;
367 }
368 
369 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
370 
itemize(U16StringPiece text,FontStyle style,uint32_t localeListId,FamilyVariant familyVariant,uint32_t runMax) const371 std::vector<FontCollection::Run> FontCollection::itemize(U16StringPiece text, FontStyle style,
372                                                          uint32_t localeListId,
373                                                          FamilyVariant familyVariant,
374                                                          uint32_t runMax) const {
375     const uint16_t* string = text.data();
376     const uint32_t string_size = text.size();
377     std::vector<Run> result;
378 
379     const FontFamily* lastFamily = nullptr;
380     Run* run = nullptr;
381 
382     if (string_size == 0) {
383         return result;
384     }
385 
386     const uint32_t kEndOfString = 0xFFFFFFFF;
387 
388     uint32_t nextCh = 0;
389     uint32_t prevCh = 0;
390     size_t nextUtf16Pos = 0;
391     size_t readLength = 0;
392     U16_NEXT(string, readLength, string_size, nextCh);
393     if (U_IS_SURROGATE(nextCh)) {
394         nextCh = REPLACEMENT_CHARACTER;
395     }
396 
397     do {
398         const uint32_t ch = nextCh;
399         const size_t utf16Pos = nextUtf16Pos;
400         nextUtf16Pos = readLength;
401         if (readLength < string_size) {
402             U16_NEXT(string, readLength, string_size, nextCh);
403             if (U_IS_SURROGATE(nextCh)) {
404                 nextCh = REPLACEMENT_CHARACTER;
405             }
406         } else {
407             nextCh = kEndOfString;
408         }
409 
410         bool shouldContinueRun = false;
411         if (doesNotNeedFontSupport(ch)) {
412             // Always continue if the character is a format character not needed to be in the font.
413             shouldContinueRun = true;
414         } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) {
415             // Continue using existing font as long as it has coverage and is whitelisted.
416             shouldContinueRun = lastFamily->getCoverage().get(ch);
417         }
418 
419         if (!shouldContinueRun) {
420             const std::shared_ptr<FontFamily>& family = getFamilyForChar(
421                     ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
422             if (utf16Pos == 0 || family.get() != lastFamily) {
423                 size_t start = utf16Pos;
424                 // Workaround for combining marks and emoji modifiers until we implement
425                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
426                 // a different font that also supports the previous character, attach previous
427                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
428                 // handled properly by this since it's a combining mark too.
429                 if (utf16Pos != 0 &&
430                     (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
431                     family != nullptr && family->getCoverage().get(prevCh)) {
432                     const size_t prevChLength = U16_LENGTH(prevCh);
433                     if (run != nullptr) {
434                         run->end -= prevChLength;
435                         if (run->start == run->end) {
436                             result.pop_back();
437                         }
438                     }
439                     start -= prevChLength;
440                 }
441                 if (lastFamily == nullptr) {
442                     // This is the first family ever assigned. We are either seeing the very first
443                     // character (which means start would already be zero), or we have only seen
444                     // characters that don't need any font support (which means we need to adjust
445                     // start to be 0 to include those characters).
446                     start = 0;
447                 }
448                 result.push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
449                 run = &result.back();
450                 lastFamily = family.get();
451             }
452         }
453         prevCh = ch;
454         if (run != nullptr) {
455             run->end = nextUtf16Pos;  // exclusive
456         }
457 
458         // Stop searching the remaining characters if the result length gets runMax + 2.
459         // When result.size gets runMax + 2 here, the run between [0, runMax) was finalized.
460         // If the result.size() equals to runMax, the run may be still expanding.
461         // if the result.size() equals to runMax + 2, the last run may be removed and the last run
462         // may be exntended the previous run with above workaround.
463         if (result.size() >= 2 && runMax == result.size() - 2) {
464             break;
465         }
466     } while (nextCh != kEndOfString);
467 
468     if (lastFamily == nullptr) {
469         // No character needed any font support, so it doesn't really matter which font they end up
470         // getting displayed in. We put the whole string in one run, using the first font.
471         result.push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)});
472     }
473 
474     if (result.size() > runMax) {
475         // The itemization has terminated since it reaches the runMax. Remove last unfinalized runs.
476         result.resize(runMax);
477     }
478     return result;
479 }
480 
baseFontFaked(FontStyle style)481 FakedFont FontCollection::baseFontFaked(FontStyle style) {
482     return mFamilies[0]->getClosestMatch(style);
483 }
484 
createCollectionWithVariation(const std::vector<FontVariation> & variations)485 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
486         const std::vector<FontVariation>& variations) {
487     if (variations.empty() || mSupportedAxes.empty()) {
488         return nullptr;
489     }
490 
491     bool hasSupportedAxis = false;
492     for (const FontVariation& variation : variations) {
493         if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
494             hasSupportedAxis = true;
495             break;
496         }
497     }
498     if (!hasSupportedAxis) {
499         // None of variation axes are supported by this font collection.
500         return nullptr;
501     }
502 
503     std::vector<std::shared_ptr<FontFamily>> families;
504     for (const std::shared_ptr<FontFamily>& family : mFamilies) {
505         std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
506         if (newFamily) {
507             families.push_back(newFamily);
508         } else {
509             families.push_back(family);
510         }
511     }
512 
513     return std::shared_ptr<FontCollection>(new FontCollection(families));
514 }
515 
getId() const516 uint32_t FontCollection::getId() const {
517     return mId;
518 }
519 
520 }  // namespace minikin
521