1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Minikin"
18
19 #include "minikin/FontCollection.h"
20
21 #include <algorithm>
22
23 #include <log/log.h>
24 #include <unicode/unorm2.h>
25
26 #include "minikin/Emoji.h"
27
28 #include "Locale.h"
29 #include "LocaleListCache.h"
30 #include "MinikinInternal.h"
31
32 using std::vector;
33
34 namespace minikin {
35
36 template <typename T>
max(T a,T b)37 static inline T max(T a, T b) {
38 return a > b ? a : b;
39 }
40
41 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
42 const uint32_t TEXT_STYLE_VS = 0xFE0E;
43
44 static std::atomic<uint32_t> gNextCollectionId = {0};
45
FontCollection(std::shared_ptr<FontFamily> && typeface)46 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
47 std::vector<std::shared_ptr<FontFamily>> typefaces;
48 typefaces.push_back(typeface);
49 init(typefaces);
50 }
51
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)52 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : mMaxChar(0) {
53 init(typefaces);
54 }
55
init(const vector<std::shared_ptr<FontFamily>> & typefaces)56 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
57 mId = gNextCollectionId++;
58 vector<uint32_t> lastChar;
59 size_t nTypefaces = typefaces.size();
60 const FontStyle defaultStyle;
61 for (size_t i = 0; i < nTypefaces; i++) {
62 const std::shared_ptr<FontFamily>& family = typefaces[i];
63 if (family->getClosestMatch(defaultStyle).font == nullptr) {
64 continue;
65 }
66 const SparseBitSet& coverage = family->getCoverage();
67 mFamilies.push_back(family); // emplace_back would be better
68 if (family->hasVSTable()) {
69 mVSFamilyVec.push_back(family);
70 }
71 mMaxChar = max(mMaxChar, coverage.length());
72 lastChar.push_back(coverage.nextSetBit(0));
73
74 const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
75 mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
76 }
77 nTypefaces = mFamilies.size();
78 MINIKIN_ASSERT(nTypefaces > 0, "Font collection must have at least one valid typeface");
79 MINIKIN_ASSERT(nTypefaces <= MAX_FAMILY_COUNT,
80 "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
81 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
82 // TODO: Use variation selector map for mRanges construction.
83 // A font can have a glyph for a base code point and variation selector pair but no glyph for
84 // the base code point without variation selector. The family won't be listed in the range in
85 // this case.
86 for (size_t i = 0; i < nPages; i++) {
87 Range dummy;
88 mRanges.push_back(dummy);
89 Range* range = &mRanges.back();
90 range->start = mFamilyVec.size();
91 for (size_t j = 0; j < nTypefaces; j++) {
92 if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
93 const std::shared_ptr<FontFamily>& family = mFamilies[j];
94 mFamilyVec.push_back(static_cast<uint8_t>(j));
95 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
96 lastChar[j] = nextChar;
97 }
98 }
99 range->end = mFamilyVec.size();
100 }
101 // See the comment in Range for more details.
102 LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
103 "Exceeded the maximum indexable cmap coverage.");
104 }
105
106 // Special scores for the font fallback.
107 const uint32_t kUnsupportedFontScore = 0;
108 const uint32_t kFirstFontScore = UINT32_MAX;
109
110 // Calculates a font score.
111 // The score of the font family is based on three subscores.
112 // - Coverage Score: How well the font family covers the given character or variation sequence.
113 // - Locale Score: How well the font family is appropriate for the locale.
114 // - Variant Score: Whether the font family matches the variant. Note that this variant is not the
115 // one in BCP47. This is our own font variant (e.g., elegant, compact).
116 //
117 // Then, there is a priority for these three subscores as follow:
118 // Coverage Score > Locale Score > Variant Score
119 // The returned score reflects this priority order.
120 //
121 // Note that there are two special scores.
122 // - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
123 // base character.
124 // - kFirstFontScore: When the font is the first font family in the collection and it supports the
125 // given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FamilyVariant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const126 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FamilyVariant variant,
127 uint32_t localeListId,
128 const std::shared_ptr<FontFamily>& fontFamily) const {
129 const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
130 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
131 // No need to calculate other scores.
132 return coverageScore;
133 }
134
135 const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
136 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
137
138 // Subscores are encoded into 31 bits representation to meet the subscore priority.
139 // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
140 // then the last 1 bit is for variant score.
141 return coverageScore << 29 | localeScore << 1 | variantScore;
142 }
143
144 // Calculates a font score based on variation sequence coverage.
145 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
146 // character.
147 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
148 // supports the given character or variation sequence.
149 // - Returns 3 if the font family supports the variation sequence.
150 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
151 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
152 // - Returns 1 if the variation selector is not specified or if the font family only supports the
153 // variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const154 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
155 const std::shared_ptr<FontFamily>& fontFamily) const {
156 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
157 if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
158 // The font doesn't support either variation sequence or even the base character.
159 return kUnsupportedFontScore;
160 }
161
162 if ((vs == 0 || hasVSGlyph) && (mFamilies[0] == fontFamily || fontFamily->isCustomFallback())) {
163 // If the first font family supports the given character or variation sequence, always use
164 // it.
165 return kFirstFontScore;
166 }
167
168 if (vs != 0 && hasVSGlyph) {
169 return 3;
170 }
171
172 bool colorEmojiRequest;
173 if (vs == EMOJI_STYLE_VS) {
174 colorEmojiRequest = true;
175 } else if (vs == TEXT_STYLE_VS) {
176 colorEmojiRequest = false;
177 } else {
178 switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
179 case EmojiStyle::EMOJI:
180 colorEmojiRequest = true;
181 break;
182 case EmojiStyle::TEXT:
183 colorEmojiRequest = false;
184 break;
185 case EmojiStyle::EMPTY:
186 case EmojiStyle::DEFAULT:
187 default:
188 // Do not give any extra score for the default emoji style.
189 return 1;
190 break;
191 }
192 }
193
194 return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
195 }
196
197 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
198 //
199 // 1. If only the font's language matches or there is no matches between requested font and
200 // supported font, then the font obtains a score of 0.
201 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
202 // a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
203 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
204 // language-and-script obtains a socre of 3 with the same reason above.
205 //
206 // If two locales in the requested list have the same locale score, the font matching with higher
207 // priority locale gets a higher score. For example, in the case the user requested locale list is
208 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
209 // "en-Latn".
210 //
211 // To achieve score calculation with priorities, the locale score is determined as follows:
212 // LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
213 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
214 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)215 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
216 const FontFamily& fontFamily) {
217 const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
218 const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
219
220 const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
221 uint32_t score = 0;
222 for (size_t i = 0; i < maxCompareNum; ++i) {
223 score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
224 }
225 return score;
226 }
227
228 // Calculates a font score based on variant ("compact" or "elegant") matching.
229 // - Returns 1 if the font doesn't have variant or the variant matches with the text style.
230 // - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FamilyVariant variant,const FontFamily & fontFamily)231 uint32_t FontCollection::calcVariantMatchingScore(FamilyVariant variant,
232 const FontFamily& fontFamily) {
233 const FamilyVariant familyVariant = fontFamily.variant();
234 if (familyVariant == FamilyVariant::DEFAULT) {
235 return 1;
236 }
237 if (familyVariant == variant) {
238 return 1;
239 }
240 if (variant == FamilyVariant::DEFAULT && familyVariant == FamilyVariant::COMPACT) {
241 // If default is requested, prefer compat variation.
242 return 1;
243 }
244 return 0;
245 }
246
247 // Implement heuristic for choosing best-match font. Here are the rules:
248 // 1. If first font in the collection has the character, it wins.
249 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
250 // 3. Highest score wins, with ties resolved to the first font.
251 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FamilyVariant variant) const252 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
253 uint32_t localeListId,
254 FamilyVariant variant) const {
255 if (ch >= mMaxChar) {
256 return mFamilies[0];
257 }
258
259 Range range = mRanges[ch >> kLogCharsPerPage];
260
261 if (vs != 0) {
262 range = {0, static_cast<uint16_t>(mFamilies.size())};
263 }
264
265 int bestFamilyIndex = -1;
266 uint32_t bestScore = kUnsupportedFontScore;
267 for (size_t i = range.start; i < range.end; i++) {
268 const std::shared_ptr<FontFamily>& family =
269 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
270 const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
271 if (score == kFirstFontScore) {
272 // If the first font family supports the given character or variation sequence, always
273 // use it.
274 return family;
275 }
276 if (score > bestScore) {
277 bestScore = score;
278 bestFamilyIndex = i;
279 }
280 }
281 if (bestFamilyIndex == -1) {
282 UErrorCode errorCode = U_ZERO_ERROR;
283 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
284 if (U_SUCCESS(errorCode)) {
285 UChar decomposed[4];
286 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
287 if (U_SUCCESS(errorCode) && len > 0) {
288 int off = 0;
289 U16_NEXT_UNSAFE(decomposed, off, ch);
290 return getFamilyForChar(ch, vs, localeListId, variant);
291 }
292 }
293 return mFamilies[0];
294 }
295 return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
296 }
297
298 // Characters where we want to continue using existing font run for (or stick to the next run if
299 // they start a string), even if the font does not support them explicitly. These are handled
300 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
301 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)302 static bool doesNotNeedFontSupport(uint32_t c) {
303 return c == 0x00AD // SOFT HYPHEN
304 || c == 0x034F // COMBINING GRAPHEME JOINER
305 || c == 0x061C // ARABIC LETTER MARK
306 || (0x200C <= c && c <= 0x200F) // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
307 || (0x202A <= c && c <= 0x202E) // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
308 || (0x2066 <= c && c <= 0x2069) // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
309 || c == 0xFEFF // BYTE ORDER MARK
310 || isVariationSelector(c);
311 }
312
313 // Characters where we want to continue using existing font run instead of
314 // recomputing the best match in the fallback list.
315 static const uint32_t stickyWhitelist[] = {
316 '!', ',', '-', '.', ':', ';', '?',
317 0x00A0, // NBSP
318 0x2010, // HYPHEN
319 0x2011, // NB_HYPHEN
320 0x202F, // NNBSP
321 0x2640, // FEMALE_SIGN,
322 0x2642, // MALE_SIGN,
323 0x2695, // STAFF_OF_AESCULAPIUS
324 };
325
isStickyWhitelisted(uint32_t c)326 static bool isStickyWhitelisted(uint32_t c) {
327 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
328 if (stickyWhitelist[i] == c) return true;
329 }
330 return false;
331 }
332
isCombining(uint32_t c)333 static inline bool isCombining(uint32_t c) {
334 return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
335 }
336
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const337 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
338 uint32_t variationSelector) const {
339 if (!isVariationSelector(variationSelector)) {
340 return false;
341 }
342 if (baseCodepoint >= mMaxChar) {
343 return false;
344 }
345
346 // Currently mRanges can not be used here since it isn't aware of the variation sequence.
347 for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
348 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
349 return true;
350 }
351 }
352
353 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
354 // for <char, text presentation selector> case since we have special fallback rule for the
355 // sequence. Note that we don't need to restrict this to already standardized variation
356 // sequences, since Unicode is adding variation sequences more frequently now and may even move
357 // towards allowing text and emoji variation selectors on any character.
358 if (variationSelector == TEXT_STYLE_VS) {
359 for (size_t i = 0; i < mFamilies.size(); ++i) {
360 if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
361 return true;
362 }
363 }
364 }
365
366 return false;
367 }
368
369 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
370
itemize(U16StringPiece text,FontStyle style,uint32_t localeListId,FamilyVariant familyVariant,uint32_t runMax) const371 std::vector<FontCollection::Run> FontCollection::itemize(U16StringPiece text, FontStyle style,
372 uint32_t localeListId,
373 FamilyVariant familyVariant,
374 uint32_t runMax) const {
375 const uint16_t* string = text.data();
376 const uint32_t string_size = text.size();
377 std::vector<Run> result;
378
379 const FontFamily* lastFamily = nullptr;
380 Run* run = nullptr;
381
382 if (string_size == 0) {
383 return result;
384 }
385
386 const uint32_t kEndOfString = 0xFFFFFFFF;
387
388 uint32_t nextCh = 0;
389 uint32_t prevCh = 0;
390 size_t nextUtf16Pos = 0;
391 size_t readLength = 0;
392 U16_NEXT(string, readLength, string_size, nextCh);
393 if (U_IS_SURROGATE(nextCh)) {
394 nextCh = REPLACEMENT_CHARACTER;
395 }
396
397 do {
398 const uint32_t ch = nextCh;
399 const size_t utf16Pos = nextUtf16Pos;
400 nextUtf16Pos = readLength;
401 if (readLength < string_size) {
402 U16_NEXT(string, readLength, string_size, nextCh);
403 if (U_IS_SURROGATE(nextCh)) {
404 nextCh = REPLACEMENT_CHARACTER;
405 }
406 } else {
407 nextCh = kEndOfString;
408 }
409
410 bool shouldContinueRun = false;
411 if (doesNotNeedFontSupport(ch)) {
412 // Always continue if the character is a format character not needed to be in the font.
413 shouldContinueRun = true;
414 } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) {
415 // Continue using existing font as long as it has coverage and is whitelisted.
416 shouldContinueRun = lastFamily->getCoverage().get(ch);
417 }
418
419 if (!shouldContinueRun) {
420 const std::shared_ptr<FontFamily>& family = getFamilyForChar(
421 ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
422 if (utf16Pos == 0 || family.get() != lastFamily) {
423 size_t start = utf16Pos;
424 // Workaround for combining marks and emoji modifiers until we implement
425 // per-cluster font selection: if a combining mark or an emoji modifier is found in
426 // a different font that also supports the previous character, attach previous
427 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
428 // handled properly by this since it's a combining mark too.
429 if (utf16Pos != 0 &&
430 (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
431 family != nullptr && family->getCoverage().get(prevCh)) {
432 const size_t prevChLength = U16_LENGTH(prevCh);
433 if (run != nullptr) {
434 run->end -= prevChLength;
435 if (run->start == run->end) {
436 result.pop_back();
437 }
438 }
439 start -= prevChLength;
440 }
441 if (lastFamily == nullptr) {
442 // This is the first family ever assigned. We are either seeing the very first
443 // character (which means start would already be zero), or we have only seen
444 // characters that don't need any font support (which means we need to adjust
445 // start to be 0 to include those characters).
446 start = 0;
447 }
448 result.push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
449 run = &result.back();
450 lastFamily = family.get();
451 }
452 }
453 prevCh = ch;
454 if (run != nullptr) {
455 run->end = nextUtf16Pos; // exclusive
456 }
457
458 // Stop searching the remaining characters if the result length gets runMax + 2.
459 // When result.size gets runMax + 2 here, the run between [0, runMax) was finalized.
460 // If the result.size() equals to runMax, the run may be still expanding.
461 // if the result.size() equals to runMax + 2, the last run may be removed and the last run
462 // may be exntended the previous run with above workaround.
463 if (result.size() >= 2 && runMax == result.size() - 2) {
464 break;
465 }
466 } while (nextCh != kEndOfString);
467
468 if (lastFamily == nullptr) {
469 // No character needed any font support, so it doesn't really matter which font they end up
470 // getting displayed in. We put the whole string in one run, using the first font.
471 result.push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)});
472 }
473
474 if (result.size() > runMax) {
475 // The itemization has terminated since it reaches the runMax. Remove last unfinalized runs.
476 result.resize(runMax);
477 }
478 return result;
479 }
480
baseFontFaked(FontStyle style)481 FakedFont FontCollection::baseFontFaked(FontStyle style) {
482 return mFamilies[0]->getClosestMatch(style);
483 }
484
createCollectionWithVariation(const std::vector<FontVariation> & variations)485 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
486 const std::vector<FontVariation>& variations) {
487 if (variations.empty() || mSupportedAxes.empty()) {
488 return nullptr;
489 }
490
491 bool hasSupportedAxis = false;
492 for (const FontVariation& variation : variations) {
493 if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
494 hasSupportedAxis = true;
495 break;
496 }
497 }
498 if (!hasSupportedAxis) {
499 // None of variation axes are supported by this font collection.
500 return nullptr;
501 }
502
503 std::vector<std::shared_ptr<FontFamily>> families;
504 for (const std::shared_ptr<FontFamily>& family : mFamilies) {
505 std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
506 if (newFamily) {
507 families.push_back(newFamily);
508 } else {
509 families.push_back(family);
510 }
511 }
512
513 return std::shared_ptr<FontCollection>(new FontCollection(families));
514 }
515
getId() const516 uint32_t FontCollection::getId() const {
517 return mId;
518 }
519
520 } // namespace minikin
521