1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "OptimalLineBreaker.h"
18
19 #include <algorithm>
20 #include <limits>
21
22 #include "minikin/Characters.h"
23 #include "minikin/Layout.h"
24 #include "minikin/Range.h"
25 #include "minikin/U16StringPiece.h"
26
27 #include "HyphenatorMap.h"
28 #include "LayoutUtils.h"
29 #include "LineBreakerUtil.h"
30 #include "Locale.h"
31 #include "LocaleListCache.h"
32 #include "MinikinInternal.h"
33 #include "WordBreaker.h"
34
35 namespace minikin {
36
37 namespace {
38
39 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
40 // constants are larger than any reasonable actual width score.
41 constexpr float SCORE_INFTY = std::numeric_limits<float>::max();
42 constexpr float SCORE_OVERFULL = 1e12f;
43 constexpr float SCORE_DESPERATE = 1e10f;
44
45 // Multiplier for hyphen penalty on last line.
46 constexpr float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
47 // Penalty assigned to each line break (to try to minimize number of lines)
48 // TODO: when we implement full justification (so spaces can shrink and stretch), this is
49 // probably not the most appropriate method.
50 constexpr float LINE_PENALTY_MULTIPLIER = 2.0f;
51
52 // Penalty assigned to shrinking the whitepsace.
53 constexpr float SHRINK_PENALTY_MULTIPLIER = 4.0f;
54
55 // Maximum amount that spaces can shrink, in justified text.
56 constexpr float SHRINKABILITY = 1.0 / 3.0;
57
58 // A single candidate break
59 struct Candidate {
60 uint32_t offset; // offset to text buffer, in code units
61
62 ParaWidth preBreak; // width of text until this point, if we decide to not break here:
63 // preBreak is used as an optimized way to calculate the width
64 // between two candidates. The line width between two line break
65 // candidates i and j is calculated as postBreak(j) - preBreak(i).
66 ParaWidth postBreak; // width of text until this point, if we decide to break here
67 float penalty; // penalty of this break (for example, hyphen penalty)
68 uint32_t preSpaceCount; // preceding space count before breaking
69 uint32_t postSpaceCount; // preceding space count after breaking
70 HyphenationType hyphenType;
71 bool isRtl; // The direction of the bidi run containing or ending in this candidate
72
Candidateminikin::__anon6b174eaf0111::Candidate73 Candidate(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
74 uint32_t preSpaceCount, uint32_t postSpaceCount, HyphenationType hyphenType,
75 bool isRtl)
76 : offset(offset),
77 preBreak(preBreak),
78 postBreak(postBreak),
79 penalty(penalty),
80 preSpaceCount(preSpaceCount),
81 postSpaceCount(postSpaceCount),
82 hyphenType(hyphenType),
83 isRtl(isRtl) {}
84 };
85
86 // A context of line break optimization.
87 struct OptimizeContext {
88 // The break candidates.
89 std::vector<Candidate> candidates;
90
91 // The penalty for the number of lines.
92 float linePenalty = 0.0f;
93
94 // The width of a space. May be 0 if there are no spaces.
95 // Note: if there are multiple different widths for spaces (for example, because of mixing of
96 // fonts), it's only guaranteed to pick one.
97 float spaceWidth = 0.0f;
98
99 // Append desperate break point to the candidates.
pushDesperateminikin::__anon6b174eaf0111::OptimizeContext100 inline void pushDesperate(uint32_t offset, ParaWidth sumOfCharWidths, uint32_t spaceCount,
101 bool isRtl) {
102 candidates.emplace_back(offset, sumOfCharWidths, sumOfCharWidths, SCORE_DESPERATE,
103 spaceCount, spaceCount,
104 HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, isRtl);
105 }
106
107 // Append hyphenation break point to the candidates.
pushHyphenationminikin::__anon6b174eaf0111::OptimizeContext108 inline void pushHyphenation(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
109 float penalty, uint32_t spaceCount, HyphenationType type,
110 bool isRtl) {
111 candidates.emplace_back(offset, preBreak, postBreak, penalty, spaceCount, spaceCount, type,
112 isRtl);
113 }
114
115 // Append word break point to the candidates.
pushWordBreakminikin::__anon6b174eaf0111::OptimizeContext116 inline void pushWordBreak(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
117 float penalty, uint32_t preSpaceCount, uint32_t postSpaceCount,
118 bool isRtl) {
119 candidates.emplace_back(offset, preBreak, postBreak, penalty, preSpaceCount, postSpaceCount,
120 HyphenationType::DONT_BREAK, isRtl);
121 }
122
OptimizeContextminikin::__anon6b174eaf0111::OptimizeContext123 OptimizeContext() {
124 candidates.emplace_back(0, 0.0f, 0.0f, 0.0f, 0, 0, HyphenationType::DONT_BREAK, false);
125 }
126 };
127
128 // Compute the penalty for the run and returns penalty for hyphenation and number of lines.
computePenalties(const Run & run,const LineWidth & lineWidth,HyphenationFrequency frequency,bool justified)129 std::pair<float, float> computePenalties(const Run& run, const LineWidth& lineWidth,
130 HyphenationFrequency frequency, bool justified) {
131 float linePenalty = 0.0;
132 const MinikinPaint* paint = run.getPaint();
133 // a heuristic that seems to perform well
134 float hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0);
135 if (frequency == HyphenationFrequency::Normal) {
136 hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
137 }
138
139 if (justified) {
140 // Make hyphenation more aggressive for fully justified text (so that "normal" in
141 // justified mode is the same as "full" in ragged-right).
142 hyphenPenalty *= 0.25;
143 } else {
144 // Line penalty is zero for justified text.
145 linePenalty = hyphenPenalty * LINE_PENALTY_MULTIPLIER;
146 }
147
148 return std::make_pair(hyphenPenalty, linePenalty);
149 }
150
151 // Represents a desperate break point.
152 struct DesperateBreak {
153 // The break offset.
154 uint32_t offset;
155
156 // The sum of the character width from the beginning of the word.
157 ParaWidth sumOfChars;
158
DesperateBreakminikin::__anon6b174eaf0111::DesperateBreak159 DesperateBreak(uint32_t offset, ParaWidth sumOfChars)
160 : offset(offset), sumOfChars(sumOfChars){};
161 };
162
163 // Retrieves desperate break points from a word.
populateDesperatePoints(const MeasuredText & measured,const Range & range)164 std::vector<DesperateBreak> populateDesperatePoints(const MeasuredText& measured,
165 const Range& range) {
166 std::vector<DesperateBreak> out;
167 ParaWidth width = measured.widths[range.getStart()];
168 for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
169 const float w = measured.widths[i];
170 if (w == 0) {
171 continue; // w == 0 means here is not a grapheme bounds. Don't break here.
172 }
173 out.emplace_back(i, width);
174 width += w;
175 }
176 return out;
177 }
178
179 // Append hyphenation break points and desperate break points.
180 // If an offset is a both candidate for hyphenation and desperate break points, place desperate
181 // break candidate first and hyphenation break points second since the result width of the desperate
182 // break is shorter than hyphenation break.
183 // This is important since DP in computeBreaksOptimal assumes that the result line width is
184 // increased by break offset.
appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter,std::vector<HyphenBreak>::const_iterator endHyIter,const std::vector<DesperateBreak> & desperates,const CharProcessor & proc,float hyphenPenalty,bool isRtl,OptimizeContext * out)185 void appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter,
186 std::vector<HyphenBreak>::const_iterator endHyIter,
187 const std::vector<DesperateBreak>& desperates, const CharProcessor& proc,
188 float hyphenPenalty, bool isRtl, OptimizeContext* out) {
189 auto d = desperates.begin();
190 while (hyIter != endHyIter || d != desperates.end()) {
191 // If both hyphen breaks and desperate breaks point to the same offset, push desperate
192 // breaks first.
193 if (d != desperates.end() && (hyIter == endHyIter || d->offset <= hyIter->offset)) {
194 out->pushDesperate(d->offset, proc.sumOfCharWidthsAtPrevWordBreak + d->sumOfChars,
195 proc.effectiveSpaceCount, isRtl);
196 d++;
197 } else {
198 out->pushHyphenation(hyIter->offset, proc.sumOfCharWidths - hyIter->second,
199 proc.sumOfCharWidthsAtPrevWordBreak + hyIter->first, hyphenPenalty,
200 proc.effectiveSpaceCount, hyIter->type, isRtl);
201 hyIter++;
202 }
203 }
204 }
205
206 // Enumerate all line break candidates.
populateCandidates(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidth,HyphenationFrequency frequency,bool isJustified)207 OptimizeContext populateCandidates(const U16StringPiece& textBuf, const MeasuredText& measured,
208 const LineWidth& lineWidth, HyphenationFrequency frequency,
209 bool isJustified) {
210 const ParaWidth minLineWidth = lineWidth.getMin();
211 CharProcessor proc(textBuf);
212
213 OptimizeContext result;
214
215 const bool doHyphenation = frequency != HyphenationFrequency::None;
216 auto hyIter = std::begin(measured.hyphenBreaks);
217
218 for (const auto& run : measured.runs) {
219 const bool isRtl = run->isRtl();
220 const Range& range = run->getRange();
221
222 // Compute penalty parameters.
223 float hyphenPenalty = 0.0f;
224 if (run->canBreak()) {
225 auto penalties = computePenalties(*run, lineWidth, frequency, isJustified);
226 hyphenPenalty = penalties.first;
227 result.linePenalty = std::max(penalties.second, result.linePenalty);
228 }
229
230 proc.updateLocaleIfNecessary(*run);
231
232 for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
233 MINIKIN_ASSERT(textBuf[i] != CHAR_TAB, "TAB is not supported in optimal line breaker");
234 // Even if the run is not a candidate of line break, treat the end of run as the line
235 // break candidate.
236 const bool canBreak = run->canBreak() || (i + 1) == range.getEnd();
237 proc.feedChar(i, textBuf[i], measured.widths[i], canBreak);
238
239 const uint32_t nextCharOffset = i + 1;
240 if (nextCharOffset != proc.nextWordBreak) {
241 continue; // Wait until word break point.
242 }
243
244 // Add hyphenation and desperate break points.
245 std::vector<HyphenBreak> hyphenedBreaks;
246 std::vector<DesperateBreak> desperateBreaks;
247 const Range contextRange = proc.contextRange();
248
249 auto beginHyIter = hyIter;
250 while (hyIter != std::end(measured.hyphenBreaks) &&
251 hyIter->offset < contextRange.getEnd()) {
252 hyIter++;
253 }
254 if (proc.widthFromLastWordBreak() > minLineWidth) {
255 desperateBreaks = populateDesperatePoints(measured, contextRange);
256 }
257 appendWithMerging(beginHyIter, doHyphenation ? hyIter : beginHyIter, desperateBreaks,
258 proc, hyphenPenalty, isRtl, &result);
259
260 // We skip breaks for zero-width characters inside replacement spans.
261 if (run->getPaint() != nullptr || nextCharOffset == range.getEnd() ||
262 measured.widths[nextCharOffset] > 0) {
263 const float penalty = hyphenPenalty * proc.wordBreakPenalty();
264 result.pushWordBreak(nextCharOffset, proc.sumOfCharWidths, proc.effectiveWidth,
265 penalty, proc.rawSpaceCount, proc.effectiveSpaceCount, isRtl);
266 }
267 }
268 }
269 result.spaceWidth = proc.spaceWidth;
270 return result;
271 }
272
273 class LineBreakOptimizer {
274 public:
LineBreakOptimizer()275 LineBreakOptimizer() {}
276
277 LineBreakResult computeBreaks(const OptimizeContext& context, const U16StringPiece& textBuf,
278 const MeasuredText& measuredText, const LineWidth& lineWidth,
279 BreakStrategy strategy, bool justified);
280
281 private:
282 // Data used to compute optimal line breaks
283 struct OptimalBreaksData {
284 float score; // best score found for this break
285 uint32_t prev; // index to previous break
286 uint32_t lineNumber; // the computed line number of the candidate
287 };
288 LineBreakResult finishBreaksOptimal(const U16StringPiece& textBuf, const MeasuredText& measured,
289 const std::vector<OptimalBreaksData>& breaksData,
290 const std::vector<Candidate>& candidates);
291 };
292
293 // Follow "prev" links in candidates array, and copy to result arrays.
finishBreaksOptimal(const U16StringPiece & textBuf,const MeasuredText & measured,const std::vector<OptimalBreaksData> & breaksData,const std::vector<Candidate> & candidates)294 LineBreakResult LineBreakOptimizer::finishBreaksOptimal(
295 const U16StringPiece& textBuf, const MeasuredText& measured,
296 const std::vector<OptimalBreaksData>& breaksData,
297 const std::vector<Candidate>& candidates) {
298 LineBreakResult result;
299 const uint32_t nCand = candidates.size();
300 uint32_t prevIndex;
301 for (uint32_t i = nCand - 1; i > 0; i = prevIndex) {
302 prevIndex = breaksData[i].prev;
303 const Candidate& cand = candidates[i];
304 const Candidate& prev = candidates[prevIndex];
305
306 result.breakPoints.push_back(cand.offset);
307 result.widths.push_back(cand.postBreak - prev.preBreak);
308 MinikinExtent extent = measured.getExtent(textBuf, Range(prev.offset, cand.offset));
309 result.ascents.push_back(extent.ascent);
310 result.descents.push_back(extent.descent);
311
312 const HyphenEdit edit =
313 packHyphenEdit(editForNextLine(prev.hyphenType), editForThisLine(cand.hyphenType));
314 result.flags.push_back(static_cast<int>(edit));
315 }
316 result.reverse();
317 return result;
318 }
319
computeBreaks(const OptimizeContext & context,const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidth,BreakStrategy strategy,bool justified)320 LineBreakResult LineBreakOptimizer::computeBreaks(const OptimizeContext& context,
321 const U16StringPiece& textBuf,
322 const MeasuredText& measured,
323 const LineWidth& lineWidth,
324 BreakStrategy strategy, bool justified) {
325 const std::vector<Candidate>& candidates = context.candidates;
326 uint32_t active = 0;
327 const uint32_t nCand = candidates.size();
328 const float maxShrink = justified ? SHRINKABILITY * context.spaceWidth : 0.0f;
329
330 std::vector<OptimalBreaksData> breaksData;
331 breaksData.reserve(nCand);
332 breaksData.push_back({0.0, 0, 0}); // The first candidate is always at the first line.
333
334 // "i" iterates through candidates for the end of the line.
335 for (uint32_t i = 1; i < nCand; i++) {
336 const bool atEnd = i == nCand - 1;
337 float best = SCORE_INFTY;
338 uint32_t bestPrev = 0;
339
340 uint32_t lineNumberLast = breaksData[active].lineNumber;
341 float width = lineWidth.getAt(lineNumberLast);
342
343 ParaWidth leftEdge = candidates[i].postBreak - width;
344 float bestHope = 0;
345
346 // "j" iterates through candidates for the beginning of the line.
347 for (uint32_t j = active; j < i; j++) {
348 const uint32_t lineNumber = breaksData[j].lineNumber;
349 if (lineNumber != lineNumberLast) {
350 const float widthNew = lineWidth.getAt(lineNumber);
351 if (widthNew != width) {
352 leftEdge = candidates[i].postBreak - width;
353 bestHope = 0;
354 width = widthNew;
355 }
356 lineNumberLast = lineNumber;
357 }
358 const float jScore = breaksData[j].score;
359 if (jScore + bestHope >= best) continue;
360 const float delta = candidates[j].preBreak - leftEdge;
361
362 // compute width score for line
363
364 // Note: the "bestHope" optimization makes the assumption that, when delta is
365 // non-negative, widthScore will increase monotonically as successive candidate
366 // breaks are considered.
367 float widthScore = 0.0f;
368 float additionalPenalty = 0.0f;
369 if ((atEnd || !justified) && delta < 0) {
370 widthScore = SCORE_OVERFULL;
371 } else if (atEnd && strategy != BreakStrategy::Balanced) {
372 // increase penalty for hyphen on last line
373 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * candidates[j].penalty;
374 } else {
375 widthScore = delta * delta;
376 if (delta < 0) {
377 if (-delta <
378 maxShrink * (candidates[i].postSpaceCount - candidates[j].preSpaceCount)) {
379 widthScore *= SHRINK_PENALTY_MULTIPLIER;
380 } else {
381 widthScore = SCORE_OVERFULL;
382 }
383 }
384 }
385
386 if (delta < 0) {
387 active = j + 1;
388 } else {
389 bestHope = widthScore;
390 }
391
392 const float score = jScore + widthScore + additionalPenalty;
393 if (score <= best) {
394 best = score;
395 bestPrev = j;
396 }
397 }
398 breaksData.push_back({best + candidates[i].penalty + context.linePenalty, // score
399 bestPrev, // prev
400 breaksData[bestPrev].lineNumber + 1}); // lineNumber
401 }
402 return finishBreaksOptimal(textBuf, measured, breaksData, candidates);
403 }
404
405 } // namespace
406
breakLineOptimal(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidth,BreakStrategy strategy,HyphenationFrequency frequency,bool justified)407 LineBreakResult breakLineOptimal(const U16StringPiece& textBuf, const MeasuredText& measured,
408 const LineWidth& lineWidth, BreakStrategy strategy,
409 HyphenationFrequency frequency, bool justified) {
410 if (textBuf.size() == 0) {
411 return LineBreakResult();
412 }
413 const OptimizeContext context =
414 populateCandidates(textBuf, measured, lineWidth, frequency, justified);
415 LineBreakOptimizer optimizer;
416 return optimizer.computeBreaks(context, textBuf, measured, lineWidth, strategy, justified);
417 }
418
419 } // namespace minikin
420