1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "GreedyLineBreak"
18 
19 #include "minikin/Characters.h"
20 #include "minikin/LineBreaker.h"
21 #include "minikin/MeasuredText.h"
22 #include "minikin/Range.h"
23 #include "minikin/U16StringPiece.h"
24 
25 #include "HyphenatorMap.h"
26 #include "LineBreakerUtil.h"
27 #include "Locale.h"
28 #include "LocaleListCache.h"
29 #include "WordBreaker.h"
30 
31 namespace minikin {
32 
33 namespace {
34 
35 constexpr uint32_t NOWHERE = 0xFFFFFFFF;
36 
37 class GreedyLineBreaker {
38 public:
39     // User of this class must keep measured, lineWidthLimit, tabStop alive until the instance is
40     // destructed.
GreedyLineBreaker(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)41     GreedyLineBreaker(const U16StringPiece& textBuf, const MeasuredText& measured,
42                       const LineWidth& lineWidthLimits, const TabStops& tabStops,
43                       bool enableHyphenation)
44             : mLineWidthLimit(lineWidthLimits.getAt(0)),
45               mTextBuf(textBuf),
46               mMeasuredText(measured),
47               mLineWidthLimits(lineWidthLimits),
48               mTabStops(tabStops),
49               mEnableHyphenation(enableHyphenation) {}
50 
51     void process();
52 
53     LineBreakResult getResult() const;
54 
55 private:
56     struct BreakPoint {
BreakPointminikin::__anondb74f7790111::GreedyLineBreaker::BreakPoint57         BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen,
58                    EndHyphenEdit endHyphen)
59                 : offset(offset),
60                   lineWidth(lineWidth),
61                   hyphenEdit(packHyphenEdit(startHyphen, endHyphen)) {}
62 
63         uint32_t offset;
64         float lineWidth;
65         HyphenEdit hyphenEdit;
66     };
67 
getPrevLineBreakOffset()68     inline uint32_t getPrevLineBreakOffset() {
69         return mBreakPoints.empty() ? 0 : mBreakPoints.back().offset;
70     }
71 
72     // Registers the break point and prepares for next line computation.
73     void breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
74                      float remainingNextSumOfCharWidths, EndHyphenEdit thisLineEndHyphen,
75                      StartHyphenEdit nextLineStartHyphen);
76 
77     // Update current line width.
78     void updateLineWidth(uint16_t c, float width);
79 
80     // Break line if current line exceeds the line limit.
81     void processLineBreak(uint32_t offset, WordBreaker* breaker, bool doHyphenation);
82 
83     // Try to break with previous word boundary.
84     // Returns false if unable to break by word boundary.
85     bool tryLineBreakWithWordBreak();
86 
87     // Try to break with hyphenation.
88     // Returns false if unable to hyphenate.
89     //
90     // This method keeps hyphenation until the line width after line break meets the line width
91     // limit.
92     bool tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker);
93 
94     // Do line break with each characters.
95     //
96     // This method only breaks at the first offset which has the longest width for the line width
97     // limit. This method don't keep line breaking even if the rest of the word exceeds the line
98     // width limit.
99     // This method return true if there is no characters to be processed.
100     bool doLineBreakWithGraphemeBounds(const Range& range);
101 
102     // Info about the line currently processing.
103     uint32_t mLineNum = 0;
104     double mLineWidth = 0;
105     double mSumOfCharWidths = 0;
106     double mLineWidthLimit;
107     StartHyphenEdit mStartHyphenEdit = StartHyphenEdit::NO_EDIT;
108 
109     // Previous word break point info.
110     uint32_t mPrevWordBoundsOffset = NOWHERE;
111     double mLineWidthAtPrevWordBoundary = 0;
112     double mSumOfCharWidthsAtPrevWordBoundary = 0;
113     bool mIsPrevWordBreakIsInEmailOrUrl = false;
114 
115     // The hyphenator currently used.
116     const Hyphenator* mHyphenator = nullptr;
117 
118     // Input parameters.
119     const U16StringPiece& mTextBuf;
120     const MeasuredText& mMeasuredText;
121     const LineWidth& mLineWidthLimits;
122     const TabStops& mTabStops;
123     bool mEnableHyphenation;
124 
125     // The result of line breaking.
126     std::vector<BreakPoint> mBreakPoints;
127 
128     MINIKIN_PREVENT_COPY_ASSIGN_AND_MOVE(GreedyLineBreaker);
129 };
130 
breakLineAt(uint32_t offset,float lineWidth,float remainingNextLineWidth,float remainingNextSumOfCharWidths,EndHyphenEdit thisLineEndHyphen,StartHyphenEdit nextLineStartHyphen)131 void GreedyLineBreaker::breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
132                                     float remainingNextSumOfCharWidths,
133                                     EndHyphenEdit thisLineEndHyphen,
134                                     StartHyphenEdit nextLineStartHyphen) {
135     // First, push the break to result.
136     mBreakPoints.emplace_back(offset, lineWidth, mStartHyphenEdit, thisLineEndHyphen);
137 
138     // Update the current line info.
139     mLineWidthLimit = mLineWidthLimits.getAt(++mLineNum);
140     mLineWidth = remainingNextLineWidth;
141     mSumOfCharWidths = remainingNextSumOfCharWidths;
142     mStartHyphenEdit = nextLineStartHyphen;
143     mPrevWordBoundsOffset = NOWHERE;
144     mLineWidthAtPrevWordBoundary = 0;
145     mSumOfCharWidthsAtPrevWordBoundary = 0;
146     mIsPrevWordBreakIsInEmailOrUrl = false;
147 }
148 
tryLineBreakWithWordBreak()149 bool GreedyLineBreaker::tryLineBreakWithWordBreak() {
150     if (mPrevWordBoundsOffset == NOWHERE) {
151         return false;  // No word break point before..
152     }
153 
154     breakLineAt(mPrevWordBoundsOffset,                            // break offset
155                 mLineWidthAtPrevWordBoundary,                     // line width
156                 mLineWidth - mSumOfCharWidthsAtPrevWordBoundary,  // remaining next line width
157                 // remaining next sum of char widths.
158                 mSumOfCharWidths - mSumOfCharWidthsAtPrevWordBoundary, EndHyphenEdit::NO_EDIT,
159                 StartHyphenEdit::NO_EDIT);  // No hyphen modification.
160     return true;
161 }
162 
tryLineBreakWithHyphenation(const Range & range,WordBreaker * breaker)163 bool GreedyLineBreaker::tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker) {
164     if (!mEnableHyphenation || mHyphenator == nullptr) {
165         return false;
166     }
167 
168     Run* targetRun = nullptr;
169     for (const auto& run : mMeasuredText.runs) {
170         if (run->getRange().contains(range)) {
171             targetRun = run.get();
172         }
173     }
174 
175     if (targetRun == nullptr) {
176         return false;  // The target range may lay on multiple run. Unable to hyphenate.
177     }
178 
179     const Range targetRange = breaker->wordRange();
180     if (!range.contains(targetRange)) {
181         return false;
182     }
183 
184     const std::vector<HyphenationType> hyphenResult =
185             hyphenate(mTextBuf.substr(targetRange), *mHyphenator);
186     Range contextRange = range;
187     uint32_t prevOffset = NOWHERE;
188     float prevWidth = 0;
189 
190     // Look up the hyphenation point from the begining.
191     for (uint32_t i = targetRange.getStart(); i < targetRange.getEnd(); ++i) {
192         const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(i)];
193         if (hyph == HyphenationType::DONT_BREAK) {
194             continue;  // Not a hyphenation point.
195         }
196 
197         const float width =
198                 targetRun->measureHyphenPiece(mTextBuf, contextRange.split(i).first,
199                                               mStartHyphenEdit, editForThisLine(hyph), nullptr);
200 
201         if (width <= mLineWidthLimit) {
202             // There are still space, remember current offset and look up next hyphenation point.
203             prevOffset = i;
204             prevWidth = width;
205             continue;
206         }
207 
208         if (prevOffset == NOWHERE) {
209             // Even with hyphenation, the piece is too long for line. Give up and break in
210             // character bounds.
211             doLineBreakWithGraphemeBounds(contextRange);
212         } else {
213             // Previous offset is the longest hyphenation piece. Break with it.
214             const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
215             const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
216             const float remainingCharWidths = targetRun->measureHyphenPiece(
217                     mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
218                     EndHyphenEdit::NO_EDIT, nullptr);
219             breakLineAt(prevOffset, prevWidth,
220                         remainingCharWidths - (mSumOfCharWidths - mLineWidth), remainingCharWidths,
221                         editForThisLine(hyph), nextLineStartHyphenEdit);
222         }
223 
224         if (mLineWidth <= mLineWidthLimit) {
225             // The remaining hyphenation piece is less than line width. No more hyphenation is
226             // needed. Go to next word.
227             return true;
228         }
229 
230         // Even after line break, the remaining hyphenation piece is still too long for the limit.
231         // Keep hyphenating for the rest.
232         i = getPrevLineBreakOffset();
233         contextRange.setStart(i);  // Update the hyphenation start point.
234         prevOffset = NOWHERE;
235     }
236 
237     // Do the same line break at the end of text.
238     // TODO: Remove code duplication. This is the same as in the for loop but extracting function
239     //       may not clear.
240     if (prevOffset == NOWHERE) {
241         doLineBreakWithGraphemeBounds(contextRange);
242     } else {
243         const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
244         const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
245         const float remainingCharWidths = targetRun->measureHyphenPiece(
246                 mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
247                 EndHyphenEdit::NO_EDIT, nullptr);
248 
249         breakLineAt(prevOffset, prevWidth, remainingCharWidths - (mSumOfCharWidths - mLineWidth),
250                     remainingCharWidths, editForThisLine(hyph), nextLineStartHyphenEdit);
251     }
252 
253     return true;
254 }
255 
256 // TODO: Respect trailing line end spaces.
doLineBreakWithGraphemeBounds(const Range & range)257 bool GreedyLineBreaker::doLineBreakWithGraphemeBounds(const Range& range) {
258     double width = mMeasuredText.widths[range.getStart()];
259 
260     // Starting from + 1 since at least one character needs to be assigned to a line.
261     for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
262         const float w = mMeasuredText.widths[i];
263         if (w == 0) {
264             continue;  // w == 0 means here is not a grapheme bounds. Don't break here.
265         }
266         if (width + w > mLineWidthLimit) {
267             // Okay, here is the longest position.
268             breakLineAt(i, width, mLineWidth - width, mSumOfCharWidths - width,
269                         EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
270 
271             // This method only breaks at the first longest offset, since we may want to hyphenate
272             // the rest of the word.
273             return false;
274         } else {
275             width += w;
276         }
277     }
278 
279     // Reaching here means even one character (or cluster) doesn't fit the line.
280     // Give up and break at the end of this range.
281     breakLineAt(range.getEnd(), mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
282     return true;
283 }
284 
updateLineWidth(uint16_t c,float width)285 void GreedyLineBreaker::updateLineWidth(uint16_t c, float width) {
286     if (c == CHAR_TAB) {
287         mSumOfCharWidths = mTabStops.nextTab(mSumOfCharWidths);
288         mLineWidth = mSumOfCharWidths;
289     } else {
290         mSumOfCharWidths += width;
291         if (!isLineEndSpace(c)) {
292             mLineWidth = mSumOfCharWidths;
293         }
294     }
295 }
296 
processLineBreak(uint32_t offset,WordBreaker * breaker,bool doHyphenation)297 void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker,
298                                          bool doHyphenation) {
299     while (mLineWidth > mLineWidthLimit) {
300         const Range lineRange(getPrevLineBreakOffset(), offset);  // The range we need to address.
301         if (tryLineBreakWithWordBreak()) {
302             continue;  // The word in the new line may still be too long for the line limit.
303         } else if (doHyphenation && tryLineBreakWithHyphenation(lineRange, breaker)) {
304             continue;  // TODO: we may be able to return here.
305         } else {
306             if (doLineBreakWithGraphemeBounds(lineRange)) {
307                 return;
308             }
309         }
310     }
311 
312     // There is still spaces, remember current word break point as a candidate and wait next word.
313     const bool isInEmailOrUrl = breaker->breakBadness() != 0;
314     if (mPrevWordBoundsOffset == NOWHERE || mIsPrevWordBreakIsInEmailOrUrl | !isInEmailOrUrl) {
315         mPrevWordBoundsOffset = offset;
316         mLineWidthAtPrevWordBoundary = mLineWidth;
317         mSumOfCharWidthsAtPrevWordBoundary = mSumOfCharWidths;
318         mIsPrevWordBreakIsInEmailOrUrl = isInEmailOrUrl;
319     }
320 }
321 
process()322 void GreedyLineBreaker::process() {
323     WordBreaker wordBreaker;
324     wordBreaker.setText(mTextBuf.data(), mTextBuf.size());
325 
326     // Following two will be initialized after the first iteration.
327     uint32_t localeListId = LocaleListCache::kInvalidListId;
328     uint32_t nextWordBoundaryOffset = 0;
329     for (const auto& run : mMeasuredText.runs) {
330         const Range range = run->getRange();
331 
332         // Update locale if necessary.
333         uint32_t newLocaleListId = run->getLocaleListId();
334         if (localeListId != newLocaleListId) {
335             Locale locale = getEffectiveLocale(newLocaleListId);
336             nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, range.getStart());
337             mHyphenator = HyphenatorMap::lookup(locale);
338             localeListId = newLocaleListId;
339         }
340 
341         for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
342             updateLineWidth(mTextBuf[i], mMeasuredText.widths[i]);
343 
344             if ((i + 1) == nextWordBoundaryOffset) {
345                 // Only process line break at word boundary and the run can break into some pieces.
346                 if (run->canBreak() || nextWordBoundaryOffset == range.getEnd()) {
347                     processLineBreak(i + 1, &wordBreaker, run->canBreak());
348                 }
349                 nextWordBoundaryOffset = wordBreaker.next();
350             }
351         }
352     }
353 
354     if (getPrevLineBreakOffset() != mTextBuf.size() && mPrevWordBoundsOffset != NOWHERE) {
355         // The remaining words in the last line.
356         breakLineAt(mPrevWordBoundsOffset, mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT,
357                     StartHyphenEdit::NO_EDIT);
358     }
359 }
360 
getResult() const361 LineBreakResult GreedyLineBreaker::getResult() const {
362     constexpr int TAB_BIT = 1 << 29;  // Must be the same in StaticLayout.java
363 
364     LineBreakResult out;
365     uint32_t prevBreakOffset = 0;
366     for (const auto& breakPoint : mBreakPoints) {
367         // TODO: compute these during line breaking if these takes longer time.
368         bool hasTabChar = false;
369         for (uint32_t i = prevBreakOffset; i < breakPoint.offset; ++i) {
370             hasTabChar |= mTextBuf[i] == CHAR_TAB;
371         }
372 
373         MinikinExtent extent =
374                 mMeasuredText.getExtent(mTextBuf, Range(prevBreakOffset, breakPoint.offset));
375         out.breakPoints.push_back(breakPoint.offset);
376         out.widths.push_back(breakPoint.lineWidth);
377         out.ascents.push_back(extent.ascent);
378         out.descents.push_back(extent.descent);
379         out.flags.push_back((hasTabChar ? TAB_BIT : 0) | static_cast<int>(breakPoint.hyphenEdit));
380 
381         prevBreakOffset = breakPoint.offset;
382     }
383     return out;
384 }
385 
386 }  // namespace
387 
breakLineGreedy(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)388 LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredText& measured,
389                                 const LineWidth& lineWidthLimits, const TabStops& tabStops,
390                                 bool enableHyphenation) {
391     if (textBuf.size() == 0) {
392         return LineBreakResult();
393     }
394     GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation);
395     lineBreaker.process();
396     return lineBreaker.getResult();
397 }
398 
399 }  // namespace minikin
400