1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A wrapper around ICU's line break iterator, that gives customized line
19  * break opportunities, as well as identifying words for the purpose of
20  * hyphenation.
21  */
22 
23 #ifndef MINIKIN_WORD_BREAKER_H
24 #define MINIKIN_WORD_BREAKER_H
25 
26 #include <list>
27 #include <mutex>
28 
29 #include <unicode/ubrk.h>
30 
31 #include "minikin/IcuUtils.h"
32 #include "minikin/Macros.h"
33 #include "minikin/Range.h"
34 
35 #include "Locale.h"
36 
37 namespace minikin {
38 
39 // A class interface for providing pooling implementation of ICU's line breaker.
40 // The implementation can be customized for testing purposes.
41 class ICULineBreakerPool {
42 public:
43     struct Slot {
SlotSlot44         Slot() : localeId(0), breaker(nullptr) {}
SlotSlot45         Slot(uint64_t localeId, IcuUbrkUniquePtr&& breaker)
46                 : localeId(localeId), breaker(std::move(breaker)) {}
47 
48         Slot(Slot&& other) = default;
49         Slot& operator=(Slot&& other) = default;
50 
51         // Forbid copy and assignment.
52         Slot(const Slot&) = delete;
53         Slot& operator=(const Slot&) = delete;
54 
55         uint64_t localeId;
56         IcuUbrkUniquePtr breaker;
57     };
~ICULineBreakerPool()58     virtual ~ICULineBreakerPool() {}
59     virtual Slot acquire(const Locale& locale) = 0;
60     virtual void release(Slot&& slot) = 0;
61 };
62 
63 // An singleton implementation of the ICU line breaker pool.
64 // Since creating ICU line breaker instance takes some time. Pool it for later use.
65 class ICULineBreakerPoolImpl : public ICULineBreakerPool {
66 public:
67     Slot acquire(const Locale& locale) override;
68     void release(Slot&& slot) override;
69 
getInstance()70     static ICULineBreakerPoolImpl& getInstance() {
71         static ICULineBreakerPoolImpl pool;
72         return pool;
73     }
74 
75 protected:
76     // protected for testing purposes.
77     static constexpr size_t MAX_POOL_SIZE = 4;
ICULineBreakerPoolImpl()78     ICULineBreakerPoolImpl(){};  // singleton.
getPoolSize()79     size_t getPoolSize() const {
80         std::lock_guard<std::mutex> lock(mMutex);
81         return mPool.size();
82     }
83 
84 private:
85     std::list<Slot> mPool GUARDED_BY(mMutex);
86     mutable std::mutex mMutex;
87 };
88 
89 class WordBreaker {
90 public:
~WordBreaker()91     virtual ~WordBreaker() { finish(); }
92 
93     WordBreaker();
94 
95     void setText(const uint16_t* data, size_t size);
96 
97     // Advance iterator to next word break with current locale. Return offset, or -1 if EOT
98     ssize_t next();
99 
100     // Advance iterator to the break just after "from" with using the new provided locale.
101     // Return offset, or -1 if EOT
102     ssize_t followingWithLocale(const Locale& locale, size_t from);
103 
104     // Current offset of iterator, equal to 0 at BOT or last return from next()
105     ssize_t current() const;
106 
107     // After calling next(), wordStart() and wordEnd() are offsets defining the previous
108     // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation.
109     ssize_t wordStart() const;
110 
111     ssize_t wordEnd() const;
112 
113     // Returns the range from wordStart() to wordEnd().
114     // If wordEnd() <= wordStart(), returns empty range.
wordRange()115     inline Range wordRange() const {
116         const uint32_t start = wordStart();
117         const uint32_t end = wordEnd();
118         return start < end ? Range(start, end) : Range(end, end);
119     }
120 
121     int breakBadness() const;
122 
123     void finish();
124 
125 protected:
126     // protected virtual for testing purpose.
127     // Caller must release the pool.
128     WordBreaker(ICULineBreakerPool* pool);
129 
130 private:
131     int32_t iteratorNext();
132     void detectEmailOrUrl();
133     ssize_t findNextBreakInEmailOrUrl();
134 
135     // Doesn't take ownership. Must not be nullptr. Must be set in constructor.
136     ICULineBreakerPool* mPool;
137 
138     ICULineBreakerPool::Slot mIcuBreaker;
139 
140     UText mUText = UTEXT_INITIALIZER;
141     const uint16_t* mText = nullptr;
142     size_t mTextSize;
143     ssize_t mLast;
144     ssize_t mCurrent;
145 
146     // state for the email address / url detector
147     ssize_t mScanOffset;
148     bool mInEmailOrUrl;
149 };
150 
151 }  // namespace minikin
152 
153 #endif  // MINIKIN_WORD_BREAKER_H
154