1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "LayoutUtils.h"
18
19 #include <gtest/gtest.h>
20
21 #include "UnicodeUtils.h"
22
23 namespace minikin {
24
ExpectNextWordBreakForCache(size_t offset_in,const char * query_str)25 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) {
26 const size_t BUF_SIZE = 256U;
27 uint16_t buf[BUF_SIZE];
28 size_t expected_breakpoint = 0U;
29 size_t size = 0U;
30
31 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
32 EXPECT_EQ(expected_breakpoint, getNextWordBreakForCache(U16StringPiece(buf, size), offset_in))
33 << "Expected position is [" << query_str << "] from offset " << offset_in;
34 }
35
ExpectPrevWordBreakForCache(size_t offset_in,const char * query_str)36 void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) {
37 const size_t BUF_SIZE = 256U;
38 uint16_t buf[BUF_SIZE];
39 size_t expected_breakpoint = 0U;
40 size_t size = 0U;
41
42 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
43 EXPECT_EQ(expected_breakpoint, getPrevWordBreakForCache(U16StringPiece(buf, size), offset_in))
44 << "Expected position is [" << query_str << "] from offset " << offset_in;
45 }
46
TEST(WordBreakTest,goNextWordBreakTest)47 TEST(WordBreakTest, goNextWordBreakTest) {
48 ExpectNextWordBreakForCache(0, "|");
49
50 // Continue for spaces.
51 ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |");
52 ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |");
53 ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |");
54 ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |");
55 ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |");
56 ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |");
57
58 // Space makes word break.
59 ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'");
60 ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'");
61 ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'");
62 ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |");
63 ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |");
64 ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |");
65 ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |");
66
67 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'");
68 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'");
69 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'");
70 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |");
71 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |");
72 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |");
73 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |");
74
75 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'");
76 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'");
77 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'");
78 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
79 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |");
80 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |");
81 ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |");
82 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |");
83
84 // CJK ideographs makes word break.
85 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
86 ExpectNextWordBreakForCache(1, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
87 ExpectNextWordBreakForCache(2, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
88 ExpectNextWordBreakForCache(3, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
89 ExpectNextWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
90 ExpectNextWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
91 ExpectNextWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
92
93 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
94 ExpectNextWordBreakForCache(1, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
95 ExpectNextWordBreakForCache(2, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
96 ExpectNextWordBreakForCache(3, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
97 ExpectNextWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
98 ExpectNextWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
99 ExpectNextWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
100
101 ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
102 ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
103 ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
104 ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
105 ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
106 ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
107 ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
108
109 // Continue if trailing characters is Unicode combining characters.
110 ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00");
111 ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00");
112 ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |");
113 ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |");
114 ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |");
115
116 // Surrogate pairs.
117 ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |");
118 ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |");
119 ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |");
120 ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |");
121 ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |");
122 ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |");
123
124 // Broken surrogate pairs.
125 // U+D84D is leading surrogate but there is no trailing surrogate for it.
126 ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |");
127 ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |");
128 ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |");
129 ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |");
130 ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |");
131
132 ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |");
133 ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |");
134 ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |");
135 ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |");
136 ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |");
137
138 // U+DE0D is trailing surrogate but there is no leading surrogate for it.
139 ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |");
140 ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |");
141 ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |");
142 ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |");
143 ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |");
144
145 ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |");
146 ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |");
147 ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |");
148 ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |");
149 ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |");
150
151 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
152 ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |");
153 ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |");
154 ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |");
155 ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |");
156
157 // Tone marks.
158 // CJK ideographic char + Tone mark + CJK ideographic char
159 ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444");
160 ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444");
161 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |");
162 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |");
163 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |");
164
165 // Variation Selectors.
166 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
167 ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B");
168 ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B");
169 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |");
170 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |");
171 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |");
172
173 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
174 ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B");
175 ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B");
176 ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B");
177 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |");
178 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |");
179 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |");
180 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |");
181
182 // CJK ideographic char + Tone mark + Variation Character(VS1)
183 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444");
184 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444");
185 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444");
186 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |");
187 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |");
188 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |");
189
190 // CJK ideographic char + Tone mark + Variation Character(VS17)
191 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444");
192 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444");
193 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444");
194 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444");
195 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |");
196 ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |");
197 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |");
198
199 // CJK ideographic char + Variation Character(VS1) + Tone mark
200 ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444");
201 ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444");
202 ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444");
203 ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |");
204 ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |");
205 ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |");
206
207 // CJK ideographic char + Variation Character(VS17) + Tone mark
208 ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444");
209 ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444");
210 ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444");
211 ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444");
212 ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |");
213 ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |");
214 ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |");
215
216 // Following test cases are unusual usage of variation selectors and tone
217 // marks for caching up the further behavior changes, e.g. index of bounds
218 // or crashes. Please feel free to update the test expectations if the
219 // behavior change makes sense to you.
220
221 // Isolated Tone marks and Variation Selectors
222 ExpectNextWordBreakForCache(0, "U+FE00 |");
223 ExpectNextWordBreakForCache(1, "U+FE00 |");
224 ExpectNextWordBreakForCache(1000, "U+FE00 |");
225 ExpectNextWordBreakForCache(0, "U+E0100 |");
226 ExpectNextWordBreakForCache(1000, "U+E0100 |");
227 ExpectNextWordBreakForCache(0, "U+302D |");
228 ExpectNextWordBreakForCache(1000, "U+302D |");
229
230 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
231 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B");
232 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B");
233 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B");
234 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |");
235 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |");
236 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |");
237
238 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
239 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B");
240 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B");
241 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B");
242 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B");
243 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B");
244 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |");
245 ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |");
246 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+E0100 U+845B |");
247
248 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
249 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B");
250 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B");
251 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B");
252 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B");
253 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |");
254 ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |");
255 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |");
256
257 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
258 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B");
259 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B");
260 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B");
261 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B");
262 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |");
263 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |");
264 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |");
265
266 // Tone mark. + Tone mark
267 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444");
268 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444");
269 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444");
270 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |");
271 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |");
272 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |");
273 }
274
TEST(WordBreakTest,goPrevWordBreakTest)275 TEST(WordBreakTest, goPrevWordBreakTest) {
276 ExpectPrevWordBreakForCache(0, "|");
277
278 // Continue for spaces.
279 ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'");
280 ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'");
281 ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'");
282 ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'");
283 ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'");
284 ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'");
285
286 // Space makes word break.
287 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'");
288 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'");
289 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'");
290 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'");
291 ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'");
292 ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'");
293 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'");
294
295 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'");
296 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'");
297 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'");
298 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'");
299 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'");
300 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'");
301 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'");
302
303 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
304 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
305 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
306 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'");
307 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'");
308 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
309 ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
310 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
311
312 // CJK ideographs makes word break.
313 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
314 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
315 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
316 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
317 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
318 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
319 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
320
321 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
322 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
323 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
324 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
325 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
326 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
327 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
328
329 // Mixed case.
330 ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
331 ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
332 ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
333 ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
334 ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
335 ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
336 ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
337 ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
338
339 // Continue if trailing characters is Unicode combining characters.
340 ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00");
341 ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00");
342 ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00");
343 ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00");
344 ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00");
345
346 // Surrogate pairs.
347 ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618");
348 ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618");
349 ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618");
350 ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618");
351 ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618");
352 ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618");
353
354 // Broken surrogate pairs.
355 // U+D84D is leading surrogate but there is no trailing surrogate for it.
356 ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618");
357 ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618");
358 ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618");
359 ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618");
360 ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618");
361
362 ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D");
363 ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D");
364 ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D");
365 ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D");
366 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D");
367
368 // U+DE0D is trailing surrogate but there is no leading surrogate for it.
369 ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618");
370 ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618");
371 ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618");
372 ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618");
373 ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618");
374
375 ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D");
376 ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D");
377 ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D");
378 ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D");
379 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D");
380
381 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
382 ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8");
383 ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8");
384 ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8");
385 ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8");
386
387 // Tone marks.
388 // CJK ideographic char + Tone mark + CJK ideographic char
389 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444");
390 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444");
391 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444");
392 ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444");
393 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444");
394
395 // Variation Selectors.
396 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
397 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B");
398 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B");
399 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B");
400 ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B");
401 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B");
402
403 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
404 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B");
405 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B");
406 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B");
407 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B");
408 ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B");
409 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B");
410 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B");
411
412 // CJK ideographic char + Tone mark + Variation Character(VS1)
413 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444");
414 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444");
415 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444");
416 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444");
417 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444");
418 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444");
419
420 // CJK ideographic char + Tone mark + Variation Character(VS17)
421 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444");
422 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444");
423 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444");
424 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444");
425 ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444");
426 ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444");
427 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444");
428
429 // CJK ideographic char + Variation Character(VS1) + Tone mark
430 ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444");
431 ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444");
432 ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444");
433 ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444");
434 ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444");
435 ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444");
436
437 // CJK ideographic char + Variation Character(VS17) + Tone mark
438 ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444");
439 ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444");
440 ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444");
441 ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444");
442 ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444");
443 ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444");
444 ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444");
445
446 // Following test cases are unusual usage of variation selectors and tone
447 // marks for caching up the further behavior changes, e.g. index of bounds
448 // or crashes. Please feel free to update the test expectations if the
449 // behavior change makes sense to you.
450
451 // Isolated Tone marks and Variation Selectors
452 ExpectPrevWordBreakForCache(0, "| U+FE00");
453 ExpectPrevWordBreakForCache(1, "| U+FE00");
454 ExpectPrevWordBreakForCache(1000, "| U+FE00");
455 ExpectPrevWordBreakForCache(0, "| U+E0100");
456 ExpectPrevWordBreakForCache(1000, "| U+E0100");
457 ExpectPrevWordBreakForCache(0, "| U+302D");
458 ExpectPrevWordBreakForCache(1000, "| U+302D");
459
460 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
461 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B");
462 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B");
463 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B");
464 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B");
465 ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B");
466 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B");
467
468 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
469 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B");
470 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B");
471 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B");
472 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B");
473 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B");
474 ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B");
475 ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B");
476 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+E0100 | U+845B");
477
478 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
479 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B");
480 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B");
481 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B");
482 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B");
483 ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B");
484 ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B");
485 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B");
486
487 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
488 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B");
489 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B");
490 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B");
491 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B");
492 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B");
493 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B");
494 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B");
495
496 // Tone mark. + Tone mark
497 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444");
498 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444");
499 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444");
500 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444");
501 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444");
502 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444");
503 }
504
505 } // namespace minikin
506