1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "LayoutUtils.h"
18 
19 #include <gtest/gtest.h>
20 
21 #include "UnicodeUtils.h"
22 
23 namespace minikin {
24 
ExpectNextWordBreakForCache(size_t offset_in,const char * query_str)25 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) {
26     const size_t BUF_SIZE = 256U;
27     uint16_t buf[BUF_SIZE];
28     size_t expected_breakpoint = 0U;
29     size_t size = 0U;
30 
31     ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
32     EXPECT_EQ(expected_breakpoint, getNextWordBreakForCache(U16StringPiece(buf, size), offset_in))
33             << "Expected position is [" << query_str << "] from offset " << offset_in;
34 }
35 
ExpectPrevWordBreakForCache(size_t offset_in,const char * query_str)36 void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) {
37     const size_t BUF_SIZE = 256U;
38     uint16_t buf[BUF_SIZE];
39     size_t expected_breakpoint = 0U;
40     size_t size = 0U;
41 
42     ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
43     EXPECT_EQ(expected_breakpoint, getPrevWordBreakForCache(U16StringPiece(buf, size), offset_in))
44             << "Expected position is [" << query_str << "] from offset " << offset_in;
45 }
46 
TEST(WordBreakTest,goNextWordBreakTest)47 TEST(WordBreakTest, goNextWordBreakTest) {
48     ExpectNextWordBreakForCache(0, "|");
49 
50     // Continue for spaces.
51     ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |");
52     ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |");
53     ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |");
54     ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |");
55     ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |");
56     ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |");
57 
58     // Space makes word break.
59     ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'");
60     ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'");
61     ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'");
62     ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |");
63     ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |");
64     ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |");
65     ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |");
66 
67     ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'");
68     ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'");
69     ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'");
70     ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |");
71     ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |");
72     ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |");
73     ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |");
74 
75     ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'");
76     ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'");
77     ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'");
78     ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
79     ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |");
80     ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |");
81     ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |");
82     ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |");
83 
84     // CJK ideographs makes word break.
85     ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00   U+4E00   U+4E00   U+4E00");
86     ExpectNextWordBreakForCache(1, "U+4E00   U+4E00 | U+4E00   U+4E00   U+4E00");
87     ExpectNextWordBreakForCache(2, "U+4E00   U+4E00   U+4E00 | U+4E00   U+4E00");
88     ExpectNextWordBreakForCache(3, "U+4E00   U+4E00   U+4E00   U+4E00 | U+4E00");
89     ExpectNextWordBreakForCache(4, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
90     ExpectNextWordBreakForCache(5, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
91     ExpectNextWordBreakForCache(1000, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
92 
93     ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C   U+4E09   U+56DB   U+4E94");
94     ExpectNextWordBreakForCache(1, "U+4E00   U+4E8C | U+4E09   U+56DB   U+4E94");
95     ExpectNextWordBreakForCache(2, "U+4E00   U+4E8C   U+4E09 | U+56DB   U+4E94");
96     ExpectNextWordBreakForCache(3, "U+4E00   U+4E8C   U+4E09   U+56DB | U+4E94");
97     ExpectNextWordBreakForCache(4, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
98     ExpectNextWordBreakForCache(5, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
99     ExpectNextWordBreakForCache(1000, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
100 
101     ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
102     ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
103     ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
104     ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
105     ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
106     ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
107     ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
108 
109     // Continue if trailing characters is Unicode combining characters.
110     ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00");
111     ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00");
112     ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |");
113     ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |");
114     ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |");
115 
116     // Surrogate pairs.
117     ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |");
118     ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |");
119     ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |");
120     ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |");
121     ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |");
122     ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |");
123 
124     // Broken surrogate pairs.
125     // U+D84D is leading surrogate but there is no trailing surrogate for it.
126     ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |");
127     ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |");
128     ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |");
129     ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |");
130     ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |");
131 
132     ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |");
133     ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |");
134     ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |");
135     ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |");
136     ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |");
137 
138     // U+DE0D is trailing surrogate but there is no leading surrogate for it.
139     ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |");
140     ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |");
141     ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |");
142     ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |");
143     ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |");
144 
145     ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |");
146     ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |");
147     ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |");
148     ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |");
149     ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |");
150 
151     // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
152     ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |");
153     ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |");
154     ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |");
155     ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |");
156 
157     // Tone marks.
158     // CJK ideographic char + Tone mark + CJK ideographic char
159     ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444");
160     ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444");
161     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |");
162     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |");
163     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |");
164 
165     // Variation Selectors.
166     // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
167     ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B");
168     ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B");
169     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |");
170     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |");
171     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |");
172 
173     // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
174     ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B");
175     ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B");
176     ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B");
177     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |");
178     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |");
179     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |");
180     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |");
181 
182     // CJK ideographic char + Tone mark + Variation Character(VS1)
183     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444");
184     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444");
185     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444");
186     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |");
187     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |");
188     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |");
189 
190     // CJK ideographic char + Tone mark + Variation Character(VS17)
191     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444");
192     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444");
193     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444");
194     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444");
195     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |");
196     ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |");
197     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |");
198 
199     // CJK ideographic char + Variation Character(VS1) + Tone mark
200     ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444");
201     ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444");
202     ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444");
203     ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |");
204     ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |");
205     ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |");
206 
207     // CJK ideographic char + Variation Character(VS17) + Tone mark
208     ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444");
209     ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444");
210     ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444");
211     ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444");
212     ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |");
213     ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |");
214     ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |");
215 
216     // Following test cases are unusual usage of variation selectors and tone
217     // marks for caching up the further behavior changes, e.g. index of bounds
218     // or crashes. Please feel free to update the test expectations if the
219     // behavior change makes sense to you.
220 
221     // Isolated Tone marks and Variation Selectors
222     ExpectNextWordBreakForCache(0, "U+FE00 |");
223     ExpectNextWordBreakForCache(1, "U+FE00 |");
224     ExpectNextWordBreakForCache(1000, "U+FE00 |");
225     ExpectNextWordBreakForCache(0, "U+E0100 |");
226     ExpectNextWordBreakForCache(1000, "U+E0100 |");
227     ExpectNextWordBreakForCache(0, "U+302D |");
228     ExpectNextWordBreakForCache(1000, "U+302D |");
229 
230     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
231     ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B");
232     ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B");
233     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B");
234     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |");
235     ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |");
236     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |");
237 
238     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
239     ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B");
240     ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B");
241     ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B");
242     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B");
243     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B");
244     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |");
245     ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |");
246     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+E0100 U+845B |");
247 
248     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
249     ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B");
250     ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B");
251     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B");
252     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B");
253     ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |");
254     ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |");
255     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |");
256 
257     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
258     ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B");
259     ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B");
260     ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B");
261     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B");
262     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |");
263     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |");
264     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |");
265 
266     // Tone mark. + Tone mark
267     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444");
268     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444");
269     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444");
270     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |");
271     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |");
272     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |");
273 }
274 
TEST(WordBreakTest,goPrevWordBreakTest)275 TEST(WordBreakTest, goPrevWordBreakTest) {
276     ExpectPrevWordBreakForCache(0, "|");
277 
278     // Continue for spaces.
279     ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'");
280     ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'");
281     ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'");
282     ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'");
283     ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'");
284     ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'");
285 
286     // Space makes word break.
287     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'");
288     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'");
289     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'");
290     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'");
291     ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'");
292     ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'");
293     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'");
294 
295     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'");
296     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'");
297     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'");
298     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'");
299     ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'");
300     ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'");
301     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'");
302 
303     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
304     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
305     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
306     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'");
307     ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'");
308     ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
309     ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
310     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
311 
312     // CJK ideographs makes word break.
313     ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
314     ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
315     ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
316     ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
317     ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
318     ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
319     ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
320 
321     ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
322     ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
323     ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
324     ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
325     ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
326     ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
327     ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
328 
329     // Mixed case.
330     ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
331     ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
332     ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
333     ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
334     ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
335     ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
336     ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
337     ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
338 
339     // Continue if trailing characters is Unicode combining characters.
340     ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00");
341     ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00");
342     ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00");
343     ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00");
344     ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00");
345 
346     // Surrogate pairs.
347     ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618");
348     ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618");
349     ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618");
350     ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618");
351     ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618");
352     ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618");
353 
354     // Broken surrogate pairs.
355     // U+D84D is leading surrogate but there is no trailing surrogate for it.
356     ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618");
357     ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618");
358     ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618");
359     ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618");
360     ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618");
361 
362     ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D");
363     ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D");
364     ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D");
365     ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D");
366     ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D");
367 
368     // U+DE0D is trailing surrogate but there is no leading surrogate for it.
369     ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618");
370     ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618");
371     ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618");
372     ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618");
373     ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618");
374 
375     ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D");
376     ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D");
377     ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D");
378     ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D");
379     ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D");
380 
381     // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
382     ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8");
383     ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8");
384     ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8");
385     ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8");
386 
387     // Tone marks.
388     // CJK ideographic char + Tone mark + CJK ideographic char
389     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444");
390     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444");
391     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444");
392     ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444");
393     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444");
394 
395     // Variation Selectors.
396     // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
397     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B");
398     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B");
399     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B");
400     ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B");
401     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B");
402 
403     // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
404     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B");
405     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B");
406     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B");
407     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B");
408     ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B");
409     ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B");
410     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B");
411 
412     // CJK ideographic char + Tone mark + Variation Character(VS1)
413     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444");
414     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444");
415     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444");
416     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444");
417     ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444");
418     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444");
419 
420     // CJK ideographic char + Tone mark + Variation Character(VS17)
421     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444");
422     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444");
423     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444");
424     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444");
425     ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444");
426     ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444");
427     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444");
428 
429     // CJK ideographic char + Variation Character(VS1) + Tone mark
430     ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444");
431     ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444");
432     ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444");
433     ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444");
434     ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444");
435     ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444");
436 
437     // CJK ideographic char + Variation Character(VS17) + Tone mark
438     ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444");
439     ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444");
440     ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444");
441     ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444");
442     ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444");
443     ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444");
444     ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444");
445 
446     // Following test cases are unusual usage of variation selectors and tone
447     // marks for caching up the further behavior changes, e.g. index of bounds
448     // or crashes. Please feel free to update the test expectations if the
449     // behavior change makes sense to you.
450 
451     // Isolated Tone marks and Variation Selectors
452     ExpectPrevWordBreakForCache(0, "| U+FE00");
453     ExpectPrevWordBreakForCache(1, "| U+FE00");
454     ExpectPrevWordBreakForCache(1000, "| U+FE00");
455     ExpectPrevWordBreakForCache(0, "| U+E0100");
456     ExpectPrevWordBreakForCache(1000, "| U+E0100");
457     ExpectPrevWordBreakForCache(0, "| U+302D");
458     ExpectPrevWordBreakForCache(1000, "| U+302D");
459 
460     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
461     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B");
462     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B");
463     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B");
464     ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B");
465     ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B");
466     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B");
467 
468     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
469     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B");
470     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B");
471     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B");
472     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B");
473     ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B");
474     ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B");
475     ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B");
476     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+E0100 | U+845B");
477 
478     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
479     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B");
480     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B");
481     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B");
482     ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B");
483     ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B");
484     ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B");
485     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B");
486 
487     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
488     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B");
489     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B");
490     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B");
491     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B");
492     ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B");
493     ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B");
494     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B");
495 
496     // Tone mark. + Tone mark
497     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444");
498     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444");
499     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444");
500     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444");
501     ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444");
502     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444");
503 }
504 
505 }  // namespace minikin
506