1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.text.util;
18 
19 import android.widget.MultiAutoCompleteTextView;
20 
21 import java.util.ArrayList;
22 import java.util.Collection;
23 
24 /**
25  * This class works as a Tokenizer for MultiAutoCompleteTextView for
26  * address list fields, and also provides a method for converting
27  * a string of addresses (such as might be typed into such a field)
28  * into a series of Rfc822Tokens.
29  */
30 public class Rfc822Tokenizer implements MultiAutoCompleteTextView.Tokenizer {
31 
32     /**
33      * This constructor will try to take a string like
34      * "Foo Bar (something) <foo\@google.com>,
35      * blah\@google.com (something)"
36      * and convert it into one or more Rfc822Tokens, output into the supplied
37      * collection.
38      *
39      * It does *not* decode MIME encoded-words; charset conversion
40      * must already have taken place if necessary.
41      * It will try to be tolerant of broken syntax instead of
42      * returning an error.
43      *
44      */
tokenize(CharSequence text, Collection<Rfc822Token> out)45     public static void tokenize(CharSequence text, Collection<Rfc822Token> out) {
46         StringBuilder name = new StringBuilder();
47         StringBuilder address = new StringBuilder();
48         StringBuilder comment = new StringBuilder();
49 
50         int i = 0;
51         int cursor = text.length();
52 
53         while (i < cursor) {
54             char c = text.charAt(i);
55 
56             if (c == ',' || c == ';') {
57                 i++;
58 
59                 while (i < cursor && text.charAt(i) == ' ') {
60                     i++;
61                 }
62 
63                 crunch(name);
64 
65                 if (address.length() > 0) {
66                     out.add(new Rfc822Token(name.toString(),
67                                             address.toString(),
68                                             comment.toString()));
69                 } else if (name.length() > 0) {
70                     out.add(new Rfc822Token(null,
71                                             name.toString(),
72                                             comment.toString()));
73                 }
74 
75                 name.setLength(0);
76                 address.setLength(0);
77                 comment.setLength(0);
78             } else if (c == '"') {
79                 i++;
80 
81                 while (i < cursor) {
82                     c = text.charAt(i);
83 
84                     if (c == '"') {
85                         i++;
86                         break;
87                     } else if (c == '\\') {
88                         if (i + 1 < cursor) {
89                             name.append(text.charAt(i + 1));
90                         }
91                         i += 2;
92                     } else {
93                         name.append(c);
94                         i++;
95                     }
96                 }
97             } else if (c == '(') {
98                 int level = 1;
99                 i++;
100 
101                 while (i < cursor && level > 0) {
102                     c = text.charAt(i);
103 
104                     if (c == ')') {
105                         if (level > 1) {
106                             comment.append(c);
107                         }
108 
109                         level--;
110                         i++;
111                     } else if (c == '(') {
112                         comment.append(c);
113                         level++;
114                         i++;
115                     } else if (c == '\\') {
116                         if (i + 1 < cursor) {
117                             comment.append(text.charAt(i + 1));
118                         }
119                         i += 2;
120                     } else {
121                         comment.append(c);
122                         i++;
123                     }
124                 }
125             } else if (c == '<') {
126                 i++;
127 
128                 while (i < cursor) {
129                     c = text.charAt(i);
130 
131                     if (c == '>') {
132                         i++;
133                         break;
134                     } else {
135                         address.append(c);
136                         i++;
137                     }
138                 }
139             } else if (c == ' ') {
140                 name.append('\0');
141                 i++;
142             } else {
143                 name.append(c);
144                 i++;
145             }
146         }
147 
148         crunch(name);
149 
150         if (address.length() > 0) {
151             out.add(new Rfc822Token(name.toString(),
152                                     address.toString(),
153                                     comment.toString()));
154         } else if (name.length() > 0) {
155             out.add(new Rfc822Token(null,
156                                     name.toString(),
157                                     comment.toString()));
158         }
159     }
160 
161     /**
162      * This method will try to take a string like
163      * "Foo Bar (something) &lt;foo\@google.com&gt;,
164      * blah\@google.com (something)"
165      * and convert it into one or more Rfc822Tokens.
166      * It does *not* decode MIME encoded-words; charset conversion
167      * must already have taken place if necessary.
168      * It will try to be tolerant of broken syntax instead of
169      * returning an error.
170      */
tokenize(CharSequence text)171     public static Rfc822Token[] tokenize(CharSequence text) {
172         ArrayList<Rfc822Token> out = new ArrayList<Rfc822Token>();
173         tokenize(text, out);
174         return out.toArray(new Rfc822Token[out.size()]);
175     }
176 
crunch(StringBuilder sb)177     private static void crunch(StringBuilder sb) {
178         int i = 0;
179         int len = sb.length();
180 
181         while (i < len) {
182             char c = sb.charAt(i);
183 
184             if (c == '\0') {
185                 if (i == 0 || i == len - 1 ||
186                     sb.charAt(i - 1) == ' ' ||
187                     sb.charAt(i - 1) == '\0' ||
188                     sb.charAt(i + 1) == ' ' ||
189                     sb.charAt(i + 1) == '\0') {
190                     sb.deleteCharAt(i);
191                     len--;
192                 } else {
193                     i++;
194                 }
195             } else {
196                 i++;
197             }
198         }
199 
200         for (i = 0; i < len; i++) {
201             if (sb.charAt(i) == '\0') {
202                 sb.setCharAt(i, ' ');
203             }
204         }
205     }
206 
207     /**
208      * {@inheritDoc}
209      */
findTokenStart(CharSequence text, int cursor)210     public int findTokenStart(CharSequence text, int cursor) {
211         /*
212          * It's hard to search backward, so search forward until
213          * we reach the cursor.
214          */
215 
216         int best = 0;
217         int i = 0;
218 
219         while (i < cursor) {
220             i = findTokenEnd(text, i);
221 
222             if (i < cursor) {
223                 i++; // Skip terminating punctuation
224 
225                 while (i < cursor && text.charAt(i) == ' ') {
226                     i++;
227                 }
228 
229                 if (i < cursor) {
230                     best = i;
231                 }
232             }
233         }
234 
235         return best;
236     }
237 
238     /**
239      * {@inheritDoc}
240      */
findTokenEnd(CharSequence text, int cursor)241     public int findTokenEnd(CharSequence text, int cursor) {
242         int len = text.length();
243         int i = cursor;
244 
245         while (i < len) {
246             char c = text.charAt(i);
247 
248             if (c == ',' || c == ';') {
249                 return i;
250             } else if (c == '"') {
251                 i++;
252 
253                 while (i < len) {
254                     c = text.charAt(i);
255 
256                     if (c == '"') {
257                         i++;
258                         break;
259                     } else if (c == '\\' && i + 1 < len) {
260                         i += 2;
261                     } else {
262                         i++;
263                     }
264                 }
265             } else if (c == '(') {
266                 int level = 1;
267                 i++;
268 
269                 while (i < len && level > 0) {
270                     c = text.charAt(i);
271 
272                     if (c == ')') {
273                         level--;
274                         i++;
275                     } else if (c == '(') {
276                         level++;
277                         i++;
278                     } else if (c == '\\' && i + 1 < len) {
279                         i += 2;
280                     } else {
281                         i++;
282                     }
283                 }
284             } else if (c == '<') {
285                 i++;
286 
287                 while (i < len) {
288                     c = text.charAt(i);
289 
290                     if (c == '>') {
291                         i++;
292                         break;
293                     } else {
294                         i++;
295                     }
296                 }
297             } else {
298                 i++;
299             }
300         }
301 
302         return i;
303     }
304 
305     /**
306      * Terminates the specified address with a comma and space.
307      * This assumes that the specified text already has valid syntax.
308      * The Adapter subclass's convertToString() method must make that
309      * guarantee.
310      */
terminateToken(CharSequence text)311     public CharSequence terminateToken(CharSequence text) {
312         return text + ", ";
313     }
314 }
315