1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.icu;
18 
19 import android.compat.annotation.UnsupportedAppUsage;
20 import android.icu.text.CurrencyMetaInfo;
21 import android.icu.text.CurrencyMetaInfo.CurrencyFilter;
22 import android.icu.text.DateTimePatternGenerator;
23 import android.icu.util.ULocale;
24 
25 import java.util.Collections;
26 import java.util.Date;
27 import java.util.HashMap;
28 import java.util.HashSet;
29 import java.util.LinkedHashSet;
30 import java.util.List;
31 import java.util.Locale;
32 import java.util.Map;
33 import java.util.Map.Entry;
34 import java.util.Set;
35 import libcore.util.BasicLruCache;
36 
37 /**
38  * Makes ICU data accessible to Java.
39  * @hide
40  */
41 public final class ICU {
42 
43   @UnsupportedAppUsage
44   private static final BasicLruCache<String, String> CACHED_PATTERNS =
45       new BasicLruCache<String, String>(8);
46 
47   private static Locale[] availableLocalesCache;
48 
49   private static String[] isoCountries;
50 
51   private static String[] isoLanguages;
52 
53   static {
54     // Fill CACHED_PATTERNS with the patterns from default locale and en-US initially.
55     // Likely, this is initialized in Zygote and the initial values in the cache can be shared
56     // among app. The cache was filled by LocaleData in the older Android platform, but moved to
57     // here, due to an performance issue http://b/161846393.
58     // It initializes 2 x 4 = 8 values in the CACHED_PATTERNS whose max size should be >= 8.
59     for (Locale locale : new Locale[] {Locale.US, Locale.getDefault()}) {
getTimePattern(locale, false, false)60       getTimePattern(locale, false, false);
getTimePattern(locale, false, true)61       getTimePattern(locale, false, true);
getTimePattern(locale, true, false)62       getTimePattern(locale, true, false);
getTimePattern(locale, true, true)63       getTimePattern(locale, true, true);
64     }
65   }
66 
ICU()67   private ICU() {
68   }
69 
70   /**
71    * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
72    */
getISOLanguages()73   public static String[] getISOLanguages() {
74     if (isoLanguages == null) {
75       isoLanguages = getISOLanguagesNative();
76     }
77     return isoLanguages.clone();
78   }
79 
80   /**
81    * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
82    */
getISOCountries()83   public static String[] getISOCountries() {
84     if (isoCountries == null) {
85       isoCountries = getISOCountriesNative();
86     }
87     return isoCountries.clone();
88   }
89 
90   private static final int IDX_LANGUAGE = 0;
91   private static final int IDX_SCRIPT = 1;
92   private static final int IDX_REGION = 2;
93   private static final int IDX_VARIANT = 3;
94 
95   /*
96    * Parse the {Language, Script, Region, Variant*} section of the ICU locale
97    * ID. This is the bit that appears before the keyword separate "@". The general
98    * structure is a series of ASCII alphanumeric strings (subtags)
99    * separated by underscores.
100    *
101    * Each subtag is interpreted according to its position in the list of subtags
102    * AND its length (groan...). The various cases are explained in comments
103    * below.
104    */
parseLangScriptRegionAndVariants(String string, String[] outputArray)105   private static void parseLangScriptRegionAndVariants(String string,
106           String[] outputArray) {
107     final int first = string.indexOf('_');
108     final int second = string.indexOf('_', first + 1);
109     final int third = string.indexOf('_', second + 1);
110 
111     if (first == -1) {
112       outputArray[IDX_LANGUAGE] = string;
113     } else if (second == -1) {
114       // Language and country ("ja_JP") OR
115       // Language and script ("en_Latn") OR
116       // Language and variant ("en_POSIX").
117 
118       outputArray[IDX_LANGUAGE] = string.substring(0, first);
119       final String secondString = string.substring(first + 1);
120 
121       if (secondString.length() == 4) {
122           // 4 Letter ISO script code.
123           outputArray[IDX_SCRIPT] = secondString;
124       } else if (secondString.length() == 2 || secondString.length() == 3) {
125           // 2 or 3 Letter region code.
126           outputArray[IDX_REGION] = secondString;
127       } else {
128           // If we're here, the length of the second half is either 1 or greater
129           // than 5. Assume that ICU won't hand us malformed tags, and therefore
130           // assume the rest of the string is a series of variant tags.
131           outputArray[IDX_VARIANT] = secondString;
132       }
133     } else if (third == -1) {
134       // Language and country and variant ("ja_JP_TRADITIONAL") OR
135       // Language and script and variant ("en_Latn_POSIX") OR
136       // Language and script and region ("en_Latn_US"). OR
137       // Language and variant with multiple subtags ("en_POSIX_XISOP")
138 
139       outputArray[IDX_LANGUAGE] = string.substring(0, first);
140       final String secondString = string.substring(first + 1, second);
141       final String thirdString = string.substring(second + 1);
142 
143       if (secondString.length() == 4) {
144           // The second subtag is a script.
145           outputArray[IDX_SCRIPT] = secondString;
146 
147           // The third subtag can be either a region or a variant, depending
148           // on its length.
149           if (thirdString.length() == 2 || thirdString.length() == 3 ||
150                   thirdString.isEmpty()) {
151               outputArray[IDX_REGION] = thirdString;
152           } else {
153               outputArray[IDX_VARIANT] = thirdString;
154           }
155       } else if (secondString.isEmpty() ||
156               secondString.length() == 2 || secondString.length() == 3) {
157           // The second string is a region, and the third a variant.
158           outputArray[IDX_REGION] = secondString;
159           outputArray[IDX_VARIANT] = thirdString;
160       } else {
161           // Variant with multiple subtags.
162           outputArray[IDX_VARIANT] = string.substring(first + 1);
163       }
164     } else {
165       // Language, script, region and variant with 1 or more subtags
166       // ("en_Latn_US_POSIX") OR
167       // Language, region and variant with 2 or more subtags
168       // (en_US_POSIX_VARIANT).
169       outputArray[IDX_LANGUAGE] = string.substring(0, first);
170       final String secondString = string.substring(first + 1, second);
171       if (secondString.length() == 4) {
172           outputArray[IDX_SCRIPT] = secondString;
173           outputArray[IDX_REGION] = string.substring(second + 1, third);
174           outputArray[IDX_VARIANT] = string.substring(third + 1);
175       } else {
176           outputArray[IDX_REGION] = secondString;
177           outputArray[IDX_VARIANT] = string.substring(second + 1);
178       }
179     }
180   }
181 
182   /**
183    * Returns the appropriate {@code Locale} given a {@code String} of the form returned
184    * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
185    * this method can parse strings that {@code Locale.toString} won't produce.
186    * Used to remove duplication.
187    */
localeFromIcuLocaleId(String localeId)188   public static Locale localeFromIcuLocaleId(String localeId) {
189     // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
190     final int extensionsIndex = localeId.indexOf('@');
191 
192     Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
193     Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
194     Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
195 
196     if (extensionsIndex != -1) {
197       extensionsMap = new HashMap<Character, String>();
198       unicodeKeywordsMap = new HashMap<String, String>();
199       unicodeAttributeSet = new HashSet<String>();
200 
201       // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
202       // containing all "keywords" it could parse. An ICU keyword is a key-value pair
203       // separated by an "=" (ULOC_KEYWORD_ASSIGN).
204       //
205       // Each keyword item can be one of three things :
206       // - A unicode extension attribute list: In this case the item key is "attribute"
207       //   and the value is a hyphen separated list of unicode attributes.
208       // - A unicode extension keyword: In this case, the item key will be larger than
209       //   1 char in length, and the value will be the unicode extension value.
210       // - A BCP-47 extension subtag: In this case, the item key will be exactly one
211       //   char in length, and the value will be a sequence of unparsed subtags that
212       //   represent the extension.
213       //
214       // Note that this implies that unicode extension keywords are "promoted" to
215       // to the same namespace as the top level extension subtags and their values.
216       // There can't be any collisions in practice because the BCP-47 spec imposes
217       // restrictions on their lengths.
218       final String extensionsString = localeId.substring(extensionsIndex + 1);
219       final String[] extensions = extensionsString.split(";");
220       for (String extension : extensions) {
221         // This is the special key for the unicode attributes
222         if (extension.startsWith("attribute=")) {
223           String unicodeAttributeValues = extension.substring("attribute=".length());
224           for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
225             unicodeAttributeSet.add(unicodeAttribute);
226           }
227         } else {
228           final int separatorIndex = extension.indexOf('=');
229 
230           if (separatorIndex == 1) {
231             // This is a BCP-47 extension subtag.
232             final String value = extension.substring(2);
233             final char extensionId = extension.charAt(0);
234 
235             extensionsMap.put(extensionId, value);
236           } else {
237             // This is a unicode extension keyword.
238             unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
239             extension.substring(separatorIndex + 1));
240           }
241         }
242       }
243     }
244 
245     final String[] outputArray = new String[] { "", "", "", "" };
246     if (extensionsIndex == -1) {
247       parseLangScriptRegionAndVariants(localeId, outputArray);
248     } else {
249       parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
250           outputArray);
251     }
252     Locale.Builder builder = new Locale.Builder();
253     builder.setLanguage(outputArray[IDX_LANGUAGE]);
254     builder.setRegion(outputArray[IDX_REGION]);
255     builder.setVariant(outputArray[IDX_VARIANT]);
256     builder.setScript(outputArray[IDX_SCRIPT]);
257     for (String attribute : unicodeAttributeSet) {
258       builder.addUnicodeLocaleAttribute(attribute);
259     }
260     for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) {
261       builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue());
262     }
263 
264     for (Entry<Character, String> extension : extensionsMap.entrySet()) {
265       builder.setExtension(extension.getKey(), extension.getValue());
266     }
267 
268     return builder.build();
269   }
270 
localesFromStrings(String[] localeNames)271   public static Locale[] localesFromStrings(String[] localeNames) {
272     // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
273     // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
274     // both so that we never need to convert back when talking to it.
275     LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
276     for (String localeName : localeNames) {
277       set.add(localeFromIcuLocaleId(localeName));
278     }
279     return set.toArray(new Locale[set.size()]);
280   }
281 
getAvailableLocales()282   public static Locale[] getAvailableLocales() {
283     if (availableLocalesCache == null) {
284       availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
285     }
286     return availableLocalesCache.clone();
287   }
288 
getTimePattern(Locale locale, boolean is24Hour, boolean withSecond)289   /* package */ static String getTimePattern(Locale locale, boolean is24Hour, boolean withSecond) {
290     final String skeleton;
291     if (withSecond) {
292       skeleton = is24Hour ? "Hms" : "hms";
293     } else {
294       skeleton = is24Hour ? "Hm" : "hm";
295     }
296     return getBestDateTimePattern(skeleton, locale);
297   }
298 
299   @UnsupportedAppUsage
getBestDateTimePattern(String skeleton, Locale locale)300   public static String getBestDateTimePattern(String skeleton, Locale locale) {
301     String languageTag = locale.toLanguageTag();
302     String key = skeleton + "\t" + languageTag;
303     synchronized (CACHED_PATTERNS) {
304       String pattern = CACHED_PATTERNS.get(key);
305       if (pattern == null) {
306         pattern = getBestDateTimePattern0(skeleton, locale);
307         CACHED_PATTERNS.put(key, pattern);
308       }
309       return pattern;
310     }
311   }
312 
getBestDateTimePattern0(String skeleton, Locale locale)313   private static String getBestDateTimePattern0(String skeleton, Locale locale) {
314       DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(locale);
315       return dtpg.getBestPattern(skeleton);
316   }
317 
318   @UnsupportedAppUsage
getBestDateTimePatternNative(String skeleton, String languageTag)319   private static String getBestDateTimePatternNative(String skeleton, String languageTag) {
320     return getBestDateTimePattern0(skeleton, Locale.forLanguageTag(languageTag));
321   }
322 
323   @UnsupportedAppUsage
getDateFormatOrder(String pattern)324   public static char[] getDateFormatOrder(String pattern) {
325     char[] result = new char[3];
326     int resultIndex = 0;
327     boolean sawDay = false;
328     boolean sawMonth = false;
329     boolean sawYear = false;
330 
331     for (int i = 0; i < pattern.length(); ++i) {
332       char ch = pattern.charAt(i);
333       if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
334         if (ch == 'd' && !sawDay) {
335           result[resultIndex++] = 'd';
336           sawDay = true;
337         } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
338           result[resultIndex++] = 'M';
339           sawMonth = true;
340         } else if ((ch == 'y') && !sawYear) {
341           result[resultIndex++] = 'y';
342           sawYear = true;
343         }
344       } else if (ch == 'G') {
345         // Ignore the era specifier, if present.
346       } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
347         throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
348       } else if (ch == '\'') {
349         if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
350           ++i;
351         } else {
352           i = pattern.indexOf('\'', i + 1);
353           if (i == -1) {
354             throw new IllegalArgumentException("Bad quoting in " + pattern);
355           }
356           ++i;
357         }
358       } else {
359         // Ignore spaces and punctuation.
360       }
361     }
362     return result;
363   }
364 
365   // --- Errors.
366 
367   // --- Native methods accessing ICU's database.
368 
getAvailableLocalesNative()369   private static native String[] getAvailableLocalesNative();
370 
371     /**
372      * Query ICU for the currency being used in the country right now.
373      * @param countryCode ISO 3166 two-letter country code
374      * @return ISO 4217 3-letter currency code if found, otherwise null.
375      */
getCurrencyCode(String countryCode)376   public static String getCurrencyCode(String countryCode) {
377       CurrencyFilter filter = CurrencyFilter.onRegion(countryCode)
378           .withDate(new Date());
379       List<String> currencies = CurrencyMetaInfo.getInstance().currencies(filter);
380       return currencies.isEmpty() ? null : currencies.get(0);
381   }
382 
383 
getISO3Country(String languageTag)384   public static native String getISO3Country(String languageTag);
385 
getISO3Language(String languageTag)386   public static native String getISO3Language(String languageTag);
387 
388   /**
389    * @deprecated Use {@link android.icu.util.ULocale#addLikelySubtags(ULocale)} instead.
390    * The method is only kept for @UnsupportedAppUsage.
391    */
392   @UnsupportedAppUsage
393   @Deprecated
addLikelySubtags(Locale locale)394   public static Locale addLikelySubtags(Locale locale) {
395       return ULocale.addLikelySubtags(ULocale.forLocale(locale)).toLocale();
396   }
397 
398   /**
399    * @return ICU localeID
400    * @deprecated Use {@link android.icu.util.ULocale#addLikelySubtags(ULocale)} instead.
401    * The method is only kept for @UnsupportedAppUsage.
402    */
403   @UnsupportedAppUsage
404   @Deprecated
addLikelySubtags(String locale)405   public static String addLikelySubtags(String locale) {
406       return ULocale.addLikelySubtags(new ULocale(locale)).getName();
407   }
408 
409   /**
410    * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept
411    *     around only for the support library.
412    */
413   @UnsupportedAppUsage
414   @Deprecated
getScript(String locale)415   public static native String getScript(String locale);
416 
getISOLanguagesNative()417   private static native String[] getISOLanguagesNative();
getISOCountriesNative()418   private static native String[] getISOCountriesNative();
419 
420   /**
421    * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
422    */
setDefaultLocale(String languageTag)423   public static native void setDefaultLocale(String languageTag);
424 
425   /**
426    * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
427    */
getDefaultLocale()428   public static native String getDefaultLocale();
429 }
430