1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.icu; 18 19 import android.compat.annotation.UnsupportedAppUsage; 20 import android.icu.text.CurrencyMetaInfo; 21 import android.icu.text.CurrencyMetaInfo.CurrencyFilter; 22 import android.icu.text.DateTimePatternGenerator; 23 import android.icu.util.ULocale; 24 25 import java.util.Collections; 26 import java.util.Date; 27 import java.util.HashMap; 28 import java.util.HashSet; 29 import java.util.LinkedHashSet; 30 import java.util.List; 31 import java.util.Locale; 32 import java.util.Map; 33 import java.util.Map.Entry; 34 import java.util.Set; 35 import libcore.util.BasicLruCache; 36 37 /** 38 * Makes ICU data accessible to Java. 39 * @hide 40 */ 41 public final class ICU { 42 43 @UnsupportedAppUsage 44 private static final BasicLruCache<String, String> CACHED_PATTERNS = 45 new BasicLruCache<String, String>(8); 46 47 private static Locale[] availableLocalesCache; 48 49 private static String[] isoCountries; 50 51 private static String[] isoLanguages; 52 53 static { 54 // Fill CACHED_PATTERNS with the patterns from default locale and en-US initially. 55 // Likely, this is initialized in Zygote and the initial values in the cache can be shared 56 // among app. The cache was filled by LocaleData in the older Android platform, but moved to 57 // here, due to an performance issue http://b/161846393. 58 // It initializes 2 x 4 = 8 values in the CACHED_PATTERNS whose max size should be >= 8. 59 for (Locale locale : new Locale[] {Locale.US, Locale.getDefault()}) { getTimePattern(locale, false, false)60 getTimePattern(locale, false, false); getTimePattern(locale, false, true)61 getTimePattern(locale, false, true); getTimePattern(locale, true, false)62 getTimePattern(locale, true, false); getTimePattern(locale, true, true)63 getTimePattern(locale, true, true); 64 } 65 } 66 ICU()67 private ICU() { 68 } 69 70 /** 71 * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache. 72 */ getISOLanguages()73 public static String[] getISOLanguages() { 74 if (isoLanguages == null) { 75 isoLanguages = getISOLanguagesNative(); 76 } 77 return isoLanguages.clone(); 78 } 79 80 /** 81 * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache. 82 */ getISOCountries()83 public static String[] getISOCountries() { 84 if (isoCountries == null) { 85 isoCountries = getISOCountriesNative(); 86 } 87 return isoCountries.clone(); 88 } 89 90 private static final int IDX_LANGUAGE = 0; 91 private static final int IDX_SCRIPT = 1; 92 private static final int IDX_REGION = 2; 93 private static final int IDX_VARIANT = 3; 94 95 /* 96 * Parse the {Language, Script, Region, Variant*} section of the ICU locale 97 * ID. This is the bit that appears before the keyword separate "@". The general 98 * structure is a series of ASCII alphanumeric strings (subtags) 99 * separated by underscores. 100 * 101 * Each subtag is interpreted according to its position in the list of subtags 102 * AND its length (groan...). The various cases are explained in comments 103 * below. 104 */ parseLangScriptRegionAndVariants(String string, String[] outputArray)105 private static void parseLangScriptRegionAndVariants(String string, 106 String[] outputArray) { 107 final int first = string.indexOf('_'); 108 final int second = string.indexOf('_', first + 1); 109 final int third = string.indexOf('_', second + 1); 110 111 if (first == -1) { 112 outputArray[IDX_LANGUAGE] = string; 113 } else if (second == -1) { 114 // Language and country ("ja_JP") OR 115 // Language and script ("en_Latn") OR 116 // Language and variant ("en_POSIX"). 117 118 outputArray[IDX_LANGUAGE] = string.substring(0, first); 119 final String secondString = string.substring(first + 1); 120 121 if (secondString.length() == 4) { 122 // 4 Letter ISO script code. 123 outputArray[IDX_SCRIPT] = secondString; 124 } else if (secondString.length() == 2 || secondString.length() == 3) { 125 // 2 or 3 Letter region code. 126 outputArray[IDX_REGION] = secondString; 127 } else { 128 // If we're here, the length of the second half is either 1 or greater 129 // than 5. Assume that ICU won't hand us malformed tags, and therefore 130 // assume the rest of the string is a series of variant tags. 131 outputArray[IDX_VARIANT] = secondString; 132 } 133 } else if (third == -1) { 134 // Language and country and variant ("ja_JP_TRADITIONAL") OR 135 // Language and script and variant ("en_Latn_POSIX") OR 136 // Language and script and region ("en_Latn_US"). OR 137 // Language and variant with multiple subtags ("en_POSIX_XISOP") 138 139 outputArray[IDX_LANGUAGE] = string.substring(0, first); 140 final String secondString = string.substring(first + 1, second); 141 final String thirdString = string.substring(second + 1); 142 143 if (secondString.length() == 4) { 144 // The second subtag is a script. 145 outputArray[IDX_SCRIPT] = secondString; 146 147 // The third subtag can be either a region or a variant, depending 148 // on its length. 149 if (thirdString.length() == 2 || thirdString.length() == 3 || 150 thirdString.isEmpty()) { 151 outputArray[IDX_REGION] = thirdString; 152 } else { 153 outputArray[IDX_VARIANT] = thirdString; 154 } 155 } else if (secondString.isEmpty() || 156 secondString.length() == 2 || secondString.length() == 3) { 157 // The second string is a region, and the third a variant. 158 outputArray[IDX_REGION] = secondString; 159 outputArray[IDX_VARIANT] = thirdString; 160 } else { 161 // Variant with multiple subtags. 162 outputArray[IDX_VARIANT] = string.substring(first + 1); 163 } 164 } else { 165 // Language, script, region and variant with 1 or more subtags 166 // ("en_Latn_US_POSIX") OR 167 // Language, region and variant with 2 or more subtags 168 // (en_US_POSIX_VARIANT). 169 outputArray[IDX_LANGUAGE] = string.substring(0, first); 170 final String secondString = string.substring(first + 1, second); 171 if (secondString.length() == 4) { 172 outputArray[IDX_SCRIPT] = secondString; 173 outputArray[IDX_REGION] = string.substring(second + 1, third); 174 outputArray[IDX_VARIANT] = string.substring(third + 1); 175 } else { 176 outputArray[IDX_REGION] = secondString; 177 outputArray[IDX_VARIANT] = string.substring(second + 1); 178 } 179 } 180 } 181 182 /** 183 * Returns the appropriate {@code Locale} given a {@code String} of the form returned 184 * by {@code toString}. This is very lenient, and doesn't care what's between the underscores: 185 * this method can parse strings that {@code Locale.toString} won't produce. 186 * Used to remove duplication. 187 */ localeFromIcuLocaleId(String localeId)188 public static Locale localeFromIcuLocaleId(String localeId) { 189 // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h). 190 final int extensionsIndex = localeId.indexOf('@'); 191 192 Map<Character, String> extensionsMap = Collections.EMPTY_MAP; 193 Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP; 194 Set<String> unicodeAttributeSet = Collections.EMPTY_SET; 195 196 if (extensionsIndex != -1) { 197 extensionsMap = new HashMap<Character, String>(); 198 unicodeKeywordsMap = new HashMap<String, String>(); 199 unicodeAttributeSet = new HashSet<String>(); 200 201 // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string 202 // containing all "keywords" it could parse. An ICU keyword is a key-value pair 203 // separated by an "=" (ULOC_KEYWORD_ASSIGN). 204 // 205 // Each keyword item can be one of three things : 206 // - A unicode extension attribute list: In this case the item key is "attribute" 207 // and the value is a hyphen separated list of unicode attributes. 208 // - A unicode extension keyword: In this case, the item key will be larger than 209 // 1 char in length, and the value will be the unicode extension value. 210 // - A BCP-47 extension subtag: In this case, the item key will be exactly one 211 // char in length, and the value will be a sequence of unparsed subtags that 212 // represent the extension. 213 // 214 // Note that this implies that unicode extension keywords are "promoted" to 215 // to the same namespace as the top level extension subtags and their values. 216 // There can't be any collisions in practice because the BCP-47 spec imposes 217 // restrictions on their lengths. 218 final String extensionsString = localeId.substring(extensionsIndex + 1); 219 final String[] extensions = extensionsString.split(";"); 220 for (String extension : extensions) { 221 // This is the special key for the unicode attributes 222 if (extension.startsWith("attribute=")) { 223 String unicodeAttributeValues = extension.substring("attribute=".length()); 224 for (String unicodeAttribute : unicodeAttributeValues.split("-")) { 225 unicodeAttributeSet.add(unicodeAttribute); 226 } 227 } else { 228 final int separatorIndex = extension.indexOf('='); 229 230 if (separatorIndex == 1) { 231 // This is a BCP-47 extension subtag. 232 final String value = extension.substring(2); 233 final char extensionId = extension.charAt(0); 234 235 extensionsMap.put(extensionId, value); 236 } else { 237 // This is a unicode extension keyword. 238 unicodeKeywordsMap.put(extension.substring(0, separatorIndex), 239 extension.substring(separatorIndex + 1)); 240 } 241 } 242 } 243 } 244 245 final String[] outputArray = new String[] { "", "", "", "" }; 246 if (extensionsIndex == -1) { 247 parseLangScriptRegionAndVariants(localeId, outputArray); 248 } else { 249 parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex), 250 outputArray); 251 } 252 Locale.Builder builder = new Locale.Builder(); 253 builder.setLanguage(outputArray[IDX_LANGUAGE]); 254 builder.setRegion(outputArray[IDX_REGION]); 255 builder.setVariant(outputArray[IDX_VARIANT]); 256 builder.setScript(outputArray[IDX_SCRIPT]); 257 for (String attribute : unicodeAttributeSet) { 258 builder.addUnicodeLocaleAttribute(attribute); 259 } 260 for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) { 261 builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue()); 262 } 263 264 for (Entry<Character, String> extension : extensionsMap.entrySet()) { 265 builder.setExtension(extension.getKey(), extension.getValue()); 266 } 267 268 return builder.build(); 269 } 270 localesFromStrings(String[] localeNames)271 public static Locale[] localesFromStrings(String[] localeNames) { 272 // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera. 273 // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about 274 // both so that we never need to convert back when talking to it. 275 LinkedHashSet<Locale> set = new LinkedHashSet<Locale>(); 276 for (String localeName : localeNames) { 277 set.add(localeFromIcuLocaleId(localeName)); 278 } 279 return set.toArray(new Locale[set.size()]); 280 } 281 getAvailableLocales()282 public static Locale[] getAvailableLocales() { 283 if (availableLocalesCache == null) { 284 availableLocalesCache = localesFromStrings(getAvailableLocalesNative()); 285 } 286 return availableLocalesCache.clone(); 287 } 288 getTimePattern(Locale locale, boolean is24Hour, boolean withSecond)289 /* package */ static String getTimePattern(Locale locale, boolean is24Hour, boolean withSecond) { 290 final String skeleton; 291 if (withSecond) { 292 skeleton = is24Hour ? "Hms" : "hms"; 293 } else { 294 skeleton = is24Hour ? "Hm" : "hm"; 295 } 296 return getBestDateTimePattern(skeleton, locale); 297 } 298 299 @UnsupportedAppUsage getBestDateTimePattern(String skeleton, Locale locale)300 public static String getBestDateTimePattern(String skeleton, Locale locale) { 301 String languageTag = locale.toLanguageTag(); 302 String key = skeleton + "\t" + languageTag; 303 synchronized (CACHED_PATTERNS) { 304 String pattern = CACHED_PATTERNS.get(key); 305 if (pattern == null) { 306 pattern = getBestDateTimePattern0(skeleton, locale); 307 CACHED_PATTERNS.put(key, pattern); 308 } 309 return pattern; 310 } 311 } 312 getBestDateTimePattern0(String skeleton, Locale locale)313 private static String getBestDateTimePattern0(String skeleton, Locale locale) { 314 DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(locale); 315 return dtpg.getBestPattern(skeleton); 316 } 317 318 @UnsupportedAppUsage getBestDateTimePatternNative(String skeleton, String languageTag)319 private static String getBestDateTimePatternNative(String skeleton, String languageTag) { 320 return getBestDateTimePattern0(skeleton, Locale.forLanguageTag(languageTag)); 321 } 322 323 @UnsupportedAppUsage getDateFormatOrder(String pattern)324 public static char[] getDateFormatOrder(String pattern) { 325 char[] result = new char[3]; 326 int resultIndex = 0; 327 boolean sawDay = false; 328 boolean sawMonth = false; 329 boolean sawYear = false; 330 331 for (int i = 0; i < pattern.length(); ++i) { 332 char ch = pattern.charAt(i); 333 if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') { 334 if (ch == 'd' && !sawDay) { 335 result[resultIndex++] = 'd'; 336 sawDay = true; 337 } else if ((ch == 'L' || ch == 'M') && !sawMonth) { 338 result[resultIndex++] = 'M'; 339 sawMonth = true; 340 } else if ((ch == 'y') && !sawYear) { 341 result[resultIndex++] = 'y'; 342 sawYear = true; 343 } 344 } else if (ch == 'G') { 345 // Ignore the era specifier, if present. 346 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 347 throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern); 348 } else if (ch == '\'') { 349 if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') { 350 ++i; 351 } else { 352 i = pattern.indexOf('\'', i + 1); 353 if (i == -1) { 354 throw new IllegalArgumentException("Bad quoting in " + pattern); 355 } 356 ++i; 357 } 358 } else { 359 // Ignore spaces and punctuation. 360 } 361 } 362 return result; 363 } 364 365 // --- Errors. 366 367 // --- Native methods accessing ICU's database. 368 getAvailableLocalesNative()369 private static native String[] getAvailableLocalesNative(); 370 371 /** 372 * Query ICU for the currency being used in the country right now. 373 * @param countryCode ISO 3166 two-letter country code 374 * @return ISO 4217 3-letter currency code if found, otherwise null. 375 */ getCurrencyCode(String countryCode)376 public static String getCurrencyCode(String countryCode) { 377 CurrencyFilter filter = CurrencyFilter.onRegion(countryCode) 378 .withDate(new Date()); 379 List<String> currencies = CurrencyMetaInfo.getInstance().currencies(filter); 380 return currencies.isEmpty() ? null : currencies.get(0); 381 } 382 383 getISO3Country(String languageTag)384 public static native String getISO3Country(String languageTag); 385 getISO3Language(String languageTag)386 public static native String getISO3Language(String languageTag); 387 388 /** 389 * @deprecated Use {@link android.icu.util.ULocale#addLikelySubtags(ULocale)} instead. 390 * The method is only kept for @UnsupportedAppUsage. 391 */ 392 @UnsupportedAppUsage 393 @Deprecated addLikelySubtags(Locale locale)394 public static Locale addLikelySubtags(Locale locale) { 395 return ULocale.addLikelySubtags(ULocale.forLocale(locale)).toLocale(); 396 } 397 398 /** 399 * @return ICU localeID 400 * @deprecated Use {@link android.icu.util.ULocale#addLikelySubtags(ULocale)} instead. 401 * The method is only kept for @UnsupportedAppUsage. 402 */ 403 @UnsupportedAppUsage 404 @Deprecated addLikelySubtags(String locale)405 public static String addLikelySubtags(String locale) { 406 return ULocale.addLikelySubtags(new ULocale(locale)).getName(); 407 } 408 409 /** 410 * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept 411 * around only for the support library. 412 */ 413 @UnsupportedAppUsage 414 @Deprecated getScript(String locale)415 public static native String getScript(String locale); 416 getISOLanguagesNative()417 private static native String[] getISOLanguagesNative(); getISOCountriesNative()418 private static native String[] getISOCountriesNative(); 419 420 /** 421 * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US 422 */ setDefaultLocale(String languageTag)423 public static native void setDefaultLocale(String languageTag); 424 425 /** 426 * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US. 427 */ getDefaultLocale()428 public static native String getDefaultLocale(); 429 } 430