1 /* 2 * Copyright (C) 2007 Esmertec AG. 3 * Copyright (C) 2007 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package com.google.android.mms.pdu; 19 20 import android.compat.annotation.UnsupportedAppUsage; 21 22 import java.io.UnsupportedEncodingException; 23 import java.util.HashMap; 24 25 public class CharacterSets { 26 /** 27 * IANA assigned MIB enum numbers. 28 * 29 * From wap-230-wsp-20010705-a.pdf 30 * Any-charset = <Octet 128> 31 * Equivalent to the special RFC2616 charset value "*" 32 */ 33 public static final int ANY_CHARSET = 0x00; 34 public static final int US_ASCII = 0x03; 35 public static final int ISO_8859_1 = 0x04; 36 public static final int ISO_8859_2 = 0x05; 37 public static final int ISO_8859_3 = 0x06; 38 public static final int ISO_8859_4 = 0x07; 39 public static final int ISO_8859_5 = 0x08; 40 public static final int ISO_8859_6 = 0x09; 41 public static final int ISO_8859_7 = 0x0A; 42 public static final int ISO_8859_8 = 0x0B; 43 public static final int ISO_8859_9 = 0x0C; 44 public static final int SHIFT_JIS = 0x11; 45 public static final int UTF_8 = 0x6A; 46 public static final int BIG5 = 0x07EA; 47 public static final int UCS2 = 0x03E8; 48 public static final int UTF_16 = 0x03F7; 49 50 /** 51 * Extend charsets. 52 * 53 * From http://www.iana.org/assignments/character-sets/ 54 */ 55 public static final int BIG5_HKSCS = 0x0835; //2101 56 public static final int BOCU_1 = 0x03FC; //1020 57 public static final int CESU_8 = 0x03F8; //1016 58 public static final int CP864 = 0x0803; //2051 59 public static final int EUC_JP = 0x12; //18 60 public static final int EUC_KR = 0x26; //38 61 public static final int GB18030 = 0x72; //114 62 public static final int GBK = 0x71; //113 63 public static final int HZ_GB_2312 = 0x0825; //2085 64 public static final int GB_2312 = 0x07E9; //2025 65 public static final int ISO_2022_CN = 0x68; //104 66 public static final int ISO_2022_CN_EXT = 0x69; //105 67 public static final int ISO_2022_JP = 0x27; //39 68 public static final int ISO_2022_KR = 0x25; //37 69 public static final int ISO_8859_10 = 0x0D; //13 70 public static final int ISO_8859_13 = 0x6D; //109 71 public static final int ISO_8859_14 = 0x6E; //110 72 public static final int ISO_8859_15 = 0x6F; //111 73 public static final int ISO_8859_16 = 0x70; //112 74 public static final int KOI8_R = 0x0824; //2084 75 public static final int KOI8_U = 0x0828; //2088 76 public static final int MACINTOSH = 0x07EB; //2027 77 public static final int SCSU = 0x03F3; //1011 78 public static final int TIS_620 = 0x08D3; //2259 79 public static final int UTF_16BE = 0x03F5; //1013 80 public static final int UTF_16LE = 0x03F6; //1014 81 public static final int UTF_32 = 0x03F9; //1017 82 public static final int UTF_32BE = 0x03FA; //1018 83 public static final int UTF_32LE = 0x03FB; //1019 84 public static final int UTF_7 = 0x03F4; //1012 85 public static final int WINDOWS_1250 = 0x08CA; //2250 86 public static final int WINDOWS_1251 = 0x08CB; //2251 87 public static final int WINDOWS_1252 = 0x08CC; //2252 88 public static final int WINDOWS_1253 = 0x08CD; //2253 89 public static final int WINDOWS_1254 = 0x08CE; //2254 90 public static final int WINDOWS_1255 = 0x08CF; //2255 91 public static final int WINDOWS_1256 = 0x08D0; //2256 92 public static final int WINDOWS_1257 = 0x08D1; //2257 93 public static final int WINDOWS_1258 = 0x08D2; //2258 94 95 /** 96 * If the encoding of given data is unsupported, use UTF_8 to decode it. 97 */ 98 public static final int DEFAULT_CHARSET = UTF_8; 99 100 /** 101 * Array of MIB enum numbers. 102 */ 103 private static final int[] MIBENUM_NUMBERS = { 104 ANY_CHARSET, 105 US_ASCII, 106 ISO_8859_1, 107 ISO_8859_2, 108 ISO_8859_3, 109 ISO_8859_4, 110 ISO_8859_5, 111 ISO_8859_6, 112 ISO_8859_7, 113 ISO_8859_8, 114 ISO_8859_9, 115 SHIFT_JIS, 116 UTF_8, 117 BIG5, 118 UCS2, 119 UTF_16, 120 BIG5_HKSCS, 121 BOCU_1, 122 CESU_8, 123 CP864, 124 EUC_JP, 125 EUC_KR, 126 GB18030, 127 GBK, 128 HZ_GB_2312, 129 GB_2312, 130 ISO_2022_CN, 131 ISO_2022_CN_EXT, 132 ISO_2022_JP, 133 ISO_2022_KR, 134 ISO_8859_10, 135 ISO_8859_13, 136 ISO_8859_14, 137 ISO_8859_15, 138 ISO_8859_16, 139 KOI8_R, 140 KOI8_U, 141 MACINTOSH, 142 SCSU, 143 TIS_620, 144 UTF_16BE, 145 UTF_16LE, 146 UTF_32, 147 UTF_32BE, 148 UTF_32LE, 149 UTF_7, 150 WINDOWS_1250, 151 WINDOWS_1251, 152 WINDOWS_1252, 153 WINDOWS_1253, 154 WINDOWS_1254, 155 WINDOWS_1255, 156 WINDOWS_1256, 157 WINDOWS_1257, 158 WINDOWS_1258, 159 }; 160 161 /** 162 * The Well-known-charset Mime name. 163 */ 164 public static final String MIMENAME_ANY_CHARSET = "*"; 165 public static final String MIMENAME_US_ASCII = "us-ascii"; 166 public static final String MIMENAME_ISO_8859_1 = "iso-8859-1"; 167 public static final String MIMENAME_ISO_8859_2 = "iso-8859-2"; 168 public static final String MIMENAME_ISO_8859_3 = "iso-8859-3"; 169 public static final String MIMENAME_ISO_8859_4 = "iso-8859-4"; 170 public static final String MIMENAME_ISO_8859_5 = "iso-8859-5"; 171 public static final String MIMENAME_ISO_8859_6 = "iso-8859-6"; 172 public static final String MIMENAME_ISO_8859_7 = "iso-8859-7"; 173 public static final String MIMENAME_ISO_8859_8 = "iso-8859-8"; 174 public static final String MIMENAME_ISO_8859_9 = "iso-8859-9"; 175 public static final String MIMENAME_SHIFT_JIS = "shift_JIS"; 176 public static final String MIMENAME_UTF_8 = "utf-8"; 177 public static final String MIMENAME_BIG5 = "big5"; 178 public static final String MIMENAME_UCS2 = "iso-10646-ucs-2"; 179 public static final String MIMENAME_UTF_16 = "utf-16"; 180 181 /** 182 * Extend charsets. 183 * 184 * From http://www.iana.org/assignments/character-sets/ 185 */ 186 public static final String MIMENAME_BIG5_HKSCS = "Big5-HKSCS"; 187 public static final String MIMENAME_BOCU_1 = "BOCU-1"; 188 public static final String MIMENAME_CESU_8 = "CESU-8"; 189 public static final String MIMENAME_CP864 = "cp864"; 190 public static final String MIMENAME_EUC_JP = "EUC-JP"; 191 public static final String MIMENAME_EUC_KR = "EUC-KR"; 192 public static final String MIMENAME_GB18030 = "GB18030"; 193 public static final String MIMENAME_GBK = "GBK"; 194 public static final String MIMENAME_HZ_GB_2312 = "HZ-GB-2312"; 195 public static final String MIMENAME_GB_2312 = "GB2312"; 196 public static final String MIMENAME_ISO_2022_CN = "ISO-2022-CN"; 197 public static final String MIMENAME_ISO_2022_CN_EXT = "ISO-2022-CN-EXT"; 198 public static final String MIMENAME_ISO_2022_JP = "ISO-2022-JP"; 199 public static final String MIMENAME_ISO_2022_KR = "ISO-2022-KR"; 200 public static final String MIMENAME_ISO_8859_10 = "ISO-8859-10"; 201 public static final String MIMENAME_ISO_8859_13 = "ISO-8859-13"; 202 public static final String MIMENAME_ISO_8859_14 = "ISO-8859-14"; 203 public static final String MIMENAME_ISO_8859_15 = "ISO-8859-15"; 204 public static final String MIMENAME_ISO_8859_16 = "ISO-8859-16"; 205 public static final String MIMENAME_KOI8_R = "KOI8-R"; 206 public static final String MIMENAME_KOI8_U = "KOI8-U"; 207 public static final String MIMENAME_MACINTOSH = "macintosh"; 208 public static final String MIMENAME_SCSU = "SCSU"; 209 public static final String MIMENAME_TIS_620 = "TIS-620"; 210 public static final String MIMENAME_UTF_16BE = "UTF-16BE"; 211 public static final String MIMENAME_UTF_16LE = "UTF-16LE"; 212 public static final String MIMENAME_UTF_32 = "UTF-32"; 213 public static final String MIMENAME_UTF_32BE = "UTF-32BE"; 214 public static final String MIMENAME_UTF_32LE = "UTF-32LE"; 215 public static final String MIMENAME_UTF_7 = "UTF-7"; 216 public static final String MIMENAME_WINDOWS_1250 = "windows-1250"; 217 public static final String MIMENAME_WINDOWS_1251 = "windows-1251"; 218 public static final String MIMENAME_WINDOWS_1252 = "windows-1252"; 219 public static final String MIMENAME_WINDOWS_1253 = "windows-1253"; 220 public static final String MIMENAME_WINDOWS_1254 = "windows-1254"; 221 public static final String MIMENAME_WINDOWS_1255 = "windows-1255"; 222 public static final String MIMENAME_WINDOWS_1256 = "windows-1256"; 223 public static final String MIMENAME_WINDOWS_1257 = "windows-1257"; 224 public static final String MIMENAME_WINDOWS_1258 = "windows-1258"; 225 226 public static final String DEFAULT_CHARSET_NAME = MIMENAME_UTF_8; 227 228 /** 229 * Array of the names of character sets. 230 */ 231 private static final String[] MIME_NAMES = { 232 MIMENAME_ANY_CHARSET, 233 MIMENAME_US_ASCII, 234 MIMENAME_ISO_8859_1, 235 MIMENAME_ISO_8859_2, 236 MIMENAME_ISO_8859_3, 237 MIMENAME_ISO_8859_4, 238 MIMENAME_ISO_8859_5, 239 MIMENAME_ISO_8859_6, 240 MIMENAME_ISO_8859_7, 241 MIMENAME_ISO_8859_8, 242 MIMENAME_ISO_8859_9, 243 MIMENAME_SHIFT_JIS, 244 MIMENAME_UTF_8, 245 MIMENAME_BIG5, 246 MIMENAME_UCS2, 247 MIMENAME_UTF_16, 248 MIMENAME_BIG5_HKSCS, 249 MIMENAME_BOCU_1, 250 MIMENAME_CESU_8, 251 MIMENAME_CP864, 252 MIMENAME_EUC_JP, 253 MIMENAME_EUC_KR, 254 MIMENAME_GB18030, 255 MIMENAME_GBK, 256 MIMENAME_HZ_GB_2312, 257 MIMENAME_GB_2312, 258 MIMENAME_ISO_2022_CN, 259 MIMENAME_ISO_2022_CN_EXT, 260 MIMENAME_ISO_2022_JP, 261 MIMENAME_ISO_2022_KR, 262 MIMENAME_ISO_8859_10, 263 MIMENAME_ISO_8859_13, 264 MIMENAME_ISO_8859_14, 265 MIMENAME_ISO_8859_15, 266 MIMENAME_ISO_8859_16, 267 MIMENAME_KOI8_R, 268 MIMENAME_KOI8_U, 269 MIMENAME_MACINTOSH, 270 MIMENAME_SCSU, 271 MIMENAME_TIS_620, 272 MIMENAME_UTF_16BE, 273 MIMENAME_UTF_16LE, 274 MIMENAME_UTF_32, 275 MIMENAME_UTF_32BE, 276 MIMENAME_UTF_32LE, 277 MIMENAME_UTF_7, 278 MIMENAME_WINDOWS_1250, 279 MIMENAME_WINDOWS_1251, 280 MIMENAME_WINDOWS_1252, 281 MIMENAME_WINDOWS_1253, 282 MIMENAME_WINDOWS_1254, 283 MIMENAME_WINDOWS_1255, 284 MIMENAME_WINDOWS_1256, 285 MIMENAME_WINDOWS_1257, 286 MIMENAME_WINDOWS_1258, 287 }; 288 289 private static final HashMap<Integer, String> MIBENUM_TO_NAME_MAP; 290 private static final HashMap<String, Integer> NAME_TO_MIBENUM_MAP; 291 292 static { 293 // Create the HashMaps. 294 MIBENUM_TO_NAME_MAP = new HashMap<Integer, String>(); 295 NAME_TO_MIBENUM_MAP = new HashMap<String, Integer>(); assert(MIBENUM_NUMBERS.length == MIME_NAMES.length)296 assert(MIBENUM_NUMBERS.length == MIME_NAMES.length); 297 int count = MIBENUM_NUMBERS.length - 1; 298 for(int i = 0; i <= count; i++) { MIBENUM_TO_NAME_MAP.put(MIBENUM_NUMBERS[i], MIME_NAMES[i])299 MIBENUM_TO_NAME_MAP.put(MIBENUM_NUMBERS[i], MIME_NAMES[i]); NAME_TO_MIBENUM_MAP.put(MIME_NAMES[i], MIBENUM_NUMBERS[i])300 NAME_TO_MIBENUM_MAP.put(MIME_NAMES[i], MIBENUM_NUMBERS[i]); 301 } 302 } 303 CharacterSets()304 private CharacterSets() {} // Non-instantiatable 305 306 /** 307 * Map an MIBEnum number to the name of the charset which this number 308 * is assigned to by IANA. 309 * 310 * @param mibEnumValue An IANA assigned MIBEnum number. 311 * @return The name string of the charset. 312 * @throws UnsupportedEncodingException 313 */ 314 @UnsupportedAppUsage getMimeName(int mibEnumValue)315 public static String getMimeName(int mibEnumValue) 316 throws UnsupportedEncodingException { 317 String name = MIBENUM_TO_NAME_MAP.get(mibEnumValue); 318 if (name == null) { 319 throw new UnsupportedEncodingException(); 320 } 321 return name; 322 } 323 324 /** 325 * Map a well-known charset name to its assigned MIBEnum number. 326 * 327 * @param mimeName The charset name. 328 * @return The MIBEnum number assigned by IANA for this charset. 329 * @throws UnsupportedEncodingException 330 */ 331 @UnsupportedAppUsage getMibEnumValue(String mimeName)332 public static int getMibEnumValue(String mimeName) 333 throws UnsupportedEncodingException { 334 if(null == mimeName) { 335 return -1; 336 } 337 338 Integer mibEnumValue = NAME_TO_MIBENUM_MAP.get(mimeName); 339 if (mibEnumValue == null) { 340 throw new UnsupportedEncodingException(); 341 } 342 return mibEnumValue; 343 } 344 } 345