1 /*
2  * Copyright (C) 2007 Esmertec AG.
3  * Copyright (C) 2007 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 package com.google.android.mms.pdu;
19 
20 import android.compat.annotation.UnsupportedAppUsage;
21 
22 import java.io.UnsupportedEncodingException;
23 import java.util.HashMap;
24 
25 public class CharacterSets {
26     /**
27      * IANA assigned MIB enum numbers.
28      *
29      * From wap-230-wsp-20010705-a.pdf
30      * Any-charset = <Octet 128>
31      * Equivalent to the special RFC2616 charset value "*"
32      */
33     public static final int ANY_CHARSET = 0x00;
34     public static final int US_ASCII    = 0x03;
35     public static final int ISO_8859_1  = 0x04;
36     public static final int ISO_8859_2  = 0x05;
37     public static final int ISO_8859_3  = 0x06;
38     public static final int ISO_8859_4  = 0x07;
39     public static final int ISO_8859_5  = 0x08;
40     public static final int ISO_8859_6  = 0x09;
41     public static final int ISO_8859_7  = 0x0A;
42     public static final int ISO_8859_8  = 0x0B;
43     public static final int ISO_8859_9  = 0x0C;
44     public static final int SHIFT_JIS   = 0x11;
45     public static final int UTF_8       = 0x6A;
46     public static final int BIG5        = 0x07EA;
47     public static final int UCS2        = 0x03E8;
48     public static final int UTF_16      = 0x03F7;
49 
50     /**
51      * Extend charsets.
52      *
53      * From http://www.iana.org/assignments/character-sets/
54      */
55     public static final int BIG5_HKSCS = 0x0835; //2101
56     public static final int BOCU_1 = 0x03FC; //1020
57     public static final int CESU_8 = 0x03F8; //1016
58     public static final int CP864 = 0x0803; //2051
59     public static final int EUC_JP = 0x12; //18
60     public static final int EUC_KR = 0x26; //38
61     public static final int GB18030 = 0x72; //114
62     public static final int GBK = 0x71; //113
63     public static final int HZ_GB_2312 = 0x0825; //2085
64     public static final int GB_2312 = 0x07E9; //2025
65     public static final int ISO_2022_CN = 0x68; //104
66     public static final int ISO_2022_CN_EXT = 0x69; //105
67     public static final int ISO_2022_JP = 0x27; //39
68     public static final int ISO_2022_KR = 0x25; //37
69     public static final int ISO_8859_10 = 0x0D; //13
70     public static final int ISO_8859_13 = 0x6D; //109
71     public static final int ISO_8859_14 = 0x6E; //110
72     public static final int ISO_8859_15 = 0x6F; //111
73     public static final int ISO_8859_16 = 0x70; //112
74     public static final int KOI8_R = 0x0824; //2084
75     public static final int KOI8_U = 0x0828; //2088
76     public static final int MACINTOSH = 0x07EB; //2027
77     public static final int SCSU = 0x03F3; //1011
78     public static final int TIS_620 = 0x08D3; //2259
79     public static final int UTF_16BE = 0x03F5; //1013
80     public static final int UTF_16LE = 0x03F6; //1014
81     public static final int UTF_32 = 0x03F9; //1017
82     public static final int UTF_32BE = 0x03FA; //1018
83     public static final int UTF_32LE = 0x03FB; //1019
84     public static final int UTF_7 = 0x03F4; //1012
85     public static final int WINDOWS_1250 = 0x08CA; //2250
86     public static final int WINDOWS_1251 = 0x08CB; //2251
87     public static final int WINDOWS_1252 = 0x08CC; //2252
88     public static final int WINDOWS_1253 = 0x08CD; //2253
89     public static final int WINDOWS_1254 = 0x08CE; //2254
90     public static final int WINDOWS_1255 = 0x08CF; //2255
91     public static final int WINDOWS_1256 = 0x08D0; //2256
92     public static final int WINDOWS_1257 = 0x08D1; //2257
93     public static final int WINDOWS_1258 = 0x08D2; //2258
94 
95     /**
96      * If the encoding of given data is unsupported, use UTF_8 to decode it.
97      */
98     public static final int DEFAULT_CHARSET = UTF_8;
99 
100     /**
101      * Array of MIB enum numbers.
102      */
103     private static final int[] MIBENUM_NUMBERS = {
104         ANY_CHARSET,
105         US_ASCII,
106         ISO_8859_1,
107         ISO_8859_2,
108         ISO_8859_3,
109         ISO_8859_4,
110         ISO_8859_5,
111         ISO_8859_6,
112         ISO_8859_7,
113         ISO_8859_8,
114         ISO_8859_9,
115         SHIFT_JIS,
116         UTF_8,
117         BIG5,
118         UCS2,
119         UTF_16,
120         BIG5_HKSCS,
121         BOCU_1,
122         CESU_8,
123         CP864,
124         EUC_JP,
125         EUC_KR,
126         GB18030,
127         GBK,
128         HZ_GB_2312,
129         GB_2312,
130         ISO_2022_CN,
131         ISO_2022_CN_EXT,
132         ISO_2022_JP,
133         ISO_2022_KR,
134         ISO_8859_10,
135         ISO_8859_13,
136         ISO_8859_14,
137         ISO_8859_15,
138         ISO_8859_16,
139         KOI8_R,
140         KOI8_U,
141         MACINTOSH,
142         SCSU,
143         TIS_620,
144         UTF_16BE,
145         UTF_16LE,
146         UTF_32,
147         UTF_32BE,
148         UTF_32LE,
149         UTF_7,
150         WINDOWS_1250,
151         WINDOWS_1251,
152         WINDOWS_1252,
153         WINDOWS_1253,
154         WINDOWS_1254,
155         WINDOWS_1255,
156         WINDOWS_1256,
157         WINDOWS_1257,
158         WINDOWS_1258,
159     };
160 
161     /**
162      * The Well-known-charset Mime name.
163      */
164     public static final String MIMENAME_ANY_CHARSET = "*";
165     public static final String MIMENAME_US_ASCII    = "us-ascii";
166     public static final String MIMENAME_ISO_8859_1  = "iso-8859-1";
167     public static final String MIMENAME_ISO_8859_2  = "iso-8859-2";
168     public static final String MIMENAME_ISO_8859_3  = "iso-8859-3";
169     public static final String MIMENAME_ISO_8859_4  = "iso-8859-4";
170     public static final String MIMENAME_ISO_8859_5  = "iso-8859-5";
171     public static final String MIMENAME_ISO_8859_6  = "iso-8859-6";
172     public static final String MIMENAME_ISO_8859_7  = "iso-8859-7";
173     public static final String MIMENAME_ISO_8859_8  = "iso-8859-8";
174     public static final String MIMENAME_ISO_8859_9  = "iso-8859-9";
175     public static final String MIMENAME_SHIFT_JIS   = "shift_JIS";
176     public static final String MIMENAME_UTF_8       = "utf-8";
177     public static final String MIMENAME_BIG5        = "big5";
178     public static final String MIMENAME_UCS2        = "iso-10646-ucs-2";
179     public static final String MIMENAME_UTF_16      = "utf-16";
180 
181     /**
182      * Extend charsets.
183      *
184      * From http://www.iana.org/assignments/character-sets/
185      */
186     public static final String MIMENAME_BIG5_HKSCS = "Big5-HKSCS";
187     public static final String MIMENAME_BOCU_1 = "BOCU-1";
188     public static final String MIMENAME_CESU_8 = "CESU-8";
189     public static final String MIMENAME_CP864 = "cp864";
190     public static final String MIMENAME_EUC_JP = "EUC-JP";
191     public static final String MIMENAME_EUC_KR = "EUC-KR";
192     public static final String MIMENAME_GB18030 = "GB18030";
193     public static final String MIMENAME_GBK = "GBK";
194     public static final String MIMENAME_HZ_GB_2312 = "HZ-GB-2312";
195     public static final String MIMENAME_GB_2312 = "GB2312";
196     public static final String MIMENAME_ISO_2022_CN = "ISO-2022-CN";
197     public static final String MIMENAME_ISO_2022_CN_EXT = "ISO-2022-CN-EXT";
198     public static final String MIMENAME_ISO_2022_JP = "ISO-2022-JP";
199     public static final String MIMENAME_ISO_2022_KR = "ISO-2022-KR";
200     public static final String MIMENAME_ISO_8859_10 = "ISO-8859-10";
201     public static final String MIMENAME_ISO_8859_13 = "ISO-8859-13";
202     public static final String MIMENAME_ISO_8859_14 = "ISO-8859-14";
203     public static final String MIMENAME_ISO_8859_15 = "ISO-8859-15";
204     public static final String MIMENAME_ISO_8859_16 = "ISO-8859-16";
205     public static final String MIMENAME_KOI8_R = "KOI8-R";
206     public static final String MIMENAME_KOI8_U = "KOI8-U";
207     public static final String MIMENAME_MACINTOSH = "macintosh";
208     public static final String MIMENAME_SCSU = "SCSU";
209     public static final String MIMENAME_TIS_620 = "TIS-620";
210     public static final String MIMENAME_UTF_16BE = "UTF-16BE";
211     public static final String MIMENAME_UTF_16LE = "UTF-16LE";
212     public static final String MIMENAME_UTF_32 = "UTF-32";
213     public static final String MIMENAME_UTF_32BE = "UTF-32BE";
214     public static final String MIMENAME_UTF_32LE = "UTF-32LE";
215     public static final String MIMENAME_UTF_7 = "UTF-7";
216     public static final String MIMENAME_WINDOWS_1250 = "windows-1250";
217     public static final String MIMENAME_WINDOWS_1251 = "windows-1251";
218     public static final String MIMENAME_WINDOWS_1252 = "windows-1252";
219     public static final String MIMENAME_WINDOWS_1253 = "windows-1253";
220     public static final String MIMENAME_WINDOWS_1254 = "windows-1254";
221     public static final String MIMENAME_WINDOWS_1255 = "windows-1255";
222     public static final String MIMENAME_WINDOWS_1256 = "windows-1256";
223     public static final String MIMENAME_WINDOWS_1257 = "windows-1257";
224     public static final String MIMENAME_WINDOWS_1258 = "windows-1258";
225 
226     public static final String DEFAULT_CHARSET_NAME = MIMENAME_UTF_8;
227 
228     /**
229      * Array of the names of character sets.
230      */
231     private static final String[] MIME_NAMES = {
232         MIMENAME_ANY_CHARSET,
233         MIMENAME_US_ASCII,
234         MIMENAME_ISO_8859_1,
235         MIMENAME_ISO_8859_2,
236         MIMENAME_ISO_8859_3,
237         MIMENAME_ISO_8859_4,
238         MIMENAME_ISO_8859_5,
239         MIMENAME_ISO_8859_6,
240         MIMENAME_ISO_8859_7,
241         MIMENAME_ISO_8859_8,
242         MIMENAME_ISO_8859_9,
243         MIMENAME_SHIFT_JIS,
244         MIMENAME_UTF_8,
245         MIMENAME_BIG5,
246         MIMENAME_UCS2,
247         MIMENAME_UTF_16,
248         MIMENAME_BIG5_HKSCS,
249         MIMENAME_BOCU_1,
250         MIMENAME_CESU_8,
251         MIMENAME_CP864,
252         MIMENAME_EUC_JP,
253         MIMENAME_EUC_KR,
254         MIMENAME_GB18030,
255         MIMENAME_GBK,
256         MIMENAME_HZ_GB_2312,
257         MIMENAME_GB_2312,
258         MIMENAME_ISO_2022_CN,
259         MIMENAME_ISO_2022_CN_EXT,
260         MIMENAME_ISO_2022_JP,
261         MIMENAME_ISO_2022_KR,
262         MIMENAME_ISO_8859_10,
263         MIMENAME_ISO_8859_13,
264         MIMENAME_ISO_8859_14,
265         MIMENAME_ISO_8859_15,
266         MIMENAME_ISO_8859_16,
267         MIMENAME_KOI8_R,
268         MIMENAME_KOI8_U,
269         MIMENAME_MACINTOSH,
270         MIMENAME_SCSU,
271         MIMENAME_TIS_620,
272         MIMENAME_UTF_16BE,
273         MIMENAME_UTF_16LE,
274         MIMENAME_UTF_32,
275         MIMENAME_UTF_32BE,
276         MIMENAME_UTF_32LE,
277         MIMENAME_UTF_7,
278         MIMENAME_WINDOWS_1250,
279         MIMENAME_WINDOWS_1251,
280         MIMENAME_WINDOWS_1252,
281         MIMENAME_WINDOWS_1253,
282         MIMENAME_WINDOWS_1254,
283         MIMENAME_WINDOWS_1255,
284         MIMENAME_WINDOWS_1256,
285         MIMENAME_WINDOWS_1257,
286         MIMENAME_WINDOWS_1258,
287     };
288 
289     private static final HashMap<Integer, String> MIBENUM_TO_NAME_MAP;
290     private static final HashMap<String, Integer> NAME_TO_MIBENUM_MAP;
291 
292     static {
293         // Create the HashMaps.
294         MIBENUM_TO_NAME_MAP = new HashMap<Integer, String>();
295         NAME_TO_MIBENUM_MAP = new HashMap<String, Integer>();
assert(MIBENUM_NUMBERS.length == MIME_NAMES.length)296         assert(MIBENUM_NUMBERS.length == MIME_NAMES.length);
297         int count = MIBENUM_NUMBERS.length - 1;
298         for(int i = 0; i <= count; i++) {
MIBENUM_TO_NAME_MAP.put(MIBENUM_NUMBERS[i], MIME_NAMES[i])299             MIBENUM_TO_NAME_MAP.put(MIBENUM_NUMBERS[i], MIME_NAMES[i]);
NAME_TO_MIBENUM_MAP.put(MIME_NAMES[i], MIBENUM_NUMBERS[i])300             NAME_TO_MIBENUM_MAP.put(MIME_NAMES[i], MIBENUM_NUMBERS[i]);
301         }
302     }
303 
CharacterSets()304     private CharacterSets() {} // Non-instantiatable
305 
306     /**
307      * Map an MIBEnum number to the name of the charset which this number
308      * is assigned to by IANA.
309      *
310      * @param mibEnumValue An IANA assigned MIBEnum number.
311      * @return The name string of the charset.
312      * @throws UnsupportedEncodingException
313      */
314     @UnsupportedAppUsage
getMimeName(int mibEnumValue)315     public static String getMimeName(int mibEnumValue)
316             throws UnsupportedEncodingException {
317         String name = MIBENUM_TO_NAME_MAP.get(mibEnumValue);
318         if (name == null) {
319             throw new UnsupportedEncodingException();
320         }
321         return name;
322     }
323 
324     /**
325      * Map a well-known charset name to its assigned MIBEnum number.
326      *
327      * @param mimeName The charset name.
328      * @return The MIBEnum number assigned by IANA for this charset.
329      * @throws UnsupportedEncodingException
330      */
331     @UnsupportedAppUsage
getMibEnumValue(String mimeName)332     public static int getMibEnumValue(String mimeName)
333             throws UnsupportedEncodingException {
334         if(null == mimeName) {
335             return -1;
336         }
337 
338         Integer mibEnumValue = NAME_TO_MIBENUM_MAP.get(mimeName);
339         if (mibEnumValue == null) {
340             throw new UnsupportedEncodingException();
341         }
342         return mibEnumValue;
343     }
344 }
345