1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.dex;
18 
19 import com.android.dex.util.ByteInput;
20 import java.io.UTFDataFormatException;
21 
22 /**
23  * Modified UTF-8 as described in the dex file format spec.
24  */
25 public final class Mutf8 {
Mutf8()26     private Mutf8() {}
27 
28     /**
29      * Decodes bytes from {@code in} into {@code out} until a delimiter 0x00 is
30      * encountered. Returns a new string containing the decoded characters.
31      */
decode(ByteInput in, char[] out)32     public static String decode(ByteInput in, char[] out) throws UTFDataFormatException {
33         int s = 0;
34         while (true) {
35             char a = (char) (in.readByte() & 0xff);
36             if (a == 0) {
37                 return new String(out, 0, s);
38             }
39             out[s] = a;
40             if (a < '\u0080') {
41                 s++;
42             } else if ((a & 0xe0) == 0xc0) {
43                 int b = in.readByte() & 0xff;
44                 if ((b & 0xC0) != 0x80) {
45                     throw new UTFDataFormatException("bad second byte");
46                 }
47                 out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
48             } else if ((a & 0xf0) == 0xe0) {
49                 int b = in.readByte() & 0xff;
50                 int c = in.readByte() & 0xff;
51                 if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
52                     throw new UTFDataFormatException("bad second or third byte");
53                 }
54                 out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
55             } else {
56                 throw new UTFDataFormatException("bad byte");
57             }
58         }
59     }
60 
61     /**
62      * Returns the number of bytes the modified UTF8 representation of 's' would take.
63      */
countBytes(String s, boolean shortLength)64     private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
65         long result = 0;
66         final int length = s.length();
67         for (int i = 0; i < length; ++i) {
68             char ch = s.charAt(i);
69             if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
70                 ++result;
71             } else if (ch <= 2047) {
72                 result += 2;
73             } else {
74                 result += 3;
75             }
76             if (shortLength && result > 65535) {
77                 throw new UTFDataFormatException("String more than 65535 UTF bytes long");
78             }
79         }
80         return result;
81     }
82 
83     /**
84      * Encodes the modified UTF-8 bytes corresponding to {@code s} into  {@code
85      * dst}, starting at {@code offset}.
86      */
encode(byte[] dst, int offset, String s)87     public static void encode(byte[] dst, int offset, String s) {
88         final int length = s.length();
89         for (int i = 0; i < length; i++) {
90             char ch = s.charAt(i);
91             if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
92                 dst[offset++] = (byte) ch;
93             } else if (ch <= 2047) {
94                 dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6)));
95                 dst[offset++] = (byte) (0x80 | (0x3f & ch));
96             } else {
97                 dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12)));
98                 dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6)));
99                 dst[offset++] = (byte) (0x80 | (0x3f & ch));
100             }
101         }
102     }
103 
104     /**
105      * Returns an array containing the <i>modified UTF-8</i> form of {@code s}.
106      */
encode(String s)107     public static byte[] encode(String s) throws UTFDataFormatException {
108         int utfCount = (int) countBytes(s, true);
109         byte[] result = new byte[utfCount];
110         encode(result, 0, s);
111         return result;
112     }
113 }
114