1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 
17 package com.android.inputmethod.latin.dicttool;
18 
19 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
20 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
21 import com.android.inputmethod.latin.makedict.DictDecoder;
22 import com.android.inputmethod.latin.makedict.DictionaryHeader;
23 import com.android.inputmethod.latin.makedict.FormatSpec;
24 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
25 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
26 import com.android.inputmethod.latin.makedict.FusionDictionary;
27 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
28 
29 import java.io.BufferedInputStream;
30 import java.io.BufferedOutputStream;
31 import java.io.BufferedReader;
32 import java.io.File;
33 import java.io.FileInputStream;
34 import java.io.FileNotFoundException;
35 import java.io.FileOutputStream;
36 import java.io.IOException;
37 import java.io.InputStream;
38 import java.io.InputStreamReader;
39 import java.io.OutputStream;
40 import java.util.HashMap;
41 
42 import javax.annotation.Nonnull;
43 import javax.annotation.Nullable;
44 
45 /**
46  * Class grouping utilities for offline dictionary making.
47  *
48  * Those should not be used on-device, essentially because they are quite
49  * liberal about I/O and performance.
50  */
51 public final class BinaryDictOffdeviceUtils {
52     // Prefix and suffix are arbitrary, the values do not really matter
53     private final static String PREFIX = "dicttool";
54     private final static String SUFFIX = ".tmp";
55     private final static int COPY_BUFFER_SIZE = 8192;
56 
57     public static class DecoderChainSpec<T> {
58         public final static int COMPRESSION = 1;
59         public final static int ENCRYPTION = 2;
60 
61         private final static int[][] VALID_DECODER_CHAINS = {
62             { }, { COMPRESSION }, { ENCRYPTION, COMPRESSION }
63         };
64 
65         private final int mDecoderSpecIndex;
66         public T mResult;
67 
DecoderChainSpec()68         public DecoderChainSpec() {
69             mDecoderSpecIndex = 0;
70             mResult = null;
71         }
72 
DecoderChainSpec(final DecoderChainSpec<T> src)73         private DecoderChainSpec(final DecoderChainSpec<T> src) {
74             mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
75             mResult = src.mResult;
76         }
77 
getStepDescription(final int step)78         private String getStepDescription(final int step) {
79             switch (step) {
80             case COMPRESSION:
81                 return "compression";
82             case ENCRYPTION:
83                 return "encryption";
84             default:
85                 return "unknown";
86             }
87         }
88 
describeChain()89         public String describeChain() {
90             final StringBuilder s = new StringBuilder("raw");
91             for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
92                 s.append(" > ");
93                 s.append(getStepDescription(step));
94             }
95             return s.toString();
96         }
97 
98         /**
99          * Returns the next sequential spec. If exhausted, return null.
100          */
next()101         public DecoderChainSpec next() {
102             if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) {
103                 return null;
104             }
105             return new DecoderChainSpec(this);
106         }
107 
getStream(final File src)108         public InputStream getStream(final File src) throws FileNotFoundException, IOException {
109             InputStream input = new BufferedInputStream(new FileInputStream(src));
110             for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
111                 switch (step) {
112                 case COMPRESSION:
113                     input = Compress.getUncompressedStream(input);
114                     break;
115                 case ENCRYPTION:
116                     input = Crypt.getDecryptedStream(input);
117                     break;
118                 }
119             }
120             return input;
121         }
122     }
123 
124     public interface InputProcessor<T> {
125         @Nonnull
process(@onnull final InputStream input)126         public T process(@Nonnull final InputStream input)
127                 throws IOException, UnsupportedFormatException;
128     }
129 
130     public static class CopyProcessor implements InputProcessor<File> {
131         @Override @Nonnull
process(@onnull final InputStream input)132         public File process(@Nonnull final InputStream input) throws IOException,
133                 UnsupportedFormatException {
134             final File dst = File.createTempFile(PREFIX, SUFFIX);
135             dst.deleteOnExit();
136             try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) {
137                 copy(input, output);
138                 output.flush();
139                 output.close();
140                 if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
141                         || CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) {
142                     return dst;
143                 }
144             }
145             throw new UnsupportedFormatException("Input stream not at the expected format");
146         }
147     }
148 
149     public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> {
150         // Arbitrarily limit the header length to 32k. Sounds like it would never be larger
151         // than this. Revisit this if needed later.
152         private final int MAX_HEADER_LENGTH = 32 * 1024;
153         @Override @Nonnull
process(final InputStream input)154         public DictionaryHeader process(final InputStream input) throws IOException,
155                 UnsupportedFormatException {
156             // Do everything as curtly and ad-hoc as possible for performance.
157             final byte[] tmpBuffer = new byte[12];
158             if (tmpBuffer.length != input.read(tmpBuffer)) {
159                 throw new UnsupportedFormatException("File too short, not a dictionary");
160             }
161             // Ad-hoc check for the magic number. See FormatSpec.java as well as
162             // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader().
163             final int MAGIC_NUMBER_START_OFFSET = 0;
164             final int VERSION_START_OFFSET = 4;
165             final int HEADER_SIZE_OFFSET = 8;
166             final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24)
167                     + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16)
168                     + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8)
169                     + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF);
170             if (magicNumber != FormatSpec.MAGIC_NUMBER) {
171                 throw new UnsupportedFormatException("Wrong magic number");
172             }
173             final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
174                     + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
175             if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
176                     && version != FormatSpec.VERSION202) {
177                 throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
178             }
179             final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
180                     + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
181                     + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
182                     + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
183             if (totalHeaderSize > MAX_HEADER_LENGTH) {
184                 throw new UnsupportedFormatException("Header too large");
185             }
186             final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length];
187             readStreamExhaustively(input, headerBuffer);
188             final HashMap<String, String> attributes =
189                     BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer);
190             return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes),
191                     new FormatOptions(version, false /* hasTimestamp */));
192         }
193     }
194 
readStreamExhaustively(final InputStream inputStream, final byte[] outBuffer)195     private static void readStreamExhaustively(final InputStream inputStream,
196             final byte[] outBuffer) throws IOException, UnsupportedFormatException {
197         int readBytes = 0;
198         int readBytesLastCycle = -1;
199         while (readBytes != outBuffer.length) {
200             readBytesLastCycle = inputStream.read(outBuffer, readBytes,
201                     outBuffer.length - readBytes);
202             if (readBytesLastCycle == -1)
203                 throw new UnsupportedFormatException("File shorter than specified in the header"
204                         + " (expected " + outBuffer.length + ", read " + readBytes + ")");
205             readBytes += readBytesLastCycle;
206         }
207     }
208 
copy(final InputStream input, final OutputStream output)209     public static void copy(final InputStream input, final OutputStream output) throws IOException {
210         final byte[] buffer = new byte[COPY_BUFFER_SIZE];
211         for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
212             output.write(buffer, 0, readBytes);
213         }
214     }
215 
216     /**
217      * Process a dictionary, decrypting/uncompressing it on the fly as necessary.
218      *
219      * This will execute the given processor repeatedly with the possible alternatives
220      * for dictionary format until the processor does not throw an exception.
221      * If the processor succeeds for none of the possible formats, the method returns null.
222      */
223     @Nullable
decodeDictionaryForProcess(@onnull final File src, @Nonnull final InputProcessor<T> processor)224     public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src,
225             @Nonnull final InputProcessor<T> processor) {
226         @Nonnull DecoderChainSpec spec = new DecoderChainSpec();
227         while (null != spec) {
228             try {
229                 final InputStream input = spec.getStream(src);
230                 spec.mResult = processor.process(input);
231                 try {
232                     input.close();
233                 } catch (IOException e) {
234                     // CipherInputStream doesn't like being closed without having read the
235                     // entire stream, for some reason. But we don't want to because it's a waste
236                     // of resources. We really, really don't care about this.
237                     // However on close() CipherInputStream does throw this exception, wrapped
238                     // in an IOException so we need to catch it.
239                     if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
240                         throw e;
241                     }
242                 }
243                 return spec;
244             } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
245                 // If the format is not the right one for this file, the processor will throw one
246                 // of these exceptions. In our case, that means we should try the next spec,
247                 // since it may still be at another format we haven't tried yet.
248                 // TODO: stop using exceptions for this non-exceptional case.
249             }
250             spec = spec.next();
251         }
252         return null;
253     }
254 
255     /**
256      * Get a decoder chain spec with a raw dictionary file. This makes a new file on the
257      * disk ready for any treatment the client wants.
258      */
259     @Nullable
getRawDictionaryOrNull(@onnull final File src)260     public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) {
261         return decodeDictionaryForProcess(src, new CopyProcessor());
262     }
263 
getDictionary(final String filename, final boolean report)264     static FusionDictionary getDictionary(final String filename, final boolean report) {
265         final File file = new File(filename);
266         if (report) {
267             System.out.println("Dictionary : " + file.getAbsolutePath());
268             System.out.println("Size : " + file.length() + " bytes");
269         }
270         try {
271             final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file);
272             if (null == decodedSpec) {
273                 throw new RuntimeException("Does not seem to be a dictionary file " + filename);
274             }
275             if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) {
276                 if (report) {
277                     System.out.println("Format : Combined format");
278                     System.out.println("Packaging : " + decodedSpec.describeChain());
279                     System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
280                 }
281                 try (final BufferedReader reader = new BufferedReader(
282                         new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) {
283                     return CombinedInputOutput.readDictionaryCombined(reader);
284                 }
285             }
286             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
287                     decodedSpec.mResult, 0, decodedSpec.mResult.length(),
288                     DictDecoder.USE_BYTEARRAY);
289             if (report) {
290                 System.out.println("Format : Binary dictionary format");
291                 System.out.println("Packaging : " + decodedSpec.describeChain());
292                 System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
293             }
294             return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
295         } catch (final IOException | UnsupportedFormatException e) {
296             throw new RuntimeException("Can't read file " + filename, e);
297         }
298     }
299 }
300