1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_JNI_DATA_UTILS_H
18 #define LATINIME_JNI_DATA_UTILS_H
19 
20 #include <vector>
21 
22 #include "defines.h"
23 #include "dictionary/header/header_read_write_utils.h"
24 #include "dictionary/interface/dictionary_header_structure_policy.h"
25 #include "dictionary/property/ngram_context.h"
26 #include "dictionary/property/word_property.h"
27 #include "jni.h"
28 #include "utils/char_utils.h"
29 
30 namespace latinime {
31 
32 class JniDataUtils {
33  public:
jintarrayToVector(JNIEnv * env,jintArray array,std::vector<int> * const outVector)34     static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
35         if (!array) {
36             outVector->clear();
37             return;
38         }
39         const jsize arrayLength = env->GetArrayLength(array);
40         outVector->resize(arrayLength);
41         env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
42     }
43 
constructAttributeMap(JNIEnv * env,jobjectArray attributeKeyStringArray,jobjectArray attributeValueStringArray)44     static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env,
45             jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) {
46         DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
47         const int keyCount = env->GetArrayLength(attributeKeyStringArray);
48         for (int i = 0; i < keyCount; i++) {
49             jstring keyString = static_cast<jstring>(
50                     env->GetObjectArrayElement(attributeKeyStringArray, i));
51             const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
52             char keyChars[keyUtf8Length + 1];
53             env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
54             env->DeleteLocalRef(keyString);
55             keyChars[keyUtf8Length] = '\0';
56             DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
57             HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
58 
59             jstring valueString = static_cast<jstring>(
60                     env->GetObjectArrayElement(attributeValueStringArray, i));
61             const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
62             char valueChars[valueUtf8Length + 1];
63             env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
64             env->DeleteLocalRef(valueString);
65             valueChars[valueUtf8Length] = '\0';
66             DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
67             HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
68             attributeMap[key] = value;
69         }
70         return attributeMap;
71     }
72 
outputCodePoints(JNIEnv * env,jintArray intArrayToOutputCodePoints,const int start,const int maxLength,const int * const codePoints,const int codePointCount,const bool needsNullTermination)73     static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
74             const int maxLength, const int *const codePoints, const int codePointCount,
75             const bool needsNullTermination) {
76         const int codePointBufSize = std::min(maxLength, codePointCount);
77         int outputCodePonts[codePointBufSize];
78         int outputCodePointCount = 0;
79         for (int i = 0; i < codePointBufSize; ++i) {
80             const int codePoint = codePoints[i];
81             int codePointToOutput = codePoint;
82             if (!CharUtils::isInUnicodeSpace(codePoint)) {
83                 if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
84                     // Just skip Beginning-of-Sentence marker.
85                     continue;
86                 }
87                 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
88             } else if (codePoint >= 0x01 && codePoint <= 0x1F) {
89                 // Control code.
90                 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
91             }
92             outputCodePonts[outputCodePointCount++] = codePointToOutput;
93         }
94         env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
95                 outputCodePonts);
96         if (needsNullTermination && outputCodePointCount < maxLength) {
97             env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
98                     1 /* len */, &CODE_POINT_NULL);
99         }
100     }
101 
constructNgramContext(JNIEnv * env,jobjectArray prevWordCodePointArrays,jbooleanArray isBeginningOfSentenceArray,const size_t prevWordCount)102     static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
103             jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
104         int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
105         int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
106         bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
107         for (size_t i = 0; i < prevWordCount; ++i) {
108             prevWordCodePointCount[i] = 0;
109             isBeginningOfSentence[i] = false;
110             jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
111             if (!prevWord) {
112                 continue;
113             }
114             jsize prevWordLength = env->GetArrayLength(prevWord);
115             if (prevWordLength > MAX_WORD_LENGTH) {
116                 continue;
117             }
118             env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
119             env->DeleteLocalRef(prevWord);
120             prevWordCodePointCount[i] = prevWordLength;
121             jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
122             env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
123                     &isBeginningOfSentenceBoolean);
124             isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
125         }
126         return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
127                 prevWordCount);
128     }
129 
putBooleanToArray(JNIEnv * env,jbooleanArray array,const int index,const jboolean value)130     static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
131             const jboolean value) {
132         env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
133     }
134 
putIntToArray(JNIEnv * env,jintArray array,const int index,const int value)135     static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
136         env->SetIntArrayRegion(array, index, 1 /* len */, &value);
137     }
138 
putFloatToArray(JNIEnv * env,jfloatArray array,const int index,const float value)139     static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index,
140             const float value) {
141         env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
142     }
143 
144     static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty,
145             jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo,
146             jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
147             jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
148             jobject outShortcutProbabilities);
149 
150  private:
151     DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
152 
153     static const int CODE_POINT_REPLACEMENT_CHARACTER;
154     static const int CODE_POINT_NULL;
155 };
156 } // namespace latinime
157 #endif // LATINIME_JNI_DATA_UTILS_H
158