1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
19 * Do not edit this file other than updating policy's interface.
20 *
21 * This file was generated from
22 * dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
23 */
24
25 #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
26
27 #include "dictionary/header/header_policy.h"
28 #include "dictionary/property/ngram_property.h"
29 #include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
30 #include "dictionary/structure/backward/v402/content/bigram_dict_content.h"
31 #include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
32 #include "dictionary/structure/backward/v402/ver4_dict_constants.h"
33 #include "dictionary/utils/forgetting_curve_utils.h"
34
35 namespace latinime {
36 namespace backward {
37 namespace v402 {
38
getNextBigram(int * const outBigramPos,int * const outProbability,bool * const outHasNext,int * const bigramEntryPos) const39 void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
40 bool *const outHasNext, int *const bigramEntryPos) const {
41 const BigramEntry bigramEntry =
42 mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
43 if (outBigramPos) {
44 // Lookup target PtNode position.
45 *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
46 bigramEntry.getTargetTerminalId());
47 }
48 if (outProbability) {
49 if (bigramEntry.hasHistoricalInfo()) {
50 *outProbability =
51 ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
52 mHeaderPolicy);
53 } else {
54 *outProbability = bigramEntry.getProbability();
55 }
56 }
57 if (outHasNext) {
58 *outHasNext = bigramEntry.hasNext();
59 }
60 }
61
addNewEntry(const int terminalId,const int newTargetTerminalId,const NgramProperty * const ngramProperty,bool * const outAddedNewEntry)62 bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
63 const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
64 // 1. The word has no bigrams yet.
65 // 2. The word has bigrams, and there is the target in the list.
66 // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
67 // 4. The word has bigrams. We have to append new bigram entry to the list.
68 // 5. Same as 4, but the list is the last entry of the content file.
69 if (outAddedNewEntry) {
70 *outAddedNewEntry = false;
71 }
72 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
73 if (bigramListPos == NOT_A_DICT_POS) {
74 // Case 1. PtNode that doesn't have a bigram list.
75 // Create new bigram list.
76 if (!mBigramDictContent->createNewBigramList(terminalId)) {
77 return false;
78 }
79 const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
80 newTargetTerminalId);
81 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
82 ngramProperty);
83 // Write an entry.
84 const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
85 if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
86 return false;
87 }
88 if (outAddedNewEntry) {
89 *outAddedNewEntry = true;
90 }
91 return true;
92 }
93
94 int tailEntryPos = NOT_A_DICT_POS;
95 const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
96 &tailEntryPos);
97 if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
98 // Case 4, 5.
99 // Add new entry to the bigram list.
100 if (tailEntryPos == NOT_A_DICT_POS) {
101 // Case 4. Create new bigram list.
102 if (!mBigramDictContent->createNewBigramList(terminalId)) {
103 return false;
104 }
105 const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
106 // Copy existing bigram list.
107 if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
108 return false;
109 }
110 }
111 // Write new entry at the tail position of the bigram content.
112 const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
113 newTargetTerminalId);
114 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
115 &newBigramEntry, ngramProperty);
116 if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
117 return false;
118 }
119 // Update has next flag of the tail entry.
120 if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
121 return false;
122 }
123 if (outAddedNewEntry) {
124 *outAddedNewEntry = true;
125 }
126 return true;
127 }
128
129 // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
130 const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
131 if (!originalBigramEntry.isValid()) {
132 // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
133 // entry is updated.
134 if (outAddedNewEntry) {
135 *outAddedNewEntry = true;
136 }
137 }
138 const BigramEntry updatedBigramEntry =
139 originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
140 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
141 &updatedBigramEntry, ngramProperty);
142 return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
143 }
144
removeEntry(const int terminalId,const int targetTerminalId)145 bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
146 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
147 if (bigramListPos == NOT_A_DICT_POS) {
148 // Bigram list doesn't exist.
149 return false;
150 }
151 const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
152 nullptr /* outTailEntryPos */);
153 if (entryPosToUpdate == NOT_A_DICT_POS) {
154 // Bigram entry doesn't exist.
155 return false;
156 }
157 const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
158 if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
159 // Bigram entry doesn't exist.
160 return false;
161 }
162 // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
163 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
164 return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
165 }
166
updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,int * const outBigramCount)167 bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
168 int *const outBigramCount) {
169 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
170 if (bigramListPos == NOT_A_DICT_POS) {
171 // Bigram list doesn't exist.
172 return true;
173 }
174 bool hasNext = true;
175 int readingPos = bigramListPos;
176 while (hasNext) {
177 const int entryPos = readingPos;
178 const BigramEntry bigramEntry =
179 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
180 hasNext = bigramEntry.hasNext();
181 if (!bigramEntry.isValid()) {
182 continue;
183 }
184 const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
185 bigramEntry.getTargetTerminalId());
186 if (targetPtNodePos == NOT_A_DICT_POS) {
187 // Invalidate bigram entry.
188 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
189 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
190 return false;
191 }
192 } else if (bigramEntry.hasHistoricalInfo()) {
193 const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
194 bigramEntry.getHistoricalInfo(), mHeaderPolicy);
195 if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
196 const BigramEntry updatedBigramEntry =
197 bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
198 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
199 return false;
200 }
201 *outBigramCount += 1;
202 } else {
203 // Remove entry.
204 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
205 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
206 return false;
207 }
208 }
209 } else {
210 *outBigramCount += 1;
211 }
212 }
213 return true;
214 }
215
getBigramEntryConut(const int terminalId)216 int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
217 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
218 if (bigramListPos == NOT_A_DICT_POS) {
219 // Bigram list doesn't exist.
220 return 0;
221 }
222 int bigramCount = 0;
223 bool hasNext = true;
224 int readingPos = bigramListPos;
225 while (hasNext) {
226 const BigramEntry bigramEntry =
227 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
228 hasNext = bigramEntry.hasNext();
229 if (bigramEntry.isValid()) {
230 bigramCount++;
231 }
232 }
233 return bigramCount;
234 }
235
getEntryPosToUpdate(const int targetTerminalIdToFind,const int bigramListPos,int * const outTailEntryPos) const236 int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
237 const int bigramListPos, int *const outTailEntryPos) const {
238 if (outTailEntryPos) {
239 *outTailEntryPos = NOT_A_DICT_POS;
240 }
241 bool hasNext = true;
242 int invalidEntryPos = NOT_A_DICT_POS;
243 int readingPos = bigramListPos;
244 while (hasNext) {
245 const int entryPos = readingPos;
246 const BigramEntry bigramEntry =
247 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
248 hasNext = bigramEntry.hasNext();
249 if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
250 // Entry with same target is found.
251 return entryPos;
252 } else if (!bigramEntry.isValid()) {
253 // Invalid entry that can be reused is found.
254 invalidEntryPos = entryPos;
255 }
256 if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
257 if (outTailEntryPos) {
258 *outTailEntryPos = entryPos;
259 }
260 }
261 }
262 return invalidEntryPos;
263 }
264
createUpdatedBigramEntryFrom(const BigramEntry * const originalBigramEntry,const NgramProperty * const ngramProperty) const265 const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
266 const BigramEntry *const originalBigramEntry,
267 const NgramProperty *const ngramProperty) const {
268 // TODO: Consolidate historical info and probability.
269 if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
270 const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
271 const HistoricalInfo updatedHistoricalInfo =
272 ForgettingCurveUtils::createUpdatedHistoricalInfo(
273 originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
274 &historicalInfoForUpdate, mHeaderPolicy);
275 return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
276 } else {
277 return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability());
278 }
279 }
280
updateHasNextFlag(const bool hasNext,const int bigramEntryPos)281 bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
282 const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
283 const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
284 return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
285 }
286
287 } // namespace v402
288 } // namespace backward
289 } // namespace latinime
290