1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
19  * Do not edit this file other than updating policy's interface.
20  *
21  * This file was generated from
22  *   dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
23  */
24 
25 #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
26 
27 #include "dictionary/header/header_policy.h"
28 #include "dictionary/property/ngram_property.h"
29 #include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
30 #include "dictionary/structure/backward/v402/content/bigram_dict_content.h"
31 #include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
32 #include "dictionary/structure/backward/v402/ver4_dict_constants.h"
33 #include "dictionary/utils/forgetting_curve_utils.h"
34 
35 namespace latinime {
36 namespace backward {
37 namespace v402 {
38 
getNextBigram(int * const outBigramPos,int * const outProbability,bool * const outHasNext,int * const bigramEntryPos) const39 void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
40         bool *const outHasNext, int *const bigramEntryPos) const {
41     const BigramEntry bigramEntry =
42             mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
43     if (outBigramPos) {
44         // Lookup target PtNode position.
45         *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
46                 bigramEntry.getTargetTerminalId());
47     }
48     if (outProbability) {
49         if (bigramEntry.hasHistoricalInfo()) {
50             *outProbability =
51                     ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
52                             mHeaderPolicy);
53         } else {
54             *outProbability = bigramEntry.getProbability();
55         }
56     }
57     if (outHasNext) {
58         *outHasNext = bigramEntry.hasNext();
59     }
60 }
61 
addNewEntry(const int terminalId,const int newTargetTerminalId,const NgramProperty * const ngramProperty,bool * const outAddedNewEntry)62 bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
63         const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
64     // 1. The word has no bigrams yet.
65     // 2. The word has bigrams, and there is the target in the list.
66     // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
67     // 4. The word has bigrams. We have to append new bigram entry to the list.
68     // 5. Same as 4, but the list is the last entry of the content file.
69     if (outAddedNewEntry) {
70         *outAddedNewEntry = false;
71     }
72     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
73     if (bigramListPos == NOT_A_DICT_POS) {
74         // Case 1. PtNode that doesn't have a bigram list.
75         // Create new bigram list.
76         if (!mBigramDictContent->createNewBigramList(terminalId)) {
77             return false;
78         }
79         const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
80                 newTargetTerminalId);
81         const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
82                 ngramProperty);
83         // Write an entry.
84         const int writingPos =  mBigramDictContent->getBigramListHeadPos(terminalId);
85         if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
86             return false;
87         }
88         if (outAddedNewEntry) {
89             *outAddedNewEntry = true;
90         }
91         return true;
92     }
93 
94     int tailEntryPos = NOT_A_DICT_POS;
95     const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
96             &tailEntryPos);
97     if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
98         // Case 4, 5.
99         // Add new entry to the bigram list.
100         if (tailEntryPos == NOT_A_DICT_POS) {
101             // Case 4. Create new bigram list.
102             if (!mBigramDictContent->createNewBigramList(terminalId)) {
103                 return false;
104             }
105             const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
106             // Copy existing bigram list.
107             if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
108                 return false;
109             }
110         }
111         // Write new entry at the tail position of the bigram content.
112         const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
113                 newTargetTerminalId);
114         const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
115                 &newBigramEntry, ngramProperty);
116         if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
117             return false;
118         }
119         // Update has next flag of the tail entry.
120         if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
121             return false;
122         }
123         if (outAddedNewEntry) {
124             *outAddedNewEntry = true;
125         }
126         return true;
127     }
128 
129     // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
130     const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
131     if (!originalBigramEntry.isValid()) {
132         // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
133         // entry is updated.
134         if (outAddedNewEntry) {
135             *outAddedNewEntry = true;
136         }
137     }
138     const BigramEntry updatedBigramEntry =
139             originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
140     const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
141             &updatedBigramEntry, ngramProperty);
142     return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
143 }
144 
removeEntry(const int terminalId,const int targetTerminalId)145 bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
146     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
147     if (bigramListPos == NOT_A_DICT_POS) {
148         // Bigram list doesn't exist.
149         return false;
150     }
151     const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
152             nullptr /* outTailEntryPos */);
153     if (entryPosToUpdate == NOT_A_DICT_POS) {
154         // Bigram entry doesn't exist.
155         return false;
156     }
157     const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
158     if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
159         // Bigram entry doesn't exist.
160         return false;
161     }
162     // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
163     const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
164     return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
165 }
166 
updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,int * const outBigramCount)167 bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
168         int *const outBigramCount) {
169     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
170     if (bigramListPos == NOT_A_DICT_POS) {
171         // Bigram list doesn't exist.
172         return true;
173     }
174     bool hasNext = true;
175     int readingPos = bigramListPos;
176     while (hasNext) {
177         const int entryPos = readingPos;
178         const BigramEntry bigramEntry =
179                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
180         hasNext = bigramEntry.hasNext();
181         if (!bigramEntry.isValid()) {
182             continue;
183         }
184         const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
185                 bigramEntry.getTargetTerminalId());
186         if (targetPtNodePos == NOT_A_DICT_POS) {
187             // Invalidate bigram entry.
188             const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
189             if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
190                 return false;
191             }
192         } else if (bigramEntry.hasHistoricalInfo()) {
193             const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
194                     bigramEntry.getHistoricalInfo(), mHeaderPolicy);
195             if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
196                 const BigramEntry updatedBigramEntry =
197                         bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
198                 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
199                     return false;
200                 }
201                 *outBigramCount += 1;
202             } else {
203                 // Remove entry.
204                 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
205                 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
206                     return false;
207                 }
208             }
209         } else {
210             *outBigramCount += 1;
211         }
212     }
213     return true;
214 }
215 
getBigramEntryConut(const int terminalId)216 int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
217     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
218     if (bigramListPos == NOT_A_DICT_POS) {
219         // Bigram list doesn't exist.
220         return 0;
221     }
222     int bigramCount = 0;
223     bool hasNext = true;
224     int readingPos = bigramListPos;
225     while (hasNext) {
226         const BigramEntry bigramEntry =
227                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
228         hasNext = bigramEntry.hasNext();
229         if (bigramEntry.isValid()) {
230             bigramCount++;
231         }
232     }
233     return bigramCount;
234 }
235 
getEntryPosToUpdate(const int targetTerminalIdToFind,const int bigramListPos,int * const outTailEntryPos) const236 int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
237         const int bigramListPos, int *const outTailEntryPos) const {
238     if (outTailEntryPos) {
239         *outTailEntryPos = NOT_A_DICT_POS;
240     }
241     bool hasNext = true;
242     int invalidEntryPos = NOT_A_DICT_POS;
243     int readingPos = bigramListPos;
244     while (hasNext) {
245         const int entryPos = readingPos;
246         const BigramEntry bigramEntry =
247                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
248         hasNext = bigramEntry.hasNext();
249         if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
250             // Entry with same target is found.
251             return entryPos;
252         } else if (!bigramEntry.isValid()) {
253             // Invalid entry that can be reused is found.
254             invalidEntryPos = entryPos;
255         }
256         if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
257             if (outTailEntryPos) {
258                 *outTailEntryPos = entryPos;
259             }
260         }
261     }
262     return invalidEntryPos;
263 }
264 
createUpdatedBigramEntryFrom(const BigramEntry * const originalBigramEntry,const NgramProperty * const ngramProperty) const265 const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
266         const BigramEntry *const originalBigramEntry,
267         const NgramProperty *const ngramProperty) const {
268     // TODO: Consolidate historical info and probability.
269     if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
270         const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
271         const HistoricalInfo updatedHistoricalInfo =
272                 ForgettingCurveUtils::createUpdatedHistoricalInfo(
273                         originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
274                         &historicalInfoForUpdate, mHeaderPolicy);
275         return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
276     } else {
277         return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability());
278     }
279 }
280 
updateHasNextFlag(const bool hasNext,const int bigramEntryPos)281 bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
282     const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
283     const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
284     return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
285 }
286 
287 } // namespace v402
288 } // namespace backward
289 } // namespace latinime
290