1 /* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H 18 #define LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H 19 20 #include <algorithm> 21 22 #include "defines.h" 23 #include "dictionary/property/historical_info.h" 24 #include "utils/ngram_utils.h" 25 #include "utils/time_keeper.h" 26 27 namespace latinime { 28 29 class DynamicLanguageModelProbabilityUtils { 30 public: computeRawProbabilityFromCounts(const int count,const int contextCount,const NgramType ngramType)31 static float computeRawProbabilityFromCounts(const int count, const int contextCount, 32 const NgramType ngramType) { 33 const int minCount = ASSUMED_MIN_COUNTS[static_cast<int>(ngramType)]; 34 return static_cast<float>(count) / static_cast<float>(std::max(contextCount, minCount)); 35 } 36 backoff(const int ngramProbability,const NgramType ngramType)37 static float backoff(const int ngramProbability, const NgramType ngramType) { 38 const int probability = 39 ngramProbability + ENCODED_BACKOFF_WEIGHTS[static_cast<int>(ngramType)]; 40 return std::min(std::max(probability, NOT_A_PROBABILITY), MAX_PROBABILITY); 41 } 42 getDecayedProbability(const int probability,const HistoricalInfo historicalInfo)43 static int getDecayedProbability(const int probability, const HistoricalInfo historicalInfo) { 44 const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp(); 45 if (elapsedTime < 0) { 46 AKLOGE("The elapsed time is negatime value. Timestamp overflow?"); 47 return NOT_A_PROBABILITY; 48 } 49 // TODO: Improve this logic. 50 // We don't modify probability depending on the elapsed time. 51 return probability; 52 } 53 shouldRemoveEntryDuringGC(const HistoricalInfo historicalInfo)54 static int shouldRemoveEntryDuringGC(const HistoricalInfo historicalInfo) { 55 // TODO: Improve this logic. 56 const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp(); 57 return elapsedTime > DURATION_TO_DISCARD_ENTRY_IN_SECONDS; 58 } 59 getPriorityToPreventFromEviction(const HistoricalInfo historicalInfo)60 static int getPriorityToPreventFromEviction(const HistoricalInfo historicalInfo) { 61 // TODO: Improve this logic. 62 // More recently input entries get higher priority. 63 return historicalInfo.getTimestamp(); 64 } 65 66 private: 67 DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils); 68 69 static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram."); 70 71 static const int ASSUMED_MIN_COUNTS[]; 72 static const int ENCODED_BACKOFF_WEIGHTS[]; 73 static const int DURATION_TO_DISCARD_ENTRY_IN_SECONDS; 74 }; 75 76 } // namespace latinime 77 #endif /* LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H */ 78