1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_H
18 #define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
19 
20 #include <memory>
21 
22 #include "defines.h"
23 #include "dictionary/property/historical_info.h"
24 #include "dictionary/property/word_attributes.h"
25 #include "dictionary/property/word_property.h"
26 #include "dictionary/utils/binary_dictionary_shortcut_iterator.h"
27 #include "utils/int_array_view.h"
28 
29 namespace latinime {
30 
31 class DicNode;
32 class DicNodeVector;
33 class DictionaryHeaderStructurePolicy;
34 class MultiBigramMap;
35 class NgramListener;
36 class NgramContext;
37 class UnigramProperty;
38 
39 /*
40  * This class abstracts the structure of dictionaries.
41  * Implement this policy to support additional dictionaries.
42  */
43 class DictionaryStructureWithBufferPolicy {
44  public:
45     typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
46 
~DictionaryStructureWithBufferPolicy()47     virtual ~DictionaryStructureWithBufferPolicy() {}
48 
49     virtual int getRootPosition() const = 0;
50 
51     virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
52             DicNodeVector *const childDicNodes) const = 0;
53 
54     virtual int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
55             int *const outCodePoints) const = 0;
56 
57     virtual int getWordId(const CodePointArrayView wordCodePoints,
58             const bool forceLowerCaseSearch) const = 0;
59 
60     virtual const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
61             const int wordId, MultiBigramMap *const multiBigramMap) const = 0;
62 
63     // TODO: Remove
64     virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0;
65 
66     virtual int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const = 0;
67 
68     virtual void iterateNgramEntries(const WordIdArrayView prevWordIds,
69             NgramListener *const listener) const = 0;
70 
71     virtual BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const = 0;
72 
73     virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
74 
75     // Returns whether the update was success or not.
76     virtual bool addUnigramEntry(const CodePointArrayView wordCodePoints,
77             const UnigramProperty *const unigramProperty) = 0;
78 
79     // Returns whether the update was success or not.
80     virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
81 
82     // Returns whether the update was success or not.
83     virtual bool addNgramEntry(const NgramProperty *const ngramProperty) = 0;
84 
85     // Returns whether the update was success or not.
86     virtual bool removeNgramEntry(const NgramContext *const ngramContext,
87             const CodePointArrayView wordCodePoints) = 0;
88 
89     // Returns whether the update was success or not.
90     virtual bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
91             const CodePointArrayView wordCodePoints, const bool isValidWord,
92             const HistoricalInfo historicalInfo) = 0;
93 
94     // Returns whether the flush was success or not.
95     virtual bool flush(const char *const filePath) = 0;
96 
97     // Returns whether the GC and flush were success or not.
98     virtual bool flushWithGC(const char *const filePath) = 0;
99 
100     virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
101 
102     // Currently, this method is used only for testing. You may want to consider creating new
103     // dedicated method instead of this if you want to use this in the production.
104     virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
105             const int maxResultLength) = 0;
106 
107     virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0;
108 
109     // Method to iterate all words in the dictionary.
110     // The returned token has to be used to get the next word. If token is 0, this method newly
111     // starts iterating the dictionary.
112     virtual int getNextWordAndNextToken(const int token, int *const outCodePoints,
113             int *const outCodePointCount) = 0;
114 
115     virtual bool isCorrupted() const = 0;
116 
117  protected:
DictionaryStructureWithBufferPolicy()118     DictionaryStructureWithBufferPolicy() {}
119 
120  private:
121     DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy);
122 };
123 } // namespace latinime
124 #endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */
125