1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "suggest/core/dictionary/digraph_utils.h"
18 
19 #include <cstdlib>
20 
21 #include "defines.h"
22 #include "dictionary/interface/dictionary_header_structure_policy.h"
23 #include "utils/char_utils.h"
24 
25 namespace latinime {
26 
27 const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
28         { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
29         { 'o', 'e', 0x00F6 },   // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
30         { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
31 const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
32         { DIGRAPH_TYPE_GERMAN_UMLAUT };
33 
hasDigraphForCodePoint(const DictionaryHeaderStructurePolicy * const headerPolicy,const int compositeGlyphCodePoint)34 /* static */ bool DigraphUtils::hasDigraphForCodePoint(
35         const DictionaryHeaderStructurePolicy *const headerPolicy,
36         const int compositeGlyphCodePoint) {
37     const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy);
38     if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
39         return true;
40     }
41     return false;
42 }
43 
44 // Returns the digraph type associated with the given dictionary.
getDigraphTypeForDictionary(const DictionaryHeaderStructurePolicy * const headerPolicy)45 /* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
46         const DictionaryHeaderStructurePolicy *const headerPolicy) {
47     if (headerPolicy->requiresGermanUmlautProcessing()) {
48         return DIGRAPH_TYPE_GERMAN_UMLAUT;
49     }
50     return DIGRAPH_TYPE_NONE;
51 }
52 
53 // Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
54 // (which specifies the first or second codepoint in the digraph).
getDigraphCodePointForIndex(const int compositeGlyphCodePoint,const DigraphCodePointIndex digraphCodePointIndex)55 /* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
56         const DigraphCodePointIndex digraphCodePointIndex) {
57     if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
58         return NOT_A_CODE_POINT;
59     }
60     const DigraphUtils::digraph_t *const digraph =
61             DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
62     if (!digraph) {
63         return NOT_A_CODE_POINT;
64     }
65     if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
66         return digraph->first;
67     } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
68         return digraph->second;
69     }
70     ASSERT(false);
71     return NOT_A_CODE_POINT;
72 }
73 
74 // Retrieves the set of all digraphs associated with the given digraph type.
75 // Returns the size of the digraph array, or 0 if none exist.
getAllDigraphsForDigraphTypeAndReturnSize(const DigraphUtils::DigraphType digraphType,const DigraphUtils::digraph_t ** const digraphs)76 /* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
77         const DigraphUtils::DigraphType digraphType,
78         const DigraphUtils::digraph_t **const digraphs) {
79     if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
80         *digraphs = GERMAN_UMLAUT_DIGRAPHS;
81         return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
82     }
83     return 0;
84 }
85 
86 /**
87  * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
88  * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
89  */
getDigraphForCodePoint(const int compositeGlyphCodePoint)90 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
91         const int compositeGlyphCodePoint) {
92     for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
93         const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
94                 USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
95         if (digraph) {
96             return digraph;
97         }
98     }
99     return nullptr;
100 }
101 
102 /**
103  * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
104  * digraphType: the type of digraphs supported.
105  * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
106  */
getDigraphForDigraphTypeAndCodePoint(const DigraphUtils::DigraphType digraphType,const int compositeGlyphCodePoint)107 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
108         const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
109     const DigraphUtils::digraph_t *digraphs = nullptr;
110     const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
111     const int digraphsSize =
112             DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs);
113     for (int i = 0; i < digraphsSize; i++) {
114         if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) {
115             return &digraphs[i];
116         }
117     }
118     return nullptr;
119 }
120 
121 } // namespace latinime
122