1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.quicksearchbox.util; 18 19 /** 20 * This class represents the matrix used in the Levenshtein distance algorithm, together 21 * with the algorithm itself which operates on the matrix. 22 * 23 * We also track of the individual operations applied to transform the source string into the 24 * target string so we can trace the path taken through the matrix afterwards, in order to 25 * perform the formatting as required. 26 */ 27 public class LevenshteinDistance { 28 public static final int EDIT_DELETE = 0; 29 public static final int EDIT_INSERT = 1; 30 public static final int EDIT_REPLACE = 2; 31 public static final int EDIT_UNCHANGED = 3; 32 33 private final Token[] mSource; 34 private final Token[] mTarget; 35 private final int[][] mEditTypeTable; 36 private final int[][] mDistanceTable; 37 LevenshteinDistance(Token[] source, Token[] target)38 public LevenshteinDistance(Token[] source, Token[] target) { 39 final int sourceSize = source.length; 40 final int targetSize = target.length; 41 final int[][] editTab = new int[sourceSize+1][targetSize+1]; 42 final int[][] distTab = new int[sourceSize+1][targetSize+1]; 43 editTab[0][0] = EDIT_UNCHANGED; 44 distTab[0][0] = 0; 45 for (int i = 1; i <= sourceSize; ++i) { 46 editTab[i][0] = EDIT_DELETE; 47 distTab[i][0] = i; 48 } 49 for (int i = 1; i <= targetSize; ++i) { 50 editTab[0][i] = EDIT_INSERT; 51 distTab[0][i] = i; 52 } 53 mEditTypeTable = editTab; 54 mDistanceTable = distTab; 55 mSource = source; 56 mTarget = target; 57 } 58 59 /** 60 * Implementation of Levenshtein distance algorithm. 61 * 62 * @return The Levenshtein distance. 63 */ calculate()64 public int calculate() { 65 final Token[] src = mSource; 66 final Token[] trg = mTarget; 67 final int sourceLen = src.length; 68 final int targetLen = trg.length; 69 final int[][] distTab = mDistanceTable; 70 final int[][] editTab = mEditTypeTable; 71 for (int s = 1; s <= sourceLen; ++s) { 72 Token sourceToken = src[s-1]; 73 for (int t = 1; t <= targetLen; ++t) { 74 Token targetToken = trg[t-1]; 75 int cost = sourceToken.prefixOf(targetToken) ? 0 : 1; 76 77 int distance = distTab[s-1][t] + 1; 78 int type = EDIT_DELETE; 79 80 int d = distTab[s][t - 1]; 81 if (d + 1 < distance ) { 82 distance = d + 1; 83 type = EDIT_INSERT; 84 } 85 86 d = distTab[s - 1][t - 1]; 87 if (d + cost < distance) { 88 distance = d + cost; 89 type = cost == 0 ? EDIT_UNCHANGED : EDIT_REPLACE; 90 } 91 distTab[s][t] = distance; 92 editTab[s][t] = type; 93 } 94 } 95 return distTab[sourceLen][targetLen]; 96 } 97 98 /** 99 * Gets the list of operations which were applied to each target token; {@link #calculate} must 100 * have been called on this object before using this method. 101 * @return A list of {@link EditOperation}s indicating the origin of each token in the target 102 * string. The position of the token indicates the position in the source string of the 103 * token that was unchanged/replaced, or the position in the source after which a target 104 * token was inserted. 105 */ getTargetOperations()106 public EditOperation[] getTargetOperations() { 107 final int trgLen = mTarget.length; 108 final EditOperation[] ops = new EditOperation[trgLen]; 109 int targetPos = trgLen; 110 int sourcePos = mSource.length; 111 final int[][] editTab = mEditTypeTable; 112 while (targetPos > 0) { 113 int editType = editTab[sourcePos][targetPos]; 114 switch (editType) { 115 case LevenshteinDistance.EDIT_DELETE: 116 sourcePos--; 117 break; 118 case LevenshteinDistance.EDIT_INSERT: 119 targetPos--; 120 ops[targetPos] = new EditOperation(editType, sourcePos); 121 break; 122 case LevenshteinDistance.EDIT_UNCHANGED: 123 case LevenshteinDistance.EDIT_REPLACE: 124 targetPos--; 125 sourcePos--; 126 ops[targetPos] = new EditOperation(editType, sourcePos); 127 break; 128 } 129 } 130 131 return ops; 132 } 133 134 public static final class EditOperation { 135 private final int mType; 136 private final int mPosition; EditOperation(int type, int position)137 public EditOperation(int type, int position) { 138 mType = type; 139 mPosition = position; 140 } getType()141 public int getType() { 142 return mType; 143 } getPosition()144 public int getPosition() { 145 return mPosition; 146 } 147 } 148 149 public static final class Token implements CharSequence { 150 private final char[] mContainer; 151 public final int mStart; 152 public final int mEnd; 153 Token(char[] container, int start, int end)154 public Token(char[] container, int start, int end) { 155 mContainer = container; 156 mStart = start; 157 mEnd = end; 158 } 159 length()160 public int length() { 161 return mEnd - mStart; 162 } 163 164 @Override toString()165 public String toString() { 166 // used in tests only. 167 return subSequence(0, length()); 168 } 169 prefixOf(final Token that)170 public boolean prefixOf(final Token that) { 171 final int len = length(); 172 if (len > that.length()) return false; 173 final int thisStart = mStart; 174 final int thatStart = that.mStart; 175 final char[] thisContainer = mContainer; 176 final char[] thatContainer = that.mContainer; 177 for (int i = 0; i < len; ++i) { 178 if (thisContainer[thisStart + i] != thatContainer[thatStart + i]) { 179 return false; 180 } 181 } 182 return true; 183 } 184 charAt(int index)185 public char charAt(int index) { 186 return mContainer[index + mStart]; 187 } 188 subSequence(int start, int end)189 public String subSequence(int start, int end) { 190 return new String(mContainer, mStart + start, length()); 191 } 192 193 } 194 } 195