BrightSide Workbench Full Report + Source Code
Soundex.java
Go to the documentation of this file.
1 /*
2  * Turró i Cutiller Foundation. License notice.
3  * Copyright (C) 2011 Lluis Turró Cutiller <http://www.turro.org/>
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Affero General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Affero General Public License for more details.
14  *
15  * You should have received a copy of the GNU Affero General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 package org.turro.contacts.proposal;
19 
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.logging.Level;
23 import java.util.logging.Logger;
24 import org.apache.commons.codec.EncoderException;
25 import org.apache.commons.codec.StringEncoder;
26 import org.turro.elephant.context.ElephantContext;
27 
32 public class Soundex {
33 
34  private static StringEncoder encoder =
35  new org.apache.commons.codec.language.RefinedSoundex();
36 
37  public static int difference(String s1, String s2) {
38  s1 = replaceSpecialLetters(replacePunctuations(s1.toLowerCase()));
39  s2 = replaceSpecialLetters(replacePunctuations(s2.toLowerCase()));
40  return differenceWord(s1, s2);
41  }
42 
43  public static String replacePunctuations(String s) {
44  return s.replaceAll("[àáäâ]", "a")
45  .replaceAll("[èéëê]", "e")
46  .replaceAll("[ìíïî]", "i")
47  .replaceAll("[òóöô]", "o")
48  .replaceAll("[ùúüû]", "u");
49  }
50 
51  public static String replaceSpecialLetters(String s) {
52  return s.replaceAll("ñ", "ny")
53  .replaceAll("ç", "s")
54  .replaceAll("[^A-Za-z0-9]", "");
55  }
56 
57  private static int differenceEncoded(String es1, String es2) {
58  if (es1 == null || es2 == null) {
59  return 0;
60  }
61  String words1[] = es1.split("[ \\.,\\(\\)]+"),
62  words2[] = es2.split("[ \\.,\\(\\)]+");
63  double count, diffPhrase = 0.0d;
64  ArrayList<Double> diffWords = new ArrayList<Double>();
65  for(String word1 : words1) {
66  double diffW = 0;
67  for(String word2 : words2) {
68  diffW = Math.max(diffW, differenceWord(word1, word2));
69  }
70  diffWords.add(diffW);
71  }
72  count = Math.min(words1.length, words2.length);
73  Double[] diffArray = diffWords.toArray(new Double[0]);
74  Arrays.sort(diffArray);
75  for(int i = 0; i < count; i++) {
76  diffPhrase += diffArray[diffArray.length - i - 1];
77  }
78  return (int) (diffPhrase / count);
79  }
80 
81  private static int differenceWord(String es1, String es2) {
82  if (es1 == null || es2 == null) {
83  return 0;
84  }
85  try {
86  es1 = encoder.encode(es1);
87  es2 = encoder.encode(es2);
88  } catch (EncoderException ex) {
89  Logger.getLogger(Soundex.class.getName()).log(Level.SEVERE, ElephantContext.logMsg(null), ex);
90  }
91  double lengthToMatch = Math.min(es1.length(), es2.length()),
92  maxLength = Math.max(es1.length(), es2.length());
93  double diff = 0;
94  for (int i = 0; i < lengthToMatch; i++) {
95  if (es1.charAt(i) == es2.charAt(i)) {
96  diff++;
97  }
98  }
99  return (int) ((diff * 100.0d / lengthToMatch) *
100  (lengthToMatch / maxLength));
101  }
102 
103  private Soundex() {
104  }
105 
106 }
static String replacePunctuations(String s)
Definition: Soundex.java:43
static int difference(String s1, String s2)
Definition: Soundex.java:37
static String replaceSpecialLetters(String s)
Definition: Soundex.java:51