18 package org.turro.contacts.proposal;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.logging.Level;
23 import java.util.logging.Logger;
24 import org.apache.commons.codec.EncoderException;
25 import org.apache.commons.codec.StringEncoder;
26 import org.turro.elephant.context.ElephantContext;
34 private static StringEncoder encoder =
35 new org.apache.commons.codec.language.RefinedSoundex();
40 return differenceWord(s1, s2);
44 return s.replaceAll(
"[àáäâ]",
"a")
45 .replaceAll(
"[èéëê]",
"e")
46 .replaceAll(
"[ìíïî]",
"i")
47 .replaceAll(
"[òóöô]",
"o")
48 .replaceAll(
"[ùúüû]",
"u");
52 return s.replaceAll(
"ñ",
"ny")
54 .replaceAll(
"[^A-Za-z0-9]",
"");
57 private static int differenceEncoded(String es1, String es2) {
58 if (es1 ==
null || es2 ==
null) {
61 String words1[] = es1.split(
"[ \\.,\\(\\)]+"),
62 words2[] = es2.split(
"[ \\.,\\(\\)]+");
63 double count, diffPhrase = 0.0d;
64 ArrayList<Double> diffWords =
new ArrayList<Double>();
65 for(String word1 : words1) {
67 for(String word2 : words2) {
68 diffW = Math.max(diffW, differenceWord(word1, word2));
72 count = Math.min(words1.length, words2.length);
73 Double[] diffArray = diffWords.toArray(
new Double[0]);
74 Arrays.sort(diffArray);
75 for(
int i = 0; i < count; i++) {
76 diffPhrase += diffArray[diffArray.length - i - 1];
78 return (
int) (diffPhrase / count);
81 private static int differenceWord(String es1, String es2) {
82 if (es1 ==
null || es2 ==
null) {
86 es1 = encoder.encode(es1);
87 es2 = encoder.encode(es2);
88 }
catch (EncoderException ex) {
89 Logger.getLogger(Soundex.class.getName()).log(Level.SEVERE, ElephantContext.logMsg(
null), ex);
91 double lengthToMatch = Math.min(es1.length(), es2.length()),
92 maxLength = Math.max(es1.length(), es2.length());
94 for (
int i = 0; i < lengthToMatch; i++) {
95 if (es1.charAt(i) == es2.charAt(i)) {
99 return (
int) ((diff * 100.0d / lengthToMatch) *
100 (lengthToMatch / maxLength));