001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.language;
019    
020    import org.apache.commons.codec.EncoderException;
021    import org.apache.commons.codec.StringEncoder;
022    
023    /**
024     * Encodes a string into a double metaphone value.
025     * This Implementation is based on the algorithm by <CITE>Lawrence Philips</CITE>.
026     * <ul>
027     * <li>Original Article: <a 
028     * href="http://www.cuj.com/documents/s=8038/cuj0006philips/">
029     * http://www.cuj.com/documents/s=8038/cuj0006philips/</a></li>
030     * <li>Original Source Code: <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip">
031     * ftp://ftp.cuj.com/pub/2000/1806/philips.zip</a></li>
032     * </ul>
033     * 
034     * @author Apache Software Foundation
035     * @version $Id: DoubleMetaphone.java 800153 2009-08-02 22:45:30Z ggregory $
036     */
037    public class DoubleMetaphone implements StringEncoder {
038    
039        /**
040         * "Vowels" to test for
041         */
042        private static final String VOWELS = "AEIOUY";
043    
044        /**
045         * Prefixes when present which are not pronounced
046         */
047        private static final String[] SILENT_START = 
048        { "GN", "KN", "PN", "WR", "PS" };
049        private static final String[] L_R_N_M_B_H_F_V_W_SPACE = 
050        { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
051        private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = 
052        { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
053        private static final String[] L_T_K_S_N_M_B_Z = 
054        { "L", "T", "K", "S", "N", "M", "B", "Z" };
055    
056        /**
057         * Maximum length of an encoding, default is 4
058         */
059        protected int maxCodeLen = 4;
060    
061        /**
062         * Creates an instance of this DoubleMetaphone encoder
063         */
064        public DoubleMetaphone() {
065            super();
066        }
067        
068        /**
069         * Encode a value with Double Metaphone
070         *
071         * @param value String to encode
072         * @return an encoded string
073         */
074        public String doubleMetaphone(String value) {
075            return doubleMetaphone(value, false);
076        }
077        
078        /**
079         * Encode a value with Double Metaphone, optionally using the alternate
080         * encoding.
081         *
082         * @param value String to encode
083         * @param alternate use alternate encode
084         * @return an encoded string
085         */
086        public String doubleMetaphone(String value, boolean alternate) {
087            value = cleanInput(value);
088            if (value == null) {
089                return null;
090            }
091            
092            boolean slavoGermanic = isSlavoGermanic(value);
093            int index = isSilentStart(value) ? 1 : 0;
094            
095            DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
096            
097            while (!result.isComplete() && index <= value.length() - 1) {
098                switch (value.charAt(index)) {
099                case 'A':
100                case 'E':
101                case 'I':
102                case 'O':
103                case 'U':
104                case 'Y':
105                    index = handleAEIOUY(value, result, index);
106                    break;
107                case 'B':
108                    result.append('P');
109                    index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
110                    break;
111                case '\u00C7':
112                    // A C with a Cedilla
113                    result.append('S');
114                    index++;
115                    break; 
116                case 'C':
117                    index = handleC(value, result, index);
118                    break;
119                case 'D':
120                    index = handleD(value, result, index);
121                    break;
122                case 'F':
123                    result.append('F');
124                    index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
125                    break;
126                case 'G':
127                    index = handleG(value, result, index, slavoGermanic);
128                    break;
129                case 'H':
130                    index = handleH(value, result, index);
131                    break;
132                case 'J':
133                    index = handleJ(value, result, index, slavoGermanic);
134                    break;
135                case 'K':
136                    result.append('K');
137                    index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
138                    break;
139                case 'L':
140                    index = handleL(value, result, index);
141                    break;
142                case 'M':
143                    result.append('M');
144                    index = conditionM0(value, index) ? index + 2 : index + 1;
145                    break;
146                case 'N':
147                    result.append('N');
148                    index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
149                    break;
150                case '\u00D1':
151                    // N with a tilde (spanish ene)
152                    result.append('N');
153                    index++;
154                    break;
155                case 'P':
156                    index = handleP(value, result, index);
157                    break;
158                case 'Q':
159                    result.append('K');
160                    index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
161                    break;
162                case 'R':
163                    index = handleR(value, result, index, slavoGermanic);
164                    break;
165                case 'S':
166                    index = handleS(value, result, index, slavoGermanic);
167                    break;
168                case 'T':
169                    index = handleT(value, result, index);
170                    break;
171                case 'V':
172                    result.append('F');
173                    index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
174                    break;
175                case 'W':
176                    index = handleW(value, result, index);
177                    break;
178                case 'X':
179                    index = handleX(value, result, index);
180                    break;
181                case 'Z':
182                    index = handleZ(value, result, index, slavoGermanic);
183                    break;
184                default:
185                    index++;
186                    break;
187                }
188            }
189    
190            return alternate ? result.getAlternate() : result.getPrimary();
191        }
192        
193        /**
194         * Encode the value using DoubleMetaphone.  It will only work if 
195         * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
196         *
197         * @param obj Object to encode (should be of type String)
198         * @return An encoded Object (will be of type String)
199         * @throws EncoderException encode parameter is not of type String
200         */
201        public Object encode(Object obj) throws EncoderException {
202            if (!(obj instanceof String)) {
203                throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); 
204            } 
205            return doubleMetaphone((String) obj);
206        }
207    
208        /**
209         * Encode the value using DoubleMetaphone.
210         *
211         * @param value String to encode
212         * @return An encoded String
213         */
214        public String encode(String value) {
215            return doubleMetaphone(value);   
216        }
217    
218        /**
219         * Check if the Double Metaphone values of two <code>String</code> values
220         * are equal.
221         * 
222         * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
223         * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
224         * @return <code>true</code> if the encoded <code>String</code>s are equal;
225         *          <code>false</code> otherwise.
226         * @see #isDoubleMetaphoneEqual(String,String,boolean)
227         */
228        public boolean isDoubleMetaphoneEqual(String value1, String value2) {
229            return isDoubleMetaphoneEqual(value1, value2, false);
230        }
231        
232        /**
233         * Check if the Double Metaphone values of two <code>String</code> values
234         * are equal, optionally using the alternate value.
235         * 
236         * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
237         * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
238         * @param alternate use the alternate value if <code>true</code>.
239         * @return <code>true</code> if the encoded <code>String</code>s are equal;
240         *          <code>false</code> otherwise.
241         */
242        public boolean isDoubleMetaphoneEqual(String value1, 
243                                              String value2, 
244                                              boolean alternate) {
245            return doubleMetaphone(value1, alternate).equals(doubleMetaphone
246                                                             (value2, alternate));
247        }
248        
249        /**
250         * Returns the maxCodeLen.
251         * @return int
252         */
253        public int getMaxCodeLen() {
254            return this.maxCodeLen;
255        }
256    
257        /**
258         * Sets the maxCodeLen.
259         * @param maxCodeLen The maxCodeLen to set
260         */
261        public void setMaxCodeLen(int maxCodeLen) {
262            this.maxCodeLen = maxCodeLen;
263        }
264    
265        //-- BEGIN HANDLERS --//
266    
267        /**
268         * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases
269         */
270        private int handleAEIOUY(String value, DoubleMetaphoneResult result, int 
271                                 index) {
272            if (index == 0) {
273                result.append('A');
274            }
275            return index + 1;
276        }
277        
278        /**
279         * Handles 'C' cases
280         */
281        private int handleC(String value, 
282                            DoubleMetaphoneResult result, 
283                            int index) {
284            if (conditionC0(value, index)) {  // very confusing, moved out
285                result.append('K');
286                index += 2;
287            } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
288                result.append('S');
289                index += 2;
290            } else if (contains(value, index, 2, "CH")) {
291                index = handleCH(value, result, index);
292            } else if (contains(value, index, 2, "CZ") && 
293                       !contains(value, index - 2, 4, "WICZ")) {
294                //-- "Czerny" --//
295                result.append('S', 'X');
296                index += 2;
297            } else if (contains(value, index + 1, 3, "CIA")) {
298                //-- "focaccia" --//
299                result.append('X');
300                index += 3;
301            } else if (contains(value, index, 2, "CC") && 
302                       !(index == 1 && charAt(value, 0) == 'M')) {
303                //-- double "cc" but not "McClelland" --//
304                return handleCC(value, result, index);
305            } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
306                result.append('K');
307                index += 2;
308            } else if (contains(value, index, 2, "CI", "CE", "CY")) {
309                //-- Italian vs. English --//
310                if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
311                    result.append('S', 'X');
312                } else {
313                    result.append('S');
314                }
315                index += 2;
316            } else {
317                result.append('K');
318                if (contains(value, index + 1, 2, " C", " Q", " G")) { 
319                    //-- Mac Caffrey, Mac Gregor --//
320                    index += 3;
321                } else if (contains(value, index + 1, 1, "C", "K", "Q") && 
322                           !contains(value, index + 1, 2, "CE", "CI")) {
323                    index += 2;
324                } else {
325                    index++;
326                }
327            }
328            
329            return index;
330        }
331    
332        /**
333         * Handles 'CC' cases
334         */
335        private int handleCC(String value, 
336                             DoubleMetaphoneResult result, 
337                             int index) {
338            if (contains(value, index + 2, 1, "I", "E", "H") && 
339                !contains(value, index + 2, 2, "HU")) {
340                //-- "bellocchio" but not "bacchus" --//
341                if ((index == 1 && charAt(value, index - 1) == 'A') || 
342                    contains(value, index - 1, 5, "UCCEE", "UCCES")) {
343                    //-- "accident", "accede", "succeed" --//
344                    result.append("KS");
345                } else {
346                    //-- "bacci", "bertucci", other Italian --//
347                    result.append('X');
348                }
349                index += 3;
350            } else {    // Pierce's rule
351                result.append('K');
352                index += 2;
353            }
354            
355            return index;
356        }
357        
358        /**
359         * Handles 'CH' cases
360         */
361        private int handleCH(String value, 
362                             DoubleMetaphoneResult result, 
363                             int index) {
364            if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
365                result.append('K', 'X');
366                return index + 2;
367            } else if (conditionCH0(value, index)) {
368                //-- Greek roots ("chemistry", "chorus", etc.) --//
369                result.append('K');
370                return index + 2;
371            } else if (conditionCH1(value, index)) {
372                //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
373                result.append('K');
374                return index + 2;
375            } else {
376                if (index > 0) {
377                    if (contains(value, 0, 2, "MC")) {
378                        result.append('K');
379                    } else {
380                        result.append('X', 'K');
381                    }
382                } else {
383                    result.append('X');
384                }
385                return index + 2;
386            }
387        }
388    
389        /**
390         * Handles 'D' cases
391         */
392        private int handleD(String value, 
393                            DoubleMetaphoneResult result, 
394                            int index) {
395            if (contains(value, index, 2, "DG")) {
396                //-- "Edge" --//
397                if (contains(value, index + 2, 1, "I", "E", "Y")) {
398                    result.append('J');
399                    index += 3;
400                    //-- "Edgar" --//
401                } else {
402                    result.append("TK");
403                    index += 2;
404                }
405            } else if (contains(value, index, 2, "DT", "DD")) {
406                result.append('T');
407                index += 2;
408            } else {
409                result.append('T');
410                index++;
411            }
412            return index;
413        }
414    
415        /**
416         * Handles 'G' cases
417         */
418        private int handleG(String value, 
419                            DoubleMetaphoneResult result, 
420                            int index, 
421                            boolean slavoGermanic) {
422            if (charAt(value, index + 1) == 'H') {
423                index = handleGH(value, result, index);
424            } else if (charAt(value, index + 1) == 'N') {
425                if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
426                    result.append("KN", "N");
427                } else if (!contains(value, index + 2, 2, "EY") && 
428                           charAt(value, index + 1) != 'Y' && !slavoGermanic) {
429                    result.append("N", "KN");
430                } else {
431                    result.append("KN");
432                }
433                index = index + 2;
434            } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
435                result.append("KL", "L");
436                index += 2;
437            } else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
438                //-- -ges-, -gep-, -gel-, -gie- at beginning --//
439                result.append('K', 'J');
440                index += 2;
441            } else if ((contains(value, index + 1, 2, "ER") || 
442                        charAt(value, index + 1) == 'Y') &&
443                       !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
444                       !contains(value, index - 1, 1, "E", "I") && 
445                       !contains(value, index - 1, 3, "RGY", "OGY")) {
446                //-- -ger-, -gy- --//
447                result.append('K', 'J');
448                index += 2;
449            } else if (contains(value, index + 1, 1, "E", "I", "Y") || 
450                       contains(value, index - 1, 4, "AGGI", "OGGI")) {
451                //-- Italian "biaggi" --//
452                if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) {
453                    //-- obvious germanic --//
454                    result.append('K');
455                } else if (contains(value, index + 1, 3, "IER")) {
456                    result.append('J');
457                } else {
458                    result.append('J', 'K');
459                }
460                index += 2;
461            } else if (charAt(value, index + 1) == 'G') {
462                index += 2;
463                result.append('K');
464            } else {
465                index++;
466                result.append('K');
467            }
468            return index;
469        }
470        
471        /**
472         * Handles 'GH' cases
473         */
474        private int handleGH(String value, 
475                             DoubleMetaphoneResult result, 
476                             int index) {
477            if (index > 0 && !isVowel(charAt(value, index - 1))) {
478                result.append('K');
479                index += 2;
480            } else if (index == 0) {
481                if (charAt(value, index + 2) == 'I') {
482                    result.append('J');
483                } else {
484                    result.append('K');
485                }
486                index += 2;
487            } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
488                       (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
489                       (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
490                //-- Parker's rule (with some further refinements) - "hugh"
491                index += 2;
492            } else {
493                if (index > 2 && charAt(value, index - 1) == 'U' && 
494                    contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
495                    //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
496                    result.append('F');
497                } else if (index > 0 && charAt(value, index - 1) != 'I') {
498                    result.append('K');
499                }
500                index += 2;
501            }
502            return index;
503        }
504    
505        /**
506         * Handles 'H' cases
507         */
508        private int handleH(String value, 
509                            DoubleMetaphoneResult result, 
510                            int index) {
511            //-- only keep if first & before vowel or between 2 vowels --//
512            if ((index == 0 || isVowel(charAt(value, index - 1))) && 
513                isVowel(charAt(value, index + 1))) {
514                result.append('H');
515                index += 2;
516                //-- also takes car of "HH" --//
517            } else {
518                index++;
519            }
520            return index;
521        }
522        
523        /**
524         * Handles 'J' cases
525         */
526        private int handleJ(String value, DoubleMetaphoneResult result, int index, 
527                            boolean slavoGermanic) {
528            if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
529                    //-- obvious Spanish, "Jose", "San Jacinto" --//
530                    if ((index == 0 && (charAt(value, index + 4) == ' ') || 
531                         value.length() == 4) || contains(value, 0, 4, "SAN ")) {
532                        result.append('H');
533                    } else {
534                        result.append('J', 'H');
535                    }
536                    index++;
537                } else {
538                    if (index == 0 && !contains(value, index, 4, "JOSE")) {
539                        result.append('J', 'A');
540                    } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && 
541                                  (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
542                        result.append('J', 'H');
543                    } else if (index == value.length() - 1) {
544                        result.append('J', ' ');
545                    } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) {
546                        result.append('J');
547                    }
548    
549                    if (charAt(value, index + 1) == 'J') {
550                        index += 2;
551                    } else {
552                        index++;
553                    }
554                }
555            return index;
556        }
557        
558        /**
559         * Handles 'L' cases
560         */
561        private int handleL(String value, 
562                            DoubleMetaphoneResult result, 
563                            int index) {
564            if (charAt(value, index + 1) == 'L') {
565                if (conditionL0(value, index)) {
566                    result.appendPrimary('L');
567                } else {
568                    result.append('L');
569                }
570                index += 2;
571            } else {
572                index++;
573                result.append('L');
574            }
575            return index;
576        }
577    
578        /**
579         * Handles 'P' cases
580         */
581        private int handleP(String value, 
582                            DoubleMetaphoneResult result, 
583                            int index) {
584            if (charAt(value, index + 1) == 'H') {
585                result.append('F');
586                index += 2;
587            } else {
588                result.append('P');
589                index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
590            }
591            return index;
592        }
593    
594        /**
595         * Handles 'R' cases
596         */
597        private int handleR(String value, 
598                            DoubleMetaphoneResult result, 
599                            int index, 
600                            boolean slavoGermanic) {
601            if (index == value.length() - 1 && !slavoGermanic && 
602                contains(value, index - 2, 2, "IE") && 
603                !contains(value, index - 4, 2, "ME", "MA")) {
604                result.appendAlternate('R');
605            } else {
606                result.append('R');
607            }
608            return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
609        }
610    
611        /**
612         * Handles 'S' cases
613         */
614        private int handleS(String value, 
615                            DoubleMetaphoneResult result, 
616                            int index, 
617                            boolean slavoGermanic) {
618            if (contains(value, index - 1, 3, "ISL", "YSL")) {
619                //-- special cases "island", "isle", "carlisle", "carlysle" --//
620                index++;
621            } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
622                //-- special case "sugar-" --//
623                result.append('X', 'S');
624                index++;
625            } else if (contains(value, index, 2, "SH")) {
626                if (contains(value, index + 1, 4, 
627                             "HEIM", "HOEK", "HOLM", "HOLZ")) {
628                    //-- germanic --//
629                    result.append('S');
630                } else {
631                    result.append('X');
632                }
633                index += 2;
634            } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
635                //-- Italian and Armenian --//
636                if (slavoGermanic) {
637                    result.append('S');
638                } else {
639                    result.append('S', 'X');
640                }
641                index += 3;
642            } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) {
643                //-- german & anglicisations, e.g. "smith" match "schmidt" //
644                // "snider" match "schneider" --//
645                //-- also, -sz- in slavic language altho in hungarian it //
646                //   is pronounced "s" --//
647                result.append('S', 'X');
648                index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
649            } else if (contains(value, index, 2, "SC")) {
650                index = handleSC(value, result, index);
651            } else {
652                if (index == value.length() - 1 && contains(value, index - 2, 
653                                                            2, "AI", "OI")){
654                    //-- french e.g. "resnais", "artois" --//
655                    result.appendAlternate('S');
656                } else {
657                    result.append('S');
658                }
659                index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
660            }
661            return index;
662        }
663    
664        /**
665         * Handles 'SC' cases
666         */
667        private int handleSC(String value, 
668                             DoubleMetaphoneResult result, 
669                             int index) {
670            if (charAt(value, index + 2) == 'H') {
671                //-- Schlesinger's rule --//
672                if (contains(value, index + 3, 
673                             2, "OO", "ER", "EN", "UY", "ED", "EM")) {
674                    //-- Dutch origin, e.g. "school", "schooner" --//
675                    if (contains(value, index + 3, 2, "ER", "EN")) {
676                        //-- "schermerhorn", "schenker" --//
677                        result.append("X", "SK");
678                    } else {
679                        result.append("SK");
680                    }
681                } else {
682                    if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
683                        result.append('X', 'S');
684                    } else {
685                        result.append('X');
686                    }
687                }
688            } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
689                result.append('S');
690            } else {
691                result.append("SK");
692            }
693            return index + 3;
694        }
695    
696        /**
697         * Handles 'T' cases
698         */
699        private int handleT(String value, 
700                            DoubleMetaphoneResult result, 
701                            int index) {
702            if (contains(value, index, 4, "TION")) {
703                result.append('X');
704                index += 3;
705            } else if (contains(value, index, 3, "TIA", "TCH")) {
706                result.append('X');
707                index += 3;
708            } else if (contains(value, index, 2, "TH") || contains(value, index, 
709                                                                   3, "TTH")) {
710                if (contains(value, index + 2, 2, "OM", "AM") || 
711                    //-- special case "thomas", "thames" or germanic --//
712                    contains(value, 0, 4, "VAN ", "VON ") || 
713                    contains(value, 0, 3, "SCH")) {
714                    result.append('T');
715                } else {
716                    result.append('0', 'T');
717                }
718                index += 2;
719            } else {
720                result.append('T');
721                index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
722            }
723            return index;
724        }
725    
726        /**
727         * Handles 'W' cases
728         */
729        private int handleW(String value, 
730                            DoubleMetaphoneResult result, 
731                            int index) {
732            if (contains(value, index, 2, "WR")) {
733                //-- can also be in middle of word --//
734                result.append('R');
735                index += 2;
736            } else {
737                if (index == 0 && (isVowel(charAt(value, index + 1)) || 
738                                   contains(value, index, 2, "WH"))) {
739                    if (isVowel(charAt(value, index + 1))) {
740                        //-- Wasserman should match Vasserman --//
741                        result.append('A', 'F');
742                    } else {
743                        //-- need Uomo to match Womo --//
744                        result.append('A');
745                    }
746                    index++;
747                } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
748                           contains(value, index - 1, 
749                                    5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
750                           contains(value, 0, 3, "SCH")) {
751                    //-- Arnow should match Arnoff --//
752                    result.appendAlternate('F');
753                    index++;
754                } else if (contains(value, index, 4, "WICZ", "WITZ")) {
755                    //-- Polish e.g. "filipowicz" --//
756                    result.append("TS", "FX");
757                    index += 4;
758                } else {
759                    index++;
760                }
761            }
762            return index;
763        }
764        
765        /**
766         * Handles 'X' cases
767         */
768        private int handleX(String value, 
769                            DoubleMetaphoneResult result, 
770                            int index) {
771            if (index == 0) {
772                result.append('S');
773                index++;
774            } else {
775                if (!((index == value.length() - 1) && 
776                      (contains(value, index - 3, 3, "IAU", "EAU") || 
777                       contains(value, index - 2, 2, "AU", "OU")))) {
778                    //-- French e.g. breaux --//
779                    result.append("KS");
780                }
781                index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
782            }
783            return index;
784        }
785    
786        /**
787         * Handles 'Z' cases
788         */
789        private int handleZ(String value, DoubleMetaphoneResult result, int index, 
790                            boolean slavoGermanic) {
791            if (charAt(value, index + 1) == 'H') {
792                //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
793                result.append('J');
794                index += 2;
795            } else {
796                if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
797                    result.append("S", "TS");
798                } else {
799                    result.append('S');
800                }
801                index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
802            }
803            return index;
804        }
805    
806        //-- BEGIN CONDITIONS --//
807    
808        /**
809         * Complex condition 0 for 'C'
810         */
811        private boolean conditionC0(String value, int index) {
812            if (contains(value, index, 4, "CHIA")) {
813                return true;
814            } else if (index <= 1) {
815                return false;
816            } else if (isVowel(charAt(value, index - 2))) {
817                return false;
818            } else if (!contains(value, index - 1, 3, "ACH")) {
819                return false;
820            } else {
821                char c = charAt(value, index + 2);
822                return (c != 'I' && c != 'E') ||
823                        contains(value, index - 2, 6, "BACHER", "MACHER");
824            }
825        }
826        
827        /**
828         * Complex condition 0 for 'CH'
829         */
830        private boolean conditionCH0(String value, int index) {
831            if (index != 0) {
832                return false;
833            } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && 
834                       !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
835                return false;
836            } else if (contains(value, 0, 5, "CHORE")) {
837                return false;
838            } else {
839                return true;
840            }
841        }
842        
843        /**
844         * Complex condition 1 for 'CH'
845         */
846        private boolean conditionCH1(String value, int index) {
847            return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 
848                                                                       3, "SCH")) ||
849                    contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
850                    contains(value, index + 2, 1, "T", "S") ||
851                    ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
852                     (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
853        }
854        
855        /**
856         * Complex condition 0 for 'L'
857         */
858        private boolean conditionL0(String value, int index) {
859            if (index == value.length() - 3 && 
860                contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
861                return true;
862            } else if ((contains(value, value.length() - 2, 2, "AS", "OS") || 
863                        contains(value, value.length() - 1, 1, "A", "O")) &&
864                       contains(value, index - 1, 4, "ALLE")) {
865                return true;
866            } else {
867                return false;
868            }
869        }
870        
871        /**
872         * Complex condition 0 for 'M'
873         */
874        private boolean conditionM0(String value, int index) {
875            if (charAt(value, index + 1) == 'M') {
876                return true;
877            }
878            return contains(value, index - 1, 3, "UMB") &&
879                    ((index + 1) == value.length() - 1 || contains(value,
880                            index + 2, 2, "ER"));
881        }
882        
883        //-- BEGIN HELPER FUNCTIONS --//
884    
885        /**
886         * Determines whether or not a value is of slavo-germanic orgin. A value is
887         * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
888         */
889        private boolean isSlavoGermanic(String value) {
890            return value.indexOf('W') > -1 || value.indexOf('K') > -1 || 
891                value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
892        }
893    
894        /**
895         * Determines whether or not a character is a vowel or not
896         */
897        private boolean isVowel(char ch) {
898            return VOWELS.indexOf(ch) != -1;
899        }
900    
901        /**
902         * Determines whether or not the value starts with a silent letter.  It will
903         * return <code>true</code> if the value starts with any of 'GN', 'KN',
904         * 'PN', 'WR' or 'PS'.
905         */    
906        private boolean isSilentStart(String value) {
907            boolean result = false;
908            for (int i = 0; i < SILENT_START.length; i++) {
909                if (value.startsWith(SILENT_START[i])) {
910                    result = true;
911                    break;
912                }
913            }
914            return result;
915        }
916    
917        /**
918         * Cleans the input
919         */    
920        private String cleanInput(String input) {
921            if (input == null) {
922                return null;
923            }
924            input = input.trim();
925            if (input.length() == 0) {
926                return null;
927            }
928            return input.toUpperCase(java.util.Locale.ENGLISH);
929        }
930    
931        /**
932         * Gets the character at index <code>index</code> if available, otherwise
933         * it returns <code>Character.MIN_VALUE</code> so that there is some sort
934         * of a default
935         */    
936        protected char charAt(String value, int index) {
937            if (index < 0 || index >= value.length()) {
938                return Character.MIN_VALUE;
939            } 
940            return value.charAt(index);
941        }
942    
943        /**
944         * Shortcut method with 1 criteria
945         */    
946        private static boolean contains(String value, int start, int length, 
947                                        String criteria) {
948            return contains(value, start, length, 
949                            new String[] { criteria });
950        }
951    
952        /**
953         * Shortcut method with 2 criteria
954         */    
955        private static boolean contains(String value, int start, int length, 
956                                        String criteria1, String criteria2) {
957            return contains(value, start, length, 
958                            new String[] { criteria1, criteria2 });
959        }
960    
961        /**
962         * Shortcut method with 3 criteria
963         */    
964        private static boolean contains(String value, int start, int length, 
965                                        String criteria1, String criteria2, 
966                                        String criteria3) {
967            return contains(value, start, length, 
968                            new String[] { criteria1, criteria2, criteria3 });
969        }
970    
971        /**
972         * Shortcut method with 4 criteria
973         */    
974        private static boolean contains(String value, int start, int length, 
975                                        String criteria1, String criteria2, 
976                                        String criteria3, String criteria4) {
977            return contains(value, start, length, 
978                            new String[] { criteria1, criteria2, criteria3, 
979                                           criteria4 });
980        }
981    
982        /**
983         * Shortcut method with 5 criteria
984         */    
985        private static boolean contains(String value, int start, int length, 
986                                        String criteria1, String criteria2, 
987                                        String criteria3, String criteria4, 
988                                        String criteria5) {
989            return contains(value, start, length, 
990                            new String[] { criteria1, criteria2, criteria3, 
991                                           criteria4, criteria5 });
992        }
993    
994        /**
995         * Shortcut method with 6 criteria
996         */    
997        private static boolean contains(String value, int start, int length, 
998                                        String criteria1, String criteria2, 
999                                        String criteria3, String criteria4, 
1000                                        String criteria5, String criteria6) {
1001            return contains(value, start, length, 
1002                            new String[] { criteria1, criteria2, criteria3, 
1003                                           criteria4, criteria5, criteria6 });
1004        }
1005        
1006        /**
1007         * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and
1008         * matching up to length <code>length</code>
1009         */
1010        protected static boolean contains(String value, int start, int length, 
1011                                          String[] criteria) {
1012            boolean result = false;
1013            if (start >= 0 && start + length <= value.length()) {
1014                String target = value.substring(start, start + length);
1015    
1016                for (int i = 0; i < criteria.length; i++) {
1017                    if (target.equals(criteria[i])) {
1018                        result = true;
1019                        break;
1020                    }
1021                }
1022            }
1023            return result;
1024        }
1025        
1026        //-- BEGIN INNER CLASSES --//
1027        
1028        /**
1029         * Inner class for storing results, since there is the optional alternate
1030         * encoding.
1031         */
1032        public class DoubleMetaphoneResult {
1033    
1034            private StringBuffer primary = new StringBuffer(getMaxCodeLen());
1035            private StringBuffer alternate = new StringBuffer(getMaxCodeLen());
1036            private int maxLength;
1037    
1038            public DoubleMetaphoneResult(int maxLength) {
1039                this.maxLength = maxLength;
1040            }
1041    
1042            public void append(char value) {
1043                appendPrimary(value);
1044                appendAlternate(value);
1045            }
1046    
1047            public void append(char primary, char alternate) {
1048                appendPrimary(primary);
1049                appendAlternate(alternate);
1050            }
1051    
1052            public void appendPrimary(char value) {
1053                if (this.primary.length() < this.maxLength) {
1054                    this.primary.append(value);
1055                }
1056            }
1057    
1058            public void appendAlternate(char value) {
1059                if (this.alternate.length() < this.maxLength) {
1060                    this.alternate.append(value);
1061                }
1062            }
1063    
1064            public void append(String value) {
1065                appendPrimary(value);
1066                appendAlternate(value);
1067            }
1068    
1069            public void append(String primary, String alternate) {
1070                appendPrimary(primary);
1071                appendAlternate(alternate);
1072            }
1073    
1074            public void appendPrimary(String value) {
1075                int addChars = this.maxLength - this.primary.length();
1076                if (value.length() <= addChars) {
1077                    this.primary.append(value);
1078                } else {
1079                    this.primary.append(value.substring(0, addChars));
1080                }
1081            }
1082    
1083            public void appendAlternate(String value) {
1084                int addChars = this.maxLength - this.alternate.length();
1085                if (value.length() <= addChars) {
1086                    this.alternate.append(value);
1087                } else {
1088                    this.alternate.append(value.substring(0, addChars));
1089                }
1090            }
1091    
1092            public String getPrimary() {
1093                return this.primary.toString();
1094            }
1095    
1096            public String getAlternate() {
1097                return this.alternate.toString();
1098            }
1099    
1100            public boolean isComplete() {
1101                return this.primary.length() >= this.maxLength && 
1102                    this.alternate.length() >= this.maxLength;
1103            }
1104        }
1105    }