简体   繁体   中英

Soundex c# trying to get first digit for encoding

As you can see I have set my values as "SMITH" and "SMYTHE" within my main method. The output of this value should be 25030 but for some reason it is encoding as 250300. I think this is because it is doing the encoding prior to what first character the word is. eg SMITH is "S" so this is encoding as the first character of "S". How do I make that S become a digit or a value?

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace SoundDexFinal
{
    class Program
    {
        static void Main(string[] args)
        {

            string value1 = "SMITH";
            string value2 = "Smythe";

            soundex soundex = new soundex();
            Console.WriteLine(soundex.GetSoundex(value1));      // Outputs "S50300"
            Console.WriteLine(soundex.GetSoundex(value2));      // Outputs "S530"
            Console.WriteLine(soundex.Compare(value1, value2)); // Outputs "4"
            Console.ReadLine();
        }
    }

        namespace SoundDexFinal
    {
        class soundex
        {
            public string GetSoundex(string value)
            {
                value = value.ToUpper();
                StringBuilder soundex = new StringBuilder();
                foreach (char ch in value)
                {
                    if (char.IsLetter(ch))
                        AddCharacter(soundex, ch);

                }
                RemovePlaceholders(soundex);
                FixLength(soundex);
                return soundex.ToString();

            }


            private void AddCharacter(StringBuilder soundex, char ch)
            {
                if (soundex.Length == 0)
                    soundex.Append(ch);
                else
                {
                    string code = GetSoundexDigit(ch);
                    if (code != soundex[soundex.Length - 1].ToString())
                        soundex.Append(code);
                }
            }

            private string GetSoundexDigit(char ch)
            {
                string chString = ch.ToString();

                if ("AEIOUHWY".Contains(chString))
                    return "0";
                else if ("BFPV".Contains(chString))
                    return "1";
                else if ("CGJKQSXZ".Contains(chString))
                    return "2";
                else if ("DT".Contains(chString))
                    return "3";
                else if (ch == 'L')
                    return "4";
                else if ("MN".Contains(chString))
                    return "5";
                else if ("R".Contains(chString))
                    return "6";
                else
                    return ".";
            }

            private void RemovePlaceholders(StringBuilder soundex)
            {
                soundex.Replace(".", "");
            }

            private void FixLength(StringBuilder soundex)
            {
                int length = soundex.Length;
                if (length < 6)
                    soundex.Append(new string('0', 6 - length));
                else
                    soundex.Length = 6;
            }

            public int Compare(string value1, string value2)
            {
                int matches = 0;
                string soundex1 = GetSoundex(value1);
                string soundex2 = GetSoundex(value2);

                for (int i = 0; i < 6; i++)
                    if (soundex1[i] == soundex2[i]) matches++;

                return matches;
            }
        }
    }
}
}

you are calling FixLength function, that function append the extra '0' at the end of the string if string length is less than 6.

Thats the reason you are getting "250300" instead of "25030"

Per the discussion, changing the AddCharacter method like so will achieve what you're after:

private void AddCharacter(StringBuilder soundex, char ch)
{
    string code = GetSoundexDigit(ch);
    if (soundex.Length == 0 || code != soundex[soundex.Length - 1].ToString())
        soundex.Append(code);
}

But I wouldn't be referring to "soundex" anymore, since it no longer is.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM