简体   繁体   中英

CSV appending specific strings in Dictionary

There are two CSV files I am looking to consolidate into 1.

A.CSV

WBS Element,Purchasing Document,Purchase order text,Val/COArea Crcy
ABC123,,,75000
ABC124,4200028630,Service,1069.2
ABC124,4200041490,Service,25518.24
ABC124,4200041490,Service,-1890.24
ABC126,4200028630,Service,2268
ABC126,4200028630,Service,-2268
ABC126,4200029435,Service,25149.65
ABC137,,,4146.2

B.CSV

WBS Element,Ref Document Number,Val/COArea Crcy,Name
ABC124,1000060610,0,Slab Locates & Steel Differential
ABC124,1000081223,0,NOCN339A&3921
ABC124,1000081223,0,Slab Locates & Steel Differential
ABC126,1000067757,0,Structural Steel
ABC 137,4200041490,0,Service
ABC 137,4200028630,5393.52,Service
ABC 137,4200029435,0,Service

I want to make 1 CSV file that combines both of these. The lines starting with WBS Element are joined together. The WBS Elements from each file are then placed on the same line if they match. If A has a WBS Element B does not, then the section for B is just "," and vice versa.

Sample target output:

WBS Element,Purchasing Document,Purchase order text,Val/COArea Crcy,WBS Element,Ref Document Number,Val/COArea Crcy,Name
ABC123,,,75000,,,,
ABC124,4200028630,Service,1069.2,ABC124,1000060610,0,Slab Locates & Steel Differential

I have the following code:

static void Main(string[] args)
    {
        StreamReader a = new StreamReader(@"Input\a.csv");
        StreamReader b = new StreamReader(@"Input\b.csv");
        StreamWriter output = new StreamWriter(@"Output\output.csv");
        Dictionary<string, string> Adict = new Dictionary<string, string>();
        Dictionary<string, string> Bdict = new Dictionary<string, string>();


        output.WriteLine(a.ReadLine() + "," + b.ReadLine());

        while (!a.EndOfStream && !b.EndOfStream)
        {
            //section for A
            List<string> atempList = new List<string>();
            string atempString;
            string Aline = a.ReadLine();
            string[] Atokens = Aline.Split(','); //split the line into array
            foreach (string s in Atokens)
                atempList.Add(s); //add each string in token array to tempList
            atempList.Remove(Atokens[0]); //remove Dict Key from tempList

            StringBuilder d = new StringBuilder();

            if (!Adict.ContainsKey(Atokens[0]))
            {
                foreach (string s in atempList)
                    d.Append(s + ","); //rejoin tempList into a string with ","
                d.Append("\n"); //add a linebreak to end of templist string
                Adict.Add(Atokens[0], d.ToString()); //Add line to dictionary with Key
            }
            else  //Adict does contain key... need to remove Key and add bigger string
            {
                List<string> removeKey = new List<string>(); //temporary list

                foreach (string s in Atokens)
                    removeKey.Add(s); //create a new list from the token array
                removeKey.Remove(Atokens[0]); //remove the key from the removeKey list

                atempString = Adict[Atokens[0]];  //temporary string is what's already in dictionary
                Adict.Remove(Atokens[0]); //remove the Key + Value from dictionary.
                Adict.Add(Atokens[0], d.Append(atempString + Aline + "\n").ToString());     // string.Concat(tempString, ",", line));
            }

            //section for B
            List<string> btempList = new List<string>();
            string btempString;
            string Bline = b.ReadLine();
            string[] Btokens = Bline.Split(',');
            foreach (string s in Btokens)
                btempList.Add(s);
            btempList.Remove(Btokens[0]);

            StringBuilder f = new StringBuilder();

            if (!Bdict.ContainsKey(Btokens[0]))
            {
                foreach (string s in btempList)
                    f.Append(s + ",");
                f.Append("\n");
                Bdict.Add(Btokens[0], f.ToString());
            }
            else
            {
                List<string> removeKey = new List<string>();

                foreach (string s in Btokens)
                    removeKey.Add(s);
                removeKey.Remove(Atokens[0]);

                btempString = Bdict[Btokens[0]];
                Bdict.Remove(Btokens[0]);
                Bdict.Add(Btokens[0], f.Append(btempString + Bline + "\n").ToString());
            }
        }
        output.Close();
       // Console.ReadLine();
    }
}

I am stuck now I dont know how to look through each Dictionary and compare keys, then join (insert?) just the line that has a matching key.

first of all, I think you should make a class to use this. The class I made for this problem is really simple:

class WbsElement
{
    public string PurchasingDocument;
    public string PurchaseOrderText;
    public string ValCoAreaCrcyA;
    public string ValCoAreaCrcyB;
    public string RefDocumentNumber;
    public string Name;
}

It has some attirbutes that you can use to store the data. Then I took your code and changed it to this:

private static void Main(string[] args)
    {
        StreamReader a = new StreamReader(@"A.CSV");
        StreamReader b = new StreamReader(@"B.CSV");
        StreamWriter output = new StreamWriter(@"output.csv");
        Dictionary<string, WbsElement> newDict = new Dictionary<string, WbsElement>();


        output.WriteLine(a.ReadLine() + "," + b.ReadLine());

        while (!a.EndOfStream && !b.EndOfStream)
        {
            //section for A
            string Aline = a.ReadLine();
            string[] Atokens = Aline.Split(','); //split the line into array
            if (newDict.ContainsKey(Atokens[0]))
            {
                newDict[Atokens[0]].PurchasingDocument = Atokens[1];
                newDict[Atokens[0]].PurchaseOrderText = Atokens[2];
                newDict[Atokens[0]].ValCoAreaCrcyA = Atokens[3];
            }
            else
            {
                WbsElement elementToAdd = new WbsElement();
                elementToAdd.PurchasingDocument = Atokens[1];
                elementToAdd.PurchaseOrderText = Atokens[2];
                elementToAdd.ValCoAreaCrcyA = Atokens[3];
                newDict.Add(Atokens[0], elementToAdd);
            }
        }
        while (!b.EndOfStream)
        {
            //section for B
            string Bline = b.ReadLine();
            string[] Btokens = Bline.Split(',');
            if (newDict.ContainsKey(Btokens[0]))
            {
                newDict[Btokens[0]].RefDocumentNumber = Btokens[1];
                newDict[Btokens[0]].ValCoAreaCrcyB = Btokens[2];
                newDict[Btokens[0]].Name = Btokens[3];
            }
            else
            {
                WbsElement elementToAdd = new WbsElement();
                elementToAdd.RefDocumentNumber = Btokens[1];
                elementToAdd.ValCoAreaCrcyB = Btokens[2];
                elementToAdd.Name = Btokens[3];
                newDict.Add(Btokens[0], elementToAdd);
            }
        }

        foreach (KeyValuePair<string, WbsElement> keyValuePair in newDict)
        {
            output.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6},{7}", keyValuePair.Key, keyValuePair.Value.PurchasingDocument,
                             keyValuePair.Value.PurchaseOrderText, keyValuePair.Value.ValCoAreaCrcyA,
                             keyValuePair.Key,
                             keyValuePair.Value.RefDocumentNumber, keyValuePair.Value.ValCoAreaCrcyB,
                             keyValuePair.Value.Name));
        }

        output.Close();
        // Console.ReadLine();
    }

I make a new dictionary that stores the key + one instance of the class I made. When I find the same key again, I just add the information to the class. On the end of the application I just flush all the correct data to the output stream. The class is the key to making this easy.

In case you want it generic for different length of data input, you could use this:

private static void Main(string[] args)
    {
        StreamReader a = new StreamReader(@"A.CSV");
        StreamReader b = new StreamReader(@"B.CSV");
        StreamWriter output = new StreamWriter(@"output.csv");
        Dictionary<string, List<string>> newDict = new Dictionary<string, List<string>>();
        string aLine = a.ReadLine();
        int aLength = aLine.Split(',').Count();

        output.WriteLine(aLine + "," + b.ReadLine());

        while (!a.EndOfStream && !b.EndOfStream)
        {
            //section for A
            string Aline = a.ReadLine();
            string[] Atokens = Aline.Split(','); //split the line into array
            if (newDict.ContainsKey(Atokens[0]))
            {
                for (int i = 0; i < Atokens.Length; i++)
                {
                    newDict[Atokens[0]][i] = Atokens[i];
                }
            }
            else
            {
                List<string> listToAdd = new List<string>();
                for (int i = 0; i < Atokens.Length; i++)
                {
                    listToAdd.Add(Atokens[i]);
                }
                newDict.Add(Atokens[0], listToAdd);
            }
        }
        while (!b.EndOfStream)
        {
            //section for B
            string Bline = b.ReadLine();
            string[] Btokens = Bline.Split(',');
            if (newDict.ContainsKey(Btokens[0]))
            {


                if (newDict[Btokens[0]].Count > aLength)
                {
                    for (int i = 0; i < Btokens.Length; i++)
                    {
                        newDict[Btokens[0]][i + aLength] = Btokens[i];
                    }
                }
                else
                {
                    for (int i = 0; i < Btokens.Length; i++)
                    {
                        newDict[Btokens[0]].Add(Btokens[i]);
                    }
                }
            }
            else
            {
                List<string> listToAdd = new List<string>(aLength);
                listToAdd.AddRange(Btokens);
                newDict.Add(Btokens[0], listToAdd);
            }
        }

        foreach (KeyValuePair<string, List<string>> keyValuePair in newDict)
        {
            string outputLine = string.Empty;
            foreach (string s in keyValuePair.Value)
            {
                if (outputLine != string.Empty)
                {
                    outputLine += ",";
                }
                outputLine += s;
            }
            output.WriteLine(outputLine);
        }

        output.Close();
        // Console.ReadLine();
    }

It uses a list to keep track of input data.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM