简体   繁体   中英

Sorting a single dataframe into multiple dataframe in C++

I want to read a csv file, the content is like the following:

fruit  time  price
apple  2022  18
banana 2022  30
apple  2023  19
banana 2023  32

I want to sort out the categories and process each category as a separate dataframe like:

dataframe 1:
fruit  time  price
apple  2022  18
apple  2023  19

dataframe 2:
fruit  time  price
banana 2022  30
banana 2023  32

May I know how can I do this by using C++? Thank you.

To read a csv file, the code is as follows:

#include <iostream>
#include<fstream>
using namespace std;
void read()
{
    ifstream fin;
    string line;
    fin.open("my_file.csv");
    while(!fin.eof()){
            fin>>line;
            cout<<line<<" ";
            }
    }

int main()
{
    read();
    return 0;
}

Not sure how to do the filtering part.

Here is a more complete example (note : don't use "using namespace std")

#include <algorithm>
#include <iostream>
#include <sstream>
#include <string>
#include <cstdint>
#include <map>
#include <vector>
#include <set>

// content of your input file.
std::istringstream file_stream
{
"fruit  time  price\n"
"apple  2022  18\n"
"banana 2022  30\n"
"apple  2023  19\n"
"banana 2023  32\n"
};

// model a piece of fruit as a struct
struct fruit_info_t
{
    std::string name;
    std::uint32_t year;
    double price;
};

// overload to output a fruit_info_t
std::ostream& operator<<(std::ostream& os, const fruit_info_t& info)
{
    os << info.name << "\t\t" << info.year << "\t\t" << info.price;
    return os;
}

// this map type allows us to lookup fruits by name
using fruits_t = std::map<std::string, std::vector<fruit_info_t>>;

// class to help us organize and find fruits read from file
class Fruits
{
public:
    // Add a new entry into our little database
    void AddFruit(const fruit_info_t& fruit)
    {
        // new category 
        if (m_keys.find(fruit.name) == m_keys.end())
        {
            m_keys.insert(fruit.name);
            m_fruits.insert({ fruit.name,std::vector<fruit_info_t>{} });
        }

        m_fruits.at(fruit.name).push_back(fruit);
    }

    const auto& UniqueFruitNames() const noexcept
    {
        return m_keys;
    }

    const auto& GetFruits(const std::string& unique_fruit_name) const
    {
        // return a vector with all fruits that share the same name
        return m_fruits.at(unique_fruit_name);
    }

    // sort the unique fruit names alphabetically
    // and sort the fruits in the list first by name then year then price
    void Sort()
    {
        std::sort(m_keys.begin(), m_keys.end());

        for (auto& [key, fruits] : m_fruits)
        {
            std::sort(fruits.begin(), fruits.end(), [](const fruit_info_t& lhs, const fruit_info_t& rhs)
                {
                    if (lhs.name < rhs.name) return true;
                    if (lhs.year < rhs.year) return true;
                    if (lhs.price < rhs.price) return true;
                    return false;
                });
        }
    }

private:
    std::set<std::string> m_keys;
    fruits_t m_fruits;
};

auto load(std::istream& stream)
{
    Fruits fruits;
    std::string header;
    std::getline(stream, header);

    std::string name;
    std::string year;
    std::string price;

    while (stream >> name >> year >> price)
    {
        fruits.AddFruit({ name, std::stoul(year), std::stof(price) });
    }

    return fruits;
}

int main()
{
    // replace file_stream with std::ifstream from a real file. 
    // I used the string stream to have file in code so I could share the test.
    auto fruits = load(file_stream);
    
    // sort datab before output. 
    fruits.Sort();

    for (const auto& unique_fruit_name: fruits.UniqueFruitNames())
    {
        std::cout << "dataframe : " << unique_fruit_name << "\n";
        std::cout << "fruit\t\ttime\t\tprice\n";
        auto infos = fruits.GetFruits(unique_fruit_name);
        
        for (const auto& info : infos)
        {
            std::cout << info << "\n";
        }
    }

    return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM