简体   繁体   中英

Can't write a function for building Huffman tree

I have got a struct element , which include information about element of tree.

struct element
{
    char ch;   //Letter
    int left;  //Number in array
    int right; //Number in arry
    int count; //Count letter in word
};

Also, I've got a function MakeAlphabet , which makes an alphabet for std::string :

std::vector<element> MakeAlphabet(std::string str)  //РАБОТАЕТ!
{
    std::vector<element> alphabet;
    ranges::sort(str);

    for(auto ch : str | ranges::views::unique)
    {
        alphabet.push_back(element{ch, -1, -1, static_cast<int>(ranges::count(str, ch))});
    }
    return alphabet;
};

Finally, I've got a function MakeBinaryTree :

std::vector<element> MakeBinaryTree(std::string str)
{
    std::vector<element> result;
    std::vector<element> alphabet = MakeAlphabet(str);
    std::sort(alphabet.begin(), alphabet.end());
    result = alphabet;
   
    int size = alphabet.size();
    result = alphabet;

    for(int i = 0; i < size; i+=2)  //I think problem is here!
    {   
        alphabet.push_back(element{-1, i, i+1, (alphabet[i].count + alphabet[i+1].count)});
        alphabet.erase(alphabet.begin(), alphabet.begin() + 1);
        result.push_back(*(alphabet.end()-1));
    }
    return result;
};

When checking the root of the resulting tree (it must match the number of letters in the word), the result is almost always incorrect.

UPD: I have overload operator for std::sort(alphabet.begin(), alphabet.end()); .

bool operator<(const element &first, const element &second)
{
    return (first.count < second.count);
}

bool operator==(const element &first, const element &second)
{
    return (first.count == second.count);
}

The array must be sorted, the two nodes with the smallest occurrences are taken to form a new node that is put back in the array. The array is sorted again and so on... Thus it is only one array, ie alphabet

Use pointers to element (element*) instead of element only, because it is easy to switch them from the array to the tree.

Create a node class

class node{
};

class inner_node: public node
{
    node *left, *right;
    int count; //Count letter in word
};

class element: public node
{
public:
    char ch;   //Letter
    int count; //Count letter in word
};

std::vector<node*> MakeAlphabet(std::string str)  //РАБОТАЕТ!
{
    std::vector<node*> alphabet;
    ranges::sort(str);

    for(auto ch : str | ranges::views::unique)
    {
        element *e = new element();
        e->ch = ch;
        e->static_cast<int>(ranges::count(str, ch));
        alphabet.push_back(e);
    }
    return alphabet;
}

std::vector<node*> MakeBinaryTree(std::string str)
{
    std::vector<node*> alphabet = MakeAlphabet(str);

    // keep going until there is only the huffman tree root left in the vector
    while(alphabet.size() > 1)
    {   
        std::sort(alphabet.begin(), alphabet.end());

        // Takes last two elements and remove them
        inner_node *first  = (inner_node*) alphabet.back();
        alphabet.pop_back();
        inner_node *second = (inner_node*) alphabet.back();
        alphabet.pop_back();

        // Creates tree node and put in the vector
        inner_node *n = new node();
        n->left = first;
        n->right = second;
        n->count = first->count + second->count;
        alphabet.push_back(n);
    }
    return alphabet;
}

Use a priority_queue instead of a vector.

Go through Huffman Encoding once if you want.

I have re-implemented the MakeBinaryTree function using a priority_queue .

Everything else remains the same.

Sample Case:-

character   Frequency
    a            5
    b           9
    c           12
    d           13
    e           16
    f           45

Expected Result:-

结果树

Final Output in std::vector<element> :

Index       Character   Frequency
   0          .      100
   1          f       45
   2          .       55
   3          .       25
   4          .       30
   5          c       12
   6          d       13
   7          .       14
   8          e       16
   9          a        5
  10          b        9

. indicates an internal node.

Working Code:-

#include <iostream>
#include <vector>
#include <utility>
#include <string>
#include <queue>
#include <map>
#include <stack>
#include <string_view>
#include <deque>
#include <array>
#include <algorithm>
#include <range/v3/algorithm/count.hpp>
#include <range/v3/action/sort.hpp>
#include <range/v3/view/unique.hpp>
#include <exception>
#include <gtest/gtest.h>

struct element //Элемент бинарного дерева.
{
    char ch;
    struct element* left;
    struct element* right;
    int count;
};

bool operator<(const element &first, const element &second)
{
    return (first.count > second.count); // Reversed to form min heap instead of default max heap
}

bool operator==(const element &first, const element &second)
{
    return (first.count == second.count);
}

std::vector<element> MakeAlphabet(std::string str)  //РАБОТАЕТ!
{
    std::vector<element> alphabet;
    ranges::sort(str);

    for(auto ch : str | ranges::views::unique)
    {
        alphabet.push_back(element{ch, NULL, NULL, static_cast<int>(ranges::count(str, ch))});
    }
    return alphabet;
};


std::vector<element> MakeBinaryTree(std::string str)    //НЕ работает.
{
    std::vector<element> result;
    std::vector<element> alphabet = MakeAlphabet(str);
    std::priority_queue<element> min_heap;
    //Initialize Min Heap
    for(auto x:alphabet)
        min_heap.push(x);

    // Form Huffman Encoding Tree
    while(min_heap.size()>1){
        element *lc = (element*)malloc(sizeof(element));*lc=min_heap.top();min_heap.pop();
        element *rc = (element*)malloc(sizeof(element));*rc=min_heap.top();min_heap.pop();
        min_heap.push(element{'.',lc,rc,lc->count + rc->count});
    }
    std::queue<element> prefix_traversal;
    prefix_traversal.push(min_heap.top());

    // Convert Tree to Vector using BFS
    while(prefix_traversal.size()>0){
        element top = prefix_traversal.front();
        prefix_traversal.pop();
        result.push_back(top);
        if(top.left != NULL)
            prefix_traversal.push(*top.left);
        if(top.right != NULL)
            prefix_traversal.push(*top.right);

    }
    return result;
};



int main(){
    std::string s;
    for(int i=0;i<13;i+=1)s+="d";
    for(int i=0;i<9;i+=1)s+="b";
    for(int i=0;i<5;i+=1)s+="a";
    for(int i=0;i<12;i+=1)s+="c";   
    for(int i=0;i<16;i+=1)s+="e";
    for(int i=0;i<45;i+=1)s+="f";
    std::vector<element> result = MakeBinaryTree(s);
    printf("Index   \tCharacter\tFrequency\n");
    for(int i=0;i<result.size(); i+=1){
        // std::cout<<"Index "<<i<<" "<<result[i].ch<<" "<<result[i].count<<std::endl;
        printf("%4d\t\t%3c\t\t%4d\n",i,result[i].ch,result[i].count);
    }
    return 1;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM