From f0e53b33c60e986f7423536783cdd738f1e3b8dc Mon Sep 17 00:00:00 2001 From: Aronya Baksy Date: Thu, 7 Oct 2021 18:41:03 +0530 Subject: [PATCH] Huffman Tree implementation --- Data Structures/Huffman/README.md | 3 + Data Structures/Huffman/huffman.cpp | 120 ++++++++++++++++++++++++++++ Data Structures/Huffman/huffman.hpp | 74 +++++++++++++++++ Data Structures/Huffman/main.cpp | 12 +++ 4 files changed, 209 insertions(+) create mode 100644 Data Structures/Huffman/README.md create mode 100644 Data Structures/Huffman/huffman.cpp create mode 100644 Data Structures/Huffman/huffman.hpp create mode 100644 Data Structures/Huffman/main.cpp diff --git a/Data Structures/Huffman/README.md b/Data Structures/Huffman/README.md new file mode 100644 index 00000000..b06a1152 --- /dev/null +++ b/Data Structures/Huffman/README.md @@ -0,0 +1,3 @@ +# Huffman Tree + +Huffman coding is a lossless data compression algorithm. The idea is to assign variable-length codes to input characters, lengths of the assigned codes are based on the frequencies of corresponding characters. The most frequent character gets the smallest code and the least frequent character gets the largest code. \ No newline at end of file diff --git a/Data Structures/Huffman/huffman.cpp b/Data Structures/Huffman/huffman.cpp new file mode 100644 index 00000000..46211faa --- /dev/null +++ b/Data Structures/Huffman/huffman.cpp @@ -0,0 +1,120 @@ +#include "huffman.hpp" + +std::map codes; //Store mapping between character and its Huffman code +std::map freqMap; //Store mapping between character and its frequency in the string + +void buildCharFreqMap(std::string s) +{ + for (int i = 0; i < s.length(); ++i) + { + std::map::iterator it = freqMap.find(s[i]); //Find the character in the map + if (it == freqMap.end()) //If character not in map + { + freqMap.insert({s[i], 1}); //Insert character into map with freq 1 + } + else + { + it->second++; //Increase count of character by 1 + } + } +} + +HuffmanTreeNode *buildHuffmanTree(std::string s) +{ + buildCharFreqMap(s); + //min-heap implementation using priority queue + std::priority_queue, compare> minHeap; + + // Insert all the characters one by one into the min heap + for (auto it : freqMap) + { + char data = it.first; + int freq = it.second; + HuffmanTreeNode *node = new HuffmanTreeNode(data, freq); + minHeap.push(node); + } + HuffmanTreeNode *left = NULL; + HuffmanTreeNode *right = NULL; + HuffmanTreeNode *internalNode = NULL; + while (minHeap.size() != 1) + { + + //Pick the two nodes with least frequency from the min heap + left = minHeap.top(); + minHeap.pop(); + right = minHeap.top(); + minHeap.pop(); + + //Create an internal node with value '$' and sum of the two min frequency nodes + internalNode = new HuffmanTreeNode('$', left->freq + right->freq); + internalNode->left = left; + internalNode->right = right; + + //Push the newly created internal node into the min-heap + minHeap.push(internalNode); + } + + // Return the root of the min-heap + return minHeap.top(); +} + +void storeCodes(HuffmanTreeNode *root, std::string str) +{ + if (root == NULL) + return; + if (root->data != '$') //Leaf nodes have data as some character, not '$' + codes[root->data] = str; //Upon encountering a leaf node, store its code into the data + storeCodes(root->left, str + "0"); //If encountering the left sub-heap, add '0' to the code + storeCodes(root->right, str + "1"); //If encountering the right sub-heap, add '1' to the code +} + +std::string HuffmanEncode(HuffmanTreeNode *root, std::string str) +{ + storeCodes(root, ""); //Generate the codes for each character + //Print the codes for each character + for (auto x : codes) + { + std::cout << x.first << ' ' << x.second << '\n'; + } + std::string encodedString = ""; + for (auto x : str) + { + encodedString += codes[x]; //For each character in the original string, append its code + //to the encoded string + } + return encodedString; +} + +std::string HuffmanDecode(HuffmanTreeNode *root, std::string str) +{ + std::string decodedString = ""; + HuffmanTreeNode *curr = root; + //Check if Huffman tree exists + if (curr == NULL) + { + std::cout << "Please construct Huffman tree first!" << '\n'; + return ""; + } + for (auto x : str) + { + if (x != '0' && x != '1') + { + std::cout << "Invalid input string for decode!"; + break; + } + else if (x == '0') //If 0 found in encoded string, move left + { + curr = curr->left; + } + else if (x == '1') //If 1 found in encoded string, move right + { + curr = curr->right; + } + if (curr->left == NULL && curr->right == NULL) //If leaf node, add character to decoded string + { //and back-track to the root + decodedString += curr->data; + curr = root; + } + } + return decodedString; +} diff --git a/Data Structures/Huffman/huffman.hpp b/Data Structures/Huffman/huffman.hpp new file mode 100644 index 00000000..5aa31c79 --- /dev/null +++ b/Data Structures/Huffman/huffman.hpp @@ -0,0 +1,74 @@ +#ifndef HUFFMAN_H +#define HUFFMAN_H + +#include +#include +#include +#include +#include +#include +#include + +/* +One node of the Huffman tree data structure +Contains the character (`data`), the frequency in the data (`freq`) +and two pointers to the left and right nodes +*/ +struct HuffmanTreeNode +{ + char data; + unsigned int freq; + struct HuffmanTreeNode *left; + struct HuffmanTreeNode *right; + + HuffmanTreeNode(char data, unsigned int freq) + { + this->data = data; + this->freq = freq; + this->left = NULL; + this->right = NULL; + } +}; + +/* +Takes in a string and builds a std::map containing each character +and its frequency in the string +*/ +void buildCharFreqMap(std::string s); + +/* +Comparator used for building min heap +*/ +struct compare +{ + bool operator()(HuffmanTreeNode *left, HuffmanTreeNode *right) + { + return left->freq > right->freq; + } +}; + +/* +Builds the Huffman tree data structure by inserting nodes into a min heap, +taking the two minimum frequency nodes at a time and creating a new node from them +Repreat this process until size of min heap is 1 +*/ +HuffmanTreeNode *buildHuffmanTree(std::string str); + +/* +Store the Huffman code for each character in the input string str +This is achieved using a DFS traversal of the Huffman tree +*/ +void storeCodes(HuffmanTreeNode *root, std::string str); + +/* +Encode the string character by character +using the mapping created in storeCodes() +*/ +std::string HuffmanEncode(HuffmanTreeNode *root, std::string str); + +/* +Decode the encoded string using a traversal of the Huffman tree +*/ +std::string HuffmanDecode(HuffmanTreeNode *root, std::string str); + +#endif \ No newline at end of file diff --git a/Data Structures/Huffman/main.cpp b/Data Structures/Huffman/main.cpp new file mode 100644 index 00000000..d7f741dd --- /dev/null +++ b/Data Structures/Huffman/main.cpp @@ -0,0 +1,12 @@ +#include "huffman.hpp" + +int main() +{ + std::string myString = "ABRACADABRA"; + HuffmanTreeNode *root = buildHuffmanTree(myString); + std::string encodedString = HuffmanEncode(root, myString); + std::string decodedString = HuffmanDecode(root, encodedString); + std::cout << encodedString << '\n' + << decodedString << '\n'; + return 0; +} \ No newline at end of file