Skip to main content

Session 4

Huffman's Coding Algorithm

Huffman Coding is a popular greedy algorithm used for 8. It is particularly effective for sequences where certain characters appear more frequently than others.

How the Algorithm Works

The core idea behind Huffman Coding is to assign variable-length binary codes to characters based on their frequency:

  • Frequency Analysis: The algorithm counts how many times each character occurs in the dataset.
  • Tree Construction: It builds a binary tree (the Huffman Tree) from the bottom up, starting with the least frequent characters.
  • Binary Encoding: In this tree, more frequent characters are placed closer to the root, resulting in shorter binary strings. Conversely, rarer characters receive longer codes.

Key Benefits

  • Significant Reduction: Huffman algorithms can typically compress data size by 70–80%, depending on the redundancy of the source material.
  • Optimal Prefix Codes: It ensures that no code is a prefix of another, which prevents ambiguity during the decompression (decoding) process.
  • Efficiency: By utilizing a greedy approach, the algorithm consistently finds the most mathematically optimal way to represent characters as binary strings for a given frequency set.

Objective

The main objectives of this session are to:

  • Implement Huffman’s Coding algorithm.
  • Test the implementation on different problem instances
  • Construct the optimal binary prefix code

Question 1

Problem Statement

Apply Huffman’s algorithm to construct an optimal binary prefix code for the letters and its frequencies in the following table.

Letters A B I M S X Z
Frequency 10 7 15 8 10 5 2

Show the complete steps.

Explanation

Using your data: A(10), B(7), I(15), M(8), S(10), X(5), Z(2).

  • Initialize: Create a leaf node for each character.
  • Merge (Greedy Step): Pick the two smallest: Z(2) and X(5).
    • Create a parent node with frequency 2 + 5 = 7.
  • Repeat: Now pick the next two smallest from the remaining list. This continues until only one node (the Root) remains.
  • Assign Bits: Starting from the Root, assign 0 to every left branch and 1 to every right branch.

Implementation

Python

huffman.py
		import heapq
 
class Node:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
 
    def __lt__(self, other):
        return self.freq < other.freq
 
def solve_huffman():
    # 1. Initialize data from the problem table
    data = {'A': 10, 'B': 7, 'I': 15, 'M': 8, 'S': 10, 'X': 5, 'Z': 2}
    heap = [Node(c, f) for c, f in data.items()]
    heapq.heapify(heap)
 
    print("--- STEP-BY-STEP MERGING (GREEDY STEPS) ---")
    step = 1
    while len(heap) > 1:
        # Extract two smallest frequencies
        n1 = heapq.heappop(heap)
        n2 = heapq.heappop(heap)
 
        # Merge them
        merged = Node(None, n1.freq + n2.freq)
        merged.left = n1
        merged.right = n2
 
        print(f"Step {step}: Merged {n1.char or 'Node'}({n1.freq}) and "
              f"{n2.char or 'Node'}({n2.freq}) -> New Node({merged.freq})")
 
        heapq.heappush(heap, merged)
        step += 1
 
    # 2. Generate Codes and Print Table
    root = heap[0]
    codes = {}
 
    def generate_codes(node, current_code):
        if node:
            if node.char:
                codes[node.char] = current_code
            generate_codes(node.left, current_code + "0")
            generate_codes(node.right, current_code + "1")
 
    generate_codes(root, "")
 
    print("\n--- FINAL OPTIMAL BINARY PREFIX CODES ---")
    print(f"{'Letter':<10} | {'Frequency':<10} | {'Huffman Code':<15}")
    print("-" * 40)
    for char in sorted(data.keys()):
        print(f"{char:<10} | {data[char]:<10} | {codes[char]:<15}")
 
solve_huffman()
	

C Language

huffman.c
		#include <stdio.h>
#include <stdlib.h>
 
struct Node {
    char data;
    int freq;
    struct Node *left, *right;
};
 
struct Node* createNode(char data, int freq) {
    struct Node* n = (struct Node*)malloc(sizeof(struct Node));
    n->data = data; n->freq = freq;
    n->left = n->right = NULL;
    return n;
}
 
void printCodes(struct Node* root, int arr[], int top) {
    if (root->left) { arr[top] = 0; printCodes(root->left, arr, top + 1); }
    if (root->right) { arr[top] = 1; printCodes(root->right, arr, top + 1); }
    if (!root->left && !root->right) {
        printf("%-7c | %-7d | ", root->data, root->freq);
        for (int i = 0; i < top; i++) printf("%d", arr[i]);
        printf("\n");
    }
}
 
int main() {
    char letters[] = {'A', 'B', 'I', 'M', 'S', 'X', 'Z'};
    int freqs[] = {10, 7, 15, 8, 10, 5, 2};
    struct Node* nodes[7];
 
    for (int i = 0; i < 7; i++) nodes[i] = createNode(letters[i], freqs[i]);
 
    printf("--- STEP-BY-STEP MERGING ---\n");
    int size = 7;
    while (size > 1) {
        int m1 = 0, m2 = 1;
        if (nodes[m1]->freq > nodes[m2]->freq) { int t = m1; m1 = m2; m2 = t; }
        for (int i = 2; i < size; i++) {
            if (nodes[i]->freq < nodes[m1]->freq) { m2 = m1; m1 = i; }
            else if (nodes[i]->freq < nodes[m2]->freq) { m2 = i; }
        }
 
        struct Node* parent = createNode('$', nodes[m1]->freq + nodes[m2]->freq);
        parent->left = nodes[m1]; parent->right = nodes[m2];
        printf("Merged %d + %d = %d\n", nodes[m1]->freq, nodes[m2]->freq, parent->freq);
 
        nodes[m1] = parent;
        nodes[m2] = nodes[size - 1];
        size--;
    }
 
    printf("\n--- FINAL TABLE ---\n%-7s | %-7s | %-12s\n", "Letter", "Freq", "Code");
    printf("-----------------------------\n");
    int path[10];
    printCodes(nodes[0], path, 0);
    return 0;
}
	

Rust

huffman.rs
		use std::cmp::Reverse;
use std::collections::BinaryHeap;
 
#[derive(Eq, PartialEq)]
struct Node {
    freq: i32,
    char: Option<char>,
    left: Option<Box<Node>>,
    right: Option<Box<Node>>,
}
 
impl Ord for Node {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.freq.cmp(&other.freq)
    }
}
impl PartialOrd for Node {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}
 
fn traverse(node: &Node, code: String) {
    if let Some(c) = node.char {
        println!("{:<7} | {:<7} | {}", c, node.freq, code);
    } else {
        if let Some(ref l) = node.left {
            traverse(l, code.clone() + "0");
        }
        if let Some(ref r) = node.right {
            traverse(r, code + "1");
        }
    }
}
 
fn main() {
    let mut heap = BinaryHeap::new();
    let items = vec![
        ('A', 10),
        ('B', 7),
        ('I', 15),
        ('M', 8),
        ('S', 10),
        ('X', 5),
        ('Z', 2),
    ];
 
    for (c, f) in items {
        heap.push(Reverse(Node {
            freq: f,
            char: Some(c),
            left: None,
            right: None,
        }));
    }
 
    println!("--- MERGING STEPS ---");
    while heap.len() > 1 {
        let Reverse(n1) = heap.pop().unwrap();
        let Reverse(n2) = heap.pop().unwrap();
        let parent = Node {
            freq: n1.freq + n2.freq,
            char: None,
            left: Some(Box::new(n1)),
            right: Some(Box::new(n2)),
        };
        println!(
            "Merged: {} + {} = {}",
            parent.left.as_ref().unwrap().freq,
            parent.right.as_ref().unwrap().freq,
            parent.freq
        );
        heap.push(Reverse(parent));
    }
 
    let Reverse(root) = heap.pop().unwrap();
    println!(
        "\n--- FINAL TABLE ---\n{:<7} | {:<7} | {}",
        "Letter", "Freq", "Code"
    );
    println!("-----------------------------");
    traverse(&root, "".to_string());
}
	

Sample Output

		--- STEP-BY-STEP MERGING (GREEDY STEPS) ---
Step 1: Merged Z(2) and X(5) -> New Node(7)
Step 2: Merged B(7) and Node(7) -> New Node(14)
Step 3: Merged M(8) and A(10) -> New Node(18)
Step 4: Merged S(10) and Node(14) -> New Node(24)
Step 5: Merged I(15) and Node(18) -> New Node(33)
Step 6: Merged Node(24) and Node(33) -> New Node(57)
 
--- FINAL OPTIMAL BINARY PREFIX CODES ---
Letter     | Frequency  | Huffman Code
----------------------------------------
A          | 10         | 111
B          | 7          | 010
I          | 15         | 10
M          | 8          | 110
S          | 10         | 00
X          | 5          | 0111
Z          | 2          | 0110
	

Question 2

Problem Statement

Implement Huffman's coding algorithm and run it on the problem instance of Q1.

Implementation

Question 3

Problem Statement

What is an optimal binary tree and Huffman code for the following set of frequencies? Find out the average number of bits required per character. Show complete steps.

A:15 B:25 C:5 D:7 E:10 F:13 G:9

Initial Frequencies - Sorted

Char C D G E F A B
Freq 5 7 9 10 13 15 25

Total Frequency = 84

Huffman Tree

Step Nodes in Pool (Sorted) Merged Nodes New Node
1 C:5, D:7, G:9, E:10, F:13, A:15, B:25 C(5) + D(7) N1:12
2 N1:12, G:9, E:10, F:13, A:15, B:25 → Sort: G:9, E:10, N1:12... G(9) + E(10) N2:19
3 N1:12, F:13, A:15, N2:19, B:25 N1(12)+F(13) N3:25
4 A:15, N2:19, N3:25, B:25 A(15)+N2(19) N4:34
5 N3:25, B:25, N4:34 N3(25)+B(25) N5:50
6 N4:34, N5:50 N4(34)+N5(50) Root:84

Assigning Codes

Convention: 0 = Left Child, 1 = Right Child

		Root(84)
 ├─ 0 N4(34)
    ├─ 0 A(15)    00
    └─ 1 N2(19)
         ├─ 0 G(9)   010
         └─ 1 E(10)  011
 └─ 1 N5(50)
      ├─ 0 N3(25)
    ├─ 0 N1(12)
    ├─ 0 C(5)   1000
    └─ 1 D(7)   1001
    └─ 1 F(13)  101
      └─ 1 B(25)    11
	

Huffman Codes

Char Frequency Code Length
A 15 00 2
B 25 11 2
C 5 1000 4
D 7 1001 4
E 10 011 3
F 13 101 3
G 9 010 3

Bits/Character

		Total Bits = Σ (Frequency × Code Length)
           = (15×2) + (25×2) + (5×4) + (7×4) + (10×3) + (13×3) + (9×3)
           = 30 + 50 + 20 + 28 + 30 + 39 + 27 = 224
 
Average Bits = Total Bits / Total Frequency
             = 224 / 84 2.67 bits/character
	

Implementations

Python

huffman.py
		import heapq
 
class Node:
    def __init__(self, freq, char=None):
        self.freq = freq
        self.char = char
        self.left = None
        self.right = None
 
    # Required for heapq to compare nodes by frequency
    def __lt__(self, other):
        return self.freq < other.freq
 
def build_huffman_tree(freq_dict):
    """Builds Huffman Tree using a Min-Heap"""
    heap = [Node(f, c) for c, f in freq_dict.items()]
    heapq.heapify(heap)
 
    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        merged = Node(left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(heap, merged)
 
    return heap[0] if heap else None
 
def generate_codes(root, current_code="", codes=None):
    """DFS traversal to assign 0/1 codes"""
    if codes is None:
        codes = {}
    if root is None:
        return
    if root.char is not None:  # Leaf node
        codes[root.char] = current_code
        return
    generate_codes(root.left, current_code + "0", codes)
    generate_codes(root.right, current_code + "1", codes)
    return codes
 
def main():
    freq = {'A': 15, 'B': 25, 'C': 5, 'D': 7, 'E': 10, 'F': 13, 'G': 9}
    root = build_huffman_tree(freq)
    codes = generate_codes(root)
 
    # Calculate average bits
    total_bits = sum(freq[c] * len(codes[c]) for c in codes)
    total_freq = sum(freq.values())
    avg_bits = total_bits / total_freq
 
    print("Huffman Codes:")
    for char in sorted(codes.keys()):
        print(f"{char}: {codes[char]}")
    print(f"\nAverage bits per character: {avg_bits:.2f}")
 
if __name__ == "__main__":
    main()
	

C Language

huffman.c
		#include <stdio.h>
#include <stdlib.h>
#include <string.h>
 
typedef struct Node {
    int freq;
    char data;
    struct Node *left, *right;
} Node;
 
// Simulated Priority Queue
Node* pq[20];
int pq_size = 0;
int active_nodes = 0;
 
Node* newNode(int freq, char data) {
    Node* n = (Node*)malloc(sizeof(Node));
    n->freq = freq;
    n->data = data;
    n->left = n->right = NULL;
    pq[pq_size++] = n;
    active_nodes++;
    return n;
}
 
// Extract minimum frequency node
Node* extractMin() {
    int minIdx = -1;
    for (int i = 0; i < pq_size; i++) {
        if (pq[i] != NULL) {
            if (minIdx == -1 || pq[i]->freq < pq[minIdx]->freq)
                minIdx = i;
        }
    }
    if (minIdx == -1) return NULL;
    Node* minNode = pq[minIdx];
    pq[minIdx] = NULL;
    active_nodes--;
    return minNode;
}
 
void printCodes(Node* root, char arr[], int top) {
    if (root->left) {
        arr[top] = '0';
        printCodes(root->left, arr, top + 1);
    }
    if (root->right) {
        arr[top] = '1';
        printCodes(root->right, arr, top + 1);
    }
    if (root->data != '') { // Leaf node
        arr[top] = '';
        printf("%c: %s\n", root->data, arr);
    }
}
 
// Calculate total weighted path length for average
int calculateTotalBits(Node* root, int depth) {
    if (root == NULL) return 0;
    if (root->data != '') return root->freq * depth;
    return calculateTotalBits(root->left, depth + 1) +
           calculateTotalBits(root->right, depth + 1);
}
 
int main() {
    char chars[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G'};
    int freqs[] = {15, 25, 5, 7, 10, 13, 9};
    int n = 7, totalFreq = 0;
 
    for (int i = 0; i < n; i++) {
        newNode(freqs[i], chars[i]);
        totalFreq += freqs[i];
    }
 
    // Build Huffman Tree
    while (active_nodes > 1) {
        Node* left = extractMin();
        Node* right = extractMin();
        if (!left || !right) break;
 
        Node* merged = newNode(left->freq + right->freq, '');
        merged->left = left;
        merged->right = right;
    }
 
    Node* root = extractMin();
    char code[50];
    printf("Huffman Codes:\n");
    printCodes(root, code, 0);
 
    int totalBits = calculateTotalBits(root, 0);
    double avgBits = (double)totalBits / totalFreq;
    printf("\nAverage bits per character: %.2f\n", avgBits);
 
    return 0;
}
	

Rust

huffman.rs
		use std::cmp::Ordering;
use std::collections::BinaryHeap;
 
#[derive(Debug)]
struct Node {
    freq: usize,
    ch: Option<char>,
    left: Option<Box<Node>>,
    right: Option<Box<Node>>,
}
 
// Implement Ord for BinaryHeap (defaults to max-heap)
// We reverse comparison to simulate a min-heap based on frequency
impl PartialEq for Node {
    fn eq(&self, other: &Self) -> bool {
        self.freq == other.freq
    }
}
impl Eq for Node {}
impl PartialOrd for Node {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(other.freq.cmp(&self.freq)) // Reverse order
    }
}
impl Ord for Node {
    fn cmp(&self, other: &Self) -> Ordering {
        other.freq.cmp(&self.freq)
    }
}
 
fn build_tree(freqs: &[(char, usize)]) -> Box<Node> {
    let mut heap = BinaryHeap::new();
    for &(ch, f) in freqs {
        heap.push(Box::new(Node {
            freq: f,
            ch: Some(ch),
            left: None,
            right: None,
        }));
    }
 
    while heap.len() > 1 {
        let left = heap.pop().unwrap();
        let right = heap.pop().unwrap();
        heap.push(Box::new(Node {
            freq: left.freq + right.freq,
            ch: None,
            left: Some(left),
            right: Some(right),
        }));
    }
    heap.pop().unwrap()
}
 
fn generate_codes(node: &Box<Node>, current: &str, codes: &mut Vec<(char, String)>) {
    if let Some(ch) = node.ch {
        codes.push((ch, current.to_string()));
        return;
    }
    if let Some(ref left) = node.left {
        generate_codes(left, &format!("{}0", current), codes);
    }
    if let Some(ref right) = node.right {
        generate_codes(right, &format!("{}1", current), codes);
    }
}
 
fn main() {
    let freqs = [
        ('A', 15),
        ('B', 25),
        ('C', 5),
        ('D', 7),
        ('E', 10),
        ('F', 13),
        ('G', 9),
    ];
    let root = build_tree(&freqs);
 
    let mut codes: Vec<(char, String)> = Vec::new();
    generate_codes(&root, "", &mut codes);
    codes.sort_by_key(|c| c.0); // Sort alphabetically for clean output
 
    let total_freq: usize = freqs.iter().map(|(_, f)| f).sum();
    let total_bits: usize = codes
        .iter()
        .map(|(ch, code)| {
            let f = freqs.iter().find(|(c, _)| *c == *ch).unwrap().1;
            f * code.len()
        })
        .sum();
 
    println!("Huffman Codes:");
    for (ch, code) in &codes {
        println!("{}: {}", ch, code);
    }
    println!(
        "\nAverage bits per character: {:.2}",
        total_bits as f64 / total_freq as f64
    );
}
	

Sample Output

		Huffman Codes:
A: 00
B: 11
C: 1000
D: 1001
E: 011
F: 101
G: 010
 
Average bits per character: 2.67
	

Question 4

Problem Statement

A file contains only colon, spaces, new line character and digits in the following frequency: colon(80), space(500), new line(110), commas(500), 0(300), 1(200), 2(150), 3(60), 4(180), 5(240), 6(170), 7(200), 8(202).

Using the Huffman’s algorithm to construct an optimal binary prefix code.

Frequencies - Sorted

Char Frequency
3 60
colon 80
new line 110
2 150
6 170
4 180
1 200
7 200
8 202
5 240
0 300
space 500
comma 500

Total Frequency = 2892

Merge Steps

  • We are using min-heap approach
  • \n - New line
Step Two Smallest Nodes Merged Node (Freq) Pool After Merge (Sorted)
1 3(60) + colon(80) N1:140 \n:110, N1:140, 2:150, 6:170, 4:180, 1:200, 7:200, 8:202, 5:240, 0:300, space:500, comma:500
2 \n(110) + N1(140) N2:250 2:150, 6:170, 4:180, 1:200, 7:200, 8:202, 5:240, N2:250, 0:300, space:500, comma:500
3 2(150) + 6(170) N3:320 4:180, 1:200, 7:200, 8:202, 5:240, N2:250, 0:300, N3:320, space:500, comma:500
4 4(180) + 1(200) N4:380 7:200, 8:202, 5:240, N2:250, 0:300, N3:320, N4:380, space:500, comma:500
5 7(200) + 8(202) N5:402 5:240, N2:250, 0:300, N3:320, N4:380, N5:402, space:500, comma:500
6 5(240) + N2(250) N6:490 0:300, N3:320, N4:380, N5:402, N6:490, space:500, comma:500
7 0(300) + N3(320) N7:620 N4:380, N5:402, N6:490, N7:620, space:500, comma:500
8 N4(380) + N5(402) N8:782 N6:490, N7:620, N8:782, space:500, comma:500
9 N6(490) + space(500) N9:990 N7:620, N8:782, N9:990, comma:500
10 comma(500) + N7(620) N10:1120 N8:782, N9:990, N10:1120
11 N8(782) + N9(990) N11:1772 N10:1120, N11:1772
12 N10(1120) + N11(1772) Root:2892 Tree Complete

Tree

		Root(2892)
 ├─ 0 N10(1120)
    ├─ 0 comma(500)            00
    └─ 1 N7(620)
         ├─ 0 0(300)           010
         └─ 1 N3(320)
              ├─ 0 2(150)      0110
              └─ 1 6(170)      0111
 └─ 1 N11(1772)
      ├─ 0 N8(782)
    ├─ 0 N4(380)
    ├─ 0 4(180)     1000
    └─ 1 1(200)     1001
    └─ 1 N5(402)
         ├─ 0 7(200)     1010
         └─ 1 8(202)     1011
      └─ 1 N9(990)
           ├─ 0 N6(490)
    ├─ 0 5(240)     1100
    └─ 1 N2(250)
         ├─ 0 \n(110)   11010
         └─ 1 N1(140)
              ├─ 0 3(60)   110110
              └─ 1 colon(80)  110111
           └─ 1 space(500)     111
	

Huffman Codes

Char Freq Code Length
space 500 00 2
0 300 010 3
2 150 0110 4
6 170 0111 4
4 180 1000 4
1 200 1001 4
7 200 1010 4
8 202 1011 4
5 240 1100 4
\n 110 11010 5
3 60 110110 6
: 80 110111 6
, 500 111 3

Bits/Character

		Total Bits = Σ(Frequency × Code Length) 
=> (500×2) + (300×3) + (150×4) + (170×4) + (180×4) + (200×4)  (200×4) + (202×4) + (240×4) + (110×5) + (60×6) + (80×6) + (500×3) 
=> 1000 + 900 + 600 + 680 + 720 + 800 + 800 + 808 + 960 + 550 + 360 + 480 + 1500 
=> 10,158 bits
 
Average Bits = Total Bits / Total Frequency 
=> 10,158 / 2,892 3.51 bits/character
	

Implementations

Python

huffman.py
		import heapq
 
 
class Node:
    """Huffman Tree Node"""
 
    def __init__(self, freq, char=None):
        self.freq = freq
        self.char = char  # None for internal nodes
        self.left = None
        self.right = None
 
    def __lt__(self, other):
        """Enable comparison for heapq (min-heap by frequency)"""
        return self.freq < other.freq
 
 
def build_huffman_tree(freq_dict):
    """Build Huffman tree using priority queue (min-heap)"""
    heap = [Node(f, c) for c, f in freq_dict.items()]
    heapq.heapify(heap)
 
    while len(heap) > 1:
        # Extract two smallest frequency nodes
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
 
        # Create merged internal node
        merged = Node(left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(heap, merged)
 
    return heap[0] if heap else None
 
 
def generate_codes(node, current_code="", codes=None):
    """DFS traversal to assign binary codes (0=left, 1=right)"""
    if codes is None:
        codes = {}
    if node is None:
        return codes
    if node.char is not None:  # Leaf node
        codes[node.char] = current_code
        return codes
    generate_codes(node.left, current_code + "0", codes)
    generate_codes(node.right, current_code + "1", codes)
    return codes
 
 
def calculate_avg_bits(codes, freq_dict):
    """Calculate weighted average bits per character"""
    total_bits = sum(freq_dict[c] * len(codes[c]) for c in codes)
    total_freq = sum(freq_dict.values())
    return total_bits / total_freq
 
 
def main():
    # Input frequencies
    freq = {
        ":": 80,
        " ": 500,
        "\n": 110,
        ",": 500,
        "0": 300,
        "1": 200,
        "2": 150,
        "3": 60,
        "4": 180,
        "5": 240,
        "6": 170,
        "7": 200,
        "8": 202,
    }
 
    # Build tree and generate codes
    root = build_huffman_tree(freq)
    codes = generate_codes(root)
 
    # Display results
    print("Huffman Codes:")
    for char in sorted(codes.keys(), key=lambda c: freq[c], reverse=True):
        display = repr(char) if char in [" ", "\n", ":", ","] else char
        print(f"{display:>6} (freq={freq[char]:4d}): {codes[char]}")
 
    avg = calculate_avg_bits(codes, freq)
    print(f"\nAverage bits per character: {avg:.2f}")
    print(f"Total frequency: {sum(freq.values())}")
    print(f"Total bits for encoded file: {sum(freq[c] * len(codes[c]) for c in codes)}")
 
 
if __name__ == "__main__":
    main()
	

C Language

huffman.c
		#include <stdio.h>
#include <stdlib.h>
#include <string.h>
 
#define MAX_CHARS 13
#define MAX_CODE_LEN 20
 
// Huffman Tree Node
typedef struct Node {
    int freq;
    char data;
    struct Node *left, *right;
} Node;
 
// Priority Queue (simple array-based min-heap simulation)
Node* pq[100];
int pq_size = 0;
 
// Create new node
Node* newNode(int freq, char data) {
    Node* n = (Node*)malloc(sizeof(Node));
    n->freq = freq;
    n->data = data;
    n->left = n->right = NULL;
    return n;
}
 
// Insert into priority queue (maintain sorted order for simplicity)
void pq_insert(Node* node) {
    int i = pq_size - 1;
    while (i >= 0 && pq[i]->freq > node->freq) {
        pq[i + 1] = pq[i];
        i--;
    }
    pq[i + 1] = node;
    pq_size++;
}
 
// Extract minimum frequency node
Node* pq_extract_min() {
    if (pq_size == 0) return NULL;
    return pq[pq_size--];
}
 
// Generate and print codes via DFS
void printCodes(Node* root, char arr[], int top, int* total_bits, int freq_map[], char chars[]) {
    if (root->left) {
        arr[top] = '0';
        printCodes(root->left, arr, top + 1, total_bits, freq_map, chars);
    }
    if (root->right) {
        arr[top] = '1';
        printCodes(root->right, arr, top + 1, total_bits, freq_map, chars);
    }
    // Leaf node: print code and accumulate bits
    if (root->data != '') {
        arr[top] = '';
        // Find frequency for this character
        for (int i = 0; i < MAX_CHARS; i++) {
            if (chars[i] == root->data) {
                int len = top;
                *total_bits += freq_map[i] * len;
                // Handle special character display
                if (root->data == ' ') printf("  ' '  ");
                else if (root->data == '\n') printf(" '\n' ");
                else if (root->data == ':') printf("  ':'  ");
                else if (root->data == ',') printf("  ','  ");
                else printf("  '%c'  ", root->data);
                printf("(freq=%4d): %s\n", freq_map[i], arr);
                break;
            }
        }
    }
}
 
int main() {
    // Character-frequency pairs
    char chars[MAX_CHARS] = {':', ' ', '\n', ',', '0', '1', '2', '3', '4', '5', '6', '7', '8'};
    int freqs[MAX_CHARS] = {80, 500, 110, 500, 300, 200, 150, 60, 180, 240, 170, 200, 202};
    int total_freq = 0;
 
    // Initialize priority queue with leaf nodes
    for (int i = 0; i < MAX_CHARS; i++) {
        pq_insert(newNode(freqs[i], chars[i]));
        total_freq += freqs[i];
    }
 
    // Build Huffman Tree
    while (pq_size > 1) {
        Node* left = pq_extract_min();
        Node* right = pq_extract_min();
        Node* merged = newNode(left->freq + right->freq, '');
        merged->left = left;
        merged->right = right;
        pq_insert(merged);
    }
 
    Node* root = pq_extract_min();
    char code[MAX_CODE_LEN];
    int total_bits = 0;
 
    printf("Huffman Codes:\n");
    printCodes(root, code, 0, &total_bits, freqs, chars);
 
    double avg_bits = (double)total_bits / total_freq;
    printf("\nAverage bits per character: %.2f\n", avg_bits);
    printf("Total frequency: %d\n", total_freq);
    printf("Total bits for encoded file: %d\n", total_bits);
 
    return 0;
}
	

Rust

huffman.rs
		use std::cmp::Ordering;
use std::collections::BinaryHeap;
 
// Huffman Tree Node
#[derive(Debug)]
struct Node {
    freq: usize,
    ch: Option<char>,
    left: Option<Box<Node>>,
    right: Option<Box<Node>>,
}
 
// Implement ordering for BinaryHeap (min-heap by frequency)
impl PartialEq for Node {
    fn eq(&self, other: &Self) -> bool {
        self.freq == other.freq
    }
}
impl Eq for Node {}
impl PartialOrd for Node {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        // Reverse for min-heap behavior
        Some(other.freq.cmp(&self.freq))
    }
}
impl Ord for Node {
    fn cmp(&self, other: &Self) -> Ordering {
        other.freq.cmp(&self.freq)
    }
}
 
fn build_tree(freqs: &[(char, usize)]) -> Box<Node> {
    let mut heap = BinaryHeap::new();
 
    // Push all leaf nodes
    for &(ch, f) in freqs {
        heap.push(Box::new(Node {
            freq: f,
            ch: Some(ch),
            left: None,
            right: None,
        }));
    }
 
    // Merge nodes until one root remains
    while heap.len() > 1 {
        let left = heap.pop().unwrap();
        let right = heap.pop().unwrap();
        heap.push(Box::new(Node {
            freq: left.freq + right.freq,
            ch: None,
            left: Some(left),
            right: Some(right),
        }));
    }
    heap.pop().unwrap()
}
 
fn generate_codes(node: &Box<Node>, current: &str, codes: &mut Vec<(char, String)>) {
    if let Some(ch) = node.ch {
        codes.push((ch, current.to_string()));
        return;
    }
    if let Some(ref left) = node.left {
        generate_codes(left, &format!("{}0", current), codes);
    }
    if let Some(ref right) = node.right {
        generate_codes(right, &format!("{}1", current), codes);
    }
}
 
fn display_char(ch: char) -> String {
    match ch {
        ' ' => "' '".to_string(),
        '\n' => "'\n'".to_string(),
        ':' => "':'".to_string(),
        ',' => "','".to_string(),
        _ => format!("'{}'", ch),
    }
}
 
fn main() {
    // Input: (character, frequency) pairs
    let freqs = [
        (':', 80),
        (' ', 500),
        ('\n', 110),
        (',', 500),
        ('0', 300),
        ('1', 200),
        ('2', 150),
        ('3', 60),
        ('4', 180),
        ('5', 240),
        ('6', 170),
        ('7', 200),
        ('8', 202),
    ];
 
    let root = build_tree(&freqs);
    let mut codes: Vec<(char, String)> = Vec::new();
    generate_codes(&root, "", &mut codes);
 
    // Sort by frequency (descending) for display
    codes.sort_by(|a, b| {
        let fa = freqs.iter().find(|(c, _)| *c == a.0).unwrap().1;
        let fb = freqs.iter().find(|(c, _)| *c == b.0).unwrap().1;
        fb.cmp(&fa)
    });
 
    // Calculate statistics
    let total_freq: usize = freqs.iter().map(|(_, f)| f).sum();
    let total_bits: usize = codes
        .iter()
        .map(|(ch, code)| {
            let f = freqs.iter().find(|(c, _)| *c == *ch).unwrap().1;
            f * code.len()
        })
        .sum();
 
    println!("Huffman Codes:");
    for (ch, code) in &codes {
        let f = freqs.iter().find(|(c, _)| *c == *ch).unwrap().1;
        println!("{:>6} (freq={:4o}): {}", display_char(*ch), f, code);
    }
 
    let avg_bits = total_bits as f64 / total_freq as f64;
    println!("\nAverage bits per character: {}", avg_bits);
    println!("Total frequency: {}", total_freq);
    println!("Total bits for encoded file: {}", total_bits);
}
	

Sample Output

		Huffman Codes:
' ' (freq= 500): 00
',' (freq= 500): 111
0 (freq= 300): 010
5 (freq= 240): 1100
8 (freq= 202): 1011
1 (freq= 200): 1001
7 (freq= 200): 1010
4 (freq= 180): 1000
6 (freq= 170): 0111
2 (freq= 150): 0110
'\n' (freq= 110): 11010
':' (freq=  80): 110111
3 (freq=  60): 110110
   
 
Average bits per character: 3.512448132780083
Total frequency: 2892
Total bits for encoded file: 10,158