哈夫曼编码是一种广泛应用的编码技术,由美国计算机科学家David A. Huffman于1952年提出。它是一种变长编码,具有平均编码长度最短、平均信息熵最小的特点。在数据压缩、通信等领域具有广泛的应用。本文将介绍哈夫曼编码的原理,并探讨其在C语言中的实现与应用。
一、哈夫曼编码原理
哈夫曼编码是一种前缀编码,其基本思想是:根据字符出现的频率,构造一棵最优的哈夫曼树,然后根据树的结构生成对应的编码。具体步骤如下:
1. 统计字符频率,构造一个包含所有字符的叶子节点,并按照频率从小到大排序。
2. 选择频率最小的两个叶子节点,合并成一个父节点,其频率为两个叶子节点频率之和。
3. 将新构造的父节点插入到排序后的序列中,并重新排序。
4. 重复步骤2和3,直到只剩下一个节点,即为哈夫曼树的根节点。
5. 遍历哈夫曼树,从根节点到叶子节点,每次向左移动表示0,向右移动表示1,得到每个叶子节点的编码。
二、哈夫曼编码在C语言中的实现
以下是一个C语言实现的哈夫曼编码示例:
```c
include
include
include
define MAX_TREE_HT 100
// 哈夫曼树的节点结构
struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode left, right;
};
// 哈夫曼树结构
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHeapNode array;
};
// 创建一个新的节点
struct MinHeapNode newNode(char data, unsigned freq) {
struct MinHeapNode temp = (struct MinHeapNode)malloc(sizeof(struct MinHeapNode));
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
}
// 创建一个新的最小堆
struct MinHeap createMinHeap(unsigned capacity) {
struct MinHeap minHeap = (struct MinHeap)malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (struct MinHeapNode)malloc(minHeap->capacity sizeof(struct MinHeapNode));
return minHeap;
}
// 交换两个节点
void swapMinHeapNode(struct MinHeapNode a, struct MinHeapNode b) {
struct MinHeapNode t = a;
a = b;
b = t;
}
// 最小堆的标准堆调整函数
void minHeapify(struct MinHeap minHeap, int idx) {
int smallest = idx;
int left = 2 idx + 1;
int right = 2 idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
smallest = left;
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
smallest = right;
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
// 判断最小堆是否为空
int isSizeOne(struct MinHeap minHeap) {
return (minHeap->size == 1);
}
// 提取最小频率的节点
struct MinHeapNode extractMin(struct MinHeap minHeap) {
struct MinHeapNode temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// 插入一个新节点到最小堆
void insertMinHeap(struct MinHeap minHeap, struct MinHeapNode minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
// 构建最小堆
void buildMinHeap(struct MinHeap minHeap) {
int n = minHeap->size - 1;
int i;
for (i = (n - 1) / 2; i >= 0; --i)
minHeapify(minHeap, i);
}
// 判断节点是否是叶子节点
int isLeaf(struct MinHeapNode root) {
return !(root->left) && !(root->right);
}
// 创建一个最小堆,包含所有叶子节点
struct MinHeap createAndBuildMinHeap(char data[], int freq[], int size) {
struct MinHeap minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
minHeap->array[i] = newNode(data[i], freq[i]);
minHeap->size = size;
buildMinHeap(minHeap);
return minHeap;
}
// 构建哈夫曼树
struct MinHeapNode buildHuffmanTree(char data[], int freq[], int size) {
struct MinHeapNode left, right, top;
struct MinHeap minHeap = createAndBuildMinHeap(data, freq, size);
while (!isSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
// 打印哈夫曼编码
void printCodes(struct MinHeapNode root, int arr[], int top) {
if (root->left) {
arr[top] = 0;
printCodes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
printCodes(root->right, arr, top + 1);
}
if (isLeaf(root)) {
printf(\