首页 » 软件开发 » 哈夫曼编码压缩算法学习(词频节点的是压缩编码)「哈夫曼编码压缩率计算」

哈夫曼编码压缩算法学习(词频节点的是压缩编码)「哈夫曼编码压缩率计算」

雨夜梧桐 2024-07-24 03:30:41 软件开发 0

扫一扫用手机浏览

文章目录 [+]

2、先统计词频: { A:5, B:1,C:3,D:1}

3、取出最小词频 B,D 作为二叉树的两个叶子节点,构造一个二叉树,然后塞回去

新节点的权重为两个字母的词频和,则新的是:{ A:5, C:3, node1:2 }

哈夫曼编码压缩算法学习(词频节点的是压缩编码) 哈夫曼编码压缩算法学习(词频节点的是压缩编码) 软件开发
(图片来自网络侵删)

4、继续取最小权重的两个来构造二叉树,小的放左边,大的放右边,那么我们这轮就应该取 C 和 node1

新节点的权重为两个字母的词频和,则新的是:{ A:5, node2:5 }

哈夫曼编码压缩算法学习(词频节点的是压缩编码) 哈夫曼编码压缩算法学习(词频节点的是压缩编码) 软件开发
(图片来自网络侵删)

5、继续该过程:

新节点的权重为两个字母的词频和,则新的是:{ node3:5 }

6、当根节点唯一之后,开始进行编码,左分支记0,右分支记1,得到两个字典表格

1:压缩表格 {B: "000", D: "001", C: "01", A: "1"}

2:解压表格 {1: "A", 000: "B", 001: "D", 01: "C"}

因为D和B词频一样,所以谁左谁右,并不一定,所以可能有的人得到的是 B=000 D=001 有的人得到的是 D=000 B=001

7、根据上边的表格1进行压缩:

ABCCAAACAD = 10000101111011001(b) 17bit位,也就是三个字节(38=24)

压缩前字节长度为:10,压缩后字节长度为 3 个字节:[1, 11, 217]

8、解密过程就简单了,直接对照表格2进行解密即可

1(A) 000(B) 01(C) 01(C) 1(A) 1(A) 1(A) 01(C) 1(A) 001(D)

下面 送上代码,保存成 html ,右键 chrome 打开,按下F12 可以看到效果。

<!DOCTYPE html><html lang="en"><head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>哈夫曼编码学习</title></head><body> 请输入一个字符串:<br> <textarea style="width: 400px;height: 300px;">我塔夺苯基本三梅花三弄基本三仴仴人孺阿附顶替要琴个全仴dwefwefsgvzsdgrsgerhrehj23413241fsfeg</textarea><br> <button onclick="doZip()">编码</button><br> <script> Array.prototype.first = function () { if (this.length > 0) return this[0]; } Array.prototype.take = function (len) { var ret = []; for (var c = 0; c < len; c++) ret.push(this[c]); return ret; } Array.prototype.skip = function (len) { var ret = []; for (var c = len; c < this.length; c++) ret.push(this[c]); return ret; } function doZip() { let text = document.getElementsByTagName("textarea")[0].value; console.log(0, text); //字频统计 function GetWordRate() { let wordRate = {}; for (let ch of text) { if (!wordRate[ch]) wordRate[ch] = 1; else wordRate[ch]++; } return wordRate; } //构造哈夫曼树 function MakeTree(wordRate) { //var node = { ch:?, left:?, right:?, weight:? }; 节点数据格式:字符,左节点,右节点,权重 let tree = []; for (var k in wordRate) tree.push({ ch: k, weight: wordRate[k], left: null, right: null }); return _makeTree(tree); function _makeTree(tree) { if (!tree) return tree; if (tree.length <= 1) return tree.first(); let _tree = []; let _sortArr = tree.sort((a, b) => a.weight - b.weight); //从小到大排序 let min1 = _sortArr.first(); //最小 let min2 = _sortArr.skip(1).first(); //第二小 let other = _sortArr.skip(2).take(_sortArr.length - 2); let root = { ch: '', left: min1, right: min2, weight: (min1.weight + min2.weight) }; _tree.push(root); for (let c = 0; c < other.length; c++) _tree.push(other[c]); if (_tree.length == 1) return _tree.first(); else return _makeTree(_tree); } } / code 就是上一级的编码 / function MakeTable(tree, code) { let retCompress = {}; let retUnCompress = {}; if (tree.ch != '') { retCompress[tree.ch] = code; retUnCompress[code] = tree.ch; return { tbCompress:retCompress,tbUnCompress:retUnCompress }; } if (tree.left) { let tb = MakeTable(tree.left, code + '0'); for(let k in tb.tbCompress) retCompress[k] = tb.tbCompress[k]; for(let k in tb.tbUnCompress) retUnCompress[k] = tb.tbUnCompress[k]; } if (tree.right) { let tb = MakeTable(tree.right, code + '1'); for(let k in tb.tbCompress) retCompress[k] = tb.tbCompress[k]; for(let k in tb.tbUnCompress) retUnCompress[k] = tb.tbUnCompress[k]; } return { tbCompress:retCompress,tbUnCompress:retUnCompress }; } function main() { let wordRate = GetWordRate(); //取字频 console.log(1, wordRate); let tree = MakeTree(wordRate); //生成哈夫曼树 console.log(2, tree); let { tbCompress, tbUnCompress } = MakeTable(tree, ""); //生成哈夫曼编码表 console.log(3, tbCompress, tbUnCompress); var oldStr = []; for(var c=0;c<text.length;c++) oldStr.push(text.charCodeAt(c).toString(2)); var newStr = []; var compressData = []; for(var c=0;c<text.length;c++) { newStr.push(tbCompress[text[c]]); compressData.push(text[c]); } newStr = [...newStr.join("")]; console.log(4, newStr.join("")); //把二进制字符串转换成真正的二进制 //8位对齐,整个字符串,前面补0 newStr = newStr.join(""); var n = 8 - newStr.length % 8; if (n==1) newStr = "1" + newStr; else newStr = "".padStart(n-1, "0") + "1" + newStr; console.log(4, newStr.length, n, newStr); var data = []; for(var c=0;c<newStr.length;c+=8) { var binStr = newStr.substring(c, c+8); data.push(parseInt(binStr,2)); } console.log(5, data); //把二进制恢复成二进制字符串 newStr = [] for(var d of data) { var binStr = d.toString(2).padStart(8,'0'); newStr.push(binStr); } newStr = newStr.join(""); newStr = newStr.replace(/^01/g,''); //去掉最前边的0000+1 console.log(6, newStr); var text2 = []; var word = []; for(var c=0;c<newStr.length;c++) { word.push(newStr[c]); if (word.join("") in tbUnCompress) { text2.push(tbUnCompress[word.join("")]); word = []; } } console.log(5, text2.join("")); } main(); } </script></body></html>

效果如下:

标签:

相关文章