学习来源:日撸 Java 三百行(21-30天,树与二叉树)_闵帆的博客-CSDN博客
28. Huffman 编码 (节点定义与文件读取)
1.定义了一个内嵌类. 如果是实际项目, 我就为其单独写一个文件了, 这里仅仅是为了方便.
2.每个节点的内容包括: 字符 (仅对叶节点有效)、权重 (用的整数, 该字符的个数)、指向子节点父节点的引用. 这里指向父节点的引用是必须的.
3.NUM_CHARS 是指 ASCII 字符集的字符个数. 为方便起见, 仅支持 ASCII.
4.inputText 的引入只是想把程序尽可能细分成独立的模块, 这样便于学习和调拭.
5.alphabet 仅存 inputText 出现过的字符.
6.alphabetLength 完全可以用 alphabet.length() 代替, 但我就喜欢写成独立的变量.
7.charCounts 要为所有的节点负责, 其元素对应于 HuffmanNode 里面的 weight. 为了节约, 可以把其中一个省掉.
8.charMapping 是为了从 ASCII 里面的顺序映射到 alphabet 里面的顺序. 这也是我只采用 ASCII 字符集 (仅 256 字符) 的原因.
9.huffmanCodes 将个字符映射为一个字符串, 其实应该是二进制串. 我这里不是想偷懒么.
10.nodes 要先把所有的节点存储在一个数组里面, 然后再链接它们. 这是常用招数.
11.构造方法仅初始化了 charMapping, 读入了文件.
12.readText 采用了最简单粗暴的方式. 还可以有其它的逐行读入的方式.
13.要自己弄个文本文件, 里面存放一个字符串 abcdedgsgs 之类, 或者几行英文文本.
package datastructure.tree;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.Collectors;
/**
* Huffman tree, encoding, and decoding. For simplicity, only ASCII characters
* are supported.
*
* @author Rui Chen 1369097405@qq.com.
*/
public class Huffman {
/**
* An inner class for Huffman nodes.
*/
class HuffmanNode {
/**
* The char. Only valid for leaf nodes.
*/
char character;
/**
* Weight. It can also be double.
*/
int weight;
/**
* The left child.
*/
HuffmanNode leftChild;
/**
* The right child.
*/
HuffmanNode rightChild;
/**
* The parent. It helps constructing the Huffman code of each character.
*/
HuffmanNode parent;
/**
*******************
* The first constructor
*******************
*/
public HuffmanNode(char paraCharacter, int paraWeight, HuffmanNode paraLeftChild,
HuffmanNode paraRightChild, HuffmanNode paraParent) {
character = paraCharacter;
weight = paraWeight;
leftChild = paraLeftChild;
rightChild = paraRightChild;
parent = paraParent;
}// Of HuffmanNode
/**
*******************
* To string.
*******************
*/
public String toString() {
String resultString = "(" + character + ", " + weight + ")";
return resultString;
}// Of toString
}// Of class HuffmanNode
/**
* The number of characters. 256 for ASCII.
*/
public static final int NUM_CHARS = 256;
/**
* The input text. It is stored in a string for simplicity.
*/
String inputText;
/**
* The length of the alphabet, also the number of leaves.
*/
int alphabetLength;
/**
* The alphabet.
*/
char[] alphabet;
/**
* The count of chars. The length is 2 * alphabetLength - 1 to include
* non-leaf nodes.
*/
int[] charCounts;
/**
* The mapping of chars to the indices in the alphabet.
*/
int[] charMapping;
/**
* Codes for each char in the alphabet. It should have the same length as
* alphabet.
*/
String[] huffmanCodes;
/**
* All nodes. The last node is the root.
*/
HuffmanNode[] nodes;
/**
*********************
* The first constructor.
*
* @param paraFilename
* The text filename.
*********************
*/
public Huffman(String paraFilename) {
charMapping = new int[NUM_CHARS];
readText(paraFilename);
}// Of the first constructor
/**
*********************
* Read text.
*
* @param paraFilename
* The text filename.
*********************
*/
public void readText(String paraFilename) {
try {
inputText = Files.newBufferedReader(Paths.get(paraFilename), StandardCharsets.UTF_8)
.lines().collect(Collectors.joining("\n"));
} catch (Exception ee) {
System.out.println(ee);
System.exit(0);
} // Of try
System.out.println("The text is:\r\n" + inputText);
}// Of readText
/**
*********************
* Construct the alphabet. The results are stored in the member variables
* charMapping and alphabet.
*********************
*/
public void constructAlphabet() {
// Initialize.
Arrays.fill(charMapping, -1);
// The count for each char. At most NUM_CHARS chars.
int[] tempCharCounts = new int[NUM_CHARS];
// The index of the char in the ASCII charset.
int tempCharIndex;
// Step 1. Scan the string to obtain the counts.
char tempChar;
for (int i = 0; i < inputText.length(); i++) {
tempChar = inputText.charAt(i);
tempCharIndex = (int) tempChar;
System.out.print("" + tempCharIndex + " ");
tempCharCounts[tempCharIndex]++;
} // Of for i
// Step 2. Scan to determine the size of the alphabet.
alphabetLength = 0;
for (int i = 0; i < 255; i++) {
if (tempCharCounts[i] > 0) {
alphabetLength++;
} // Of if
} // Of for i
// Step 3. Compress to the alphabet
alphabet = new char[alphabetLength];
charCounts = new int[2 * alphabetLength - 1];
int tempCounter = 0;
for (int i = 0; i < NUM_CHARS; i++) {
if (tempCharCounts[i] > 0) {
alphabet[tempCounter] = (char) i;
charCounts[tempCounter] = tempCharCounts[i];
charMapping[i] = tempCounter;
tempCounter++;
} // Of if
} // Of for i
System.out.println("The alphabet is: " + Arrays.toString(alphabet));
System.out.println("Their counts are: " + Arrays.toString(charCounts));
System.out.println("The char mappings are: " + Arrays.toString(charMapping));
}// Of constructAlphabet
/**
*********************
* Construct the tree.
*********************
*/
public void constructTree() {
// Step 1. Allocate space.
nodes = new HuffmanNode[alphabetLength * 2 - 1];
boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];
// Step 2. Initialize leaves.
for (int i = 0; i < alphabetLength; i++) {
nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null, null);
} // Of for i
// Step 3. Construct the tree.
int tempLeft, tempRight, tempMinimal;
for (int i = alphabetLength; i < 2 * alphabetLength - 1; i++) {
// Step 3.1 Select the first minimal as the left child.
tempLeft = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempLeft = j;
} // Of if
} // Of for j
tempProcessed[tempLeft] = true;
// Step 3.2 Select the second minimal as the right child.
tempRight = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempRight = j;
} // Of if
} // Of for j
tempProcessed[tempRight] = true;
System.out.println("Selecting " + tempLeft + " and " + tempRight);
// Step 3.3 Construct the new node.
charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight], null);
// Step 3.4 Link with children.
nodes[tempLeft].parent = nodes[i];
nodes[tempRight].parent = nodes[i];
System.out.println("The children of " + i + " are " + tempLeft + " and " + tempRight);
} // Of for i
}// Of constructTree
/**
*********************
* Get the root of the binary tree.
*
* @return The root.
*********************
*/
public HuffmanNode getRoot() {
return nodes[nodes.length - 1];
}// Of getRoot
/**
*********************
* Pre-order visit.
*********************
*/
public void preOrderVisit(HuffmanNode paraNode) {
System.out.print("(" + paraNode.character + ", " + paraNode.weight + ") ");
if (paraNode.leftChild != null) {
preOrderVisit(paraNode.leftChild);
} // Of if
if (paraNode.rightChild != null) {
preOrderVisit(paraNode.rightChild);
} // Of if
}// Of preOrderVisit
/**
*********************
* Generate codes for each character in the alphabet.
*********************
*/
public void generateCodes() {
huffmanCodes = new String[alphabetLength];
HuffmanNode tempNode;
for (int i = 0; i < alphabetLength; i++) {
tempNode = nodes[i];
// Use tempCharCode instead of tempCode such that it is unlike
// tempNode.
// This is an advantage of long names.
String tempCharCode = "";
while (tempNode.parent != null) {
if (tempNode == tempNode.parent.leftChild) {
tempCharCode = "0" + tempCharCode;
} else {
tempCharCode = "1" + tempCharCode;
} // Of if
tempNode = tempNode.parent;
} // Of while
huffmanCodes[i] = tempCharCode;
System.out.println("The code of " + alphabet[i] + " is " + tempCharCode);
} // Of for i
}// Of generateCodes
/**
*********************
* Encode the given string.
*
* @param paraString
* The given string.
*********************
*/
public String coding(String paraString) {
String resultCodeString = "";
int tempIndex;
for (int i = 0; i < paraString.length(); i++) {
// From the original char to the location in the alphabet.
tempIndex = charMapping[(int) paraString.charAt(i)];
// From the location in the alphabet to the code.
resultCodeString += huffmanCodes[tempIndex];
} // Of for i
return resultCodeString;
}// Of coding
/**
*********************
* Decode the given string.
*
* @param paraString
* The given string.
*********************
*/
public String decoding(String paraString) {
String resultCodeString = "";
HuffmanNode tempNode = getRoot();
for (int i = 0; i < paraString.length(); i++) {
if (paraString.charAt(i) == '0') {
tempNode = tempNode.leftChild;
System.out.println(tempNode);
} else {
tempNode = tempNode.rightChild;
System.out.println(tempNode);
} // Of if
if (tempNode.leftChild == null) {
System.out.println("Decode one:" + tempNode);
// Decode one char.
resultCodeString += tempNode.character;
// Return to the root.
tempNode = getRoot();
} // Of if
} // Of for i
return resultCodeString;
}// Of decoding
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
Huffman tempHuffman = new Huffman("D:/chenrui/temp/huffmantext-small.txt");
tempHuffman.constructAlphabet();
tempHuffman.constructTree();
HuffmanNode tempRoot = tempHuffman.getRoot();
System.out.println("The root is: " + tempRoot);
System.out.println("Preorder visit:");
tempHuffman.preOrderVisit(tempHuffman.getRoot());
tempHuffman.generateCodes();
String tempCoded = tempHuffman.coding("abcdb");
System.out.println("Coded: " + tempCoded);
String tempDecoded = tempHuffman.decoding(tempCoded);
System.out.println("Decoded: " + tempDecoded);
}// Of main
}// Of class Huffman
运行截图:
31. 整数矩阵及其运算
这个代码以前有基础. 原想着写矩阵连通性, 把这个当成开胃菜的, 后来发现这个的代码量已经够了. 良心发现, 把这个做成一天的工作.
1.矩阵对象的创建.
2.getRows 等: getter, setter 在 java 里面很常用. 主要是为了访问控制.
3.整数矩阵的加法、乘法.
4.Exception 的抛出与捕获机制.
5.用 this 调用其它的构造方法以减少冗余代码.
6.代码看起来多, 但矩阵运算我们以前写过.
7.把数据类型修改成 double, 获得 DoubleMatrix.java, 以后会很有用.
8.getIdentityMatrix: 单位矩阵.
9.resultMatrix.data[i][i]: 成员变量的访问权限: 在同一类里面是可以直接使用的.
package matrix;
import java.util.Arrays;
/**
* Int matrix. For efficiency we do not define ObjectMatrix. One can revise it
* to obtain DoubleMatrix.
*
* @author Rui Chen 1369097405@qq.com.
*/
public class IntMatrix {
/**
* The data.
*/
int[][] data;
/**
*********************
* The first constructor.
*
* @param paraRows
* The number of rows.
* @param paraColumns
* The number of columns.
*********************
*/
public IntMatrix(int paraRows, int paraColumns) {
data = new int[paraRows][paraColumns];
}// Of the first constructor
/**
*********************
* The second constructor. Construct a copy of the given matrix.
*
* @param paraMatrix
* The given matrix.
*********************
*/
public IntMatrix(int[][] paraMatrix) {
data = new int[paraMatrix.length][paraMatrix[0].length];
// Copy elements.
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] = paraMatrix[i][j];
} // Of for j
} // Of for i
}// Of the second constructor
/**
*********************
* The third constructor. Construct a copy of the given matrix.
*
* @param paraMatrix
* The given matrix.
*********************
*/
public IntMatrix(IntMatrix paraMatrix) {
this(paraMatrix.getData());
}// Of the third constructor
/**
*********************
* Get identity matrix. The values at the diagonal are all 1.
*
* @param paraRows
* The given rows.
*********************
*/
public static IntMatrix getIdentityMatrix(int paraRows) {
IntMatrix resultMatrix = new IntMatrix(paraRows, paraRows);
for (int i = 0; i < paraRows; i++) {
// According to access control, resultMatrix.data can be visited
// directly.
resultMatrix.data[i][i] = 1;
} // Of for i
return resultMatrix;
}// Of getIdentityMatrix
/**
*********************
* Overrides the method claimed in Object, the superclass of any class.
*********************
*/
public String toString() {
return Arrays.deepToString(data);
}// Of toString
/**
*********************
* Get my data. Warning, the reference to the data instead of a copy of the
* data is returned.
*
* @return The data matrix.
*********************
*/
public int[][] getData() {
return data;
}// Of getData
/**
*********************
* Getter.
*
* @return The number of rows.
*********************
*/
public int getRows() {
return data.length;
}// Of getRows
/**
*********************
* Getter.
*
* @return The number of columns.
*********************
*/
public int getColumns() {
return data[0].length;
}// Of getColumns
/**
*********************
* Set one the value of one element.
*
* @param paraRow
* The row of the element.
* @param paraColumn
* The column of the element.
* @param paraValue
* The new value.
*********************
*/
public void setValue(int paraRow, int paraColumn, int paraValue) {
data[paraRow][paraColumn] = paraValue;
}// Of setValue
/**
*********************
* Get the value of one element.
*
* @param paraRow
* The row of the element.
* @param paraColumn
* The column of the element.
*********************
*/
public int getValue(int paraRow, int paraColumn) {
return data[paraRow][paraColumn];
}// Of getValue
/**
*********************
* Add another matrix to me.
*
* @param paraMatrix
* The other matrix.
*********************
*/
public void add(IntMatrix paraMatrix) throws Exception {
// Step 1. Get the data of the given matrix.
int[][] tempData = paraMatrix.getData();
// Step 2. Size check.
if (data.length != tempData.length) {
throw new Exception("Cannot add matrices. Rows not match: " + data.length + " vs. "
+ tempData.length + ".");
} // Of if
if (data[0].length != tempData[0].length) {
throw new Exception("Cannot add matrices. Rows not match: " + data[0].length + " vs. "
+ tempData[0].length + ".");
} // Of if
// Step 3. Add to me.
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] += tempData[i][j];
} // Of for j
} // Of for i
}// Of add
/**
*********************
* Add two existing matrices.
*
* @param paraMatrix1
* The first matrix.
* @param paraMatrix2
* The second matrix.
* @return A new matrix.
*********************
*/
public static IntMatrix add(IntMatrix paraMatrix1, IntMatrix paraMatrix2) throws Exception {
// Step 1. Clone the first matrix.
IntMatrix resultMatrix = new IntMatrix(paraMatrix1);
// Step 2. Add the second one.
resultMatrix.add(paraMatrix2);
return resultMatrix;
}// Of add
/**
*********************
* Multiply two existing matrices.
*
* @param paraMatrix1
* The first matrix.
* @param paraMatrix2
* The second matrix.
* @return A new matrix.
*********************
*/
public static IntMatrix multiply(IntMatrix paraMatrix1, IntMatrix paraMatrix2)
throws Exception {
// Step 1. Check size.
int[][] tempData1 = paraMatrix1.getData();
int[][] tempData2 = paraMatrix2.getData();
if (tempData1[0].length != tempData2.length) {
throw new Exception("Cannot multiply matrices: " + tempData1[0].length + " vs. "
+ tempData2.length + ".");
} // Of if
// Step 2. Allocate space.
int[][] resultData = new int[tempData1.length][tempData2[0].length];
// Step 3. Multiply.
for (int i = 0; i < tempData1.length; i++) {
for (int j = 0; j < tempData2[0].length; j++) {
for (int k = 0; k < tempData1[0].length; k++) {
resultData[i][j] += tempData1[i][k] * tempData2[k][j];
} // Of for k
} // Of for j
} // Of for i
// Step 4. Construct the matrix object.
IntMatrix resultMatrix = new IntMatrix(resultData);
return resultMatrix;
}// Of multiply
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
IntMatrix tempMatrix1 = new IntMatrix(3, 3);
tempMatrix1.setValue(0, 1, 1);
tempMatrix1.setValue(1, 0, 1);
tempMatrix1.setValue(1, 2, 1);
tempMatrix1.setValue(2, 1, 1);
System.out.println("The original matrix is: " + tempMatrix1);
IntMatrix tempMatrix2 = null;
try {
tempMatrix2 = IntMatrix.multiply(tempMatrix1, tempMatrix1);
} catch (Exception ee) {
System.out.println(ee);
} // Of try
System.out.println("The square matrix is: " + tempMatrix2);
IntMatrix tempMatrix3 = new IntMatrix(tempMatrix2);
try {
tempMatrix3.add(tempMatrix1);
} catch (Exception ee) {
System.out.println(ee);
} // Of try
System.out.println("The connectivity matrix is: " + tempMatrix3);
}// Of main
}// Of class IntMatrix
运行截图: