-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLZSS.C
321 lines (293 loc) · 10.8 KB
/
LZSS.C
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
/************************** Start of LZSS.C *************************
*
* This is the LZSS module, which implements an LZ77 style compression
* algorithm. As iplemented here it uses a 12 bit index into the sliding
* window, and a 4 bit length, which is adjusted to reflect phrase lengths
* of between 2 and 17 bytes.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <windows.h>
#include "bitio.h"
/*
* Various constants used to define the compression parameters. The
* INDEX_BIT_COUNT tells how many bits we allocate to indices into the
* text window. This directly determines the WINDOW_SIZE. The
* LENGTH_BIT_COUNT tells how many bits we allocate for the length of
* an encode phrase. This determines the size of the look ahead buffer.
* The TREE_ROOT is a special node in the tree that always points to
* the root node of the binary phrase tree. END_OF_STREAM is a special
* index used to flag the fact that the file has been completely
* encoded, and there is no more data. UNUSED is the null index for
* the tree. MOD_WINDOW() is a macro used to perform arithmetic on tree
* indices.
*
*/
#define INDEX_BIT_COUNT 12
#define LENGTH_BIT_COUNT 4
#define WINDOW_SIZE ( 1 << INDEX_BIT_COUNT )
#define RAW_LOOK_AHEAD_SIZE ( 1 << LENGTH_BIT_COUNT )
#define BREAK_EVEN ( ( 1 + INDEX_BIT_COUNT + LENGTH_BIT_COUNT ) / 9 )
#define LOOK_AHEAD_SIZE ( RAW_LOOK_AHEAD_SIZE + BREAK_EVEN )
#define TREE_ROOT WINDOW_SIZE
#define END_OF_STREAM 0
#define UNUSED 0
#define MOD_WINDOW( a ) ( ( a ) & ( WINDOW_SIZE - 1 ) )
//char *CompressionName = "LZSS Encoder";
//char *Usage = "in-file out-file\n\n";
/*
* These are the two global data structures used in this program.
* The window[] array is exactly that, the window of previously seen
* text, as well as the current look ahead text. The tree[] structure
* contains the binary tree of all of the strings in the window sorted
* in order.
*/
unsigned char window[ WINDOW_SIZE ];
struct {
int parent;
int smaller_child;
int larger_child;
} tree[ WINDOW_SIZE + 1 ];
/*
* Function prototypes for both ANSI C compilers and their K&R brethren.
*/
void InitTree( int r );
void ContractNode( int old_node, int new_node );
void ReplaceNode( int old_node, int new_node );
int FindNextNode( int node );
void DeleteString( int p );
int AddString( int new_node, int *match_position );
void CompressFile( FILE *input, BIT_FILE *output, int argc, char *argv[] );
void ExpandFile( BIT_FILE *input, FILE *output, int argc, char *argv[] );
/*
* Since the tree is static data, it comes up with every node
* initialized to 0, which is good, since 0 is the UNUSED code.
* However, to make the tree really usable, a single phrase has to be
* added to the tree so it has a root node. That is done right here.
*/
void InitTree(int r)
{
tree[ TREE_ROOT ].larger_child = r;
tree[ r ].parent = TREE_ROOT;
tree[ r ].larger_child = UNUSED;
tree[ r ].smaller_child = UNUSED;
}
/*
* This routine is used when a node is being deleted. The link to
* its descendant is broken by pulling the descendant in to overlay
* the existing link.
*/
void ContractNode(int old_node,int new_node)
{
tree[ new_node ].parent = tree[ old_node ].parent;
if ( tree[ tree[ old_node ].parent ].larger_child == old_node )
tree[ tree[ old_node ].parent ].larger_child = new_node;
else
tree[ tree[ old_node ].parent ].smaller_child = new_node;
tree[ old_node ].parent = UNUSED;
}
/*
* This routine is also used when a node is being deleted. However,
* in this case, it is being replaced by a node that was not previously
* in the tree.
*/
void ReplaceNode(int old_node,int new_node)
{
int parent;
parent = tree[ old_node ].parent;
if ( tree[ parent ].smaller_child == old_node )
tree[ parent ].smaller_child = new_node;
else
tree[ parent ].larger_child = new_node;
tree[ new_node ] = tree[ old_node ];
tree[ tree[ new_node ].smaller_child ].parent = new_node;
tree[ tree[ new_node ].larger_child ].parent = new_node;
tree[ old_node ].parent = UNUSED;
}
/*
* This routine is used to find the next smallest node after the node
* argument. It assumes that the node has a smaller child. We find
* the next smallest child by going to the smaller_child node, then
* going to the end of the larger_child descendant chain.
*/
int FindNextNode(int node)
{
int next;
next = tree[ node ].smaller_child;
while ( tree[ next ].larger_child != UNUSED )
next = tree[ next ].larger_child;
return( next );
}
/*
* This routine performs the classic binary tree deletion algorithm.
* If the node to be deleted has a null link in either direction, we
* just pull the non-null link up one to replace the existing link.
* If both links exist, we instead delete the next link in order, which
* is guaranteed to have a null link, then replace the node to be deleted
* with the next link.
*/
void DeleteString(int p)
{
int replacement;
if ( tree[ p ].parent == UNUSED )
return;
if ( tree[ p ].larger_child == UNUSED )
ContractNode( p, tree[ p ].smaller_child );
else if ( tree[ p ].smaller_child == UNUSED )
ContractNode( p, tree[ p ].larger_child );
else {
replacement = FindNextNode( p );
DeleteString( replacement );
ReplaceNode( p, replacement );
}
}
/*
* This where most of the work done by the encoder takes place. This
* routine is responsible for adding the new node to the binary tree.
* It also has to find the best match among all the existing nodes in
* the tree, and return that to the calling routine. To make matters
* even more complicated, if the new_node has a duplicate in the tree,
* the old_node is deleted, for reasons of efficiency.
*/
int AddString(int new_node,int *match_position)
{
int i;
int test_node;
int delta;
int match_length;
int *child;
if ( new_node == END_OF_STREAM )
return( 0 );
test_node = tree[ TREE_ROOT ].larger_child;
match_length = 0;
for ( ; ; ) {
for ( i = 0 ; i < LOOK_AHEAD_SIZE ; i++ ) {
delta = window[ MOD_WINDOW( new_node + i ) ] -
window[ MOD_WINDOW( test_node + i ) ];
if ( delta != 0 )
break;
}
if ( i >= match_length ) {
match_length = i;
*match_position = test_node;
if ( match_length >= LOOK_AHEAD_SIZE ) {
ReplaceNode( test_node, new_node );
return( match_length );
}
}
if ( delta >= 0 )
child = &tree[ test_node ].larger_child;
else
child = &tree[ test_node ].smaller_child;
if ( *child == UNUSED ) {
*child = new_node;
tree[ new_node ].parent = test_node;
tree[ new_node ].larger_child = UNUSED;
tree[ new_node ].smaller_child = UNUSED;
return( match_length );
}
test_node = *child;
}
}
/*
* This is the compression routine. It has to first load up the look
* ahead buffer, then go into the main compression loop. The main loop
* decides whether to output a single character or an index/length
* token that defines a phrase. Once the character or phrase has been
* sent out, another loop has to run. The second loop reads in new
* characters, deletes the strings that are overwritten by the new
* character, then adds the strings that are created by the new
* character.
*/
int CompressFile_lzss_encoding(FILE *input,BIT_FILE *output,int argc,char *argv[])
{
int i;
int c;
int look_ahead_bytes;
int current_position;
int replace_count;
int match_length;
int match_position;
DWORD milliseconds = GetTickCount();
current_position = 1;
for ( i = 0 ; i < LOOK_AHEAD_SIZE ; i++ ) {
if ( ( c = getc( input ) ) == EOF )
break;
window[ current_position + i ] = (unsigned char) c;
}
look_ahead_bytes = i;
InitTree( current_position );
match_length = 0;
match_position = 0;
while ( look_ahead_bytes > 0 ) {
if ( match_length > look_ahead_bytes )
match_length = look_ahead_bytes;
if ( match_length <= BREAK_EVEN ) {
replace_count = 1;
OutputBit( output, 1 );
OutputBits( output,
(unsigned long) window[ current_position ], 8 );
} else {
OutputBit( output, 0 );
OutputBits( output,
(unsigned long) match_position, INDEX_BIT_COUNT );
OutputBits( output,
(unsigned long) ( match_length - ( BREAK_EVEN + 1 ) ),
LENGTH_BIT_COUNT );
replace_count = match_length;
}
for ( i = 0 ; i < replace_count ; i++ ) {
DeleteString( MOD_WINDOW( current_position + LOOK_AHEAD_SIZE ) );
if ( ( c = getc( input ) ) == EOF )
look_ahead_bytes--;
else
window[ MOD_WINDOW( current_position + LOOK_AHEAD_SIZE ) ]
= (unsigned char) c;
current_position = MOD_WINDOW( current_position + 1 );
if ( look_ahead_bytes )
match_length = AddString( current_position, &match_position );
}
};
OutputBit( output, 0 );
OutputBits( output, (unsigned long) END_OF_STREAM, INDEX_BIT_COUNT );
return (GetTickCount() - milliseconds);
}
/*
* This is the expansion routine for the LZSS algorithm. All it has
* to do is read in flag bits, decide whether to read in a character or
* a index/length pair, and take the appropriate action.
*/
int ExpandFile_lzss_encoding(BIT_FILE *input, FILE *output,int argc,char *argv[])
{
int i;
int current_position;
int c;
int match_length;
int match_position;
DWORD milliseconds = GetTickCount();
current_position = 1;
for ( ; ; ) {
if ( InputBit( input ) ) {
c = (int) InputBits( input, 8 );
putc( c, output );
window[ current_position ] = (unsigned char) c;
current_position = MOD_WINDOW( current_position + 1 );
} else {
match_position = (int) InputBits( input, INDEX_BIT_COUNT );
if ( match_position == END_OF_STREAM )
break;
match_length = (int) InputBits( input, LENGTH_BIT_COUNT );
match_length += BREAK_EVEN;
for ( i = 0 ; i <= match_length ; i++ ) {
c = window[ MOD_WINDOW( match_position + i ) ];
putc( c, output );
window[ current_position ] = (unsigned char) c;
current_position = MOD_WINDOW( current_position + 1 );
}
}
}
return (GetTickCount() - milliseconds);
}
/************************** End of LZSS.C *************************/