You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lzhuf.c 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. /*
  2. ----------------------------------------------------------------------------
  3. M. LZHuf Compression
  4. This is the LZHuf compression algorithm as used in DPBOX and F6FBB.
  5. ----------------------------------------------------------------------------
  6. */
  7. /**************************************************************
  8. lzhuf.c
  9. written by Haruyasu Yoshizaki 11/20/1988
  10. some minor changes 4/6/1989
  11. comments translated by Haruhiko Okumura 4/7/1989
  12. minor beautifications and adjustments for compiling under Linux
  13. by Markus Gutschke <gutschk@math.uni-muenster.de>
  14. 1997-01-27
  15. Modifications to allow use as a filter by Ken Yap <ken_yap@users.sourceforge.net>.
  16. 1997-07-01
  17. Small mod to cope with running on big-endian machines
  18. by Jim Hague <jim.hague@acm.org)
  19. 1998-02-06
  20. Make compression statistics report shorter
  21. by Ken Yap <ken_yap@users.sourceforge.net>.
  22. 2001-04-25
  23. **************************************************************/
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include <ctype.h>
  28. #include <errno.h>
  29. #ifndef VERBOSE
  30. #define Fprintf(x)
  31. #define wterr 0
  32. #else
  33. #define Fprintf(x) fprintf x
  34. #if defined(ENCODE) || defined(DECODE)
  35. static char wterr[] = "Can't write.";
  36. #ifdef ENCODE
  37. static unsigned long int codesize = 0;
  38. #endif
  39. static unsigned long int printcount = 0;
  40. #endif
  41. #endif
  42. #ifndef MAIN
  43. extern
  44. #endif
  45. FILE *infile, *outfile;
  46. #if defined(ENCODE) || defined(DECODE)
  47. static unsigned long int textsize = 0;
  48. static __inline__ void Error(char *message)
  49. {
  50. Fprintf((stderr, "\n%s\n", message));
  51. exit(EXIT_FAILURE);
  52. }
  53. /* These will be a complete waste of time on a lo-endian */
  54. /* system, but it only gets done once so WTF. */
  55. static unsigned long i86ul_to_host(unsigned long ul)
  56. {
  57. unsigned long res = 0;
  58. int i;
  59. union
  60. {
  61. unsigned char c[4];
  62. unsigned long ul;
  63. } u;
  64. u.ul = ul;
  65. for (i = 3; i >= 0; i--)
  66. res = (res << 8) + u.c[i];
  67. return res;
  68. }
  69. static unsigned long host_to_i86ul(unsigned long ul)
  70. {
  71. int i;
  72. union
  73. {
  74. unsigned char c[4];
  75. unsigned long ul;
  76. } u;
  77. for (i = 0; i < 4; i++)
  78. {
  79. u.c[i] = ul & 0xff;
  80. ul >>= 8;
  81. }
  82. return u.ul;
  83. }
  84. #endif
  85. /********** LZSS compression **********/
  86. #define N 4096 /* buffer size */
  87. /* Attention: When using this file for f6fbb-type compressed data exchange,
  88. set N to 2048 ! (DL8HBS) */
  89. #define F 60 /* lookahead buffer size */
  90. #define THRESHOLD 2
  91. #define NIL N /* leaf of tree */
  92. #if defined(ENCODE) || defined(DECODE)
  93. static unsigned char
  94. text_buf[N + F - 1];
  95. #endif
  96. #ifdef ENCODE
  97. static int match_position, match_length,
  98. lson[N + 1], rson[N + 257], dad[N + 1];
  99. static void InitTree(void) /* initialize trees */
  100. {
  101. int i;
  102. for (i = N + 1; i <= N + 256; i++)
  103. rson[i] = NIL; /* root */
  104. for (i = 0; i < N; i++)
  105. dad[i] = NIL; /* node */
  106. }
  107. static void InsertNode(int r) /* insert to tree */
  108. {
  109. int i, p, cmp;
  110. unsigned char *key;
  111. unsigned c;
  112. cmp = 1;
  113. key = &text_buf[r];
  114. p = N + 1 + key[0];
  115. rson[r] = lson[r] = NIL;
  116. match_length = 0;
  117. for ( ; ; ) {
  118. if (cmp >= 0) {
  119. if (rson[p] != NIL)
  120. p = rson[p];
  121. else {
  122. rson[p] = r;
  123. dad[r] = p;
  124. return;
  125. }
  126. } else {
  127. if (lson[p] != NIL)
  128. p = lson[p];
  129. else {
  130. lson[p] = r;
  131. dad[r] = p;
  132. return;
  133. }
  134. }
  135. for (i = 1; i < F; i++)
  136. if ((cmp = key[i] - text_buf[p + i]) != 0)
  137. break;
  138. if (i > THRESHOLD) {
  139. if (i > match_length) {
  140. match_position = ((r - p) & (N - 1)) - 1;
  141. if ((match_length = i) >= F)
  142. break;
  143. }
  144. if (i == match_length) {
  145. if ((c = ((r - p) & (N - 1)) - 1) < match_position) {
  146. match_position = c;
  147. }
  148. }
  149. }
  150. }
  151. dad[r] = dad[p];
  152. lson[r] = lson[p];
  153. rson[r] = rson[p];
  154. dad[lson[p]] = r;
  155. dad[rson[p]] = r;
  156. if (rson[dad[p]] == p)
  157. rson[dad[p]] = r;
  158. else
  159. lson[dad[p]] = r;
  160. dad[p] = NIL; /* remove p */
  161. }
  162. static void DeleteNode(int p) /* remove from tree */
  163. {
  164. int q;
  165. if (dad[p] == NIL)
  166. return; /* not registered */
  167. if (rson[p] == NIL)
  168. q = lson[p];
  169. else
  170. if (lson[p] == NIL)
  171. q = rson[p];
  172. else {
  173. q = lson[p];
  174. if (rson[q] != NIL) {
  175. do {
  176. q = rson[q];
  177. } while (rson[q] != NIL);
  178. rson[dad[q]] = lson[q];
  179. dad[lson[q]] = dad[q];
  180. lson[q] = lson[p];
  181. dad[lson[p]] = q;
  182. }
  183. rson[q] = rson[p];
  184. dad[rson[p]] = q;
  185. }
  186. dad[q] = dad[p];
  187. if (rson[dad[p]] == p)
  188. rson[dad[p]] = q;
  189. else
  190. lson[dad[p]] = q;
  191. dad[p] = NIL;
  192. }
  193. #endif
  194. /* Huffman coding */
  195. #define N_CHAR (256 - THRESHOLD + F)
  196. /* kinds of characters (character code = 0..N_CHAR-1) */
  197. #define T (N_CHAR * 2 - 1) /* size of table */
  198. #define R (T - 1) /* position of root */
  199. #define MAX_FREQ 0x8000 /* updates tree when the */
  200. /* root frequency comes to this value. */
  201. typedef unsigned char uchar;
  202. /* table for encoding and decoding the upper 6 bits of position */
  203. /* for encoding */
  204. #ifdef ENCODE
  205. static uchar p_len[64] = {
  206. 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
  207. 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
  208. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  209. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  210. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  211. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  212. 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
  213. 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
  214. };
  215. static uchar p_code[64] = {
  216. 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
  217. 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
  218. 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
  219. 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
  220. 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
  221. 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
  222. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  223. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
  224. };
  225. #endif
  226. #ifdef DECODE
  227. /* for decoding */
  228. static uchar d_code[256] = {
  229. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  230. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  231. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  232. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  233. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  234. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  235. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
  236. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
  237. 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
  238. 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
  239. 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
  240. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  241. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  242. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  243. 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
  244. 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
  245. 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
  246. 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
  247. 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
  248. 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
  249. 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
  250. 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
  251. 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
  252. 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
  253. 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
  254. 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
  255. 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
  256. 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
  257. 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
  258. 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
  259. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  260. 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
  261. };
  262. static uchar d_len[256] = {
  263. 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
  264. 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
  265. 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
  266. 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
  267. 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
  268. 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
  269. 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
  270. 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
  271. 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
  272. 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
  273. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  274. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  275. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  276. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  277. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  278. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  279. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  280. 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
  281. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  282. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  283. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  284. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  285. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  286. 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
  287. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  288. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  289. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  290. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  291. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  292. 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
  293. 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
  294. 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
  295. };
  296. #endif
  297. #if defined(ENCODE) || defined(DECODE)
  298. static unsigned freq[T + 1]; /* frequency table */
  299. static int prnt[T + N_CHAR]; /* pointers to parent nodes, except for the */
  300. /* elements [T..T + N_CHAR - 1] which are used to get */
  301. /* the positions of leaves corresponding to the codes. */
  302. static int son[T]; /* pointers to child nodes (son[], son[] + 1) */
  303. #endif
  304. #ifdef DECODE
  305. static unsigned getbuf = 0;
  306. static uchar getlen = 0;
  307. static int GetBit(void) /* get one bit */
  308. {
  309. int i;
  310. while (getlen <= 8) {
  311. if ((i = getc(infile)) < 0) i = 0;
  312. getbuf |= i << (8 - getlen);
  313. getlen += 8;
  314. }
  315. i = getbuf;
  316. getbuf <<= 1;
  317. getlen--;
  318. return ((signed short)i < 0);
  319. }
  320. static int GetByte(void) /* get one byte */
  321. {
  322. unsigned short i;
  323. while (getlen <= 8) {
  324. if ((signed short)(i = getc(infile)) < 0) i = 0;
  325. getbuf |= i << (8 - getlen);
  326. getlen += 8;
  327. }
  328. i = getbuf;
  329. getbuf <<= 8;
  330. getlen -= 8;
  331. return i >> 8;
  332. }
  333. #endif
  334. #ifdef ENCODE
  335. static unsigned putbuf = 0;
  336. static uchar putlen = 0;
  337. static void Putcode(int l, unsigned c) /* output c bits of code */
  338. {
  339. putbuf |= c >> putlen;
  340. if ((putlen += l) >= 8) {
  341. if (putc(putbuf >> 8, outfile) == EOF) {
  342. Error(wterr);
  343. }
  344. if ((putlen -= 8) >= 8) {
  345. if (putc(putbuf, outfile) == EOF) {
  346. Error(wterr);
  347. }
  348. #ifdef VERBOSE
  349. codesize += 2;
  350. #endif
  351. putlen -= 8;
  352. putbuf = c << (l - putlen);
  353. } else {
  354. putbuf <<= 8;
  355. #ifdef VERBOSE
  356. codesize++;
  357. #endif
  358. }
  359. }
  360. }
  361. #endif
  362. /* initialization of tree */
  363. #if defined(ENCODE) || defined(DECODE)
  364. static void StartHuff(void)
  365. {
  366. int i, j;
  367. for (i = 0; i < N_CHAR; i++) {
  368. freq[i] = 1;
  369. son[i] = i + T;
  370. prnt[i + T] = i;
  371. }
  372. i = 0; j = N_CHAR;
  373. while (j <= R) {
  374. freq[j] = freq[i] + freq[i + 1];
  375. son[j] = i;
  376. prnt[i] = prnt[i + 1] = j;
  377. i += 2; j++;
  378. }
  379. freq[T] = 0xffff;
  380. prnt[R] = 0;
  381. }
  382. /* reconstruction of tree */
  383. static void reconst(void)
  384. {
  385. int i, j, k;
  386. unsigned f, l;
  387. /* collect leaf nodes in the first half of the table */
  388. /* and replace the freq by (freq + 1) / 2. */
  389. j = 0;
  390. for (i = 0; i < T; i++) {
  391. if (son[i] >= T) {
  392. freq[j] = (freq[i] + 1) / 2;
  393. son[j] = son[i];
  394. j++;
  395. }
  396. }
  397. /* begin constructing tree by connecting sons */
  398. for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
  399. k = i + 1;
  400. f = freq[j] = freq[i] + freq[k];
  401. for (k = j - 1; f < freq[k]; k--);
  402. k++;
  403. l = (j - k) * 2;
  404. memmove(&freq[k + 1], &freq[k], l);
  405. freq[k] = f;
  406. memmove(&son[k + 1], &son[k], l);
  407. son[k] = i;
  408. }
  409. /* connect prnt */
  410. for (i = 0; i < T; i++) {
  411. if ((k = son[i]) >= T) {
  412. prnt[k] = i;
  413. } else {
  414. prnt[k] = prnt[k + 1] = i;
  415. }
  416. }
  417. }
  418. /* increment frequency of given code by one, and update tree */
  419. static void update(int c)
  420. {
  421. int i, j, k, l;
  422. if (freq[R] == MAX_FREQ) {
  423. reconst();
  424. }
  425. c = prnt[c + T];
  426. do {
  427. k = ++freq[c];
  428. /* if the order is disturbed, exchange nodes */
  429. if (k > freq[l = c + 1]) {
  430. while (k > freq[++l]);
  431. l--;
  432. freq[c] = freq[l];
  433. freq[l] = k;
  434. i = son[c];
  435. prnt[i] = l;
  436. if (i < T) prnt[i + 1] = l;
  437. j = son[l];
  438. son[l] = i;
  439. prnt[j] = c;
  440. if (j < T) prnt[j + 1] = c;
  441. son[c] = j;
  442. c = l;
  443. }
  444. } while ((c = prnt[c]) != 0); /* repeat up to root */
  445. }
  446. #endif
  447. #ifdef ENCODE
  448. #if 0
  449. static unsigned code, len;
  450. #endif
  451. static void EncodeChar(unsigned c)
  452. {
  453. unsigned i;
  454. int j, k;
  455. i = 0;
  456. j = 0;
  457. k = prnt[c + T];
  458. /* travel from leaf to root */
  459. do {
  460. i >>= 1;
  461. /* if node's address is odd-numbered, choose bigger brother node */
  462. if (k & 1) i += 0x8000;
  463. j++;
  464. } while ((k = prnt[k]) != R);
  465. Putcode(j, i);
  466. #if 0
  467. code = i;
  468. len = j;
  469. #endif
  470. update(c);
  471. }
  472. static void EncodePosition(unsigned c)
  473. {
  474. unsigned i;
  475. /* output upper 6 bits by table lookup */
  476. i = c >> 6;
  477. Putcode(p_len[i], (unsigned)p_code[i] << 8);
  478. /* output lower 6 bits verbatim */
  479. Putcode(6, (c & 0x3f) << 10);
  480. }
  481. static void EncodeEnd(void)
  482. {
  483. if (putlen) {
  484. if (putc(putbuf >> 8, outfile) == EOF) {
  485. Error(wterr);
  486. }
  487. #ifdef VERBOSE
  488. codesize++;
  489. #endif
  490. }
  491. }
  492. #endif
  493. #ifdef DECODE
  494. static int DecodeChar(void)
  495. {
  496. unsigned c;
  497. c = son[R];
  498. /* travel from root to leaf, */
  499. /* choosing the smaller child node (son[]) if the read bit is 0, */
  500. /* the bigger (son[]+1} if 1 */
  501. while (c < T) {
  502. c += GetBit();
  503. c = son[c];
  504. }
  505. c -= T;
  506. update(c);
  507. return c;
  508. }
  509. static int DecodePosition(void)
  510. {
  511. unsigned i, j, c;
  512. /* recover upper 6 bits from table */
  513. i = GetByte();
  514. c = (unsigned)d_code[i] << 6;
  515. j = d_len[i];
  516. /* read lower 6 bits verbatim */
  517. j -= 2;
  518. while (j--) {
  519. i = (i << 1) + GetBit();
  520. }
  521. return c | (i & 0x3f);
  522. }
  523. #endif
  524. #ifdef ENCODE
  525. /* compression */
  526. void Encode(void) /* compression */
  527. {
  528. int i, c, len, r, s, last_match_length;
  529. unsigned long tw;
  530. fseek(infile, 0L, 2);
  531. textsize = ftell(infile);
  532. #ifdef VERBOSE
  533. if ((signed long)textsize < 0)
  534. Fprintf((stderr, "Errno: %d", errno));
  535. #endif
  536. tw = host_to_i86ul(textsize);
  537. if (fwrite(&tw, sizeof tw, 1, outfile) < 1)
  538. Error(wterr); /* output size of text */
  539. if (textsize == 0)
  540. return;
  541. rewind(infile);
  542. textsize = 0; /* rewind and re-read */
  543. StartHuff();
  544. InitTree();
  545. s = 0;
  546. r = N - F;
  547. for (i = s; i < r; i++)
  548. text_buf[i] = ' ';
  549. for (len = 0; len < F && (c = getc(infile)) != EOF; len++)
  550. text_buf[r + len] = c;
  551. textsize = len;
  552. for (i = 1; i <= F; i++)
  553. InsertNode(r - i);
  554. InsertNode(r);
  555. do {
  556. if (match_length > len)
  557. match_length = len;
  558. if (match_length <= THRESHOLD) {
  559. match_length = 1;
  560. EncodeChar(text_buf[r]);
  561. } else {
  562. EncodeChar(255 - THRESHOLD + match_length);
  563. EncodePosition(match_position);
  564. }
  565. last_match_length = match_length;
  566. for (i = 0; i < last_match_length &&
  567. (c = getc(infile)) != EOF; i++) {
  568. DeleteNode(s);
  569. text_buf[s] = c;
  570. if (s < F - 1)
  571. text_buf[s + N] = c;
  572. s = (s + 1) & (N - 1);
  573. r = (r + 1) & (N - 1);
  574. InsertNode(r);
  575. }
  576. if ((textsize += i) > printcount) {
  577. #if defined(VERBOSE) && defined(EXTRAVERBOSE)
  578. Fprintf((stderr, "%12ld\r", textsize));
  579. #endif
  580. printcount += 1024;
  581. }
  582. while (i++ < last_match_length) {
  583. DeleteNode(s);
  584. s = (s + 1) & (N - 1);
  585. r = (r + 1) & (N - 1);
  586. if (--len) InsertNode(r);
  587. }
  588. } while (len > 0);
  589. EncodeEnd();
  590. #ifdef LONG_REPORT
  591. Fprintf((stderr, "input size %ld bytes\n", codesize));
  592. Fprintf((stderr, "output size %ld bytes\n", textsize));
  593. Fprintf((stderr, "input/output %.3f\n", (double)codesize / textsize));
  594. #else
  595. Fprintf((stderr, "input/output = %ld/%ld = %.3f\n", codesize, textsize,
  596. (double)codesize / textsize));
  597. #endif
  598. }
  599. #endif
  600. #ifdef DECODE
  601. void Decode(void) /* recover */
  602. {
  603. int i, j, k, r, c;
  604. unsigned long int count;
  605. unsigned long tw;
  606. if (fread(&tw, sizeof tw, 1, infile) < 1)
  607. Error("Can't read"); /* read size of text */
  608. textsize = i86ul_to_host(tw);
  609. if (textsize == 0)
  610. return;
  611. StartHuff();
  612. for (i = 0; i < N - F; i++)
  613. text_buf[i] = ' ';
  614. r = N - F;
  615. for (count = 0; count < textsize; ) {
  616. c = DecodeChar();
  617. if (c < 256) {
  618. if (putc(c, outfile) == EOF) {
  619. Error(wterr);
  620. }
  621. text_buf[r++] = c;
  622. r &= (N - 1);
  623. count++;
  624. } else {
  625. i = (r - DecodePosition() - 1) & (N - 1);
  626. j = c - 255 + THRESHOLD;
  627. for (k = 0; k < j; k++) {
  628. c = text_buf[(i + k) & (N - 1)];
  629. if (putc(c, outfile) == EOF) {
  630. Error(wterr);
  631. }
  632. text_buf[r++] = c;
  633. r &= (N - 1);
  634. count++;
  635. }
  636. }
  637. if (count > printcount) {
  638. #if defined(VERBOSE) && defined(EXTRAVERBOSE)
  639. Fprintf((stderr, "%12ld\r", count));
  640. #endif
  641. printcount += 1024;
  642. }
  643. }
  644. Fprintf((stderr, "%12ld\n", count));
  645. }
  646. #endif
  647. #ifdef MAIN
  648. int main(int argc, char *argv[])
  649. {
  650. char *s;
  651. FILE *f;
  652. int c;
  653. if (argc == 2) {
  654. outfile = stdout;
  655. if ((f = tmpfile()) == NULL) {
  656. perror("tmpfile");
  657. return EXIT_FAILURE;
  658. }
  659. while ((c = getchar()) != EOF)
  660. fputc(c, f);
  661. rewind(infile = f);
  662. }
  663. else if (argc != 4) {
  664. Fprintf((stderr, "'lzhuf e file1 file2' encodes file1 into file2.\n"
  665. "'lzhuf d file2 file1' decodes file2 into file1.\n"));
  666. return EXIT_FAILURE;
  667. }
  668. if (argc == 4) {
  669. if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL)
  670. || (s = argv[2], (infile = fopen(s, "rb")) == NULL)
  671. || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) {
  672. Fprintf((stderr, "??? %s\n", s));
  673. return EXIT_FAILURE;
  674. }
  675. }
  676. if (toupper(*argv[1]) == 'E')
  677. Encode();
  678. else
  679. Decode();
  680. fclose(infile);
  681. fclose(outfile);
  682. return EXIT_SUCCESS;
  683. }
  684. #endif