You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

deflate.c 27KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. /*
  2. * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. */
  19. FILE_LICENCE ( GPL2_OR_LATER );
  20. #include <string.h>
  21. #include <strings.h>
  22. #include <errno.h>
  23. #include <assert.h>
  24. #include <ctype.h>
  25. #include <ipxe/uaccess.h>
  26. #include <ipxe/deflate.h>
  27. /** @file
  28. *
  29. * DEFLATE decompression algorithm
  30. *
  31. * This file implements the decompression half of the DEFLATE
  32. * algorithm specified in RFC 1951.
  33. *
  34. * Portions of this code are derived from wimboot's xca.c.
  35. *
  36. */
  37. /**
  38. * Byte reversal table
  39. *
  40. * For some insane reason, the DEFLATE format stores some values in
  41. * bit-reversed order.
  42. */
  43. static uint8_t deflate_reverse[256];
  44. /** Literal/length base values
  45. *
  46. * We include entries only for literal/length codes 257-284. Code 285
  47. * does not fit the pattern (it represents a length of 258; following
  48. * the pattern from the earlier codes would give a length of 259), and
  49. * has no extra bits. Codes 286-287 are invalid, but can occur. We
  50. * treat any code greater than 284 as meaning "length 285, no extra
  51. * bits".
  52. */
  53. static uint8_t deflate_litlen_base[28];
  54. /** Distance base values
  55. *
  56. * We include entries for all possible codes 0-31, avoiding the need
  57. * to check for undefined codes 30 and 31 before performing the
  58. * lookup. Codes 30 and 31 are never initialised, and will therefore
  59. * be treated as meaning "14 extra bits, base distance 0".
  60. */
  61. static uint16_t deflate_distance_base[32];
  62. /** Code length map */
  63. static uint8_t deflate_codelen_map[19] = {
  64. 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
  65. };
  66. /** Static Huffman alphabet length patterns */
  67. static struct deflate_static_length_pattern deflate_static_length_patterns[] = {
  68. /* Literal/length code lengths */
  69. { 0x88, ( ( ( 143 - 0 ) + 1 ) / 2 ) },
  70. { 0x99, ( ( ( 255 - 144 ) + 1 ) / 2 ) },
  71. { 0x77, ( ( ( 279 - 256 ) + 1 ) / 2 ) },
  72. { 0x88, ( ( ( 287 - 280 ) + 1 ) / 2 ) },
  73. /* Distance code lengths */
  74. { 0x55, ( ( ( 31 - 0 ) + 1 ) / 2 ) },
  75. /* End marker */
  76. { 0, 0 }
  77. };
  78. /**
  79. * Transcribe binary value (for debugging)
  80. *
  81. * @v value Value
  82. * @v bits Length of value (in bits)
  83. * @ret string Transcribed value
  84. */
  85. static const char * deflate_bin ( unsigned long value, unsigned int bits ) {
  86. static char buf[ ( 8 * sizeof ( value ) ) + 1 /* NUL */ ];
  87. char *out = buf;
  88. /* Sanity check */
  89. assert ( bits < sizeof ( buf ) );
  90. /* Transcribe value */
  91. while ( bits-- )
  92. *(out++) = ( ( value & ( 1 << bits ) ) ? '1' : '0' );
  93. *out = '\0';
  94. return buf;
  95. }
  96. /**
  97. * Set Huffman symbol length
  98. *
  99. * @v deflate Decompressor
  100. * @v index Index within lengths
  101. * @v bits Symbol length (in bits)
  102. */
  103. static void deflate_set_length ( struct deflate *deflate, unsigned int index,
  104. unsigned int bits ) {
  105. deflate->lengths[ index / 2 ] |= ( bits << ( 4 * ( index % 2 ) ) );
  106. }
  107. /**
  108. * Get Huffman symbol length
  109. *
  110. * @v deflate Decompressor
  111. * @v index Index within lengths
  112. * @ret bits Symbol length (in bits)
  113. */
  114. static unsigned int deflate_length ( struct deflate *deflate,
  115. unsigned int index ) {
  116. return ( ( deflate->lengths[ index / 2 ] >> ( 4 * ( index % 2 ) ) )
  117. & 0x0f );
  118. }
  119. /**
  120. * Determine Huffman alphabet name (for debugging)
  121. *
  122. * @v deflate Decompressor
  123. * @v alphabet Huffman alphabet
  124. * @ret name Alphabet name
  125. */
  126. static const char * deflate_alphabet_name ( struct deflate *deflate,
  127. struct deflate_alphabet *alphabet ){
  128. if ( alphabet == &deflate->litlen ) {
  129. return "litlen";
  130. } else if ( alphabet == &deflate->distance_codelen ) {
  131. return "distance/codelen";
  132. } else {
  133. return "<UNKNOWN>";
  134. }
  135. }
  136. /**
  137. * Dump Huffman alphabet (for debugging)
  138. *
  139. * @v deflate Decompressor
  140. * @v alphabet Huffman alphabet
  141. */
  142. static void deflate_dump_alphabet ( struct deflate *deflate,
  143. struct deflate_alphabet *alphabet ) {
  144. struct deflate_huf_symbols *huf_sym;
  145. unsigned int bits;
  146. unsigned int huf;
  147. unsigned int i;
  148. /* Do nothing unless debugging is enabled */
  149. if ( ! DBG_EXTRA )
  150. return;
  151. /* Dump symbol table for each utilised length */
  152. for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
  153. sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
  154. huf_sym = &alphabet->huf[ bits - 1 ];
  155. if ( huf_sym->freq == 0 )
  156. continue;
  157. huf = ( huf_sym->start >> huf_sym->shift );
  158. DBGC2 ( alphabet, "DEFLATE %p \"%s\" length %d start \"%s\" "
  159. "freq %d:", deflate,
  160. deflate_alphabet_name ( deflate, alphabet ), bits,
  161. deflate_bin ( huf, huf_sym->bits ), huf_sym->freq );
  162. for ( i = 0 ; i < huf_sym->freq ; i++ ) {
  163. DBGC2 ( alphabet, " %03x",
  164. huf_sym->raw[ huf + i ] );
  165. }
  166. DBGC2 ( alphabet, "\n" );
  167. }
  168. /* Dump quick lookup table */
  169. DBGC2 ( alphabet, "DEFLATE %p \"%s\" quick lookup:", deflate,
  170. deflate_alphabet_name ( deflate, alphabet ) );
  171. for ( i = 0 ; i < ( sizeof ( alphabet->lookup ) /
  172. sizeof ( alphabet->lookup[0] ) ) ; i++ ) {
  173. DBGC2 ( alphabet, " %d", ( alphabet->lookup[i] + 1 ) );
  174. }
  175. DBGC2 ( alphabet, "\n" );
  176. }
  177. /**
  178. * Construct Huffman alphabet
  179. *
  180. * @v deflate Decompressor
  181. * @v alphabet Huffman alphabet
  182. * @v count Number of symbols
  183. * @v offset Starting offset within length table
  184. * @ret rc Return status code
  185. */
  186. static int deflate_alphabet ( struct deflate *deflate,
  187. struct deflate_alphabet *alphabet,
  188. unsigned int count, unsigned int offset ) {
  189. struct deflate_huf_symbols *huf_sym;
  190. unsigned int huf;
  191. unsigned int cum_freq;
  192. unsigned int bits;
  193. unsigned int raw;
  194. unsigned int adjustment;
  195. unsigned int prefix;
  196. int complete;
  197. /* Clear symbol table */
  198. memset ( alphabet->huf, 0, sizeof ( alphabet->huf ) );
  199. /* Count number of symbols with each Huffman-coded length */
  200. for ( raw = 0 ; raw < count ; raw++ ) {
  201. bits = deflate_length ( deflate, ( raw + offset ) );
  202. if ( bits )
  203. alphabet->huf[ bits - 1 ].freq++;
  204. }
  205. /* Populate Huffman-coded symbol table */
  206. huf = 0;
  207. cum_freq = 0;
  208. for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
  209. sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
  210. huf_sym = &alphabet->huf[ bits - 1 ];
  211. huf_sym->bits = bits;
  212. huf_sym->shift = ( 16 - bits );
  213. huf_sym->start = ( huf << huf_sym->shift );
  214. huf_sym->raw = &alphabet->raw[cum_freq];
  215. huf += huf_sym->freq;
  216. if ( huf > ( 1U << bits ) ) {
  217. DBGC ( alphabet, "DEFLATE %p \"%s\" has too many "
  218. "symbols with lengths <=%d\n", deflate,
  219. deflate_alphabet_name ( deflate, alphabet ),
  220. bits );
  221. return -EINVAL;
  222. }
  223. huf <<= 1;
  224. cum_freq += huf_sym->freq;
  225. }
  226. complete = ( huf == ( 1U << bits ) );
  227. /* Populate raw symbol table */
  228. for ( raw = 0 ; raw < count ; raw++ ) {
  229. bits = deflate_length ( deflate, ( raw + offset ) );
  230. if ( bits ) {
  231. huf_sym = &alphabet->huf[ bits - 1 ];
  232. *(huf_sym->raw++) = raw;
  233. }
  234. }
  235. /* Adjust Huffman-coded symbol table raw pointers and populate
  236. * quick lookup table.
  237. */
  238. for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
  239. sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
  240. huf_sym = &alphabet->huf[ bits - 1 ];
  241. /* Adjust raw pointer */
  242. huf_sym->raw -= huf_sym->freq; /* Reset to first symbol */
  243. adjustment = ( huf_sym->start >> huf_sym->shift );
  244. huf_sym->raw -= adjustment; /* Adjust for quick indexing */
  245. /* Populate quick lookup table */
  246. for ( prefix = ( huf_sym->start >> DEFLATE_HUFFMAN_QL_SHIFT ) ;
  247. prefix < ( 1 << DEFLATE_HUFFMAN_QL_BITS ) ; prefix++ ) {
  248. alphabet->lookup[prefix] = ( bits - 1 );
  249. }
  250. }
  251. /* Dump alphabet (for debugging) */
  252. deflate_dump_alphabet ( deflate, alphabet );
  253. /* Check that there are no invalid codes */
  254. if ( ! complete ) {
  255. DBGC ( alphabet, "DEFLATE %p \"%s\" is incomplete\n", deflate,
  256. deflate_alphabet_name ( deflate, alphabet ) );
  257. return -EINVAL;
  258. }
  259. return 0;
  260. }
  261. /**
  262. * Attempt to accumulate bits from input stream
  263. *
  264. * @v deflate Decompressor
  265. * @v in Compressed input data
  266. * @v target Number of bits to accumulate
  267. * @ret excess Number of excess bits accumulated (may be negative)
  268. */
  269. static int deflate_accumulate ( struct deflate *deflate,
  270. struct deflate_chunk *in,
  271. unsigned int target ) {
  272. uint8_t byte;
  273. while ( deflate->bits < target ) {
  274. /* Check for end of input */
  275. if ( in->offset >= in->len )
  276. break;
  277. /* Acquire byte from input */
  278. copy_from_user ( &byte, in->data, in->offset++,
  279. sizeof ( byte ) );
  280. deflate->accumulator = ( deflate->accumulator |
  281. ( byte << deflate->bits ) );
  282. deflate->rotalumucca = ( deflate->rotalumucca |
  283. ( deflate_reverse[byte] <<
  284. ( 24 - deflate->bits ) ) );
  285. deflate->bits += 8;
  286. /* Sanity check */
  287. assert ( deflate->bits <=
  288. ( 8 * sizeof ( deflate->accumulator ) ) );
  289. }
  290. return ( deflate->bits - target );
  291. }
  292. /**
  293. * Consume accumulated bits from the input stream
  294. *
  295. * @v deflate Decompressor
  296. * @v count Number of accumulated bits to consume
  297. * @ret data Consumed bits
  298. */
  299. static int deflate_consume ( struct deflate *deflate, unsigned int count ) {
  300. int data;
  301. /* Sanity check */
  302. assert ( count <= deflate->bits );
  303. /* Extract data and consume bits */
  304. data = ( deflate->accumulator & ( ( 1 << count ) - 1 ) );
  305. deflate->accumulator >>= count;
  306. deflate->rotalumucca <<= count;
  307. deflate->bits -= count;
  308. return data;
  309. }
  310. /**
  311. * Attempt to extract a fixed number of bits from input stream
  312. *
  313. * @v deflate Decompressor
  314. * @v in Compressed input data
  315. * @v target Number of bits to extract
  316. * @ret data Extracted bits (or negative if not yet accumulated)
  317. */
  318. static int deflate_extract ( struct deflate *deflate, struct deflate_chunk *in,
  319. unsigned int target ) {
  320. int excess;
  321. int data;
  322. /* Return immediately if we are attempting to extract zero bits */
  323. if ( target == 0 )
  324. return 0;
  325. /* Attempt to accumulate bits */
  326. excess = deflate_accumulate ( deflate, in, target );
  327. if ( excess < 0 )
  328. return excess;
  329. /* Extract data and consume bits */
  330. data = deflate_consume ( deflate, target );
  331. DBGCP ( deflate, "DEFLATE %p extracted %s = %#x = %d\n", deflate,
  332. deflate_bin ( data, target ), data, data );
  333. return data;
  334. }
  335. /**
  336. * Attempt to decode a Huffman-coded symbol from input stream
  337. *
  338. * @v deflate Decompressor
  339. * @v in Compressed input data
  340. * @v alphabet Huffman alphabet
  341. * @ret code Raw code (or negative if not yet accumulated)
  342. */
  343. static int deflate_decode ( struct deflate *deflate,
  344. struct deflate_chunk *in,
  345. struct deflate_alphabet *alphabet ) {
  346. struct deflate_huf_symbols *huf_sym;
  347. uint16_t huf;
  348. unsigned int lookup_index;
  349. int excess;
  350. unsigned int raw;
  351. /* Attempt to accumulate maximum required number of bits.
  352. * There may be fewer bits than this remaining in the stream,
  353. * even if the stream still contains some complete
  354. * Huffman-coded symbols.
  355. */
  356. deflate_accumulate ( deflate, in, DEFLATE_HUFFMAN_BITS );
  357. /* Normalise the bit-reversed accumulated value to 16 bits */
  358. huf = ( deflate->rotalumucca >> 16 );
  359. /* Find symbol set for this length */
  360. lookup_index = ( huf >> DEFLATE_HUFFMAN_QL_SHIFT );
  361. huf_sym = &alphabet->huf[ alphabet->lookup[ lookup_index ] ];
  362. while ( huf < huf_sym->start )
  363. huf_sym--;
  364. /* Calculate number of excess bits, and return if not yet complete */
  365. excess = ( deflate->bits - huf_sym->bits );
  366. if ( excess < 0 )
  367. return excess;
  368. /* Consume bits */
  369. deflate_consume ( deflate, huf_sym->bits );
  370. /* Look up raw symbol */
  371. raw = huf_sym->raw[ huf >> huf_sym->shift ];
  372. DBGCP ( deflate, "DEFLATE %p decoded %s = %#x = %d\n", deflate,
  373. deflate_bin ( ( huf >> huf_sym->shift ), huf_sym->bits ),
  374. raw, raw );
  375. return raw;
  376. }
  377. /**
  378. * Discard bits up to the next byte boundary
  379. *
  380. * @v deflate Decompressor
  381. */
  382. static void deflate_discard_to_byte ( struct deflate *deflate ) {
  383. deflate_consume ( deflate, ( deflate->bits & 7 ) );
  384. }
  385. /**
  386. * Copy data to output buffer (if available)
  387. *
  388. * @v out Output data buffer
  389. * @v start Source data
  390. * @v offset Starting offset within source data
  391. * @v len Length to copy
  392. */
  393. static void deflate_copy ( struct deflate_chunk *out,
  394. userptr_t start, size_t offset, size_t len ) {
  395. size_t out_offset = out->offset;
  396. size_t copy_len;
  397. /* Copy data one byte at a time, to allow for overlap */
  398. if ( out_offset < out->len ) {
  399. copy_len = ( out->len - out_offset );
  400. if ( copy_len > len )
  401. copy_len = len;
  402. while ( copy_len-- ) {
  403. memcpy_user ( out->data, out_offset++,
  404. start, offset++, 1 );
  405. }
  406. }
  407. out->offset += len;
  408. }
  409. /**
  410. * Inflate compressed data
  411. *
  412. * @v deflate Decompressor
  413. * @v in Compressed input data
  414. * @v out Output data buffer
  415. * @ret rc Return status code
  416. *
  417. * The caller can use deflate_finished() to determine whether a
  418. * successful return indicates that the decompressor is merely waiting
  419. * for more input.
  420. *
  421. * Data will not be written beyond the specified end of the output
  422. * data buffer, but the offset within the output data buffer will be
  423. * updated to reflect the amount that should have been written. The
  424. * caller can use this to find the length of the decompressed data
  425. * before allocating the output data buffer.
  426. */
  427. int deflate_inflate ( struct deflate *deflate,
  428. struct deflate_chunk *in,
  429. struct deflate_chunk *out ) {
  430. /* This could be implemented more neatly if gcc offered a
  431. * means for enforcing tail recursion.
  432. */
  433. if ( deflate->resume ) {
  434. goto *(deflate->resume);
  435. } else switch ( deflate->format ) {
  436. case DEFLATE_RAW: goto block_header;
  437. case DEFLATE_ZLIB: goto zlib_header;
  438. default: assert ( 0 );
  439. }
  440. zlib_header: {
  441. int header;
  442. int cm;
  443. /* Extract header */
  444. header = deflate_extract ( deflate, in, ZLIB_HEADER_BITS );
  445. if ( header < 0 ) {
  446. deflate->resume = &&zlib_header;
  447. return 0;
  448. }
  449. /* Parse header */
  450. cm = ( ( header >> ZLIB_HEADER_CM_LSB ) & ZLIB_HEADER_CM_MASK );
  451. if ( cm != ZLIB_HEADER_CM_DEFLATE ) {
  452. DBGC ( deflate, "DEFLATE %p unsupported ZLIB "
  453. "compression method %d\n", deflate, cm );
  454. return -ENOTSUP;
  455. }
  456. if ( header & ( 1 << ZLIB_HEADER_FDICT_BIT ) ) {
  457. DBGC ( deflate, "DEFLATE %p unsupported ZLIB preset "
  458. "dictionary\n", deflate );
  459. return -ENOTSUP;
  460. }
  461. /* Process first block header */
  462. goto block_header;
  463. }
  464. block_header: {
  465. int header;
  466. int bfinal;
  467. int btype;
  468. /* Extract block header */
  469. header = deflate_extract ( deflate, in, DEFLATE_HEADER_BITS );
  470. if ( header < 0 ) {
  471. deflate->resume = &&block_header;
  472. return 0;
  473. }
  474. /* Parse header */
  475. deflate->header = header;
  476. bfinal = ( header & ( 1 << DEFLATE_HEADER_BFINAL_BIT ) );
  477. btype = ( header >> DEFLATE_HEADER_BTYPE_LSB );
  478. DBGC ( deflate, "DEFLATE %p found %sblock type %#x\n",
  479. deflate, ( bfinal ? "final " : "" ), btype );
  480. switch ( btype ) {
  481. case DEFLATE_HEADER_BTYPE_LITERAL:
  482. goto literal_block;
  483. case DEFLATE_HEADER_BTYPE_STATIC:
  484. goto static_block;
  485. case DEFLATE_HEADER_BTYPE_DYNAMIC:
  486. goto dynamic_block;
  487. default:
  488. DBGC ( deflate, "DEFLATE %p unsupported block type "
  489. "%#x\n", deflate, btype );
  490. return -ENOTSUP;
  491. }
  492. }
  493. literal_block: {
  494. /* Discard any bits up to the next byte boundary */
  495. deflate_discard_to_byte ( deflate );
  496. }
  497. literal_len: {
  498. int len;
  499. /* Extract LEN field */
  500. len = deflate_extract ( deflate, in, DEFLATE_LITERAL_LEN_BITS );
  501. if ( len < 0 ) {
  502. deflate->resume = &&literal_len;
  503. return 0;
  504. }
  505. /* Record length of literal data */
  506. deflate->remaining = len;
  507. DBGC2 ( deflate, "DEFLATE %p literal block length %#04zx\n",
  508. deflate, deflate->remaining );
  509. }
  510. literal_nlen: {
  511. int nlen;
  512. /* Extract NLEN field */
  513. nlen = deflate_extract ( deflate, in, DEFLATE_LITERAL_LEN_BITS);
  514. if ( nlen < 0 ) {
  515. deflate->resume = &&literal_nlen;
  516. return 0;
  517. }
  518. /* Verify NLEN */
  519. if ( ( ( deflate->remaining ^ ~nlen ) &
  520. ( ( 1 << DEFLATE_LITERAL_LEN_BITS ) - 1 ) ) != 0 ) {
  521. DBGC ( deflate, "DEFLATE %p invalid len/nlen "
  522. "%#04zx/%#04x\n", deflate,
  523. deflate->remaining, nlen );
  524. return -EINVAL;
  525. }
  526. }
  527. literal_data: {
  528. size_t in_remaining;
  529. size_t len;
  530. /* Calculate available amount of literal data */
  531. in_remaining = ( in->len - in->offset );
  532. len = deflate->remaining;
  533. if ( len > in_remaining )
  534. len = in_remaining;
  535. /* Copy data to output buffer */
  536. deflate_copy ( out, in->data, in->offset, len );
  537. /* Consume data from input buffer */
  538. in->offset += len;
  539. deflate->remaining -= len;
  540. /* Finish processing if we are blocked */
  541. if ( deflate->remaining ) {
  542. deflate->resume = &&literal_data;
  543. return 0;
  544. }
  545. /* Otherwise, finish block */
  546. goto block_done;
  547. }
  548. static_block: {
  549. struct deflate_static_length_pattern *pattern;
  550. uint8_t *lengths = deflate->lengths;
  551. /* Construct static Huffman lengths as per RFC 1950 */
  552. for ( pattern = deflate_static_length_patterns ;
  553. pattern->count ; pattern++ ) {
  554. memset ( lengths, pattern->fill, pattern->count );
  555. lengths += pattern->count;
  556. }
  557. deflate->litlen_count = 288;
  558. deflate->distance_count = 32;
  559. goto construct_alphabets;
  560. }
  561. dynamic_block:
  562. dynamic_header: {
  563. int header;
  564. unsigned int hlit;
  565. unsigned int hdist;
  566. unsigned int hclen;
  567. /* Extract block header */
  568. header = deflate_extract ( deflate, in, DEFLATE_DYNAMIC_BITS );
  569. if ( header < 0 ) {
  570. deflate->resume = &&dynamic_header;
  571. return 0;
  572. }
  573. /* Parse header */
  574. hlit = ( ( header >> DEFLATE_DYNAMIC_HLIT_LSB ) &
  575. DEFLATE_DYNAMIC_HLIT_MASK );
  576. hdist = ( ( header >> DEFLATE_DYNAMIC_HDIST_LSB ) &
  577. DEFLATE_DYNAMIC_HDIST_MASK );
  578. hclen = ( ( header >> DEFLATE_DYNAMIC_HCLEN_LSB ) &
  579. DEFLATE_DYNAMIC_HCLEN_MASK );
  580. deflate->litlen_count = ( hlit + 257 );
  581. deflate->distance_count = ( hdist + 1 );
  582. deflate->length_index = 0;
  583. deflate->length_target = ( hclen + 4 );
  584. DBGC2 ( deflate, "DEFLATE %p dynamic block %d codelen, %d "
  585. "litlen, %d distance\n", deflate,
  586. deflate->length_target, deflate->litlen_count,
  587. deflate->distance_count );
  588. /* Prepare for decoding code length code lengths */
  589. memset ( &deflate->lengths, 0, sizeof ( deflate->lengths ) );
  590. }
  591. dynamic_codelen: {
  592. int len;
  593. unsigned int index;
  594. int rc;
  595. /* Extract all code lengths */
  596. while ( deflate->length_index < deflate->length_target ) {
  597. /* Extract code length length */
  598. len = deflate_extract ( deflate, in,
  599. DEFLATE_CODELEN_BITS );
  600. if ( len < 0 ) {
  601. deflate->resume = &&dynamic_codelen;
  602. return 0;
  603. }
  604. /* Store code length */
  605. index = deflate_codelen_map[deflate->length_index++];
  606. deflate_set_length ( deflate, index, len );
  607. DBGCP ( deflate, "DEFLATE %p codelen for %d is %d\n",
  608. deflate, index, len );
  609. }
  610. /* Generate code length alphabet */
  611. if ( ( rc = deflate_alphabet ( deflate,
  612. &deflate->distance_codelen,
  613. ( DEFLATE_CODELEN_MAX_CODE + 1 ),
  614. 0 ) ) != 0 )
  615. return rc;
  616. /* Prepare for decoding literal/length/distance code lengths */
  617. memset ( &deflate->lengths, 0, sizeof ( deflate->lengths ) );
  618. deflate->length_index = 0;
  619. deflate->length_target = ( deflate->litlen_count +
  620. deflate->distance_count );
  621. deflate->length = 0;
  622. }
  623. dynamic_litlen_distance: {
  624. int len;
  625. int index;
  626. /* Decode literal/length/distance code length */
  627. len = deflate_decode ( deflate, in, &deflate->distance_codelen);
  628. if ( len < 0 ) {
  629. deflate->resume = &&dynamic_litlen_distance;
  630. return 0;
  631. }
  632. /* Prepare for extra bits */
  633. if ( len < 16 ) {
  634. deflate->length = len;
  635. deflate->extra_bits = 0;
  636. deflate->dup_len = 1;
  637. } else {
  638. static const uint8_t dup_len[3] = { 3, 3, 11 };
  639. static const uint8_t extra_bits[3] = { 2, 3, 7 };
  640. index = ( len - 16 );
  641. deflate->dup_len = dup_len[index];
  642. deflate->extra_bits = extra_bits[index];
  643. if ( index )
  644. deflate->length = 0;
  645. }
  646. }
  647. dynamic_litlen_distance_extra: {
  648. int extra;
  649. unsigned int dup_len;
  650. /* Extract extra bits */
  651. extra = deflate_extract ( deflate, in, deflate->extra_bits );
  652. if ( extra < 0 ) {
  653. deflate->resume = &&dynamic_litlen_distance_extra;
  654. return 0;
  655. }
  656. /* Store code lengths */
  657. dup_len = ( deflate->dup_len + extra );
  658. while ( ( deflate->length_index < deflate->length_target ) &&
  659. dup_len-- ) {
  660. deflate_set_length ( deflate, deflate->length_index++,
  661. deflate->length );
  662. }
  663. /* Process next literal/length or distance code
  664. * length, if more are required.
  665. */
  666. if ( deflate->length_index < deflate->length_target )
  667. goto dynamic_litlen_distance;
  668. /* Construct alphabets */
  669. goto construct_alphabets;
  670. }
  671. construct_alphabets: {
  672. unsigned int distance_offset = deflate->litlen_count;
  673. unsigned int distance_count = deflate->distance_count;
  674. int rc;
  675. /* Generate literal/length alphabet */
  676. if ( ( rc = deflate_alphabet ( deflate, &deflate->litlen,
  677. deflate->litlen_count, 0 ) ) !=0)
  678. return rc;
  679. /* Handle degenerate case of a single distance code
  680. * (for which it is impossible to construct a valid,
  681. * complete Huffman alphabet). RFC 1951 states:
  682. *
  683. * If only one distance code is used, it is encoded
  684. * using one bit, not zero bits; in this case there
  685. * is a single code length of one, with one unused
  686. * code. One distance code of zero bits means that
  687. * there are no distance codes used at all (the data
  688. * is all literals).
  689. *
  690. * If we have only a single distance code, then we
  691. * instead use two distance codes both with length 1.
  692. * This results in a valid Huffman alphabet. The code
  693. * "0" will mean distance code 0 (which is either
  694. * correct or irrelevant), and the code "1" will mean
  695. * distance code 1 (which is always irrelevant).
  696. */
  697. if ( deflate->distance_count == 1 ) {
  698. deflate->lengths[0] = 0x11;
  699. distance_offset = 0;
  700. distance_count = 2;
  701. }
  702. /* Generate distance alphabet */
  703. if ( ( rc = deflate_alphabet ( deflate,
  704. &deflate->distance_codelen,
  705. distance_count,
  706. distance_offset ) ) != 0 )
  707. return rc;
  708. }
  709. lzhuf_litlen: {
  710. int code;
  711. uint8_t byte;
  712. unsigned int extra;
  713. unsigned int bits;
  714. /* Decode Huffman codes */
  715. while ( 1 ) {
  716. /* Decode Huffman code */
  717. code = deflate_decode ( deflate, in, &deflate->litlen );
  718. if ( code < 0 ) {
  719. deflate->resume = &&lzhuf_litlen;
  720. return 0;
  721. }
  722. /* Handle according to code type */
  723. if ( code < DEFLATE_LITLEN_END ) {
  724. /* Literal value: copy to output buffer */
  725. byte = code;
  726. DBGCP ( deflate, "DEFLATE %p literal %#02x "
  727. "('%c')\n", deflate, byte,
  728. ( isprint ( byte ) ? byte : '.' ) );
  729. deflate_copy ( out, virt_to_user ( &byte ), 0,
  730. sizeof ( byte ) );
  731. } else if ( code == DEFLATE_LITLEN_END ) {
  732. /* End of block */
  733. goto block_done;
  734. } else {
  735. /* Length code: process extra bits */
  736. extra = ( code - DEFLATE_LITLEN_END - 1 );
  737. if ( extra < 28 ) {
  738. bits = ( extra / 4 );
  739. if ( bits )
  740. bits--;
  741. deflate->extra_bits = bits;
  742. deflate->dup_len =
  743. deflate_litlen_base[extra];
  744. } else {
  745. deflate->extra_bits = 0;
  746. deflate->dup_len = 258;
  747. }
  748. goto lzhuf_litlen_extra;
  749. }
  750. }
  751. }
  752. lzhuf_litlen_extra: {
  753. int extra;
  754. /* Extract extra bits */
  755. extra = deflate_extract ( deflate, in, deflate->extra_bits );
  756. if ( extra < 0 ) {
  757. deflate->resume = &&lzhuf_litlen_extra;
  758. return 0;
  759. }
  760. /* Update duplicate length */
  761. deflate->dup_len += extra;
  762. }
  763. lzhuf_distance: {
  764. int code;
  765. unsigned int extra;
  766. unsigned int bits;
  767. /* Decode Huffman code */
  768. code = deflate_decode ( deflate, in,
  769. &deflate->distance_codelen );
  770. if ( code < 0 ) {
  771. deflate->resume = &&lzhuf_distance;
  772. return 0;
  773. }
  774. /* Process extra bits */
  775. extra = code;
  776. bits = ( extra / 2 );
  777. if ( bits )
  778. bits--;
  779. deflate->extra_bits = bits;
  780. deflate->dup_distance = deflate_distance_base[extra];
  781. }
  782. lzhuf_distance_extra: {
  783. int extra;
  784. size_t dup_len;
  785. size_t dup_distance;
  786. /* Extract extra bits */
  787. extra = deflate_extract ( deflate, in, deflate->extra_bits );
  788. if ( extra < 0 ) {
  789. deflate->resume = &&lzhuf_distance_extra;
  790. return 0;
  791. }
  792. /* Update duplicate distance */
  793. dup_distance = ( deflate->dup_distance + extra );
  794. dup_len = deflate->dup_len;
  795. DBGCP ( deflate, "DEFLATE %p duplicate length %zd distance "
  796. "%zd\n", deflate, dup_len, dup_distance );
  797. /* Sanity check */
  798. if ( dup_distance > out->offset ) {
  799. DBGC ( deflate, "DEFLATE %p bad distance %zd (max "
  800. "%zd)\n", deflate, dup_distance, out->offset );
  801. return -EINVAL;
  802. }
  803. /* Copy data, allowing for overlap */
  804. deflate_copy ( out, out->data, ( out->offset - dup_distance ),
  805. dup_len );
  806. /* Process next literal/length symbol */
  807. goto lzhuf_litlen;
  808. }
  809. block_done: {
  810. DBGCP ( deflate, "DEFLATE %p end of block\n", deflate );
  811. /* If this was not the final block, process next block header */
  812. if ( ! ( deflate->header & ( 1 << DEFLATE_HEADER_BFINAL_BIT ) ))
  813. goto block_header;
  814. /* Otherwise, process footer (if any) */
  815. switch ( deflate->format ) {
  816. case DEFLATE_RAW: goto finished;
  817. case DEFLATE_ZLIB: goto zlib_footer;
  818. default: assert ( 0 );
  819. }
  820. }
  821. zlib_footer: {
  822. /* Discard any bits up to the next byte boundary */
  823. deflate_discard_to_byte ( deflate );
  824. }
  825. zlib_adler32: {
  826. int excess;
  827. /* Accumulate the 32 bits of checksum. We don't check
  828. * the value, stop processing immediately afterwards,
  829. * and so don't have to worry about the nasty corner
  830. * cases involved in calling deflate_extract() to
  831. * obtain a full 32 bits.
  832. */
  833. excess = deflate_accumulate ( deflate, in, ZLIB_ADLER32_BITS );
  834. if ( excess < 0 ) {
  835. deflate->resume = &&zlib_adler32;
  836. return 0;
  837. }
  838. /* Finish processing */
  839. goto finished;
  840. }
  841. finished: {
  842. /* Mark as finished and terminate */
  843. DBGCP ( deflate, "DEFLATE %p finished\n", deflate );
  844. deflate->resume = NULL;
  845. return 0;
  846. }
  847. }
  848. /**
  849. * Initialise decompressor
  850. *
  851. * @v deflate Decompressor
  852. * @v format Compression format code
  853. */
  854. void deflate_init ( struct deflate *deflate, enum deflate_format format ) {
  855. static int global_init_done;
  856. uint8_t i;
  857. uint8_t bit;
  858. uint8_t byte;
  859. unsigned int base;
  860. unsigned int bits;
  861. /* Perform global initialisation if required */
  862. if ( ! global_init_done ) {
  863. /* Initialise byte reversal table */
  864. for ( i = 255 ; i ; i-- ) {
  865. for ( bit = 1, byte = 0 ; bit ; bit <<= 1 ) {
  866. byte <<= 1;
  867. if ( i & bit )
  868. byte |= 1;
  869. }
  870. deflate_reverse[i] = byte;
  871. }
  872. /* Initialise literal/length extra bits table */
  873. base = 3;
  874. for ( i = 0 ; i < 28 ; i++ ) {
  875. bits = ( i / 4 );
  876. if ( bits )
  877. bits--;
  878. deflate_litlen_base[i] = base;
  879. base += ( 1 << bits );
  880. }
  881. assert ( base == 259 ); /* sic */
  882. /* Initialise distance extra bits table */
  883. base = 1;
  884. for ( i = 0 ; i < 30 ; i++ ) {
  885. bits = ( i / 2 );
  886. if ( bits )
  887. bits--;
  888. deflate_distance_base[i] = base;
  889. base += ( 1 << bits );
  890. }
  891. assert ( base == 32769 );
  892. /* Record global initialisation as complete */
  893. global_init_done = 1;
  894. }
  895. /* Initialise structure */
  896. memset ( deflate, 0, sizeof ( *deflate ) );
  897. deflate->format = format;
  898. }