You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

deflate.c 27KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049
  1. /*
  2. * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. #include <string.h>
  25. #include <strings.h>
  26. #include <errno.h>
  27. #include <assert.h>
  28. #include <ctype.h>
  29. #include <ipxe/uaccess.h>
  30. #include <ipxe/deflate.h>
  31. /** @file
  32. *
  33. * DEFLATE decompression algorithm
  34. *
  35. * This file implements the decompression half of the DEFLATE
  36. * algorithm specified in RFC 1951.
  37. *
  38. * Portions of this code are derived from wimboot's xca.c.
  39. *
  40. */
  41. /**
  42. * Byte reversal table
  43. *
  44. * For some insane reason, the DEFLATE format stores some values in
  45. * bit-reversed order.
  46. */
  47. static uint8_t deflate_reverse[256];
  48. /** Literal/length base values
  49. *
  50. * We include entries only for literal/length codes 257-284. Code 285
  51. * does not fit the pattern (it represents a length of 258; following
  52. * the pattern from the earlier codes would give a length of 259), and
  53. * has no extra bits. Codes 286-287 are invalid, but can occur. We
  54. * treat any code greater than 284 as meaning "length 285, no extra
  55. * bits".
  56. */
  57. static uint8_t deflate_litlen_base[28];
  58. /** Distance base values
  59. *
  60. * We include entries for all possible codes 0-31, avoiding the need
  61. * to check for undefined codes 30 and 31 before performing the
  62. * lookup. Codes 30 and 31 are never initialised, and will therefore
  63. * be treated as meaning "14 extra bits, base distance 0".
  64. */
  65. static uint16_t deflate_distance_base[32];
  66. /** Code length map */
  67. static uint8_t deflate_codelen_map[19] = {
  68. 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
  69. };
  70. /** Static Huffman alphabet length patterns */
  71. static struct deflate_static_length_pattern deflate_static_length_patterns[] = {
  72. /* Literal/length code lengths */
  73. { 0x88, ( ( ( 143 - 0 ) + 1 ) / 2 ) },
  74. { 0x99, ( ( ( 255 - 144 ) + 1 ) / 2 ) },
  75. { 0x77, ( ( ( 279 - 256 ) + 1 ) / 2 ) },
  76. { 0x88, ( ( ( 287 - 280 ) + 1 ) / 2 ) },
  77. /* Distance code lengths */
  78. { 0x55, ( ( ( 31 - 0 ) + 1 ) / 2 ) },
  79. /* End marker */
  80. { 0, 0 }
  81. };
  82. /**
  83. * Transcribe binary value (for debugging)
  84. *
  85. * @v value Value
  86. * @v bits Length of value (in bits)
  87. * @ret string Transcribed value
  88. */
  89. static const char * deflate_bin ( unsigned long value, unsigned int bits ) {
  90. static char buf[ ( 8 * sizeof ( value ) ) + 1 /* NUL */ ];
  91. char *out = buf;
  92. /* Sanity check */
  93. assert ( bits < sizeof ( buf ) );
  94. /* Transcribe value */
  95. while ( bits-- )
  96. *(out++) = ( ( value & ( 1 << bits ) ) ? '1' : '0' );
  97. *out = '\0';
  98. return buf;
  99. }
  100. /**
  101. * Set Huffman symbol length
  102. *
  103. * @v deflate Decompressor
  104. * @v index Index within lengths
  105. * @v bits Symbol length (in bits)
  106. */
  107. static void deflate_set_length ( struct deflate *deflate, unsigned int index,
  108. unsigned int bits ) {
  109. deflate->lengths[ index / 2 ] |= ( bits << ( 4 * ( index % 2 ) ) );
  110. }
  111. /**
  112. * Get Huffman symbol length
  113. *
  114. * @v deflate Decompressor
  115. * @v index Index within lengths
  116. * @ret bits Symbol length (in bits)
  117. */
  118. static unsigned int deflate_length ( struct deflate *deflate,
  119. unsigned int index ) {
  120. return ( ( deflate->lengths[ index / 2 ] >> ( 4 * ( index % 2 ) ) )
  121. & 0x0f );
  122. }
  123. /**
  124. * Determine Huffman alphabet name (for debugging)
  125. *
  126. * @v deflate Decompressor
  127. * @v alphabet Huffman alphabet
  128. * @ret name Alphabet name
  129. */
  130. static const char * deflate_alphabet_name ( struct deflate *deflate,
  131. struct deflate_alphabet *alphabet ){
  132. if ( alphabet == &deflate->litlen ) {
  133. return "litlen";
  134. } else if ( alphabet == &deflate->distance_codelen ) {
  135. return "distance/codelen";
  136. } else {
  137. return "<UNKNOWN>";
  138. }
  139. }
  140. /**
  141. * Dump Huffman alphabet (for debugging)
  142. *
  143. * @v deflate Decompressor
  144. * @v alphabet Huffman alphabet
  145. */
  146. static void deflate_dump_alphabet ( struct deflate *deflate,
  147. struct deflate_alphabet *alphabet ) {
  148. struct deflate_huf_symbols *huf_sym;
  149. unsigned int bits;
  150. unsigned int huf;
  151. unsigned int i;
  152. /* Do nothing unless debugging is enabled */
  153. if ( ! DBG_EXTRA )
  154. return;
  155. /* Dump symbol table for each utilised length */
  156. for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
  157. sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
  158. huf_sym = &alphabet->huf[ bits - 1 ];
  159. if ( huf_sym->freq == 0 )
  160. continue;
  161. huf = ( huf_sym->start >> huf_sym->shift );
  162. DBGC2 ( alphabet, "DEFLATE %p \"%s\" length %d start \"%s\" "
  163. "freq %d:", deflate,
  164. deflate_alphabet_name ( deflate, alphabet ), bits,
  165. deflate_bin ( huf, huf_sym->bits ), huf_sym->freq );
  166. for ( i = 0 ; i < huf_sym->freq ; i++ ) {
  167. DBGC2 ( alphabet, " %03x",
  168. huf_sym->raw[ huf + i ] );
  169. }
  170. DBGC2 ( alphabet, "\n" );
  171. }
  172. /* Dump quick lookup table */
  173. DBGC2 ( alphabet, "DEFLATE %p \"%s\" quick lookup:", deflate,
  174. deflate_alphabet_name ( deflate, alphabet ) );
  175. for ( i = 0 ; i < ( sizeof ( alphabet->lookup ) /
  176. sizeof ( alphabet->lookup[0] ) ) ; i++ ) {
  177. DBGC2 ( alphabet, " %d", ( alphabet->lookup[i] + 1 ) );
  178. }
  179. DBGC2 ( alphabet, "\n" );
  180. }
  181. /**
  182. * Construct Huffman alphabet
  183. *
  184. * @v deflate Decompressor
  185. * @v alphabet Huffman alphabet
  186. * @v count Number of symbols
  187. * @v offset Starting offset within length table
  188. * @ret rc Return status code
  189. */
  190. static int deflate_alphabet ( struct deflate *deflate,
  191. struct deflate_alphabet *alphabet,
  192. unsigned int count, unsigned int offset ) {
  193. struct deflate_huf_symbols *huf_sym;
  194. unsigned int huf;
  195. unsigned int cum_freq;
  196. unsigned int bits;
  197. unsigned int raw;
  198. unsigned int adjustment;
  199. unsigned int prefix;
  200. int complete;
  201. /* Clear symbol table */
  202. memset ( alphabet->huf, 0, sizeof ( alphabet->huf ) );
  203. /* Count number of symbols with each Huffman-coded length */
  204. for ( raw = 0 ; raw < count ; raw++ ) {
  205. bits = deflate_length ( deflate, ( raw + offset ) );
  206. if ( bits )
  207. alphabet->huf[ bits - 1 ].freq++;
  208. }
  209. /* Populate Huffman-coded symbol table */
  210. huf = 0;
  211. cum_freq = 0;
  212. for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
  213. sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
  214. huf_sym = &alphabet->huf[ bits - 1 ];
  215. huf_sym->bits = bits;
  216. huf_sym->shift = ( 16 - bits );
  217. huf_sym->start = ( huf << huf_sym->shift );
  218. huf_sym->raw = &alphabet->raw[cum_freq];
  219. huf += huf_sym->freq;
  220. if ( huf > ( 1U << bits ) ) {
  221. DBGC ( alphabet, "DEFLATE %p \"%s\" has too many "
  222. "symbols with lengths <=%d\n", deflate,
  223. deflate_alphabet_name ( deflate, alphabet ),
  224. bits );
  225. return -EINVAL;
  226. }
  227. huf <<= 1;
  228. cum_freq += huf_sym->freq;
  229. }
  230. complete = ( huf == ( 1U << bits ) );
  231. /* Populate raw symbol table */
  232. for ( raw = 0 ; raw < count ; raw++ ) {
  233. bits = deflate_length ( deflate, ( raw + offset ) );
  234. if ( bits ) {
  235. huf_sym = &alphabet->huf[ bits - 1 ];
  236. *(huf_sym->raw++) = raw;
  237. }
  238. }
  239. /* Adjust Huffman-coded symbol table raw pointers and populate
  240. * quick lookup table.
  241. */
  242. for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
  243. sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
  244. huf_sym = &alphabet->huf[ bits - 1 ];
  245. /* Adjust raw pointer */
  246. huf_sym->raw -= huf_sym->freq; /* Reset to first symbol */
  247. adjustment = ( huf_sym->start >> huf_sym->shift );
  248. huf_sym->raw -= adjustment; /* Adjust for quick indexing */
  249. /* Populate quick lookup table */
  250. for ( prefix = ( huf_sym->start >> DEFLATE_HUFFMAN_QL_SHIFT ) ;
  251. prefix < ( 1 << DEFLATE_HUFFMAN_QL_BITS ) ; prefix++ ) {
  252. alphabet->lookup[prefix] = ( bits - 1 );
  253. }
  254. }
  255. /* Dump alphabet (for debugging) */
  256. deflate_dump_alphabet ( deflate, alphabet );
  257. /* Check that there are no invalid codes */
  258. if ( ! complete ) {
  259. DBGC ( alphabet, "DEFLATE %p \"%s\" is incomplete\n", deflate,
  260. deflate_alphabet_name ( deflate, alphabet ) );
  261. return -EINVAL;
  262. }
  263. return 0;
  264. }
  265. /**
  266. * Attempt to accumulate bits from input stream
  267. *
  268. * @v deflate Decompressor
  269. * @v in Compressed input data
  270. * @v target Number of bits to accumulate
  271. * @ret excess Number of excess bits accumulated (may be negative)
  272. */
  273. static int deflate_accumulate ( struct deflate *deflate,
  274. struct deflate_chunk *in,
  275. unsigned int target ) {
  276. uint8_t byte;
  277. while ( deflate->bits < target ) {
  278. /* Check for end of input */
  279. if ( in->offset >= in->len )
  280. break;
  281. /* Acquire byte from input */
  282. copy_from_user ( &byte, in->data, in->offset++,
  283. sizeof ( byte ) );
  284. deflate->accumulator = ( deflate->accumulator |
  285. ( byte << deflate->bits ) );
  286. deflate->rotalumucca = ( deflate->rotalumucca |
  287. ( deflate_reverse[byte] <<
  288. ( 24 - deflate->bits ) ) );
  289. deflate->bits += 8;
  290. /* Sanity check */
  291. assert ( deflate->bits <=
  292. ( 8 * sizeof ( deflate->accumulator ) ) );
  293. }
  294. return ( deflate->bits - target );
  295. }
  296. /**
  297. * Consume accumulated bits from the input stream
  298. *
  299. * @v deflate Decompressor
  300. * @v count Number of accumulated bits to consume
  301. * @ret data Consumed bits
  302. */
  303. static int deflate_consume ( struct deflate *deflate, unsigned int count ) {
  304. int data;
  305. /* Sanity check */
  306. assert ( count <= deflate->bits );
  307. /* Extract data and consume bits */
  308. data = ( deflate->accumulator & ( ( 1 << count ) - 1 ) );
  309. deflate->accumulator >>= count;
  310. deflate->rotalumucca <<= count;
  311. deflate->bits -= count;
  312. return data;
  313. }
  314. /**
  315. * Attempt to extract a fixed number of bits from input stream
  316. *
  317. * @v deflate Decompressor
  318. * @v in Compressed input data
  319. * @v target Number of bits to extract
  320. * @ret data Extracted bits (or negative if not yet accumulated)
  321. */
  322. static int deflate_extract ( struct deflate *deflate, struct deflate_chunk *in,
  323. unsigned int target ) {
  324. int excess;
  325. int data;
  326. /* Return immediately if we are attempting to extract zero bits */
  327. if ( target == 0 )
  328. return 0;
  329. /* Attempt to accumulate bits */
  330. excess = deflate_accumulate ( deflate, in, target );
  331. if ( excess < 0 )
  332. return excess;
  333. /* Extract data and consume bits */
  334. data = deflate_consume ( deflate, target );
  335. DBGCP ( deflate, "DEFLATE %p extracted %s = %#x = %d\n", deflate,
  336. deflate_bin ( data, target ), data, data );
  337. return data;
  338. }
  339. /**
  340. * Attempt to decode a Huffman-coded symbol from input stream
  341. *
  342. * @v deflate Decompressor
  343. * @v in Compressed input data
  344. * @v alphabet Huffman alphabet
  345. * @ret code Raw code (or negative if not yet accumulated)
  346. */
  347. static int deflate_decode ( struct deflate *deflate,
  348. struct deflate_chunk *in,
  349. struct deflate_alphabet *alphabet ) {
  350. struct deflate_huf_symbols *huf_sym;
  351. uint16_t huf;
  352. unsigned int lookup_index;
  353. int excess;
  354. unsigned int raw;
  355. /* Attempt to accumulate maximum required number of bits.
  356. * There may be fewer bits than this remaining in the stream,
  357. * even if the stream still contains some complete
  358. * Huffman-coded symbols.
  359. */
  360. deflate_accumulate ( deflate, in, DEFLATE_HUFFMAN_BITS );
  361. /* Normalise the bit-reversed accumulated value to 16 bits */
  362. huf = ( deflate->rotalumucca >> 16 );
  363. /* Find symbol set for this length */
  364. lookup_index = ( huf >> DEFLATE_HUFFMAN_QL_SHIFT );
  365. huf_sym = &alphabet->huf[ alphabet->lookup[ lookup_index ] ];
  366. while ( huf < huf_sym->start )
  367. huf_sym--;
  368. /* Calculate number of excess bits, and return if not yet complete */
  369. excess = ( deflate->bits - huf_sym->bits );
  370. if ( excess < 0 )
  371. return excess;
  372. /* Consume bits */
  373. deflate_consume ( deflate, huf_sym->bits );
  374. /* Look up raw symbol */
  375. raw = huf_sym->raw[ huf >> huf_sym->shift ];
  376. DBGCP ( deflate, "DEFLATE %p decoded %s = %#x = %d\n", deflate,
  377. deflate_bin ( ( huf >> huf_sym->shift ), huf_sym->bits ),
  378. raw, raw );
  379. return raw;
  380. }
  381. /**
  382. * Discard bits up to the next byte boundary
  383. *
  384. * @v deflate Decompressor
  385. */
  386. static void deflate_discard_to_byte ( struct deflate *deflate ) {
  387. deflate_consume ( deflate, ( deflate->bits & 7 ) );
  388. }
  389. /**
  390. * Copy data to output buffer (if available)
  391. *
  392. * @v out Output data buffer
  393. * @v start Source data
  394. * @v offset Starting offset within source data
  395. * @v len Length to copy
  396. */
  397. static void deflate_copy ( struct deflate_chunk *out,
  398. userptr_t start, size_t offset, size_t len ) {
  399. size_t out_offset = out->offset;
  400. size_t copy_len;
  401. /* Copy data one byte at a time, to allow for overlap */
  402. if ( out_offset < out->len ) {
  403. copy_len = ( out->len - out_offset );
  404. if ( copy_len > len )
  405. copy_len = len;
  406. while ( copy_len-- ) {
  407. memcpy_user ( out->data, out_offset++,
  408. start, offset++, 1 );
  409. }
  410. }
  411. out->offset += len;
  412. }
  413. /**
  414. * Inflate compressed data
  415. *
  416. * @v deflate Decompressor
  417. * @v in Compressed input data
  418. * @v out Output data buffer
  419. * @ret rc Return status code
  420. *
  421. * The caller can use deflate_finished() to determine whether a
  422. * successful return indicates that the decompressor is merely waiting
  423. * for more input.
  424. *
  425. * Data will not be written beyond the specified end of the output
  426. * data buffer, but the offset within the output data buffer will be
  427. * updated to reflect the amount that should have been written. The
  428. * caller can use this to find the length of the decompressed data
  429. * before allocating the output data buffer.
  430. */
  431. int deflate_inflate ( struct deflate *deflate,
  432. struct deflate_chunk *in,
  433. struct deflate_chunk *out ) {
  434. /* This could be implemented more neatly if gcc offered a
  435. * means for enforcing tail recursion.
  436. */
  437. if ( deflate->resume ) {
  438. goto *(deflate->resume);
  439. } else switch ( deflate->format ) {
  440. case DEFLATE_RAW: goto block_header;
  441. case DEFLATE_ZLIB: goto zlib_header;
  442. default: assert ( 0 );
  443. }
  444. zlib_header: {
  445. int header;
  446. int cm;
  447. /* Extract header */
  448. header = deflate_extract ( deflate, in, ZLIB_HEADER_BITS );
  449. if ( header < 0 ) {
  450. deflate->resume = &&zlib_header;
  451. return 0;
  452. }
  453. /* Parse header */
  454. cm = ( ( header >> ZLIB_HEADER_CM_LSB ) & ZLIB_HEADER_CM_MASK );
  455. if ( cm != ZLIB_HEADER_CM_DEFLATE ) {
  456. DBGC ( deflate, "DEFLATE %p unsupported ZLIB "
  457. "compression method %d\n", deflate, cm );
  458. return -ENOTSUP;
  459. }
  460. if ( header & ( 1 << ZLIB_HEADER_FDICT_BIT ) ) {
  461. DBGC ( deflate, "DEFLATE %p unsupported ZLIB preset "
  462. "dictionary\n", deflate );
  463. return -ENOTSUP;
  464. }
  465. /* Process first block header */
  466. goto block_header;
  467. }
  468. block_header: {
  469. int header;
  470. int bfinal;
  471. int btype;
  472. /* Extract block header */
  473. header = deflate_extract ( deflate, in, DEFLATE_HEADER_BITS );
  474. if ( header < 0 ) {
  475. deflate->resume = &&block_header;
  476. return 0;
  477. }
  478. /* Parse header */
  479. deflate->header = header;
  480. bfinal = ( header & ( 1 << DEFLATE_HEADER_BFINAL_BIT ) );
  481. btype = ( header >> DEFLATE_HEADER_BTYPE_LSB );
  482. DBGC ( deflate, "DEFLATE %p found %sblock type %#x\n",
  483. deflate, ( bfinal ? "final " : "" ), btype );
  484. switch ( btype ) {
  485. case DEFLATE_HEADER_BTYPE_LITERAL:
  486. goto literal_block;
  487. case DEFLATE_HEADER_BTYPE_STATIC:
  488. goto static_block;
  489. case DEFLATE_HEADER_BTYPE_DYNAMIC:
  490. goto dynamic_block;
  491. default:
  492. DBGC ( deflate, "DEFLATE %p unsupported block type "
  493. "%#x\n", deflate, btype );
  494. return -ENOTSUP;
  495. }
  496. }
  497. literal_block: {
  498. /* Discard any bits up to the next byte boundary */
  499. deflate_discard_to_byte ( deflate );
  500. }
  501. literal_len: {
  502. int len;
  503. /* Extract LEN field */
  504. len = deflate_extract ( deflate, in, DEFLATE_LITERAL_LEN_BITS );
  505. if ( len < 0 ) {
  506. deflate->resume = &&literal_len;
  507. return 0;
  508. }
  509. /* Record length of literal data */
  510. deflate->remaining = len;
  511. DBGC2 ( deflate, "DEFLATE %p literal block length %#04zx\n",
  512. deflate, deflate->remaining );
  513. }
  514. literal_nlen: {
  515. int nlen;
  516. /* Extract NLEN field */
  517. nlen = deflate_extract ( deflate, in, DEFLATE_LITERAL_LEN_BITS);
  518. if ( nlen < 0 ) {
  519. deflate->resume = &&literal_nlen;
  520. return 0;
  521. }
  522. /* Verify NLEN */
  523. if ( ( ( deflate->remaining ^ ~nlen ) &
  524. ( ( 1 << DEFLATE_LITERAL_LEN_BITS ) - 1 ) ) != 0 ) {
  525. DBGC ( deflate, "DEFLATE %p invalid len/nlen "
  526. "%#04zx/%#04x\n", deflate,
  527. deflate->remaining, nlen );
  528. return -EINVAL;
  529. }
  530. }
  531. literal_data: {
  532. size_t in_remaining;
  533. size_t len;
  534. /* Calculate available amount of literal data */
  535. in_remaining = ( in->len - in->offset );
  536. len = deflate->remaining;
  537. if ( len > in_remaining )
  538. len = in_remaining;
  539. /* Copy data to output buffer */
  540. deflate_copy ( out, in->data, in->offset, len );
  541. /* Consume data from input buffer */
  542. in->offset += len;
  543. deflate->remaining -= len;
  544. /* Finish processing if we are blocked */
  545. if ( deflate->remaining ) {
  546. deflate->resume = &&literal_data;
  547. return 0;
  548. }
  549. /* Otherwise, finish block */
  550. goto block_done;
  551. }
  552. static_block: {
  553. struct deflate_static_length_pattern *pattern;
  554. uint8_t *lengths = deflate->lengths;
  555. /* Construct static Huffman lengths as per RFC 1950 */
  556. for ( pattern = deflate_static_length_patterns ;
  557. pattern->count ; pattern++ ) {
  558. memset ( lengths, pattern->fill, pattern->count );
  559. lengths += pattern->count;
  560. }
  561. deflate->litlen_count = 288;
  562. deflate->distance_count = 32;
  563. goto construct_alphabets;
  564. }
  565. dynamic_block:
  566. dynamic_header: {
  567. int header;
  568. unsigned int hlit;
  569. unsigned int hdist;
  570. unsigned int hclen;
  571. /* Extract block header */
  572. header = deflate_extract ( deflate, in, DEFLATE_DYNAMIC_BITS );
  573. if ( header < 0 ) {
  574. deflate->resume = &&dynamic_header;
  575. return 0;
  576. }
  577. /* Parse header */
  578. hlit = ( ( header >> DEFLATE_DYNAMIC_HLIT_LSB ) &
  579. DEFLATE_DYNAMIC_HLIT_MASK );
  580. hdist = ( ( header >> DEFLATE_DYNAMIC_HDIST_LSB ) &
  581. DEFLATE_DYNAMIC_HDIST_MASK );
  582. hclen = ( ( header >> DEFLATE_DYNAMIC_HCLEN_LSB ) &
  583. DEFLATE_DYNAMIC_HCLEN_MASK );
  584. deflate->litlen_count = ( hlit + 257 );
  585. deflate->distance_count = ( hdist + 1 );
  586. deflate->length_index = 0;
  587. deflate->length_target = ( hclen + 4 );
  588. DBGC2 ( deflate, "DEFLATE %p dynamic block %d codelen, %d "
  589. "litlen, %d distance\n", deflate,
  590. deflate->length_target, deflate->litlen_count,
  591. deflate->distance_count );
  592. /* Prepare for decoding code length code lengths */
  593. memset ( &deflate->lengths, 0, sizeof ( deflate->lengths ) );
  594. }
  595. dynamic_codelen: {
  596. int len;
  597. unsigned int index;
  598. int rc;
  599. /* Extract all code lengths */
  600. while ( deflate->length_index < deflate->length_target ) {
  601. /* Extract code length length */
  602. len = deflate_extract ( deflate, in,
  603. DEFLATE_CODELEN_BITS );
  604. if ( len < 0 ) {
  605. deflate->resume = &&dynamic_codelen;
  606. return 0;
  607. }
  608. /* Store code length */
  609. index = deflate_codelen_map[deflate->length_index++];
  610. deflate_set_length ( deflate, index, len );
  611. DBGCP ( deflate, "DEFLATE %p codelen for %d is %d\n",
  612. deflate, index, len );
  613. }
  614. /* Generate code length alphabet */
  615. if ( ( rc = deflate_alphabet ( deflate,
  616. &deflate->distance_codelen,
  617. ( DEFLATE_CODELEN_MAX_CODE + 1 ),
  618. 0 ) ) != 0 )
  619. return rc;
  620. /* Prepare for decoding literal/length/distance code lengths */
  621. memset ( &deflate->lengths, 0, sizeof ( deflate->lengths ) );
  622. deflate->length_index = 0;
  623. deflate->length_target = ( deflate->litlen_count +
  624. deflate->distance_count );
  625. deflate->length = 0;
  626. }
  627. dynamic_litlen_distance: {
  628. int len;
  629. int index;
  630. /* Decode literal/length/distance code length */
  631. len = deflate_decode ( deflate, in, &deflate->distance_codelen);
  632. if ( len < 0 ) {
  633. deflate->resume = &&dynamic_litlen_distance;
  634. return 0;
  635. }
  636. /* Prepare for extra bits */
  637. if ( len < 16 ) {
  638. deflate->length = len;
  639. deflate->extra_bits = 0;
  640. deflate->dup_len = 1;
  641. } else {
  642. static const uint8_t dup_len[3] = { 3, 3, 11 };
  643. static const uint8_t extra_bits[3] = { 2, 3, 7 };
  644. index = ( len - 16 );
  645. deflate->dup_len = dup_len[index];
  646. deflate->extra_bits = extra_bits[index];
  647. if ( index )
  648. deflate->length = 0;
  649. }
  650. }
  651. dynamic_litlen_distance_extra: {
  652. int extra;
  653. unsigned int dup_len;
  654. /* Extract extra bits */
  655. extra = deflate_extract ( deflate, in, deflate->extra_bits );
  656. if ( extra < 0 ) {
  657. deflate->resume = &&dynamic_litlen_distance_extra;
  658. return 0;
  659. }
  660. /* Store code lengths */
  661. dup_len = ( deflate->dup_len + extra );
  662. while ( ( deflate->length_index < deflate->length_target ) &&
  663. dup_len-- ) {
  664. deflate_set_length ( deflate, deflate->length_index++,
  665. deflate->length );
  666. }
  667. /* Process next literal/length or distance code
  668. * length, if more are required.
  669. */
  670. if ( deflate->length_index < deflate->length_target )
  671. goto dynamic_litlen_distance;
  672. /* Construct alphabets */
  673. goto construct_alphabets;
  674. }
  675. construct_alphabets: {
  676. unsigned int distance_offset = deflate->litlen_count;
  677. unsigned int distance_count = deflate->distance_count;
  678. int rc;
  679. /* Generate literal/length alphabet */
  680. if ( ( rc = deflate_alphabet ( deflate, &deflate->litlen,
  681. deflate->litlen_count, 0 ) ) !=0)
  682. return rc;
  683. /* Handle degenerate case of a single distance code
  684. * (for which it is impossible to construct a valid,
  685. * complete Huffman alphabet). RFC 1951 states:
  686. *
  687. * If only one distance code is used, it is encoded
  688. * using one bit, not zero bits; in this case there
  689. * is a single code length of one, with one unused
  690. * code. One distance code of zero bits means that
  691. * there are no distance codes used at all (the data
  692. * is all literals).
  693. *
  694. * If we have only a single distance code, then we
  695. * instead use two distance codes both with length 1.
  696. * This results in a valid Huffman alphabet. The code
  697. * "0" will mean distance code 0 (which is either
  698. * correct or irrelevant), and the code "1" will mean
  699. * distance code 1 (which is always irrelevant).
  700. */
  701. if ( deflate->distance_count == 1 ) {
  702. deflate->lengths[0] = 0x11;
  703. distance_offset = 0;
  704. distance_count = 2;
  705. }
  706. /* Generate distance alphabet */
  707. if ( ( rc = deflate_alphabet ( deflate,
  708. &deflate->distance_codelen,
  709. distance_count,
  710. distance_offset ) ) != 0 )
  711. return rc;
  712. }
  713. lzhuf_litlen: {
  714. int code;
  715. uint8_t byte;
  716. unsigned int extra;
  717. unsigned int bits;
  718. /* Decode Huffman codes */
  719. while ( 1 ) {
  720. /* Decode Huffman code */
  721. code = deflate_decode ( deflate, in, &deflate->litlen );
  722. if ( code < 0 ) {
  723. deflate->resume = &&lzhuf_litlen;
  724. return 0;
  725. }
  726. /* Handle according to code type */
  727. if ( code < DEFLATE_LITLEN_END ) {
  728. /* Literal value: copy to output buffer */
  729. byte = code;
  730. DBGCP ( deflate, "DEFLATE %p literal %#02x "
  731. "('%c')\n", deflate, byte,
  732. ( isprint ( byte ) ? byte : '.' ) );
  733. deflate_copy ( out, virt_to_user ( &byte ), 0,
  734. sizeof ( byte ) );
  735. } else if ( code == DEFLATE_LITLEN_END ) {
  736. /* End of block */
  737. goto block_done;
  738. } else {
  739. /* Length code: process extra bits */
  740. extra = ( code - DEFLATE_LITLEN_END - 1 );
  741. if ( extra < 28 ) {
  742. bits = ( extra / 4 );
  743. if ( bits )
  744. bits--;
  745. deflate->extra_bits = bits;
  746. deflate->dup_len =
  747. deflate_litlen_base[extra];
  748. } else {
  749. deflate->extra_bits = 0;
  750. deflate->dup_len = 258;
  751. }
  752. goto lzhuf_litlen_extra;
  753. }
  754. }
  755. }
  756. lzhuf_litlen_extra: {
  757. int extra;
  758. /* Extract extra bits */
  759. extra = deflate_extract ( deflate, in, deflate->extra_bits );
  760. if ( extra < 0 ) {
  761. deflate->resume = &&lzhuf_litlen_extra;
  762. return 0;
  763. }
  764. /* Update duplicate length */
  765. deflate->dup_len += extra;
  766. }
  767. lzhuf_distance: {
  768. int code;
  769. unsigned int extra;
  770. unsigned int bits;
  771. /* Decode Huffman code */
  772. code = deflate_decode ( deflate, in,
  773. &deflate->distance_codelen );
  774. if ( code < 0 ) {
  775. deflate->resume = &&lzhuf_distance;
  776. return 0;
  777. }
  778. /* Process extra bits */
  779. extra = code;
  780. bits = ( extra / 2 );
  781. if ( bits )
  782. bits--;
  783. deflate->extra_bits = bits;
  784. deflate->dup_distance = deflate_distance_base[extra];
  785. }
  786. lzhuf_distance_extra: {
  787. int extra;
  788. size_t dup_len;
  789. size_t dup_distance;
  790. /* Extract extra bits */
  791. extra = deflate_extract ( deflate, in, deflate->extra_bits );
  792. if ( extra < 0 ) {
  793. deflate->resume = &&lzhuf_distance_extra;
  794. return 0;
  795. }
  796. /* Update duplicate distance */
  797. dup_distance = ( deflate->dup_distance + extra );
  798. dup_len = deflate->dup_len;
  799. DBGCP ( deflate, "DEFLATE %p duplicate length %zd distance "
  800. "%zd\n", deflate, dup_len, dup_distance );
  801. /* Sanity check */
  802. if ( dup_distance > out->offset ) {
  803. DBGC ( deflate, "DEFLATE %p bad distance %zd (max "
  804. "%zd)\n", deflate, dup_distance, out->offset );
  805. return -EINVAL;
  806. }
  807. /* Copy data, allowing for overlap */
  808. deflate_copy ( out, out->data, ( out->offset - dup_distance ),
  809. dup_len );
  810. /* Process next literal/length symbol */
  811. goto lzhuf_litlen;
  812. }
  813. block_done: {
  814. DBGCP ( deflate, "DEFLATE %p end of block\n", deflate );
  815. /* If this was not the final block, process next block header */
  816. if ( ! ( deflate->header & ( 1 << DEFLATE_HEADER_BFINAL_BIT ) ))
  817. goto block_header;
  818. /* Otherwise, process footer (if any) */
  819. switch ( deflate->format ) {
  820. case DEFLATE_RAW: goto finished;
  821. case DEFLATE_ZLIB: goto zlib_footer;
  822. default: assert ( 0 );
  823. }
  824. }
  825. zlib_footer: {
  826. /* Discard any bits up to the next byte boundary */
  827. deflate_discard_to_byte ( deflate );
  828. }
  829. zlib_adler32: {
  830. int excess;
  831. /* Accumulate the 32 bits of checksum. We don't check
  832. * the value, stop processing immediately afterwards,
  833. * and so don't have to worry about the nasty corner
  834. * cases involved in calling deflate_extract() to
  835. * obtain a full 32 bits.
  836. */
  837. excess = deflate_accumulate ( deflate, in, ZLIB_ADLER32_BITS );
  838. if ( excess < 0 ) {
  839. deflate->resume = &&zlib_adler32;
  840. return 0;
  841. }
  842. /* Finish processing */
  843. goto finished;
  844. }
  845. finished: {
  846. /* Mark as finished and terminate */
  847. DBGCP ( deflate, "DEFLATE %p finished\n", deflate );
  848. deflate->resume = NULL;
  849. return 0;
  850. }
  851. }
  852. /**
  853. * Initialise decompressor
  854. *
  855. * @v deflate Decompressor
  856. * @v format Compression format code
  857. */
  858. void deflate_init ( struct deflate *deflate, enum deflate_format format ) {
  859. static int global_init_done;
  860. uint8_t i;
  861. uint8_t bit;
  862. uint8_t byte;
  863. unsigned int base;
  864. unsigned int bits;
  865. /* Perform global initialisation if required */
  866. if ( ! global_init_done ) {
  867. /* Initialise byte reversal table */
  868. for ( i = 255 ; i ; i-- ) {
  869. for ( bit = 1, byte = 0 ; bit ; bit <<= 1 ) {
  870. byte <<= 1;
  871. if ( i & bit )
  872. byte |= 1;
  873. }
  874. deflate_reverse[i] = byte;
  875. }
  876. /* Initialise literal/length extra bits table */
  877. base = 3;
  878. for ( i = 0 ; i < 28 ; i++ ) {
  879. bits = ( i / 4 );
  880. if ( bits )
  881. bits--;
  882. deflate_litlen_base[i] = base;
  883. base += ( 1 << bits );
  884. }
  885. assert ( base == 259 ); /* sic */
  886. /* Initialise distance extra bits table */
  887. base = 1;
  888. for ( i = 0 ; i < 30 ; i++ ) {
  889. bits = ( i / 2 );
  890. if ( bits )
  891. bits--;
  892. deflate_distance_base[i] = base;
  893. base += ( 1 << bits );
  894. }
  895. assert ( base == 32769 );
  896. /* Record global initialisation as complete */
  897. global_init_done = 1;
  898. }
  899. /* Initialise structure */
  900. memset ( deflate, 0, sizeof ( *deflate ) );
  901. deflate->format = format;
  902. }