Browse Source

[zbin] Use LZMA compression

LZMA provides significantly better compression (by ~15%) than the
current NRV2B algorithm.

We use a raw LZMA stream (aka LZMA1) to avoid the need for code to
parse the LZMA2 block headers.  We use parameters {lc=2,lp=0,pb=0} to
reduce the stack space required by the decompressor to acceptable
levels (around 8kB).  Using lc=3 or pb=2 would give marginally better
compression, but at the cost of substantially increasing the required
stack space.

The build process now requires the liblzma headers to be present on
the build system, since we do not include a copy of an LZMA compressor
within the iPXE source tree.  The decompressor is written from scratch
(based on XZ Embedded) and is entirely self-contained within the
iPXE source.

The branch-call-jump (BCJ) filter used to improve the compressibility
is specific to iPXE.  We choose not to use liblzma's built-in BCJ
filter since the algorithm is complex and undocumented.  Our BCJ
filter achieves approximately the same results (on typical iPXE
binaries) with a substantially simpler algorithm.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 9 years ago
parent
commit
5350b65a3c
4 changed files with 1004 additions and 17 deletions
  1. 4
    7
      src/Makefile.housekeeping
  2. 905
    0
      src/arch/i386/prefix/unlzma.S
  3. 9
    0
      src/arch/i386/prefix/unlzma16.S
  4. 86
    10
      src/util/zbin.c

+ 4
- 7
src/Makefile.housekeeping View File

@@ -1220,15 +1220,12 @@ endif # defined(BIN)
1220 1220
 #
1221 1221
 # The compression utilities
1222 1222
 #
1223
-$(NRV2B) : util/nrv2b.c $(MAKEDEPS)
1224
-	$(QM)$(ECHO) "  [HOSTCC] $@"
1225
-	$(Q)$(HOST_CC) $(HOST_CFLAGS) -DENCODE -DDECODE -DMAIN -DVERBOSE \
1226
-		       -DNDEBUG -DBITSIZE=32 -DENDIAN=0 -o $@ $<
1227
-CLEANUP	+= $(NRV2B)
1228 1223
 
1229
-$(ZBIN) : util/zbin.c util/nrv2b.c $(MAKEDEPS)
1224
+ZBIN_LDFLAGS := -llzma
1225
+
1226
+$(ZBIN) : util/zbin.c $(MAKEDEPS)
1230 1227
 	$(QM)$(ECHO) "  [HOSTCC] $@"
1231
-	$(Q)$(HOST_CC) $(HOST_CFLAGS) -o $@ $<
1228
+	$(Q)$(HOST_CC) $(HOST_CFLAGS) $< $(ZBIN_LDFLAGS) -o $@
1232 1229
 CLEANUP += $(ZBIN)
1233 1230
 
1234 1231
 ###############################################################################

+ 905
- 0
src/arch/i386/prefix/unlzma.S View File

@@ -0,0 +1,905 @@
1
+/*
2
+ * Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17
+ * 02110-1301, USA.
18
+ */
19
+
20
+FILE_LICENCE ( GPL2_OR_LATER );
21
+
22
+/****************************************************************************
23
+ *
24
+ * This file provides the decompress() and decompress16() functions
25
+ * which can be called in order to decompress an LZMA-compressed
26
+ * image.  The code is modelled on the public-domain "XZ Embedded"
27
+ * implementation as used by the Linux kernel.  Symbol names are
28
+ * chosen to match the XZ Embedded implementation where possible, for
29
+ * ease of reference.
30
+ *
31
+ * This code is optimised for size rather than speed, since the amount
32
+ * of data to be decompressed is trivially small by modern standards.
33
+ *
34
+ * The same basic assembly code is used to compile both decompress()
35
+ * and decompress16().
36
+ *
37
+ * Note that these functions require large amounts of stack space.
38
+ *
39
+ ****************************************************************************
40
+ */
41
+
42
+	.text
43
+	.arch i586
44
+	.section ".prefix.lib", "ax", @progbits
45
+
46
+#ifdef CODE16
47
+#define ADDR16
48
+#define ADDR32 addr32
49
+#define decompress decompress16
50
+	.code16
51
+#else /* CODE16 */
52
+#define ADDR16 addr16
53
+#define ADDR32
54
+	.code32
55
+#endif /* CODE16 */
56
+
57
+/****************************************************************************
58
+ * Debugging (via 0xe9 debug port)
59
+ ****************************************************************************
60
+ */
61
+
62
+#define DEBUG 0
63
+
64
+#if DEBUG
65
+	.macro	print_character, char
66
+	pushw	%ax
67
+	movb 	$\char, %al
68
+	outb	%al, $0xe9
69
+	popw	%ax
70
+	.endm
71
+
72
+	.macro	print_hex_nibble
73
+	cmpb	$10, %al
74
+	sbb	$0x69, %al
75
+	das
76
+	outb	%al, $0xe9
77
+	.endm
78
+
79
+	.macro	print_hex_byte, reg
80
+	pushfl
81
+	pushw	%ax
82
+	movb	\reg, %al
83
+	pushw	%ax
84
+	shrb	$4, %al
85
+	print_hex_nibble
86
+	popw	%ax
87
+	andb	$0x0f, %al
88
+	print_hex_nibble
89
+	popw	%ax
90
+	popfl
91
+	.endm
92
+
93
+	.macro	print_hex_word, reg
94
+	pushw	%ax
95
+	movw	\reg, %ax
96
+	print_hex_byte %ah
97
+	print_hex_byte %al
98
+	popw	%ax
99
+	.endm
100
+
101
+	.macro	print_hex_dword, reg
102
+	pushl	%eax
103
+	movl	\reg, %eax
104
+	rorl	$16, %eax
105
+	print_hex_word %ax
106
+	rorl	$16, %eax
107
+	print_hex_word %ax
108
+	popl	%eax
109
+	.endm
110
+#else
111
+	.macro	print_character, char
112
+	.endm
113
+	.macro	print_hex_byte, reg
114
+	.endm
115
+	.macro	print_hex_word, reg
116
+	.endm
117
+	.macro	print_hex_dword, reg
118
+	.endm
119
+#endif
120
+
121
+/****************************************************************************
122
+ * LZMA parameters and data structures
123
+ ****************************************************************************
124
+ */
125
+
126
+/* LZMA decompressor states (as used in XZ Embedded) */
127
+#define STATE_LIT_LIT 0x00
128
+#define STATE_MATCH_LIT_LIT 0x01
129
+#define STATE_REP_LIT_LIT 0x02
130
+#define STATE_SHORTREP_LIT_LIT 0x03
131
+#define STATE_MATCH_LIT 0x04
132
+#define STATE_REP_LIT 0x05
133
+#define STATE_SHORTREP_LIT 0x06
134
+#define STATE_LIT_MATCH 0x07
135
+#define STATE_LIT_LONGREP 0x08
136
+#define STATE_LIT_SHORTREP 0x09
137
+#define STATE_NONLIT_MATCH 0x0a
138
+#define STATE_NONLIT_REP 0x0b
139
+
140
+/* LZMA maximum decompressor state in which most recent symbol was a literal */
141
+#define STATE_LIT_MAX 0x06
142
+
143
+/* LZMA number of literal context bits ("lc=" parameter) */
144
+#define LZMA_LC 2
145
+
146
+	.struct	0
147
+lzma_len_dec:
148
+choice:		.word	0
149
+choice2:	.word	0
150
+low:		.rept	( 1 << 3 )
151
+		.word	0
152
+		.endr
153
+mid:		.rept	( 1 << 3 )
154
+		.word	0
155
+		.endr
156
+high:		.rept	( 1 << 8 )
157
+		.word	0
158
+		.endr
159
+	.equ	sizeof__lzma_len_dec, . - lzma_len_dec
160
+	.previous
161
+
162
+	.struct	0
163
+lzma_dec:
164
+in_start:	.long	0
165
+out_start:	.long	0
166
+rc_code:	.long	0
167
+rc_range:	.long	0
168
+len:		.word	0
169
+reps:
170
+rep0:		.long	0
171
+rep1:		.long	0
172
+rep2:		.long	0
173
+rep3:		.long	0
174
+probs:
175
+is_match:	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
176
+is_rep:		.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
177
+is_rep0:	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
178
+is_rep1:	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
179
+is_rep2:	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180
+is_rep0_long:	.word	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
181
+dist_slot:	.rept	( 4 * ( 1 << 6 ) )
182
+		.word	0
183
+		.endr
184
+dist_special:	.rept	( ( 1 << ( 14 / 2 ) ) - 14 )
185
+		.word	0
186
+		.endr
187
+dist_align:	.rept	( 1 << 4 )
188
+		.word	0
189
+		.endr
190
+match_len_dec:	.space	sizeof__lzma_len_dec
191
+rep_len_dec:	.space	sizeof__lzma_len_dec
192
+literal:	.rept	( ( 1 << LZMA_LC ) * 0x300 )
193
+		.word	0
194
+		.endr
195
+	.align	4
196
+	.equ	sizeof__lzma_dec, . - lzma_dec
197
+	.previous
198
+
199
+/*****************************************************************************
200
+ * Normalise range encoder
201
+ *
202
+ * Parameters:
203
+ *   %ss:%ebp : LZMA parameter block
204
+ *   %ds:%esi : compressed input data pointer
205
+ * Returns:
206
+ *   %ds:%esi : compressed input data pointer (possibly updated)
207
+ *   %eax : current range
208
+ * Corrupts:
209
+ *   %eax
210
+ *****************************************************************************
211
+ */
212
+rc_normalise:
213
+	/* Check if rc_range is less than 1<<24 */
214
+	testb	$0xff, (rc_range+3)(%ebp)
215
+	jnz	1f
216
+	/* If it is, shift in a new byte from the compressed input data */
217
+	shll	$8, rc_range(%ebp)
218
+	shll	$8, rc_code(%ebp)
219
+	ADDR32 lodsb
220
+	movb	%al, (rc_code+0)(%ebp)
221
+1:	/* Return current range */
222
+	movl	rc_range(%ebp), %eax
223
+	ret
224
+	.size	rc_normalise, . - rc_normalise
225
+
226
+/*****************************************************************************
227
+ * Decode single range-encoded bit using a probability estimate
228
+ *
229
+ * Parameters:
230
+ *   %ss:%ebp : LZMA parameter block
231
+ *   %ds:%esi : compressed input data pointer
232
+ *   %ebx : probability estimate pointer (offset from %ebp)
233
+ * Returns:
234
+ *   %ds:%esi : compressed input data pointer (possibly updated)
235
+ *   CF : decoded bit
236
+ *   ZF : inverse of decoded bit
237
+ * Corrupts:
238
+ *   none
239
+ *****************************************************************************
240
+ */
241
+rc_bit:
242
+	/* Preserve registers */
243
+	pushl	%eax
244
+	pushl	%edx
245
+	/* Perform normalisation */
246
+	call	rc_normalise
247
+	/* Calculate bound in %eax and probability estimate in %dx */
248
+	shrl	$11, %eax
249
+	movzwl	(%ebp,%ebx), %edx
250
+	mul	%edx /* will zero %edx */
251
+	movw	(%ebp,%ebx), %dx
252
+	/* Compare code against bound */
253
+	cmpl	%eax, rc_code(%ebp)
254
+	jae	2f
255
+1:	/* Code is less than bound */
256
+	movl	%eax, rc_range(%ebp)
257
+	negw	%dx
258
+	addw	$(1<<11), %dx
259
+	shrw	$5, %dx
260
+	addw	%dx, (%ebp,%ebx)
261
+	xorw	%ax, %ax	/* Clear CF, set ZF */
262
+	jmp	99f
263
+2:	/* Code is greater than or equal to bound */
264
+	subl	%eax, rc_range(%ebp)
265
+	subl	%eax, rc_code(%ebp)
266
+	shrw	$5, %dx
267
+	subw	%dx, (%ebp,%ebx)
268
+	incw	%dx		/* Clear ZF (%dx is 11-bit; can never wrap) */
269
+	stc			/* Set CF */
270
+99:	/* Restore registers and return */
271
+	popl	%edx
272
+	popl	%eax
273
+	ret
274
+	.size	rc_bit, . - rc_bit
275
+
276
+/*****************************************************************************
277
+ * Decode MSB-first bittree
278
+ *
279
+ * Parameters:
280
+ *   %ss:%ebp : LZMA parameter block
281
+ *   %ds:%esi : compressed input data pointer
282
+ *   %ebx : probability estimate set pointer (offset from %ebp)
283
+ *   %cx : number of bits to decode
284
+ * Returns:
285
+ *   %ds:%esi : compressed input data pointer (possibly updated)
286
+ *   %eax : decoded bittree
287
+ * Corrupts:
288
+ *   none
289
+ *****************************************************************************
290
+ */
291
+rc_bittree:
292
+	/* Preserve registers */
293
+	pushl	%edi
294
+	pushw	%cx
295
+	movl	%ebx, %edi
296
+	/* Initialise registers */
297
+	movl	$1, %eax
298
+1:	/* Decode bit */
299
+	leaw	(%edi,%eax,2), %bx	/* high word always zero anyway */
300
+	call	rc_bit
301
+	rclw	%ax
302
+	ADDR16 loop 1b
303
+	/* Restore registers, clear unwanted high bit of result, and return */
304
+	movl	%edi, %ebx
305
+	popw	%cx
306
+	popl	%edi
307
+	btrw	%cx, %ax
308
+	ret
309
+	.size	rc_bittree, . - rc_bittree
310
+
311
+/*****************************************************************************
312
+ * Decode LSB-first bittree
313
+ *
314
+ * Parameters:
315
+ *   %ss:%ebp : LZMA parameter block
316
+ *   %ds:%esi : compressed input data pointer
317
+ *   %ebx : probability estimate set pointer (offset from %ebp)
318
+ *   %cx : number of bits to decode
319
+ * Returns:
320
+ *   %ds:%esi : compressed input data pointer (possibly updated)
321
+ *   %eax : decoded bittree
322
+ * Corrupts:
323
+ *   none
324
+ *****************************************************************************
325
+ */
326
+rc_bittree_reverse:
327
+	/* Preserve registers */
328
+	pushw	%cx
329
+	/* Decode bittree */
330
+	call	rc_bittree
331
+1:	/* Reverse result */
332
+	rcrb	%al
333
+	rclb	%ah
334
+	ADDR16 loop 1b
335
+	shrw	$8, %ax
336
+	/* Restore registers and return */
337
+	popw	%cx
338
+	ret
339
+	.size	rc_bittree_reverse, . - rc_bittree_reverse
340
+
341
+/*****************************************************************************
342
+ * Decode MSB-first bittree with optional match byte
343
+ *
344
+ * Parameters:
345
+ *   %ss:%ebp : LZMA parameter block
346
+ *   %ds:%esi : compressed input data pointer
347
+ *   %ebx : probability estimate set pointer (offset from %ebp)
348
+ *   %cl : match byte
349
+ *   %ch : 1 to use match byte, 0 to ignore match byte
350
+ * Returns:
351
+ *   %ds:%esi : compressed input data pointer (possibly updated)
352
+ *   %eax : decoded bittree
353
+ * Corrupts:
354
+ *   none
355
+ *****************************************************************************
356
+ */
357
+rc_bittree_match:
358
+	/* Preserve registers */
359
+	pushl	%edi
360
+	pushw	%cx
361
+	pushw	%dx
362
+	movl	%ebx, %edi
363
+	/* Initialise registers */
364
+	movl	$1, %eax
365
+1:	/* Decode bit */
366
+	rolb	$1, %cl
367
+	movw	%cx, %dx
368
+	andb	%dh, %dl		/* match_bit in %dl */
369
+	movw	%dx, %bx
370
+	addb	%bl, %bh
371
+	xorb	%bl, %bl
372
+	addw	%ax, %bx		/* offset + match_bit + symbol */
373
+	leaw	(%edi,%ebx,2), %bx	/* high word always zero anyway */
374
+	call	rc_bit
375
+	rclw	%ax
376
+	movb	%al, %dh
377
+	notb	%dh
378
+	xorb	%dh, %dl
379
+	andb	%dl, %ch		/* offset &= ( match_bit ^ bit ) */
380
+	testb	%ah, %ah
381
+	jz	1b
382
+	/* Restore registers, clear unwanted high bit of result, and return */
383
+	movl	%edi, %ebx
384
+	popw	%dx
385
+	popw	%cx
386
+	popl	%edi
387
+	xorb	%ah, %ah
388
+	ret
389
+	.size	rc_bittree_match, . - rc_bittree_match
390
+
391
+/*****************************************************************************
392
+ * Decode direct bits (no probability estimates)
393
+ *
394
+ * Parameters:
395
+ *   %ss:%ebp : LZMA parameter block
396
+ *   %ds:%esi : compressed input data pointer
397
+ *   %cx : number of bits to decode
398
+ * Returns:
399
+ *   %ds:%esi : compressed input data pointer (possibly updated)
400
+ *   %eax : decoded bits
401
+ * Corrupts:
402
+ *   none
403
+ *****************************************************************************
404
+ */
405
+rc_direct:
406
+	/* Preserve registers */
407
+	pushl	%ebx
408
+	pushw	%cx
409
+	pushl	%edx
410
+	/* Initialise registers */
411
+	xorl	%edx, %edx
412
+1:	/* Perform normalisation */
413
+	call	rc_normalise
414
+	/* Decode bit */
415
+	shrl	$1, %eax
416
+	movl	%eax, rc_range(%ebp)
417
+	movl	rc_code(%ebp), %ebx
418
+	subl	%eax, %ebx
419
+	js	2f
420
+	movl	%ebx, rc_code(%ebp)
421
+2:	rcll	%ebx
422
+	rcll	%edx
423
+	xorb	$1, %dl
424
+	ADDR16 loop 1b
425
+	/* Restore registers and return */
426
+	movl	%edx, %eax
427
+	popl	%edx
428
+	popw	%cx
429
+	popl	%ebx
430
+	ret
431
+	.size	rc_direct, . - rc_direct
432
+
433
+/*****************************************************************************
434
+ * Decode an LZMA literal
435
+ *
436
+ * Parameters:
437
+ *   %ss:%ebp : LZMA parameter block
438
+ *   %ds:%esi : compressed input data pointer
439
+ *   %es:%edi : uncompressed output data pointer
440
+ *   %edx : LZMA state
441
+ * Returns:
442
+ *   %ds:%esi : compressed input data pointer (possibly updated)
443
+ *   %es:%edi : uncompressed output data pointer (updated)
444
+ *   %edx : LZMA state
445
+ *   CF : end of payload marker found (always zero)
446
+ * Corrupts:
447
+ *   %eax
448
+ *   %ebx
449
+ *   %ecx
450
+ *****************************************************************************
451
+ *
452
+ * Literals are coded as an eight-bit tree, using a match byte if the
453
+ * previous symbol was not a literal.
454
+ *
455
+ */
456
+lzma_literal:
457
+	/* Get most recent output byte, if available */
458
+	xorl	%ebx, %ebx
459
+	cmpl	%esi, in_start(%ebp)
460
+	je	1f
461
+	movb	%es:-1(%edi), %bh
462
+1:	/* Locate probability estimate set */
463
+	shrb	$( 8 - LZMA_LC ), %bh
464
+	shlb	$1, %bh
465
+	leaw	literal(%ebx,%ebx,2), %bx
466
+	/* Get match byte, if applicable */
467
+	xorw	%cx, %cx
468
+	cmpb	$STATE_LIT_MAX, %dl
469
+	jbe	1f
470
+	movl	rep0(%ebp), %eax
471
+	notl	%eax
472
+	movb	%es:(%edi,%eax), %cl
473
+	movb	$1, %ch
474
+1:	/* Decode bittree */
475
+	call	rc_bittree_match
476
+	/* Store output byte */
477
+	ADDR32 stosb
478
+	print_hex_byte %al
479
+	print_character ' '
480
+	/* Update LZMA state */
481
+	subb	$3, %dl
482
+	jns	1f
483
+	xorb	%dl, %dl
484
+1:	cmpb	$7, %dl
485
+	jb	1f
486
+	subb	$3, %dl
487
+1:	/* Clear CF and return */
488
+	clc
489
+	ret
490
+	.size	lzma_literal, . - lzma_literal
491
+
492
+/*****************************************************************************
493
+ * Decode an LZMA length
494
+ *
495
+ * Parameters:
496
+ *   %ss:%ebp : LZMA parameter block
497
+ *   %ds:%esi : compressed input data pointer
498
+ *   %ebx : length parameter pointer (offset from %ebp)
499
+ * Returns:
500
+ *   %ds:%esi : compressed input data pointer (possibly updated)
501
+ * Corrupts:
502
+ *   %ebx
503
+ *****************************************************************************
504
+ *
505
+ * Lengths are encoded as:
506
+ *
507
+ *   "0" + 3 bits    : lengths 2-9 ("low")
508
+ *   "10" + 3 bits   : lengths 10-17 ("mid")
509
+ *   "11" + 8 bits   : lengths 18-273 ("high")
510
+ */
511
+lzma_len:
512
+	/* Preserve registers */
513
+	pushl	%eax
514
+	pushl	%ecx
515
+	pushl	%edi
516
+	movl	%ebx, %edi
517
+	/* Start by assuming three bits and a base length of 2 */
518
+	movw	$3, %cx
519
+	movw	$2, len(%ebp)
520
+	/* Check low-length choice bit */
521
+	leal	choice(%edi), %ebx
522
+	call	rc_bit
523
+	leal	low(%edi), %ebx
524
+	jz	1f
525
+	/* Check high-length choice bit */
526
+	leal	choice2(%edi), %ebx
527
+	call	rc_bit
528
+	leal	mid(%edi), %ebx
529
+	movb	$10, len(%ebp)
530
+	jz	1f
531
+	leal	high(%edi), %ebx
532
+	movb	$8, %cl
533
+	movb	$18, len(%ebp)
534
+1:	/* Get encoded length */
535
+	call	rc_bittree
536
+	addw	%ax, len(%ebp)
537
+	/* Restore registers and return */
538
+	movl	%edi, %ebx
539
+	popl	%edi
540
+	popl	%ecx
541
+	popl	%eax
542
+	ret
543
+	.size	lzma_len, . - lzma_len
544
+
545
+/*****************************************************************************
546
+ * Copy (possibly repeated) matched data
547
+ *
548
+ * Parameters:
549
+ *   %ss:%ebp : LZMA parameter block
550
+ *   %ds:%esi : compressed input data pointer
551
+ *   %es:%edi : uncompressed output data pointer
552
+ *   %cl : repeated match distance index (for repeated matches)
553
+ *   %eax : match distance (for non-repeated matches)
554
+ * Returns:
555
+ *   %ds:%esi : compressed input data pointer (possibly updated)
556
+ *   %es:%edi : uncompressed output data pointer
557
+ *   CF : match distance is out of range
558
+ * Corrupts:
559
+ *   %eax
560
+ *   %ebx
561
+ *   %ecx
562
+ *****************************************************************************
563
+ */
564
+match:	/* Update repeated match list */
565
+	print_character '['
566
+	movl	$3, %ecx
567
+	jmp	1f
568
+match_rep:
569
+	print_character '['
570
+	print_character 'R'
571
+	print_hex_byte %cl
572
+	print_character '='
573
+	movzbl	%cl, %ecx
574
+	movl	reps(%ebp,%ecx,4), %eax
575
+	jcxz	2f
576
+1:	movl	(reps-4)(%ebp,%ecx,4), %ebx
577
+	movl	%ebx, reps(%ebp,%ecx,4)
578
+	loop	1b
579
+	movl	%eax, rep0(%ebp)
580
+2:	/* Preserve registers */
581
+	pushl	%esi
582
+	/* Get stored match length */
583
+	movzwl	len(%ebp), %ecx
584
+	print_hex_dword	%eax
585
+	print_character '+'
586
+	print_hex_word %cx
587
+	print_character ']'
588
+	print_character ' '
589
+	/* Abort with CF set if match distance is out of range */
590
+	movl	out_start(%ebp), %esi
591
+	negl	%esi
592
+	leal	-1(%edi,%esi), %esi
593
+	cmpl	%eax, %esi
594
+	jc	99f
595
+	/* Perform copy */
596
+	notl	%eax
597
+	leal	(%edi,%eax), %esi
598
+	ADDR32 es rep movsb
599
+99:	/* Restore registers and return */
600
+	popl	%esi
601
+	ret
602
+	.size	match, . - match
603
+
604
+/*****************************************************************************
605
+ * Decode an LZMA match
606
+ *
607
+ * Parameters:
608
+ *   %ss:%ebp : LZMA parameter block
609
+ *   %ds:%esi : compressed input data pointer
610
+ *   %es:%edi : uncompressed output data pointer
611
+ *   %edx : LZMA state
612
+ * Returns:
613
+ *   %ds:%esi : compressed input data pointer (possibly updated)
614
+ *   %es:%edi : uncompressed output data pointer
615
+ *   %edx : LZMA state
616
+ *   CF : end of payload marker found
617
+ * Corrupts:
618
+ *   %eax
619
+ *   %ebx
620
+ *   %ecx
621
+ *****************************************************************************
622
+ *
623
+ * Matches are encoded as an LZMA length followed by a 6-bit "distance
624
+ * slot" code, 0-26 fixed-probability bits, and 0-5 context encoded
625
+ * bits.
626
+ */
627
+lzma_match:
628
+	/* Preserve registers */
629
+	pushl	%edi
630
+	/* Update LZMA state */
631
+	cmpb	$STATE_LIT_MAX, %dl
632
+	movb	$STATE_LIT_MATCH, %dl
633
+	jbe	1f
634
+	movb	$STATE_NONLIT_MATCH, %dl
635
+1:	/* Decode length */
636
+	movl	$match_len_dec, %ebx
637
+	call	lzma_len
638
+	/* Decode distance slot */
639
+	movw	len(%ebp), %bx
640
+	subw	$2, %bx
641
+	cmpw	$4, %bx
642
+	jb	1f
643
+	movw	$3, %bx
644
+1:	shlw	$7, %bx
645
+	addw	$dist_slot, %bx
646
+	movw	$6, %cx
647
+	call	rc_bittree
648
+	/* Distance slots 0-3 are literal distances */
649
+	cmpb	$4, %al
650
+	jb	99f
651
+	/* Determine initial bits: 10/11 for even/odd distance codes */
652
+	movl	%eax, %edi
653
+	andw	$1, %di
654
+	orw	$2, %di
655
+	/* Determine number of context-encoded bits */
656
+	movw	%ax, %cx
657
+	shrb	$1, %cl
658
+	decb	%cl
659
+	/* Select context to be used in absence of fixed-probability bits */
660
+	movl	%edi, %ebx
661
+	shlw	%cl, %bx
662
+	subw	%ax, %bx
663
+	leaw	(dist_special-2)(%ebx,%ebx), %bx
664
+	/* Decode fixed-probability bits, if any */
665
+	cmpb	$6, %cl
666
+	jb	1f
667
+	subb	$4, %cl
668
+	shll	%cl, %edi
669
+	call	rc_direct
670
+	orl	%eax, %edi
671
+	/* Select context to be used in presence of fixed-probability bits */
672
+	movb	$4, %cl
673
+	movl	$dist_align, %ebx
674
+1:	/* Decode context-encoded bits */
675
+	shll	%cl, %edi
676
+	call	rc_bittree_reverse
677
+	orl	%edi, %eax
678
+99:	/* Restore registers and tail-call */
679
+	popl	%edi
680
+	jmp	match
681
+	.size	lzma_match, . - lzma_match
682
+
683
+/*****************************************************************************
684
+ * Decode an LZMA repeated match
685
+ *
686
+ * Parameters:
687
+ *   %ss:%ebp : LZMA parameter block
688
+ *   %ds:%esi : compressed input data pointer
689
+ *   %es:%edi : uncompressed output data pointer
690
+ *   %edx : LZMA state
691
+ * Returns:
692
+ *   %ds:%esi : compressed input data pointer (possibly updated)
693
+ *   %es:%edi : uncompressed output data pointer
694
+ *   %edx : LZMA state
695
+ *   CF : end of payload marker found
696
+ * Corrupts:
697
+ *   %eax
698
+ *   %ebx
699
+ *   %ecx
700
+ *****************************************************************************
701
+ *
702
+ * Repeated matches are encoded as:
703
+ *
704
+ *   "00"	 : shortrep0 (implicit length 1)
705
+ *   "01" + len  : longrep0
706
+ *   "10" + len  : longrep1
707
+ *   "110" + len : longrep2
708
+ *   "111" + len : longrep3
709
+ */
710
+lzma_rep_match:
711
+	/* Initially assume longrep0 */
712
+	movw	$(STATE_LIT_LONGREP << 8), %cx
713
+	/* Get is_rep0 bit */
714
+	leal	is_rep0(,%edx,2), %ebx
715
+	call	rc_bit
716
+	jnz	1f
717
+	/* Get is_rep0_long bit */
718
+	leal	is_rep0_long(,%edx,2), %ebx
719
+	call	rc_bit
720
+	jnz	98f
721
+	movw	$1, len(%ebp)
722
+	movb	$STATE_LIT_SHORTREP, %ch
723
+	jmp	99f
724
+1:	/* Get is_rep1 bit */
725
+	incb	%cl
726
+	leal	is_rep1(,%edx,2), %ebx
727
+	call	rc_bit
728
+	jz	98f
729
+	/* Get is_rep2 bit */
730
+	incb	%cl
731
+	leal	is_rep2(,%edx,2), %ebx
732
+	call	rc_bit
733
+	adcb	$0, %cl
734
+98:	/* Decode length */
735
+	movl	$rep_len_dec, %ebx
736
+	call	lzma_len
737
+99:	/* Update LZMA state */
738
+	cmpb	$STATE_LIT_MAX, %dl
739
+	movb	%ch, %dl
740
+	jbe	1f
741
+	movb	$STATE_NONLIT_REP, %dl
742
+1:	/* Tail call */
743
+	jmp	match_rep
744
+	.size	lzma_match, . - lzma_match
745
+
746
+/*****************************************************************************
747
+ * Decode one LZMA symbol
748
+ *
749
+ * Parameters:
750
+ *   %ss:%ebp : LZMA parameter block
751
+ *   %ds:%esi : compressed input data pointer
752
+ *   %es:%edi : uncompressed output data pointer
753
+ *   %edx : LZMA state
754
+ * Returns:
755
+ *   %ds:%esi : compressed input data pointer (possibly updated)
756
+ *   %es:%edi : uncompressed output data pointer (updated)
757
+ *   %edx : LZMA state
758
+ *   CF : end of payload marker found
759
+ * Corrupts:
760
+ *   %eax
761
+ *   %ebx
762
+ *   %ecx
763
+ *****************************************************************************
764
+ */
765
+lzma_decode:
766
+	/* Get is_match bit */
767
+	leal	is_match(,%edx,2), %ebx
768
+	call	rc_bit
769
+	jz	lzma_literal
770
+	/* Get is_rep bit */
771
+	leal	is_rep(,%edx,2), %ebx
772
+	call	rc_bit
773
+	jz	lzma_match
774
+	jmp	lzma_rep_match
775
+	.size	lzma_decode, . - lzma_decode
776
+
777
+/****************************************************************************
778
+ * Undo effect of branch-call-jump (BCJ) filter
779
+ *
780
+ * Parameters:
781
+ *   %es:%esi : start of uncompressed output data (note %es)
782
+ *   %es:%edi : end of uncompressed output data
783
+ * Returns:
784
+ * Corrupts:
785
+ *   %eax
786
+ *   %ebx
787
+ *   %ecx
788
+ *   %edx
789
+ *   %esi
790
+ *****************************************************************************
791
+ */
792
+bcj_filter:
793
+	/* Store (negative) start of data in %edx */
794
+	movl	%esi, %edx
795
+	negl	%edx
796
+	/* Calculate limit in %ecx */
797
+	leal	-5(%edi,%edx), %ecx
798
+1:	/* Calculate offset in %ebx */
799
+	leal	(%esi,%edx), %ebx
800
+	/* Check for end of data */
801
+	cmpl	%ecx, %ebx
802
+	ja	99f
803
+	/* Check for an opcode which would be followed by a rel32 address */
804
+	ADDR32 es lodsb
805
+	andb	$0xfe, %al
806
+	cmpb	$0xe8, %al
807
+	jne	1b
808
+	/* Get current jump target value in %eax */
809
+	ADDR32 es lodsl
810
+	/* Convert absolute addresses in the range [0,limit) back to
811
+	 * relative addresses in the range [-offset,limit-offset).
812
+	 */
813
+	cmpl	%ecx, %eax
814
+	jae	2f
815
+	subl	%ebx,%es:-4(%esi)
816
+2:	/* Convert negative numbers in the range [-offset,0) back to
817
+	 * positive numbers in the range [limit-offset,limit).
818
+	 */
819
+	notl	%eax	/* Range is now [0,offset) */
820
+	cmpl	%ebx, %eax
821
+	jae	1b
822
+	addl	%ecx,%es:-4(%esi)
823
+	jmp	1b
824
+99:	/* Return */
825
+	ret
826
+	.size	bcj_filter, . - bcj_filter
827
+
828
+/****************************************************************************
829
+ * decompress (real-mode or 16/32-bit protected-mode near call)
830
+ *
831
+ * Decompress data
832
+ *
833
+ * Parameters (passed via registers):
834
+ *   %ds:%esi : Start of compressed input data
835
+ *   %es:%edi : Start of output buffer
836
+ * Returns:
837
+ *   %ds:%esi - End of compressed input data
838
+ *   %es:%edi - End of decompressed output data
839
+ *   All other registers are preserved
840
+ *
841
+ * NOTE: It would be possible to build a smaller version of the
842
+ * decompression code for -DKEEP_IT_REAL by using 16-bit registers
843
+ * where possible.
844
+ ****************************************************************************
845
+ */
846
+	.globl	decompress
847
+decompress:
848
+	/* Preserve registers */
849
+	pushl	%eax
850
+	pushl	%ebx
851
+	pushl	%ecx
852
+	pushl	%edx
853
+	pushl	%ebp
854
+	/* Allocate parameter block */
855
+	subl	$sizeof__lzma_dec, %esp
856
+	movl	%esp, %ebp
857
+	/* Zero parameter block and set all probabilities to 0.5 */
858
+	pushl	%edi
859
+	pushw	%es
860
+	pushw	%ss
861
+	popw	%es
862
+	movl	%ebp, %edi
863
+	xorl	%eax, %eax
864
+	movl	$( sizeof__lzma_dec / 4 ), %ecx
865
+	ADDR32 rep stosl
866
+	leal	probs(%ebp), %edi
867
+	movw	$( ( 1 << 11 ) / 2 ), %ax
868
+	movl	$( ( sizeof__lzma_dec - probs ) / 2 ), %ecx
869
+	ADDR32 rep stosw
870
+	popw	%es
871
+	popl	%edi
872
+	/* Initialise remaining parameters */
873
+	movl	%esi, in_start(%ebp)
874
+	movl	%edi, out_start(%ebp)
875
+	print_character '\n'
876
+	ADDR32 lodsb	/* discard initial byte */
877
+	print_hex_byte %al
878
+	ADDR32 lodsl
879
+	bswapl	%eax
880
+	print_hex_dword %eax
881
+	print_character '\n'
882
+	movl	%eax, rc_code(%ebp)
883
+	decl	rc_range(%ebp)
884
+	movl	$STATE_LIT_LIT, %edx
885
+1:	/* Decompress until we reach end of buffer */
886
+	call	lzma_decode
887
+	jnc	1b
888
+	print_character '\n'
889
+	/* Undo BCJ filter */
890
+	pushl	%esi
891
+	movl	out_start(%ebp), %esi
892
+	call	bcj_filter
893
+	popl	%esi
894
+	/* Restore registers and return */
895
+	addl	$sizeof__lzma_dec, %esp
896
+	popl	%ebp
897
+	popl	%edx
898
+	popl	%ecx
899
+	popl	%ebx
900
+	popl	%eax
901
+	ret
902
+
903
+	/* Specify minimum amount of stack space required */
904
+	.globl	_min_decompress_stack
905
+	.equ	_min_decompress_stack, ( sizeof__lzma_dec + 512 /* margin */ )

+ 9
- 0
src/arch/i386/prefix/unlzma16.S View File

@@ -0,0 +1,9 @@
1
+/*
2
+ * 16-bit version of the decompressor
3
+ *
4
+ */
5
+
6
+FILE_LICENCE ( GPL2_OR_LATER )
7
+
8
+#define CODE16
9
+#include "unlzma.S"

+ 86
- 10
src/util/zbin.c View File

@@ -1,13 +1,21 @@
1
+#include <stdint.h>
1 2
 #include <stdio.h>
3
+#include <stdlib.h>
4
+#include <string.h>
5
+#include <errno.h>
2 6
 #include <sys/stat.h>
3
-
4
-#define ENCODE
5
-#define VERBOSE
6
-#include "nrv2b.c"
7
-FILE *infile, *outfile;
7
+#include <lzma.h>
8 8
 
9 9
 #define DEBUG 0
10 10
 
11
+/* LZMA filter choices.  Must match those used by unlzma.S */
12
+#define LZMA_LC 2
13
+#define LZMA_LP 0
14
+#define LZMA_PB 0
15
+
16
+/* LZMA preset choice.  This is a policy decision */
17
+#define LZMA_PRESET ( LZMA_PRESET_DEFAULT | LZMA_PRESET_EXTREME )
18
+
11 19
 struct input_file {
12 20
 	void *buf;
13 21
 	size_t len;
@@ -177,13 +185,75 @@ static int process_zinfo_copy ( struct input_file *input,
177 185
 	return 0;
178 186
 }
179 187
 
188
+#define OPCODE_CALL 0xe8
189
+#define OPCODE_JMP 0xe9
190
+
191
+static void bcj_filter ( void *data, size_t len ) {
192
+	struct {
193
+		uint8_t opcode;
194
+		int32_t target;
195
+	} __attribute__ (( packed )) *jump;
196
+	ssize_t limit = ( len - sizeof ( *jump ) );
197
+	ssize_t offset;
198
+
199
+	/* liblzma does include an x86 BCJ filter, but it's hideously
200
+	 * convoluted and undocumented.  This BCJ filter is
201
+	 * substantially simpler and achieves the same compression (at
202
+	 * the cost of requiring the decompressor to know the size of
203
+	 * the decompressed data, which we already have in iPXE).
204
+	 */
205
+	for ( offset = 0 ; offset <= limit ; offset++ ) {
206
+		jump = ( data + offset );
207
+
208
+		/* Skip instructions that are not followed by a rel32 address */
209
+		if ( ( jump->opcode != OPCODE_CALL ) &&
210
+		     ( jump->opcode != OPCODE_JMP ) )
211
+			continue;
212
+
213
+		/* Convert rel32 address to an absolute address.  To
214
+		 * avoid false positives (which damage the compression
215
+		 * ratio), we should check that the jump target is
216
+		 * within the range [0,limit).
217
+		 *
218
+		 * Some output values would then end up being mapped
219
+		 * from two distinct input values, making the
220
+		 * transformation irreversible.  To solve this, we
221
+		 * transform such values back into the part of the
222
+		 * range which would otherwise correspond to no input
223
+		 * values.
224
+		 */
225
+		if ( ( jump->target >= -offset ) &&
226
+		     ( jump->target < ( limit - offset ) ) ) {
227
+			/* Convert relative addresses in the range
228
+			 * [-offset,limit-offset) to absolute
229
+			 * addresses in the range [0,limit).
230
+			 */
231
+			jump->target += offset;
232
+		} else if ( ( jump->target >= ( limit - offset ) ) &&
233
+			    ( jump->target < limit ) ) {
234
+			/* Convert positive numbers in the range
235
+			 * [limit-offset,limit) to negative numbers in
236
+			 * the range [-offset,0).
237
+			 */
238
+			jump->target -= limit;
239
+		}
240
+		offset += sizeof ( jump->target );
241
+	};
242
+}
243
+
180 244
 static int process_zinfo_pack ( struct input_file *input,
181 245
 				struct output_file *output,
182 246
 				union zinfo_record *zinfo ) {
183 247
 	struct zinfo_pack *pack = &zinfo->pack;
184 248
 	size_t offset = pack->offset;
185 249
 	size_t len = pack->len;
186
-	unsigned long packed_len;
250
+	size_t packed_len = 0;
251
+	size_t remaining = ( output->max_len - output->len );
252
+	lzma_options_lzma options;
253
+	const lzma_filter filters[] = {
254
+		{ .id = LZMA_FILTER_LZMA1, .options = &options },
255
+		{ .id = LZMA_VLI_UNKNOWN }
256
+	};
187 257
 
188 258
 	if ( ( offset + len ) > input->len ) {
189 259
 		fprintf ( stderr, "Input buffer overrun on pack\n" );
@@ -196,9 +266,15 @@ static int process_zinfo_pack ( struct input_file *input,
196 266
 		return -1;
197 267
 	}
198 268
 
199
-	if ( ucl_nrv2b_99_compress ( ( input->buf + offset ), len,
200
-				     ( output->buf + output->len ),
201
-				     &packed_len, 0 ) != UCL_E_OK ) {
269
+	bcj_filter ( ( input->buf + offset ), len );
270
+
271
+	lzma_lzma_preset ( &options, LZMA_PRESET );
272
+	options.lc = LZMA_LC;
273
+	options.lp = LZMA_LP;
274
+	options.pb = LZMA_PB;
275
+	if ( lzma_raw_buffer_encode ( filters, NULL, ( input->buf + offset ),
276
+				      len, ( output->buf + output->len ),
277
+				      &packed_len, remaining ) != LZMA_OK ) {
202 278
 		fprintf ( stderr, "Compression failure\n" );
203 279
 		return -1;
204 280
 	}
@@ -206,7 +282,7 @@ static int process_zinfo_pack ( struct input_file *input,
206 282
 	if ( DEBUG ) {
207 283
 		fprintf ( stderr, "PACK [%#zx,%#zx) to [%#zx,%#zx)\n",
208 284
 			  offset, ( offset + len ), output->len,
209
-			  ( size_t )( output->len + packed_len ) );
285
+			  ( output->len + packed_len ) );
210 286
 	}
211 287
 
212 288
 	output->len += packed_len;

Loading…
Cancel
Save