瀏覽代碼

[crypto] Replace AES implementation

Replace the AES implementation from AXTLS with a dedicated iPXE
implementation which is slightly smaller and around 1000% faster.
This implementation has been verified using the existing self-tests
based on the NIST AES test vectors.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 9 年之前
父節點
當前提交
09824eca31
共有 5 個檔案被更改,包括 836 行新增635 行删除
  1. 804
    0
      src/crypto/aes.c
  2. 0
    457
      src/crypto/axtls/aes.c
  3. 0
    165
      src/crypto/axtls_aes.c
  4. 31
    12
      src/include/ipxe/aes.h
  5. 1
    1
      src/include/ipxe/errfile.h

+ 804
- 0
src/crypto/aes.c 查看文件

@@ -0,0 +1,804 @@
1
+/*
2
+ * Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17
+ * 02110-1301, USA.
18
+ *
19
+ * You can also choose to distribute this program under the terms of
20
+ * the Unmodified Binary Distribution Licence (as given in the file
21
+ * COPYING.UBDL), provided that you have satisfied its requirements.
22
+ */
23
+
24
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
+
26
+/** @file
27
+ *
28
+ * AES algorithm
29
+ *
30
+ */
31
+
32
+#include <stdint.h>
33
+#include <string.h>
34
+#include <errno.h>
35
+#include <assert.h>
36
+#include <byteswap.h>
37
+#include <ipxe/rotate.h>
38
+#include <ipxe/crypto.h>
39
+#include <ipxe/ecb.h>
40
+#include <ipxe/cbc.h>
41
+#include <ipxe/aes.h>
42
+
43
+/** AES strides
44
+ *
45
+ * These are the strides (modulo 16) used to walk through the AES
46
+ * input state bytes in order of byte position after [Inv]ShiftRows.
47
+ */
48
+enum aes_stride {
49
+	/** Input stride for ShiftRows
50
+	 *
51
+	 *    0 4 8 c
52
+	 *     \ \ \
53
+	 *    1 5 9 d
54
+	 *     \ \ \
55
+	 *    2 6 a e
56
+	 *     \ \ \
57
+	 *    3 7 b f
58
+	 */
59
+	AES_STRIDE_SHIFTROWS = +5,
60
+	/** Input stride for InvShiftRows
61
+	 *
62
+	 *    0 4 8 c
63
+	 *     / / /
64
+	 *    1 5 9 d
65
+	 *     / / /
66
+	 *    2 6 a e
67
+	 *     / / /
68
+	 *    3 7 b f
69
+	 */
70
+	AES_STRIDE_INVSHIFTROWS = -3,
71
+};
72
+
73
+/** A single AES lookup table entry
74
+ *
75
+ * This represents the product (in the Galois field GF(2^8)) of an
76
+ * eight-byte vector multiplier with a single scalar multiplicand.
77
+ *
78
+ * The vector multipliers used for AES will be {1,1,1,3,2,1,1,3} for
79
+ * MixColumns and {1,9,13,11,14,9,13,11} for InvMixColumns.  This
80
+ * allows for the result of multiplying any single column of the
81
+ * [Inv]MixColumns matrix by a scalar value to be obtained simply by
82
+ * extracting the relevant four-byte subset from the lookup table
83
+ * entry.
84
+ *
85
+ * For example, to find the result of multiplying the second column of
86
+ * the MixColumns matrix by the scalar value 0x80:
87
+ *
88
+ * MixColumns column[0]: {			      2,    1,    1,    3 }
89
+ * MixColumns column[1]: {			3,    2,    1,    1	  }
90
+ * MixColumns column[2]: {		  1,    3,    2,    1		  }
91
+ * MixColumns column[3]: {	    1,    1,    3,    2			  }
92
+ * Vector multiplier:	 {    1,    1,    1,    3,    2,    1,    1,    3 }
93
+ * Scalar multiplicand:	   0x80
94
+ * Lookup table entry:	 { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
95
+ *
96
+ * The second column of the MixColumns matrix is {3,2,1,1}.  The
97
+ * product of this column with the scalar value 0x80 can be obtained
98
+ * by extracting the relevant four-byte subset of the lookup table
99
+ * entry:
100
+ *
101
+ * MixColumns column[1]: {			3,    2,    1,    1	  }
102
+ * Vector multiplier:	 {    1,    1,    1,    3,    2,    1,    1,    3 }
103
+ * Lookup table entry:	 { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
104
+ * Product:		 {		     0x9b, 0x1b, 0x80, 0x80	  }
105
+ *
106
+ * The column lookups require only seven bytes of the eight-byte
107
+ * entry: the remaining (first) byte is used to hold the scalar
108
+ * multiplicand itself (i.e. the first byte of the vector multiplier
109
+ * is always chosen to be 1).
110
+ */
111
+union aes_table_entry {
112
+	/** Viewed as an array of bytes */
113
+	uint8_t byte[8];
114
+} __attribute__ (( packed ));
115
+
116
+/** An AES lookup table
117
+ *
118
+ * This represents the products (in the Galois field GF(2^8)) of a
119
+ * constant eight-byte vector multiplier with all possible 256 scalar
120
+ * multiplicands.
121
+ *
122
+ * The entries are indexed by the AES [Inv]SubBytes S-box output
123
+ * values (denoted S(N)).  This allows for the result of multiplying
124
+ * any single column of the [Inv]MixColumns matrix by S(N) to be
125
+ * obtained simply by extracting the relevant four-byte subset from
126
+ * the Nth table entry.  For example:
127
+ *
128
+ * Input byte (N):	   0x3a
129
+ * SubBytes output S(N):   0x80
130
+ * MixColumns column[1]: {			3,    2,    1,    1	  }
131
+ * Vector multiplier:	 {    1,    1,    1,    3,    2,    1,    1,    3 }
132
+ * Table entry[0x3a]:	 { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
133
+ * Product:		 {		     0x9b, 0x1b, 0x80, 0x80	  }
134
+ *
135
+ * Since the first byte of the eight-byte vector multiplier is always
136
+ * chosen to be 1, the value of S(N) may be lookup up by extracting
137
+ * the first byte of the Nth table entry.
138
+ */
139
+struct aes_table {
140
+	/** Table entries, indexed by S(N) */
141
+	union aes_table_entry entry[256];
142
+} __attribute__ (( aligned ( 8 ) ));
143
+
144
+/** AES MixColumns lookup table */
145
+static struct aes_table aes_mixcolumns;
146
+
147
+/** AES InvMixColumns lookup table */
148
+static struct aes_table aes_invmixcolumns;
149
+
150
+/**
151
+ * Multiply [Inv]MixColumns matrix column by scalar multiplicand
152
+ *
153
+ * @v entry		AES lookup table entry for scalar multiplicand
154
+ * @v column		[Inv]MixColumns matrix column index
155
+ * @ret product		Product of matrix column with scalar multiplicand
156
+ */
157
+static inline __attribute__ (( always_inline )) uint32_t
158
+aes_entry_column ( const union aes_table_entry *entry, unsigned int column ) {
159
+	const uint8_t *first __attribute__ (( may_alias ));
160
+
161
+	/* Locate start of relevant four-byte subset */
162
+	first = &entry->byte[ 4 - column ];
163
+
164
+	/* Extract this four-byte subset */
165
+	return ( *( ( uint32_t * ) first ) );
166
+}
167
+
168
+/**
169
+ * Multiply [Inv]MixColumns matrix column by S-boxed input byte
170
+ *
171
+ * @v table		AES lookup table
172
+ * @v stride		AES row shift stride
173
+ * @v in		AES input state
174
+ * @v offset		Output byte offset (after [Inv]ShiftRows)
175
+ * @ret product		Product of matrix column with S(input byte)
176
+ *
177
+ * Note that the specified offset is not the offset of the input byte;
178
+ * it is the offset of the output byte which corresponds to the input
179
+ * byte.  This output byte offset is used to calculate both the input
180
+ * byte offset and to select the appropriate matric column.
181
+ *
182
+ * With a compile-time constant offset, this function will optimise
183
+ * down to a single "movzbl" (to extract the input byte) and will
184
+ * generate a single x86 memory reference expression which can then be
185
+ * used directly within a single "xorl" instruction.
186
+ */
187
+static inline __attribute__ (( always_inline )) uint32_t
188
+aes_column ( const struct aes_table *table, size_t stride,
189
+	     const union aes_matrix *in, size_t offset ) {
190
+	const union aes_table_entry *entry;
191
+	unsigned int byte;
192
+
193
+	/* Extract input byte corresponding to this output byte offset
194
+	 * (i.e. perform [Inv]ShiftRows).
195
+	 */
196
+	byte = in->byte[ ( stride * offset ) & 0xf ];
197
+
198
+	/* Locate lookup table entry for this input byte (i.e. perform
199
+	 * [Inv]SubBytes).
200
+	 */
201
+	entry = &table->entry[byte];
202
+
203
+	/* Multiply appropriate matrix column by this input byte
204
+	 * (i.e. perform [Inv]MixColumns).
205
+	 */
206
+	return aes_entry_column ( entry, ( offset & 0x3 ) );
207
+}
208
+
209
+/**
210
+ * Calculate intermediate round output column
211
+ *
212
+ * @v table		AES lookup table
213
+ * @v stride		AES row shift stride
214
+ * @v in		AES input state
215
+ * @v key		AES round key
216
+ * @v column		Column index
217
+ * @ret output		Output column value
218
+ */
219
+static inline __attribute__ (( always_inline )) uint32_t
220
+aes_output ( const struct aes_table *table, size_t stride,
221
+	     const union aes_matrix *in, const union aes_matrix *key,
222
+	     unsigned int column ) {
223
+	size_t offset = ( column * 4 );
224
+
225
+	/* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
226
+	 * AddRoundKey for this column.  The loop is unrolled to allow
227
+	 * for the required compile-time constant optimisations.
228
+	 */
229
+	return ( aes_column ( table, stride, in, ( offset + 0 ) ) ^
230
+		 aes_column ( table, stride, in, ( offset + 1 ) ) ^
231
+		 aes_column ( table, stride, in, ( offset + 2 ) ) ^
232
+		 aes_column ( table, stride, in, ( offset + 3 ) ) ^
233
+		 key->column[column] );
234
+}
235
+
236
+/**
237
+ * Perform a single intermediate round
238
+ *
239
+ * @v table		AES lookup table
240
+ * @v stride		AES row shift stride
241
+ * @v in		AES input state
242
+ * @v out		AES output state
243
+ * @v key		AES round key
244
+ */
245
+static inline __attribute__ (( always_inline )) void
246
+aes_round ( const struct aes_table *table, size_t stride,
247
+	    const union aes_matrix *in, union aes_matrix *out,
248
+	    const union aes_matrix *key ) {
249
+
250
+	/* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
251
+	 * AddRoundKey for all columns.  The loop is unrolled to allow
252
+	 * for the required compile-time constant optimisations.
253
+	 */
254
+	out->column[0] = aes_output ( table, stride, in, key, 0 );
255
+	out->column[1] = aes_output ( table, stride, in, key, 1 );
256
+	out->column[2] = aes_output ( table, stride, in, key, 2 );
257
+	out->column[3] = aes_output ( table, stride, in, key, 3 );
258
+}
259
+
260
+/**
261
+ * Perform encryption intermediate rounds
262
+ *
263
+ * @v in		AES input state
264
+ * @v out		AES output state
265
+ * @v key		Round keys
266
+ * @v rounds		Number of rounds (must be odd)
267
+ *
268
+ * This function is deliberately marked as non-inlinable to ensure
269
+ * maximal availability of registers for GCC's register allocator,
270
+ * which has a tendency to otherwise spill performance-critical
271
+ * registers to the stack.
272
+ */
273
+static __attribute__ (( noinline )) void
274
+aes_encrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
275
+		     const union aes_matrix *key, unsigned int rounds ) {
276
+	union aes_matrix *tmp;
277
+
278
+	/* Perform intermediate rounds */
279
+	do {
280
+		/* Perform one intermediate round */
281
+		aes_round ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
282
+			    in, out, key++ );
283
+
284
+		/* Swap input and output states for next round */
285
+		tmp = in;
286
+		in = out;
287
+		out = tmp;
288
+
289
+	} while ( --rounds );
290
+}
291
+
292
+/**
293
+ * Perform decryption intermediate rounds
294
+ *
295
+ * @v in		AES input state
296
+ * @v out		AES output state
297
+ * @v key		Round keys
298
+ * @v rounds		Number of rounds (must be odd)
299
+ *
300
+ * As with aes_encrypt_rounds(), this function is deliberately marked
301
+ * as non-inlinable.
302
+ *
303
+ * This function could potentially use the same binary code as is used
304
+ * for encryption.  To compensate for the difference between ShiftRows
305
+ * and InvShiftRows, half of the input byte offsets would have to be
306
+ * modifiable at runtime (half by an offset of +4/-4, half by an
307
+ * offset of -4/+4 for ShiftRows/InvShiftRows).  This can be
308
+ * accomplished in x86 assembly within the number of available
309
+ * registers, but GCC's register allocator struggles to do so,
310
+ * resulting in a significant performance decrease due to registers
311
+ * being spilled to the stack.  We therefore use two separate but very
312
+ * similar binary functions based on the same C source.
313
+ */
314
+static __attribute__ (( noinline )) void
315
+aes_decrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
316
+		     const union aes_matrix *key, unsigned int rounds ) {
317
+	union aes_matrix *tmp;
318
+
319
+	/* Perform intermediate rounds */
320
+	do {
321
+		/* Perform one intermediate round */
322
+		aes_round ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS,
323
+			    in, out, key++ );
324
+
325
+		/* Swap input and output states for next round */
326
+		tmp = in;
327
+		in = out;
328
+		out = tmp;
329
+
330
+	} while ( --rounds );
331
+}
332
+
333
+/**
334
+ * Perform standalone AddRoundKey
335
+ *
336
+ * @v state		AES state
337
+ * @v key		AES round key
338
+ */
339
+static inline __attribute__ (( always_inline )) void
340
+aes_addroundkey ( union aes_matrix *state, const union aes_matrix *key ) {
341
+
342
+	state->column[0] ^= key->column[0];
343
+	state->column[1] ^= key->column[1];
344
+	state->column[2] ^= key->column[2];
345
+	state->column[3] ^= key->column[3];
346
+}
347
+
348
+/**
349
+ * Perform final round
350
+ *
351
+ * @v table		AES lookup table
352
+ * @v stride		AES row shift stride
353
+ * @v in		AES input state
354
+ * @v out		AES output state
355
+ * @v key		AES round key
356
+ */
357
+static void aes_final ( const struct aes_table *table, size_t stride,
358
+			const union aes_matrix *in, union aes_matrix *out,
359
+			const union aes_matrix *key ) {
360
+	const union aes_table_entry *entry;
361
+	unsigned int byte;
362
+	size_t out_offset;
363
+	size_t in_offset;
364
+
365
+	/* Perform [Inv]ShiftRows and [Inv]SubBytes */
366
+	for ( out_offset = 0, in_offset = 0 ; out_offset < 16 ;
367
+	      out_offset++, in_offset = ( ( in_offset + stride ) & 0xf ) ) {
368
+
369
+		/* Extract input byte (i.e. perform [Inv]ShiftRows) */
370
+		byte = in->byte[in_offset];
371
+
372
+		/* Locate lookup table entry for this input byte
373
+		 * (i.e. perform [Inv]SubBytes).
374
+		 */
375
+		entry = &table->entry[byte];
376
+
377
+		/* Store output byte */
378
+		out->byte[out_offset] = entry->byte[0];
379
+	}
380
+
381
+	/* Perform AddRoundKey */
382
+	aes_addroundkey ( out, key );
383
+}
384
+
385
+/**
386
+ * Encrypt data
387
+ *
388
+ * @v ctx		Context
389
+ * @v src		Data to encrypt
390
+ * @v dst		Buffer for encrypted data
391
+ * @v len		Length of data
392
+ */
393
+static void aes_encrypt ( void *ctx, const void *src, void *dst, size_t len ) {
394
+	struct aes_context *aes = ctx;
395
+	union aes_matrix buffer[2];
396
+	union aes_matrix *in = &buffer[0];
397
+	union aes_matrix *out = &buffer[1];
398
+	unsigned int rounds = aes->rounds;
399
+
400
+	/* Sanity check */
401
+	assert ( len == sizeof ( *in ) );
402
+
403
+	/* Initialise input state */
404
+	memcpy ( in, src, sizeof ( *in ) );
405
+
406
+	/* Perform initial round (AddRoundKey) */
407
+	aes_addroundkey ( in, &aes->encrypt.key[0] );
408
+
409
+	/* Perform intermediate rounds (ShiftRows, SubBytes,
410
+	 * MixColumns, AddRoundKey).
411
+	 */
412
+	aes_encrypt_rounds ( in, out, &aes->encrypt.key[1], ( rounds - 2 ) );
413
+	in = out;
414
+
415
+	/* Perform final round (ShiftRows, SubBytes, AddRoundKey) */
416
+	out = dst;
417
+	aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS, in, out,
418
+		    &aes->encrypt.key[ rounds - 1 ] );
419
+}
420
+
421
+/**
422
+ * Decrypt data
423
+ *
424
+ * @v ctx		Context
425
+ * @v src		Data to decrypt
426
+ * @v dst		Buffer for decrypted data
427
+ * @v len		Length of data
428
+ */
429
+static void aes_decrypt ( void *ctx, const void *src, void *dst, size_t len ) {
430
+	struct aes_context *aes = ctx;
431
+	union aes_matrix buffer[2];
432
+	union aes_matrix *in = &buffer[0];
433
+	union aes_matrix *out = &buffer[1];
434
+	unsigned int rounds = aes->rounds;
435
+
436
+	/* Sanity check */
437
+	assert ( len == sizeof ( *in ) );
438
+
439
+	/* Initialise input state */
440
+	memcpy ( in, src, sizeof ( *in ) );
441
+
442
+	/* Perform initial round (AddRoundKey) */
443
+	aes_addroundkey ( in, &aes->decrypt.key[0] );
444
+
445
+	/* Perform intermediate rounds (InvShiftRows, InvSubBytes,
446
+	 * InvMixColumns, AddRoundKey).
447
+	 */
448
+	aes_decrypt_rounds ( in, out, &aes->decrypt.key[1], ( rounds - 2 ) );
449
+	in = out;
450
+
451
+	/* Perform final round (InvShiftRows, InvSubBytes, AddRoundKey) */
452
+	out = dst;
453
+	aes_final ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS, in, out,
454
+		    &aes->decrypt.key[ rounds - 1 ] );
455
+}
456
+
457
+/**
458
+ * Multiply a polynomial by (x) modulo (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8)
459
+ *
460
+ * @v poly		Polynomial to be multiplied
461
+ * @ret result		Result
462
+ */
463
+static __attribute__ (( const )) unsigned int aes_double ( unsigned int poly ) {
464
+
465
+	/* Multiply polynomial by (x), placing the resulting x^8
466
+	 * coefficient in the LSB (i.e. rotate byte left by one).
467
+	 */
468
+	poly = rol8 ( poly, 1 );
469
+
470
+	/* If coefficient of x^8 (in LSB) is non-zero, then reduce by
471
+	 * subtracting (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8).
472
+	 */
473
+	if ( poly & 0x01 ) {
474
+		poly ^= 0x01; /* Subtract x^8 (currently in LSB) */
475
+		poly ^= 0x1b; /* Subtract (x^4 + x^3 + x^2 + 1) */
476
+	}
477
+
478
+	return poly;
479
+}
480
+
481
+/**
482
+ * Fill in MixColumns lookup table entry
483
+ *
484
+ * @v entry		AES lookup table entry for scalar multiplicand
485
+ *
486
+ * The MixColumns lookup table vector multiplier is {1,1,1,3,2,1,1,3}.
487
+ */
488
+static void aes_mixcolumns_entry ( union aes_table_entry *entry ) {
489
+	unsigned int scalar_x_1;
490
+	unsigned int scalar_x;
491
+	unsigned int scalar;
492
+
493
+	/* Retrieve scalar multiplicand */
494
+	scalar = entry->byte[0];
495
+	entry->byte[1] = scalar;
496
+	entry->byte[2] = scalar;
497
+	entry->byte[5] = scalar;
498
+	entry->byte[6] = scalar;
499
+
500
+	/* Calculate scalar multiplied by (x) */
501
+	scalar_x = aes_double ( scalar );
502
+	entry->byte[4] = scalar_x;
503
+
504
+	/* Calculate scalar multiplied by (x + 1) */
505
+	scalar_x_1 = ( scalar_x ^ scalar );
506
+	entry->byte[3] = scalar_x_1;
507
+	entry->byte[7] = scalar_x_1;
508
+}
509
+
510
+/**
511
+ * Fill in InvMixColumns lookup table entry
512
+ *
513
+ * @v entry		AES lookup table entry for scalar multiplicand
514
+ *
515
+ * The InvMixColumns lookup table vector multiplier is {1,9,13,11,14,9,13,11}.
516
+ */
517
+static void aes_invmixcolumns_entry ( union aes_table_entry *entry ) {
518
+	unsigned int scalar_x3_x2_x;
519
+	unsigned int scalar_x3_x2_1;
520
+	unsigned int scalar_x3_x2;
521
+	unsigned int scalar_x3_x_1;
522
+	unsigned int scalar_x3_1;
523
+	unsigned int scalar_x3;
524
+	unsigned int scalar_x2;
525
+	unsigned int scalar_x;
526
+	unsigned int scalar;
527
+
528
+	/* Retrieve scalar multiplicand */
529
+	scalar = entry->byte[0];
530
+
531
+	/* Calculate scalar multiplied by (x) */
532
+	scalar_x = aes_double ( scalar );
533
+
534
+	/* Calculate scalar multiplied by (x^2) */
535
+	scalar_x2 = aes_double ( scalar_x );
536
+
537
+	/* Calculate scalar multiplied by (x^3) */
538
+	scalar_x3 = aes_double ( scalar_x2 );
539
+
540
+	/* Calculate scalar multiplied by (x^3 + 1) */
541
+	scalar_x3_1 = ( scalar_x3 ^ scalar );
542
+	entry->byte[1] = scalar_x3_1;
543
+	entry->byte[5] = scalar_x3_1;
544
+
545
+	/* Calculate scalar multiplied by (x^3 + x + 1) */
546
+	scalar_x3_x_1 = ( scalar_x3_1 ^ scalar_x );
547
+	entry->byte[3] = scalar_x3_x_1;
548
+	entry->byte[7] = scalar_x3_x_1;
549
+
550
+	/* Calculate scalar multiplied by (x^3 + x^2) */
551
+	scalar_x3_x2 = ( scalar_x3 ^ scalar_x2 );
552
+
553
+	/* Calculate scalar multiplied by (x^3 + x^2 + 1) */
554
+	scalar_x3_x2_1 = ( scalar_x3_x2 ^ scalar );
555
+	entry->byte[2] = scalar_x3_x2_1;
556
+	entry->byte[6] = scalar_x3_x2_1;
557
+
558
+	/* Calculate scalar multiplied by (x^3 + x^2 + x) */
559
+	scalar_x3_x2_x = ( scalar_x3_x2 ^ scalar_x );
560
+	entry->byte[4] = scalar_x3_x2_x;
561
+}
562
+
563
+/**
564
+ * Generate AES lookup tables
565
+ *
566
+ */
567
+static void aes_generate ( void ) {
568
+	union aes_table_entry *entry;
569
+	union aes_table_entry *inventry;
570
+	unsigned int poly = 0x01;
571
+	unsigned int invpoly = 0x01;
572
+	unsigned int transformed;
573
+	unsigned int i;
574
+
575
+	/* Iterate over non-zero values of GF(2^8) using generator (x + 1) */
576
+	do {
577
+
578
+		/* Multiply polynomial by (x + 1) */
579
+		poly ^= aes_double ( poly );
580
+
581
+		/* Divide inverse polynomial by (x + 1).  This code
582
+		 * fragment is taken directly from the Wikipedia page
583
+		 * on the Rijndael S-box.  An explanation of why it
584
+		 * works would be greatly appreciated.
585
+		 */
586
+		invpoly ^= ( invpoly << 1 );
587
+		invpoly ^= ( invpoly << 2 );
588
+		invpoly ^= ( invpoly << 4 );
589
+		if ( invpoly & 0x80 )
590
+			invpoly ^= 0x09;
591
+		invpoly &= 0xff;
592
+
593
+		/* Apply affine transformation */
594
+		transformed = ( 0x63 ^ invpoly ^ rol8 ( invpoly, 1 ) ^
595
+				rol8 ( invpoly, 2 ) ^ rol8 ( invpoly, 3 ) ^
596
+				rol8 ( invpoly, 4 ) );
597
+
598
+		/* Populate S-box (within MixColumns lookup table) */
599
+		aes_mixcolumns.entry[poly].byte[0] = transformed;
600
+
601
+	} while ( poly != 0x01 );
602
+
603
+	/* Populate zeroth S-box entry (which has no inverse) */
604
+	aes_mixcolumns.entry[0].byte[0] = 0x63;
605
+
606
+	/* Fill in MixColumns and InvMixColumns lookup tables */
607
+	for ( i = 0 ; i < 256 ; i++ ) {
608
+
609
+		/* Fill in MixColumns lookup table entry */
610
+		entry = &aes_mixcolumns.entry[i];
611
+		aes_mixcolumns_entry ( entry );
612
+
613
+		/* Populate inverse S-box (within InvMixColumns lookup table) */
614
+		inventry = &aes_invmixcolumns.entry[ entry->byte[0] ];
615
+		inventry->byte[0] = i;
616
+
617
+		/* Fill in InvMixColumns lookup table entry */
618
+		aes_invmixcolumns_entry ( inventry );
619
+	}
620
+}
621
+
622
+/**
623
+ * Rotate key column
624
+ *
625
+ * @v column		Key column
626
+ * @ret column		Updated key column
627
+ */
628
+static inline __attribute__ (( always_inline )) uint32_t
629
+aes_key_rotate ( uint32_t column ) {
630
+
631
+	return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
632
+		 ror32 ( column, 8 ) : rol32 ( column, 8 ) );
633
+}
634
+
635
+/**
636
+ * Apply S-box to key column
637
+ *
638
+ * @v column		Key column
639
+ * @ret column		Updated key column
640
+ */
641
+static uint32_t aes_key_sbox ( uint32_t column ) {
642
+	unsigned int i;
643
+	uint8_t byte;
644
+
645
+	for ( i = 0 ; i < 4 ; i++ ) {
646
+		byte = ( column & 0xff );
647
+		byte = aes_mixcolumns.entry[byte].byte[0];
648
+		column = ( ( column & ~0xff ) | byte );
649
+		column = rol32 ( column, 8 );
650
+	}
651
+	return column;
652
+}
653
+
654
+/**
655
+ * Apply schedule round constant to key column
656
+ *
657
+ * @v column		Key column
658
+ * @v rcon		Round constant
659
+ * @ret column		Updated key column
660
+ */
661
+static inline __attribute__ (( always_inline )) uint32_t
662
+aes_key_rcon ( uint32_t column, unsigned int rcon ) {
663
+
664
+	return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
665
+		 ( column ^ rcon ) : ( column ^ ( rcon << 24 ) ) );
666
+}
667
+
668
+/**
669
+ * Set key
670
+ *
671
+ * @v ctx		Context
672
+ * @v key		Key
673
+ * @v keylen		Key length
674
+ * @ret rc		Return status code
675
+ */
676
+static int aes_setkey ( void *ctx, const void *key, size_t keylen ) {
677
+	struct aes_context *aes = ctx;
678
+	union aes_matrix *enc;
679
+	union aes_matrix *dec;
680
+	union aes_matrix temp;
681
+	union aes_matrix zero;
682
+	unsigned int rcon = 0x01;
683
+	unsigned int rounds;
684
+	size_t offset = 0;
685
+	uint32_t *prev;
686
+	uint32_t *next;
687
+	uint32_t *end;
688
+	uint32_t tmp;
689
+
690
+	/* Generate lookup tables, if not already done */
691
+	if ( ! aes_mixcolumns.entry[0].byte[0] )
692
+		aes_generate();
693
+
694
+	/* Validate key length and calculate number of intermediate rounds */
695
+	switch ( keylen ) {
696
+	case ( 128 / 8 ) :
697
+		rounds = 11;
698
+		break;
699
+	case ( 192 / 8 ) :
700
+		rounds = 13;
701
+		break;
702
+	case ( 256 / 8 ) :
703
+		rounds = 15;
704
+		break;
705
+	default:
706
+		DBGC ( aes, "AES %p unsupported key length (%zd bits)\n",
707
+		       aes, ( keylen * 8 ) );
708
+		return -EINVAL;
709
+	}
710
+	aes->rounds = rounds;
711
+	enc = aes->encrypt.key;
712
+	end = enc[rounds].column;
713
+
714
+	/* Copy raw key */
715
+	memcpy ( enc, key, keylen );
716
+	prev = enc->column;
717
+	next = ( ( ( void * ) prev ) + keylen );
718
+	tmp = next[-1];
719
+
720
+	/* Construct expanded key */
721
+	while ( next < end ) {
722
+
723
+		/* If this is the first column of an expanded key
724
+		 * block, or the middle column of an AES-256 key
725
+		 * block, then apply the S-box.
726
+		 */
727
+		if ( ( offset == 0 ) || ( ( offset | keylen ) == 48 ) )
728
+			tmp = aes_key_sbox ( tmp );
729
+
730
+		/* If this is the first column of an expanded key
731
+		 * block then rotate and apply the round constant.
732
+		 */
733
+		if ( offset == 0 ) {
734
+			tmp = aes_key_rotate ( tmp );
735
+			tmp = aes_key_rcon ( tmp, rcon );
736
+			rcon = aes_double ( rcon );
737
+		}
738
+
739
+		/* XOR with previous key column */
740
+		tmp ^= *prev;
741
+
742
+		/* Store column */
743
+		*next = tmp;
744
+
745
+		/* Move to next column */
746
+		offset += sizeof ( *next );
747
+		if ( offset == keylen )
748
+			offset = 0;
749
+		next++;
750
+		prev++;
751
+	}
752
+	DBGC2 ( aes, "AES %p expanded %zd-bit key:\n", aes, ( keylen * 8 ) );
753
+	DBGC2_HDA ( aes, 0, &aes->encrypt, ( rounds * sizeof ( *enc ) ) );
754
+
755
+	/* Convert to decryption key */
756
+	memset ( &zero, 0, sizeof ( zero ) );
757
+	dec = &aes->decrypt.key[ rounds - 1 ];
758
+	memcpy ( dec--, enc++, sizeof ( *dec ) );
759
+	while ( dec > aes->decrypt.key ) {
760
+		/* Perform InvMixColumns (by reusing the encryption
761
+		 * final-round code to perform ShiftRows+SubBytes and
762
+		 * reusing the decryption intermediate-round code to
763
+		 * perform InvShiftRows+InvSubBytes+InvMixColumns, all
764
+		 * with a zero encryption key).
765
+		 */
766
+		aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
767
+			    enc++, &temp, &zero );
768
+		aes_decrypt_rounds ( &temp, dec--, &zero, 1 );
769
+	}
770
+	memcpy ( dec--, enc++, sizeof ( *dec ) );
771
+	DBGC2 ( aes, "AES %p inverted %zd-bit key:\n", aes, ( keylen * 8 ) );
772
+	DBGC2_HDA ( aes, 0, &aes->decrypt, ( rounds * sizeof ( *dec ) ) );
773
+
774
+	return 0;
775
+}
776
+
777
+/**
778
+ * Set initialisation vector
779
+ *
780
+ * @v ctx		Context
781
+ * @v iv		Initialisation vector
782
+ */
783
+static void aes_setiv ( void *ctx __unused, const void *iv __unused ) {
784
+	/* Nothing to do */
785
+}
786
+
787
+/** Basic AES algorithm */
788
+struct cipher_algorithm aes_algorithm = {
789
+	.name = "aes",
790
+	.ctxsize = sizeof ( struct aes_context ),
791
+	.blocksize = AES_BLOCKSIZE,
792
+	.setkey = aes_setkey,
793
+	.setiv = aes_setiv,
794
+	.encrypt = aes_encrypt,
795
+	.decrypt = aes_decrypt,
796
+};
797
+
798
+/* AES in Electronic Codebook mode */
799
+ECB_CIPHER ( aes_ecb, aes_ecb_algorithm,
800
+	     aes_algorithm, struct aes_context, AES_BLOCKSIZE );
801
+
802
+/* AES in Cipher Block Chaining mode */
803
+CBC_CIPHER ( aes_cbc, aes_cbc_algorithm,
804
+	     aes_algorithm, struct aes_context, AES_BLOCKSIZE );

+ 0
- 457
src/crypto/axtls/aes.c 查看文件

@@ -1,457 +0,0 @@
1
-/*
2
- * Copyright (c) 2007, Cameron Rich
3
- *
4
- * All rights reserved.
5
- *
6
- * Redistribution and use in source and binary forms, with or without
7
- * modification, are permitted provided that the following conditions are met:
8
- *
9
- * * Redistributions of source code must retain the above copyright notice,
10
- *   this list of conditions and the following disclaimer.
11
- * * Redistributions in binary form must reproduce the above copyright notice,
12
- *   this list of conditions and the following disclaimer in the documentation
13
- *   and/or other materials provided with the distribution.
14
- * * Neither the name of the axTLS project nor the names of its contributors
15
- *   may be used to endorse or promote products derived from this software
16
- *   without specific prior written permission.
17
- *
18
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
- */
30
-
31
-/**
32
- * AES implementation - this is a small code version. There are much faster
33
- * versions around but they are much larger in size (i.e. they use large 
34
- * submix tables).
35
- */
36
-
37
-#include <string.h>
38
-#include "os_port.h"
39
-#include "crypto.h"
40
-
41
-/* all commented out in skeleton mode */
42
-#ifndef CONFIG_SSL_SKELETON_MODE
43
-
44
-#define rot1(x) (((x) << 24) | ((x) >> 8))
45
-#define rot2(x) (((x) << 16) | ((x) >> 16))
46
-#define rot3(x) (((x) <<  8) | ((x) >> 24))
47
-
48
-/* 
49
- * This cute trick does 4 'mul by two' at once.  Stolen from
50
- * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
51
- * a standard graphics trick
52
- * The key to this is that we need to xor with 0x1b if the top bit is set.
53
- * a 1xxx xxxx   0xxx 0xxx First we mask the 7bit,
54
- * b 1000 0000   0000 0000 then we shift right by 7 putting the 7bit in 0bit,
55
- * c 0000 0001   0000 0000 we then subtract (c) from (b)
56
- * d 0111 1111   0000 0000 and now we and with our mask
57
- * e 0001 1011   0000 0000
58
- */
59
-#define mt  0x80808080
60
-#define ml  0x7f7f7f7f
61
-#define mh  0xfefefefe
62
-#define mm  0x1b1b1b1b
63
-#define mul2(x,t)	((t)=((x)&mt), \
64
-			((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
65
-
66
-#define inv_mix_col(x,f2,f4,f8,f9) (\
67
-			(f2)=mul2(x,f2), \
68
-			(f4)=mul2(f2,f4), \
69
-			(f8)=mul2(f4,f8), \
70
-			(f9)=(x)^(f8), \
71
-			(f8)=((f2)^(f4)^(f8)), \
72
-			(f2)^=(f9), \
73
-			(f4)^=(f9), \
74
-			(f8)^=rot3(f2), \
75
-			(f8)^=rot2(f4), \
76
-			(f8)^rot1(f9))
77
-
78
-/*
79
- * AES S-box
80
- */
81
-static const uint8_t aes_sbox[256] =
82
-{
83
-	0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
84
-	0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
85
-	0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
86
-	0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
87
-	0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
88
-	0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
89
-	0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
90
-	0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
91
-	0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
92
-	0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
93
-	0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
94
-	0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
95
-	0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
96
-	0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
97
-	0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
98
-	0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
99
-	0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
100
-	0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
101
-	0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
102
-	0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
103
-	0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
104
-	0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
105
-	0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
106
-	0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
107
-	0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
108
-	0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
109
-	0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
110
-	0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
111
-	0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
112
-	0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
113
-	0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
114
-	0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
115
-};
116
-
117
-/*
118
- * AES is-box
119
- */
120
-static const uint8_t aes_isbox[256] = 
121
-{
122
-    0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
123
-    0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
124
-    0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
125
-    0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
126
-    0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
127
-    0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
128
-    0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
129
-    0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
130
-    0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
131
-    0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
132
-    0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
133
-    0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
134
-    0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
135
-    0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
136
-    0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
137
-    0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
138
-    0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
139
-    0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
140
-    0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
141
-    0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
142
-    0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
143
-    0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
144
-    0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
145
-    0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
146
-    0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
147
-    0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
148
-    0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
149
-    0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
150
-    0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
151
-    0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
152
-    0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
153
-    0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
154
-};
155
-
156
-static const unsigned char Rcon[30]=
157
-{
158
-	0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
159
-	0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
160
-	0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
161
-	0xb3,0x7d,0xfa,0xef,0xc5,0x91,
162
-};
163
-
164
-/* ----- static functions ----- */
165
-static void AES_encrypt(const AES_CTX *ctx, uint32_t *data);
166
-static void AES_decrypt(const AES_CTX *ctx, uint32_t *data);
167
-
168
-/* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
169
-   x^8+x^4+x^3+x+1 */
170
-static unsigned char AES_xtime(uint32_t x)
171
-{
172
-	return (x&0x80) ? (x<<1)^0x1b : x<<1;
173
-}
174
-
175
-/**
176
- * Set up AES with the key/iv and cipher size.
177
- */
178
-void AES_set_key(AES_CTX *ctx, const uint8_t *key, 
179
-        const uint8_t *iv, AES_MODE mode)
180
-{
181
-    int i, ii;
182
-    uint32_t *W, tmp, tmp2;
183
-    const unsigned char *ip;
184
-    int words;
185
-
186
-    switch (mode)
187
-    {
188
-        case AES_MODE_128:
189
-            i = 10;
190
-            words = 4;
191
-            break;
192
-
193
-        case AES_MODE_256:
194
-            i = 14;
195
-            words = 8;
196
-            break;
197
-
198
-        default:        /* fail silently */
199
-            return;
200
-    }
201
-
202
-    ctx->rounds = i;
203
-    ctx->key_size = words;
204
-    W = ctx->ks;
205
-    for (i = 0; i < words; i+=2)
206
-    {
207
-        W[i+0]=	((uint32_t)key[ 0]<<24)|
208
-            ((uint32_t)key[ 1]<<16)|
209
-            ((uint32_t)key[ 2]<< 8)|
210
-            ((uint32_t)key[ 3]    );
211
-        W[i+1]=	((uint32_t)key[ 4]<<24)|
212
-            ((uint32_t)key[ 5]<<16)|
213
-            ((uint32_t)key[ 6]<< 8)|
214
-            ((uint32_t)key[ 7]    );
215
-        key += 8;
216
-    }
217
-
218
-    ip = Rcon;
219
-    ii = 4 * (ctx->rounds+1);
220
-    for (i = words; i<ii; i++)
221
-    {
222
-        tmp = W[i-1];
223
-
224
-        if ((i % words) == 0)
225
-        {
226
-            tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]<< 8;
227
-            tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
228
-            tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
229
-            tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ];
230
-            tmp=tmp2^(((unsigned int)*ip)<<24);
231
-            ip++;
232
-        }
233
-
234
-        if ((words == 8) && ((i % words) == 4))
235
-        {
236
-            tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]    ;
237
-            tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
238
-            tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
239
-            tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ]<<24;
240
-            tmp=tmp2;
241
-        }
242
-
243
-        W[i]=W[i-words]^tmp;
244
-    }
245
-
246
-    /* copy the iv across */
247
-    memcpy(ctx->iv, iv, 16);
248
-}
249
-
250
-/**
251
- * Change a key for decryption.
252
- */
253
-void AES_convert_key(AES_CTX *ctx)
254
-{
255
-    int i;
256
-    uint32_t *k,w,t1,t2,t3,t4;
257
-
258
-    k = ctx->ks;
259
-    k += 4;
260
-
261
-    for (i= ctx->rounds*4; i > 4; i--)
262
-    {
263
-        w= *k;
264
-        w = inv_mix_col(w,t1,t2,t3,t4);
265
-        *k++ =w;
266
-    }
267
-}
268
-
269
-/**
270
- * Encrypt a byte sequence (with a block size 16) using the AES cipher.
271
- */
272
-void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
273
-{
274
-    int i;
275
-    uint32_t tin[4], tout[4], iv[4];
276
-
277
-    memcpy(iv, ctx->iv, AES_IV_SIZE);
278
-    for (i = 0; i < 4; i++)
279
-        tout[i] = ntohl(iv[i]);
280
-
281
-    for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
282
-    {
283
-        uint32_t msg_32[4];
284
-        uint32_t out_32[4];
285
-        memcpy(msg_32, msg, AES_BLOCKSIZE);
286
-        msg += AES_BLOCKSIZE;
287
-
288
-        for (i = 0; i < 4; i++)
289
-            tin[i] = ntohl(msg_32[i])^tout[i];
290
-
291
-        AES_encrypt(ctx, tin);
292
-
293
-        for (i = 0; i < 4; i++)
294
-        {
295
-            tout[i] = tin[i];
296
-            out_32[i] = htonl(tout[i]);
297
-        }
298
-
299
-        memcpy(out, out_32, AES_BLOCKSIZE);
300
-        out += AES_BLOCKSIZE;
301
-    }
302
-
303
-    for (i = 0; i < 4; i++)
304
-        iv[i] = htonl(tout[i]);
305
-    memcpy(ctx->iv, iv, AES_IV_SIZE);
306
-}
307
-
308
-/**
309
- * Decrypt a byte sequence (with a block size 16) using the AES cipher.
310
- */
311
-void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
312
-{
313
-    int i;
314
-    uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
315
-
316
-    memcpy(iv, ctx->iv, AES_IV_SIZE);
317
-    for (i = 0; i < 4; i++)
318
-        xor[i] = ntohl(iv[i]);
319
-
320
-    for (length -= 16; length >= 0; length -= 16)
321
-    {
322
-        uint32_t msg_32[4];
323
-        uint32_t out_32[4];
324
-        memcpy(msg_32, msg, AES_BLOCKSIZE);
325
-        msg += AES_BLOCKSIZE;
326
-
327
-        for (i = 0; i < 4; i++)
328
-        {
329
-            tin[i] = ntohl(msg_32[i]);
330
-            data[i] = tin[i];
331
-        }
332
-
333
-        AES_decrypt(ctx, data);
334
-
335
-        for (i = 0; i < 4; i++)
336
-        {
337
-            tout[i] = data[i]^xor[i];
338
-            xor[i] = tin[i];
339
-            out_32[i] = htonl(tout[i]);
340
-        }
341
-
342
-        memcpy(out, out_32, AES_BLOCKSIZE);
343
-        out += AES_BLOCKSIZE;
344
-    }
345
-
346
-    for (i = 0; i < 4; i++)
347
-        iv[i] = htonl(xor[i]);
348
-    memcpy(ctx->iv, iv, AES_IV_SIZE);
349
-}
350
-
351
-/**
352
- * Encrypt a single block (16 bytes) of data
353
- */
354
-static void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
355
-{
356
-    /* To make this code smaller, generate the sbox entries on the fly.
357
-     * This will have a really heavy effect upon performance.
358
-     */
359
-    uint32_t tmp[4];
360
-    uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
361
-    int curr_rnd;
362
-    int rounds = ctx->rounds; 
363
-    const uint32_t *k = ctx->ks;
364
-
365
-    /* Pre-round key addition */
366
-    for (row = 0; row < 4; row++)
367
-        data[row] ^= *(k++);
368
-
369
-    /* Encrypt one block. */
370
-    for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
371
-    {
372
-        /* Perform ByteSub and ShiftRow operations together */
373
-        for (row = 0; row < 4; row++)
374
-        {
375
-            a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
376
-            a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
377
-            a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF]; 
378
-            a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
379
-
380
-            /* Perform MixColumn iff not last round */
381
-            if (curr_rnd < (rounds - 1))
382
-            {
383
-                tmp1 = a0 ^ a1 ^ a2 ^ a3;
384
-                old_a0 = a0;
385
-                a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
386
-                a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
387
-                a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
388
-                a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
389
-            }
390
-
391
-            tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
392
-        }
393
-
394
-        /* KeyAddition - note that it is vital that this loop is separate from
395
-           the MixColumn operation, which must be atomic...*/ 
396
-        for (row = 0; row < 4; row++)
397
-            data[row] = tmp[row] ^ *(k++);
398
-    }
399
-}
400
-
401
-/**
402
- * Decrypt a single block (16 bytes) of data
403
- */
404
-static void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
405
-{ 
406
-    uint32_t tmp[4];
407
-    uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
408
-    uint32_t a0, a1, a2, a3, row;
409
-    int curr_rnd;
410
-    int rounds = ctx->rounds;
411
-    const uint32_t *k = ctx->ks + ((rounds+1)*4);
412
-
413
-    /* pre-round key addition */
414
-    for (row=4; row > 0;row--)
415
-        data[row-1] ^= *(--k);
416
-
417
-    /* Decrypt one block */
418
-    for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
419
-    {
420
-        /* Perform ByteSub and ShiftRow operations together */
421
-        for (row = 4; row > 0; row--)
422
-        {
423
-            a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
424
-            a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
425
-            a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
426
-            a3 = aes_isbox[(data[row%4])&0xFF];
427
-
428
-            /* Perform MixColumn iff not last round */
429
-            if (curr_rnd<(rounds-1))
430
-            {
431
-                /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
432
-                   are quite large compared to encryption; this 
433
-                   operation slows decryption down noticeably. */
434
-                xt0 = AES_xtime(a0^a1);
435
-                xt1 = AES_xtime(a1^a2);
436
-                xt2 = AES_xtime(a2^a3);
437
-                xt3 = AES_xtime(a3^a0);
438
-                xt4 = AES_xtime(xt0^xt1);
439
-                xt5 = AES_xtime(xt1^xt2);
440
-                xt6 = AES_xtime(xt4^xt5);
441
-
442
-                xt0 ^= a1^a2^a3^xt4^xt6;
443
-                xt1 ^= a0^a2^a3^xt5^xt6;
444
-                xt2 ^= a0^a1^a3^xt4^xt6;
445
-                xt3 ^= a0^a1^a2^xt5^xt6;
446
-                tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
447
-            }
448
-            else
449
-                tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
450
-        }
451
-
452
-        for (row = 4; row > 0; row--)
453
-            data[row-1] = tmp[row-1] ^ *(--k);
454
-    }
455
-}
456
-
457
-#endif

+ 0
- 165
src/crypto/axtls_aes.c 查看文件

@@ -1,165 +0,0 @@
1
-/*
2
- * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU General Public License as
6
- * published by the Free Software Foundation; either version 2 of the
7
- * License, or any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful, but
10
- * WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
- * General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program; if not, write to the Free Software
16
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17
- * 02110-1301, USA.
18
- */
19
-
20
-FILE_LICENCE ( GPL2_OR_LATER );
21
-
22
-#include <string.h>
23
-#include <errno.h>
24
-#include <assert.h>
25
-#include <byteswap.h>
26
-#include <ipxe/crypto.h>
27
-#include <ipxe/ecb.h>
28
-#include <ipxe/cbc.h>
29
-#include <ipxe/aes.h>
30
-#include "crypto/axtls/crypto.h"
31
-
32
-/** @file
33
- *
34
- * AES algorithm
35
- *
36
- */
37
-
38
-/**
39
- * Set key
40
- *
41
- * @v ctx		Context
42
- * @v key		Key
43
- * @v keylen		Key length
44
- * @ret rc		Return status code
45
- */
46
-static int aes_setkey ( void *ctx, const void *key, size_t keylen ) {
47
-	struct aes_context *aes_ctx = ctx;
48
-	AES_MODE mode;
49
-	void *iv;
50
-
51
-	switch ( keylen ) {
52
-	case ( 128 / 8 ):
53
-		mode = AES_MODE_128;
54
-		break;
55
-	case ( 256 / 8 ):
56
-		mode = AES_MODE_256;
57
-		break;
58
-	default:
59
-		return -EINVAL;
60
-	}
61
-
62
-	/* IV is not a relevant concept at this stage; use a dummy
63
-	 * value that will have no side-effects.
64
-	 */
65
-	iv = &aes_ctx->axtls_ctx.iv;
66
-
67
-	AES_set_key ( &aes_ctx->axtls_ctx, key, iv, mode );
68
-
69
-	aes_ctx->decrypting = 0;
70
-
71
-	return 0;
72
-}
73
-
74
-/**
75
- * Set initialisation vector
76
- *
77
- * @v ctx		Context
78
- * @v iv		Initialisation vector
79
- */
80
-static void aes_setiv ( void *ctx __unused, const void *iv __unused ) {
81
-	/* Nothing to do */
82
-}
83
-
84
-/**
85
- * Call AXTLS' AES_encrypt() or AES_decrypt() functions
86
- *
87
- * @v axtls_ctx		AXTLS AES context
88
- * @v src		Data to process
89
- * @v dst		Buffer for output
90
- * @v func		AXTLS AES function to call
91
- */
92
-static void aes_call_axtls ( AES_CTX *axtls_ctx, const void *src, void *dst,
93
-			     void ( * func ) ( const AES_CTX *axtls_ctx,
94
-					       uint32_t *data ) ){
95
-	const uint32_t *srcl = src;
96
-	uint32_t *dstl = dst;
97
-	unsigned int i;
98
-
99
-	/* AXTLS' AES_encrypt() and AES_decrypt() functions both
100
-	 * expect to deal with an array of four dwords in host-endian
101
-	 * order.
102
-	 */
103
-	for ( i = 0 ; i < 4 ; i++ )
104
-		dstl[i] = ntohl ( srcl[i] );
105
-	func ( axtls_ctx, dstl );
106
-	for ( i = 0 ; i < 4 ; i++ )
107
-		dstl[i] = htonl ( dstl[i] );
108
-}
109
-
110
-/**
111
- * Encrypt data
112
- *
113
- * @v ctx		Context
114
- * @v src		Data to encrypt
115
- * @v dst		Buffer for encrypted data
116
- * @v len		Length of data
117
- */
118
-static void aes_encrypt ( void *ctx, const void *src, void *dst,
119
-			  size_t len ) {
120
-	struct aes_context *aes_ctx = ctx;
121
-
122
-	assert ( len == AES_BLOCKSIZE );
123
-	if ( aes_ctx->decrypting )
124
-		assert ( 0 );
125
-	aes_call_axtls ( &aes_ctx->axtls_ctx, src, dst, axtls_aes_encrypt );
126
-}
127
-
128
-/**
129
- * Decrypt data
130
- *
131
- * @v ctx		Context
132
- * @v src		Data to decrypt
133
- * @v dst		Buffer for decrypted data
134
- * @v len		Length of data
135
- */
136
-static void aes_decrypt ( void *ctx, const void *src, void *dst,
137
-			  size_t len ) {
138
-	struct aes_context *aes_ctx = ctx;
139
-
140
-	assert ( len == AES_BLOCKSIZE );
141
-	if ( ! aes_ctx->decrypting ) {
142
-		AES_convert_key ( &aes_ctx->axtls_ctx );
143
-		aes_ctx->decrypting = 1;
144
-	}
145
-	aes_call_axtls ( &aes_ctx->axtls_ctx, src, dst, axtls_aes_decrypt );
146
-}
147
-
148
-/** Basic AES algorithm */
149
-struct cipher_algorithm aes_algorithm = {
150
-	.name = "aes",
151
-	.ctxsize = sizeof ( struct aes_context ),
152
-	.blocksize = AES_BLOCKSIZE,
153
-	.setkey = aes_setkey,
154
-	.setiv = aes_setiv,
155
-	.encrypt = aes_encrypt,
156
-	.decrypt = aes_decrypt,
157
-};
158
-
159
-/* AES in Electronic Codebook mode */
160
-ECB_CIPHER ( aes_ecb, aes_ecb_algorithm,
161
-	     aes_algorithm, struct aes_context, AES_BLOCKSIZE );
162
-
163
-/* AES in Cipher Block Chaining mode */
164
-CBC_CIPHER ( aes_cbc, aes_cbc_algorithm,
165
-	     aes_algorithm, struct aes_context, AES_BLOCKSIZE );

+ 31
- 12
src/include/ipxe/aes.h 查看文件

@@ -1,30 +1,49 @@
1 1
 #ifndef _IPXE_AES_H
2 2
 #define _IPXE_AES_H
3 3
 
4
-FILE_LICENCE ( GPL2_OR_LATER );
4
+/** @file
5
+ *
6
+ * AES algorithm
7
+ *
8
+ */
5 9
 
6
-struct cipher_algorithm;
10
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
7 11
 
8
-/** Basic AES blocksize */
12
+#include <ipxe/crypto.h>
13
+
14
+/** AES blocksize */
9 15
 #define AES_BLOCKSIZE 16
10 16
 
11
-#include "crypto/axtls/crypto.h"
17
+/** Maximum number of AES rounds */
18
+#define AES_MAX_ROUNDS 15
19
+
20
+/** AES matrix */
21
+union aes_matrix {
22
+	/** Viewed as an array of bytes */
23
+	uint8_t byte[16];
24
+	/** Viewed as an array of four-byte columns */
25
+	uint32_t column[4];
26
+} __attribute__ (( packed ));
27
+
28
+/** AES round keys */
29
+struct aes_round_keys {
30
+	/** Round keys */
31
+	union aes_matrix key[AES_MAX_ROUNDS];
32
+};
12 33
 
13 34
 /** AES context */
14 35
 struct aes_context {
15
-	/** AES context for AXTLS */
16
-	AES_CTX axtls_ctx;
17
-	/** Cipher is being used for decrypting */
18
-	int decrypting;
36
+	/** Encryption keys */
37
+	struct aes_round_keys encrypt;
38
+	/** Decryption keys */
39
+	struct aes_round_keys decrypt;
40
+	/** Number of rounds */
41
+	unsigned int rounds;
19 42
 };
20 43
 
21 44
 /** AES context size */
22 45
 #define AES_CTX_SIZE sizeof ( struct aes_context )
23 46
 
24
-/* AXTLS functions */
25
-extern void axtls_aes_encrypt ( const AES_CTX *ctx, uint32_t *data );
26
-extern void axtls_aes_decrypt ( const AES_CTX *ctx, uint32_t *data );
27
-
28 47
 extern struct cipher_algorithm aes_algorithm;
29 48
 extern struct cipher_algorithm aes_ecb_algorithm;
30 49
 extern struct cipher_algorithm aes_cbc_algorithm;

+ 1
- 1
src/include/ipxe/errfile.h 查看文件

@@ -264,7 +264,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
264 264
 #define ERRFILE_imgmgmt		      ( ERRFILE_OTHER | 0x00050000 )
265 265
 #define ERRFILE_pxe_tftp	      ( ERRFILE_OTHER | 0x00060000 )
266 266
 #define ERRFILE_pxe_udp		      ( ERRFILE_OTHER | 0x00070000 )
267
-#define ERRFILE_axtls_aes	      ( ERRFILE_OTHER | 0x00080000 )
267
+#define ERRFILE_aes	 	      ( ERRFILE_OTHER | 0x00080000 )
268 268
 #define ERRFILE_cipher		      ( ERRFILE_OTHER | 0x00090000 )
269 269
 #define ERRFILE_image_cmd	      ( ERRFILE_OTHER | 0x000a0000 )
270 270
 #define ERRFILE_uri_test	      ( ERRFILE_OTHER | 0x000b0000 )

Loading…
取消
儲存