|  | @@ -0,0 +1,804 @@
 | 
		
	
		
			
			|  | 1 | +/*
 | 
		
	
		
			
			|  | 2 | + * Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
 | 
		
	
		
			
			|  | 3 | + *
 | 
		
	
		
			
			|  | 4 | + * This program is free software; you can redistribute it and/or
 | 
		
	
		
			
			|  | 5 | + * modify it under the terms of the GNU General Public License as
 | 
		
	
		
			
			|  | 6 | + * published by the Free Software Foundation; either version 2 of the
 | 
		
	
		
			
			|  | 7 | + * License, or any later version.
 | 
		
	
		
			
			|  | 8 | + *
 | 
		
	
		
			
			|  | 9 | + * This program is distributed in the hope that it will be useful, but
 | 
		
	
		
			
			|  | 10 | + * WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
		
	
		
			
			|  | 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
		
	
		
			
			|  | 12 | + * General Public License for more details.
 | 
		
	
		
			
			|  | 13 | + *
 | 
		
	
		
			
			|  | 14 | + * You should have received a copy of the GNU General Public License
 | 
		
	
		
			
			|  | 15 | + * along with this program; if not, write to the Free Software
 | 
		
	
		
			
			|  | 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | 
		
	
		
			
			|  | 17 | + * 02110-1301, USA.
 | 
		
	
		
			
			|  | 18 | + *
 | 
		
	
		
			
			|  | 19 | + * You can also choose to distribute this program under the terms of
 | 
		
	
		
			
			|  | 20 | + * the Unmodified Binary Distribution Licence (as given in the file
 | 
		
	
		
			
			|  | 21 | + * COPYING.UBDL), provided that you have satisfied its requirements.
 | 
		
	
		
			
			|  | 22 | + */
 | 
		
	
		
			
			|  | 23 | +
 | 
		
	
		
			
			|  | 24 | +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 | 
		
	
		
			
			|  | 25 | +
 | 
		
	
		
			
			|  | 26 | +/** @file
 | 
		
	
		
			
			|  | 27 | + *
 | 
		
	
		
			
			|  | 28 | + * AES algorithm
 | 
		
	
		
			
			|  | 29 | + *
 | 
		
	
		
			
			|  | 30 | + */
 | 
		
	
		
			
			|  | 31 | +
 | 
		
	
		
			
			|  | 32 | +#include <stdint.h>
 | 
		
	
		
			
			|  | 33 | +#include <string.h>
 | 
		
	
		
			
			|  | 34 | +#include <errno.h>
 | 
		
	
		
			
			|  | 35 | +#include <assert.h>
 | 
		
	
		
			
			|  | 36 | +#include <byteswap.h>
 | 
		
	
		
			
			|  | 37 | +#include <ipxe/rotate.h>
 | 
		
	
		
			
			|  | 38 | +#include <ipxe/crypto.h>
 | 
		
	
		
			
			|  | 39 | +#include <ipxe/ecb.h>
 | 
		
	
		
			
			|  | 40 | +#include <ipxe/cbc.h>
 | 
		
	
		
			
			|  | 41 | +#include <ipxe/aes.h>
 | 
		
	
		
			
			|  | 42 | +
 | 
		
	
		
			
			|  | 43 | +/** AES strides
 | 
		
	
		
			
			|  | 44 | + *
 | 
		
	
		
			
			|  | 45 | + * These are the strides (modulo 16) used to walk through the AES
 | 
		
	
		
			
			|  | 46 | + * input state bytes in order of byte position after [Inv]ShiftRows.
 | 
		
	
		
			
			|  | 47 | + */
 | 
		
	
		
			
			|  | 48 | +enum aes_stride {
 | 
		
	
		
			
			|  | 49 | +	/** Input stride for ShiftRows
 | 
		
	
		
			
			|  | 50 | +	 *
 | 
		
	
		
			
			|  | 51 | +	 *    0 4 8 c
 | 
		
	
		
			
			|  | 52 | +	 *     \ \ \
 | 
		
	
		
			
			|  | 53 | +	 *    1 5 9 d
 | 
		
	
		
			
			|  | 54 | +	 *     \ \ \
 | 
		
	
		
			
			|  | 55 | +	 *    2 6 a e
 | 
		
	
		
			
			|  | 56 | +	 *     \ \ \
 | 
		
	
		
			
			|  | 57 | +	 *    3 7 b f
 | 
		
	
		
			
			|  | 58 | +	 */
 | 
		
	
		
			
			|  | 59 | +	AES_STRIDE_SHIFTROWS = +5,
 | 
		
	
		
			
			|  | 60 | +	/** Input stride for InvShiftRows
 | 
		
	
		
			
			|  | 61 | +	 *
 | 
		
	
		
			
			|  | 62 | +	 *    0 4 8 c
 | 
		
	
		
			
			|  | 63 | +	 *     / / /
 | 
		
	
		
			
			|  | 64 | +	 *    1 5 9 d
 | 
		
	
		
			
			|  | 65 | +	 *     / / /
 | 
		
	
		
			
			|  | 66 | +	 *    2 6 a e
 | 
		
	
		
			
			|  | 67 | +	 *     / / /
 | 
		
	
		
			
			|  | 68 | +	 *    3 7 b f
 | 
		
	
		
			
			|  | 69 | +	 */
 | 
		
	
		
			
			|  | 70 | +	AES_STRIDE_INVSHIFTROWS = -3,
 | 
		
	
		
			
			|  | 71 | +};
 | 
		
	
		
			
			|  | 72 | +
 | 
		
	
		
			
			|  | 73 | +/** A single AES lookup table entry
 | 
		
	
		
			
			|  | 74 | + *
 | 
		
	
		
			
			|  | 75 | + * This represents the product (in the Galois field GF(2^8)) of an
 | 
		
	
		
			
			|  | 76 | + * eight-byte vector multiplier with a single scalar multiplicand.
 | 
		
	
		
			
			|  | 77 | + *
 | 
		
	
		
			
			|  | 78 | + * The vector multipliers used for AES will be {1,1,1,3,2,1,1,3} for
 | 
		
	
		
			
			|  | 79 | + * MixColumns and {1,9,13,11,14,9,13,11} for InvMixColumns.  This
 | 
		
	
		
			
			|  | 80 | + * allows for the result of multiplying any single column of the
 | 
		
	
		
			
			|  | 81 | + * [Inv]MixColumns matrix by a scalar value to be obtained simply by
 | 
		
	
		
			
			|  | 82 | + * extracting the relevant four-byte subset from the lookup table
 | 
		
	
		
			
			|  | 83 | + * entry.
 | 
		
	
		
			
			|  | 84 | + *
 | 
		
	
		
			
			|  | 85 | + * For example, to find the result of multiplying the second column of
 | 
		
	
		
			
			|  | 86 | + * the MixColumns matrix by the scalar value 0x80:
 | 
		
	
		
			
			|  | 87 | + *
 | 
		
	
		
			
			|  | 88 | + * MixColumns column[0]: {			      2,    1,    1,    3 }
 | 
		
	
		
			
			|  | 89 | + * MixColumns column[1]: {			3,    2,    1,    1	  }
 | 
		
	
		
			
			|  | 90 | + * MixColumns column[2]: {		  1,    3,    2,    1		  }
 | 
		
	
		
			
			|  | 91 | + * MixColumns column[3]: {	    1,    1,    3,    2			  }
 | 
		
	
		
			
			|  | 92 | + * Vector multiplier:	 {    1,    1,    1,    3,    2,    1,    1,    3 }
 | 
		
	
		
			
			|  | 93 | + * Scalar multiplicand:	   0x80
 | 
		
	
		
			
			|  | 94 | + * Lookup table entry:	 { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
 | 
		
	
		
			
			|  | 95 | + *
 | 
		
	
		
			
			|  | 96 | + * The second column of the MixColumns matrix is {3,2,1,1}.  The
 | 
		
	
		
			
			|  | 97 | + * product of this column with the scalar value 0x80 can be obtained
 | 
		
	
		
			
			|  | 98 | + * by extracting the relevant four-byte subset of the lookup table
 | 
		
	
		
			
			|  | 99 | + * entry:
 | 
		
	
		
			
			|  | 100 | + *
 | 
		
	
		
			
			|  | 101 | + * MixColumns column[1]: {			3,    2,    1,    1	  }
 | 
		
	
		
			
			|  | 102 | + * Vector multiplier:	 {    1,    1,    1,    3,    2,    1,    1,    3 }
 | 
		
	
		
			
			|  | 103 | + * Lookup table entry:	 { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
 | 
		
	
		
			
			|  | 104 | + * Product:		 {		     0x9b, 0x1b, 0x80, 0x80	  }
 | 
		
	
		
			
			|  | 105 | + *
 | 
		
	
		
			
			|  | 106 | + * The column lookups require only seven bytes of the eight-byte
 | 
		
	
		
			
			|  | 107 | + * entry: the remaining (first) byte is used to hold the scalar
 | 
		
	
		
			
			|  | 108 | + * multiplicand itself (i.e. the first byte of the vector multiplier
 | 
		
	
		
			
			|  | 109 | + * is always chosen to be 1).
 | 
		
	
		
			
			|  | 110 | + */
 | 
		
	
		
			
			|  | 111 | +union aes_table_entry {
 | 
		
	
		
			
			|  | 112 | +	/** Viewed as an array of bytes */
 | 
		
	
		
			
			|  | 113 | +	uint8_t byte[8];
 | 
		
	
		
			
			|  | 114 | +} __attribute__ (( packed ));
 | 
		
	
		
			
			|  | 115 | +
 | 
		
	
		
			
			|  | 116 | +/** An AES lookup table
 | 
		
	
		
			
			|  | 117 | + *
 | 
		
	
		
			
			|  | 118 | + * This represents the products (in the Galois field GF(2^8)) of a
 | 
		
	
		
			
			|  | 119 | + * constant eight-byte vector multiplier with all possible 256 scalar
 | 
		
	
		
			
			|  | 120 | + * multiplicands.
 | 
		
	
		
			
			|  | 121 | + *
 | 
		
	
		
			
			|  | 122 | + * The entries are indexed by the AES [Inv]SubBytes S-box output
 | 
		
	
		
			
			|  | 123 | + * values (denoted S(N)).  This allows for the result of multiplying
 | 
		
	
		
			
			|  | 124 | + * any single column of the [Inv]MixColumns matrix by S(N) to be
 | 
		
	
		
			
			|  | 125 | + * obtained simply by extracting the relevant four-byte subset from
 | 
		
	
		
			
			|  | 126 | + * the Nth table entry.  For example:
 | 
		
	
		
			
			|  | 127 | + *
 | 
		
	
		
			
			|  | 128 | + * Input byte (N):	   0x3a
 | 
		
	
		
			
			|  | 129 | + * SubBytes output S(N):   0x80
 | 
		
	
		
			
			|  | 130 | + * MixColumns column[1]: {			3,    2,    1,    1	  }
 | 
		
	
		
			
			|  | 131 | + * Vector multiplier:	 {    1,    1,    1,    3,    2,    1,    1,    3 }
 | 
		
	
		
			
			|  | 132 | + * Table entry[0x3a]:	 { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
 | 
		
	
		
			
			|  | 133 | + * Product:		 {		     0x9b, 0x1b, 0x80, 0x80	  }
 | 
		
	
		
			
			|  | 134 | + *
 | 
		
	
		
			
			|  | 135 | + * Since the first byte of the eight-byte vector multiplier is always
 | 
		
	
		
			
			|  | 136 | + * chosen to be 1, the value of S(N) may be lookup up by extracting
 | 
		
	
		
			
			|  | 137 | + * the first byte of the Nth table entry.
 | 
		
	
		
			
			|  | 138 | + */
 | 
		
	
		
			
			|  | 139 | +struct aes_table {
 | 
		
	
		
			
			|  | 140 | +	/** Table entries, indexed by S(N) */
 | 
		
	
		
			
			|  | 141 | +	union aes_table_entry entry[256];
 | 
		
	
		
			
			|  | 142 | +} __attribute__ (( aligned ( 8 ) ));
 | 
		
	
		
			
			|  | 143 | +
 | 
		
	
		
			
			|  | 144 | +/** AES MixColumns lookup table */
 | 
		
	
		
			
			|  | 145 | +static struct aes_table aes_mixcolumns;
 | 
		
	
		
			
			|  | 146 | +
 | 
		
	
		
			
			|  | 147 | +/** AES InvMixColumns lookup table */
 | 
		
	
		
			
			|  | 148 | +static struct aes_table aes_invmixcolumns;
 | 
		
	
		
			
			|  | 149 | +
 | 
		
	
		
			
			|  | 150 | +/**
 | 
		
	
		
			
			|  | 151 | + * Multiply [Inv]MixColumns matrix column by scalar multiplicand
 | 
		
	
		
			
			|  | 152 | + *
 | 
		
	
		
			
			|  | 153 | + * @v entry		AES lookup table entry for scalar multiplicand
 | 
		
	
		
			
			|  | 154 | + * @v column		[Inv]MixColumns matrix column index
 | 
		
	
		
			
			|  | 155 | + * @ret product		Product of matrix column with scalar multiplicand
 | 
		
	
		
			
			|  | 156 | + */
 | 
		
	
		
			
			|  | 157 | +static inline __attribute__ (( always_inline )) uint32_t
 | 
		
	
		
			
			|  | 158 | +aes_entry_column ( const union aes_table_entry *entry, unsigned int column ) {
 | 
		
	
		
			
			|  | 159 | +	const uint8_t *first __attribute__ (( may_alias ));
 | 
		
	
		
			
			|  | 160 | +
 | 
		
	
		
			
			|  | 161 | +	/* Locate start of relevant four-byte subset */
 | 
		
	
		
			
			|  | 162 | +	first = &entry->byte[ 4 - column ];
 | 
		
	
		
			
			|  | 163 | +
 | 
		
	
		
			
			|  | 164 | +	/* Extract this four-byte subset */
 | 
		
	
		
			
			|  | 165 | +	return ( *( ( uint32_t * ) first ) );
 | 
		
	
		
			
			|  | 166 | +}
 | 
		
	
		
			
			|  | 167 | +
 | 
		
	
		
			
			|  | 168 | +/**
 | 
		
	
		
			
			|  | 169 | + * Multiply [Inv]MixColumns matrix column by S-boxed input byte
 | 
		
	
		
			
			|  | 170 | + *
 | 
		
	
		
			
			|  | 171 | + * @v table		AES lookup table
 | 
		
	
		
			
			|  | 172 | + * @v stride		AES row shift stride
 | 
		
	
		
			
			|  | 173 | + * @v in		AES input state
 | 
		
	
		
			
			|  | 174 | + * @v offset		Output byte offset (after [Inv]ShiftRows)
 | 
		
	
		
			
			|  | 175 | + * @ret product		Product of matrix column with S(input byte)
 | 
		
	
		
			
			|  | 176 | + *
 | 
		
	
		
			
			|  | 177 | + * Note that the specified offset is not the offset of the input byte;
 | 
		
	
		
			
			|  | 178 | + * it is the offset of the output byte which corresponds to the input
 | 
		
	
		
			
			|  | 179 | + * byte.  This output byte offset is used to calculate both the input
 | 
		
	
		
			
			|  | 180 | + * byte offset and to select the appropriate matric column.
 | 
		
	
		
			
			|  | 181 | + *
 | 
		
	
		
			
			|  | 182 | + * With a compile-time constant offset, this function will optimise
 | 
		
	
		
			
			|  | 183 | + * down to a single "movzbl" (to extract the input byte) and will
 | 
		
	
		
			
			|  | 184 | + * generate a single x86 memory reference expression which can then be
 | 
		
	
		
			
			|  | 185 | + * used directly within a single "xorl" instruction.
 | 
		
	
		
			
			|  | 186 | + */
 | 
		
	
		
			
			|  | 187 | +static inline __attribute__ (( always_inline )) uint32_t
 | 
		
	
		
			
			|  | 188 | +aes_column ( const struct aes_table *table, size_t stride,
 | 
		
	
		
			
			|  | 189 | +	     const union aes_matrix *in, size_t offset ) {
 | 
		
	
		
			
			|  | 190 | +	const union aes_table_entry *entry;
 | 
		
	
		
			
			|  | 191 | +	unsigned int byte;
 | 
		
	
		
			
			|  | 192 | +
 | 
		
	
		
			
			|  | 193 | +	/* Extract input byte corresponding to this output byte offset
 | 
		
	
		
			
			|  | 194 | +	 * (i.e. perform [Inv]ShiftRows).
 | 
		
	
		
			
			|  | 195 | +	 */
 | 
		
	
		
			
			|  | 196 | +	byte = in->byte[ ( stride * offset ) & 0xf ];
 | 
		
	
		
			
			|  | 197 | +
 | 
		
	
		
			
			|  | 198 | +	/* Locate lookup table entry for this input byte (i.e. perform
 | 
		
	
		
			
			|  | 199 | +	 * [Inv]SubBytes).
 | 
		
	
		
			
			|  | 200 | +	 */
 | 
		
	
		
			
			|  | 201 | +	entry = &table->entry[byte];
 | 
		
	
		
			
			|  | 202 | +
 | 
		
	
		
			
			|  | 203 | +	/* Multiply appropriate matrix column by this input byte
 | 
		
	
		
			
			|  | 204 | +	 * (i.e. perform [Inv]MixColumns).
 | 
		
	
		
			
			|  | 205 | +	 */
 | 
		
	
		
			
			|  | 206 | +	return aes_entry_column ( entry, ( offset & 0x3 ) );
 | 
		
	
		
			
			|  | 207 | +}
 | 
		
	
		
			
			|  | 208 | +
 | 
		
	
		
			
			|  | 209 | +/**
 | 
		
	
		
			
			|  | 210 | + * Calculate intermediate round output column
 | 
		
	
		
			
			|  | 211 | + *
 | 
		
	
		
			
			|  | 212 | + * @v table		AES lookup table
 | 
		
	
		
			
			|  | 213 | + * @v stride		AES row shift stride
 | 
		
	
		
			
			|  | 214 | + * @v in		AES input state
 | 
		
	
		
			
			|  | 215 | + * @v key		AES round key
 | 
		
	
		
			
			|  | 216 | + * @v column		Column index
 | 
		
	
		
			
			|  | 217 | + * @ret output		Output column value
 | 
		
	
		
			
			|  | 218 | + */
 | 
		
	
		
			
			|  | 219 | +static inline __attribute__ (( always_inline )) uint32_t
 | 
		
	
		
			
			|  | 220 | +aes_output ( const struct aes_table *table, size_t stride,
 | 
		
	
		
			
			|  | 221 | +	     const union aes_matrix *in, const union aes_matrix *key,
 | 
		
	
		
			
			|  | 222 | +	     unsigned int column ) {
 | 
		
	
		
			
			|  | 223 | +	size_t offset = ( column * 4 );
 | 
		
	
		
			
			|  | 224 | +
 | 
		
	
		
			
			|  | 225 | +	/* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
 | 
		
	
		
			
			|  | 226 | +	 * AddRoundKey for this column.  The loop is unrolled to allow
 | 
		
	
		
			
			|  | 227 | +	 * for the required compile-time constant optimisations.
 | 
		
	
		
			
			|  | 228 | +	 */
 | 
		
	
		
			
			|  | 229 | +	return ( aes_column ( table, stride, in, ( offset + 0 ) ) ^
 | 
		
	
		
			
			|  | 230 | +		 aes_column ( table, stride, in, ( offset + 1 ) ) ^
 | 
		
	
		
			
			|  | 231 | +		 aes_column ( table, stride, in, ( offset + 2 ) ) ^
 | 
		
	
		
			
			|  | 232 | +		 aes_column ( table, stride, in, ( offset + 3 ) ) ^
 | 
		
	
		
			
			|  | 233 | +		 key->column[column] );
 | 
		
	
		
			
			|  | 234 | +}
 | 
		
	
		
			
			|  | 235 | +
 | 
		
	
		
			
			|  | 236 | +/**
 | 
		
	
		
			
			|  | 237 | + * Perform a single intermediate round
 | 
		
	
		
			
			|  | 238 | + *
 | 
		
	
		
			
			|  | 239 | + * @v table		AES lookup table
 | 
		
	
		
			
			|  | 240 | + * @v stride		AES row shift stride
 | 
		
	
		
			
			|  | 241 | + * @v in		AES input state
 | 
		
	
		
			
			|  | 242 | + * @v out		AES output state
 | 
		
	
		
			
			|  | 243 | + * @v key		AES round key
 | 
		
	
		
			
			|  | 244 | + */
 | 
		
	
		
			
			|  | 245 | +static inline __attribute__ (( always_inline )) void
 | 
		
	
		
			
			|  | 246 | +aes_round ( const struct aes_table *table, size_t stride,
 | 
		
	
		
			
			|  | 247 | +	    const union aes_matrix *in, union aes_matrix *out,
 | 
		
	
		
			
			|  | 248 | +	    const union aes_matrix *key ) {
 | 
		
	
		
			
			|  | 249 | +
 | 
		
	
		
			
			|  | 250 | +	/* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
 | 
		
	
		
			
			|  | 251 | +	 * AddRoundKey for all columns.  The loop is unrolled to allow
 | 
		
	
		
			
			|  | 252 | +	 * for the required compile-time constant optimisations.
 | 
		
	
		
			
			|  | 253 | +	 */
 | 
		
	
		
			
			|  | 254 | +	out->column[0] = aes_output ( table, stride, in, key, 0 );
 | 
		
	
		
			
			|  | 255 | +	out->column[1] = aes_output ( table, stride, in, key, 1 );
 | 
		
	
		
			
			|  | 256 | +	out->column[2] = aes_output ( table, stride, in, key, 2 );
 | 
		
	
		
			
			|  | 257 | +	out->column[3] = aes_output ( table, stride, in, key, 3 );
 | 
		
	
		
			
			|  | 258 | +}
 | 
		
	
		
			
			|  | 259 | +
 | 
		
	
		
			
			|  | 260 | +/**
 | 
		
	
		
			
			|  | 261 | + * Perform encryption intermediate rounds
 | 
		
	
		
			
			|  | 262 | + *
 | 
		
	
		
			
			|  | 263 | + * @v in		AES input state
 | 
		
	
		
			
			|  | 264 | + * @v out		AES output state
 | 
		
	
		
			
			|  | 265 | + * @v key		Round keys
 | 
		
	
		
			
			|  | 266 | + * @v rounds		Number of rounds (must be odd)
 | 
		
	
		
			
			|  | 267 | + *
 | 
		
	
		
			
			|  | 268 | + * This function is deliberately marked as non-inlinable to ensure
 | 
		
	
		
			
			|  | 269 | + * maximal availability of registers for GCC's register allocator,
 | 
		
	
		
			
			|  | 270 | + * which has a tendency to otherwise spill performance-critical
 | 
		
	
		
			
			|  | 271 | + * registers to the stack.
 | 
		
	
		
			
			|  | 272 | + */
 | 
		
	
		
			
			|  | 273 | +static __attribute__ (( noinline )) void
 | 
		
	
		
			
			|  | 274 | +aes_encrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
 | 
		
	
		
			
			|  | 275 | +		     const union aes_matrix *key, unsigned int rounds ) {
 | 
		
	
		
			
			|  | 276 | +	union aes_matrix *tmp;
 | 
		
	
		
			
			|  | 277 | +
 | 
		
	
		
			
			|  | 278 | +	/* Perform intermediate rounds */
 | 
		
	
		
			
			|  | 279 | +	do {
 | 
		
	
		
			
			|  | 280 | +		/* Perform one intermediate round */
 | 
		
	
		
			
			|  | 281 | +		aes_round ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
 | 
		
	
		
			
			|  | 282 | +			    in, out, key++ );
 | 
		
	
		
			
			|  | 283 | +
 | 
		
	
		
			
			|  | 284 | +		/* Swap input and output states for next round */
 | 
		
	
		
			
			|  | 285 | +		tmp = in;
 | 
		
	
		
			
			|  | 286 | +		in = out;
 | 
		
	
		
			
			|  | 287 | +		out = tmp;
 | 
		
	
		
			
			|  | 288 | +
 | 
		
	
		
			
			|  | 289 | +	} while ( --rounds );
 | 
		
	
		
			
			|  | 290 | +}
 | 
		
	
		
			
			|  | 291 | +
 | 
		
	
		
			
			|  | 292 | +/**
 | 
		
	
		
			
			|  | 293 | + * Perform decryption intermediate rounds
 | 
		
	
		
			
			|  | 294 | + *
 | 
		
	
		
			
			|  | 295 | + * @v in		AES input state
 | 
		
	
		
			
			|  | 296 | + * @v out		AES output state
 | 
		
	
		
			
			|  | 297 | + * @v key		Round keys
 | 
		
	
		
			
			|  | 298 | + * @v rounds		Number of rounds (must be odd)
 | 
		
	
		
			
			|  | 299 | + *
 | 
		
	
		
			
			|  | 300 | + * As with aes_encrypt_rounds(), this function is deliberately marked
 | 
		
	
		
			
			|  | 301 | + * as non-inlinable.
 | 
		
	
		
			
			|  | 302 | + *
 | 
		
	
		
			
			|  | 303 | + * This function could potentially use the same binary code as is used
 | 
		
	
		
			
			|  | 304 | + * for encryption.  To compensate for the difference between ShiftRows
 | 
		
	
		
			
			|  | 305 | + * and InvShiftRows, half of the input byte offsets would have to be
 | 
		
	
		
			
			|  | 306 | + * modifiable at runtime (half by an offset of +4/-4, half by an
 | 
		
	
		
			
			|  | 307 | + * offset of -4/+4 for ShiftRows/InvShiftRows).  This can be
 | 
		
	
		
			
			|  | 308 | + * accomplished in x86 assembly within the number of available
 | 
		
	
		
			
			|  | 309 | + * registers, but GCC's register allocator struggles to do so,
 | 
		
	
		
			
			|  | 310 | + * resulting in a significant performance decrease due to registers
 | 
		
	
		
			
			|  | 311 | + * being spilled to the stack.  We therefore use two separate but very
 | 
		
	
		
			
			|  | 312 | + * similar binary functions based on the same C source.
 | 
		
	
		
			
			|  | 313 | + */
 | 
		
	
		
			
			|  | 314 | +static __attribute__ (( noinline )) void
 | 
		
	
		
			
			|  | 315 | +aes_decrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
 | 
		
	
		
			
			|  | 316 | +		     const union aes_matrix *key, unsigned int rounds ) {
 | 
		
	
		
			
			|  | 317 | +	union aes_matrix *tmp;
 | 
		
	
		
			
			|  | 318 | +
 | 
		
	
		
			
			|  | 319 | +	/* Perform intermediate rounds */
 | 
		
	
		
			
			|  | 320 | +	do {
 | 
		
	
		
			
			|  | 321 | +		/* Perform one intermediate round */
 | 
		
	
		
			
			|  | 322 | +		aes_round ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS,
 | 
		
	
		
			
			|  | 323 | +			    in, out, key++ );
 | 
		
	
		
			
			|  | 324 | +
 | 
		
	
		
			
			|  | 325 | +		/* Swap input and output states for next round */
 | 
		
	
		
			
			|  | 326 | +		tmp = in;
 | 
		
	
		
			
			|  | 327 | +		in = out;
 | 
		
	
		
			
			|  | 328 | +		out = tmp;
 | 
		
	
		
			
			|  | 329 | +
 | 
		
	
		
			
			|  | 330 | +	} while ( --rounds );
 | 
		
	
		
			
			|  | 331 | +}
 | 
		
	
		
			
			|  | 332 | +
 | 
		
	
		
			
			|  | 333 | +/**
 | 
		
	
		
			
			|  | 334 | + * Perform standalone AddRoundKey
 | 
		
	
		
			
			|  | 335 | + *
 | 
		
	
		
			
			|  | 336 | + * @v state		AES state
 | 
		
	
		
			
			|  | 337 | + * @v key		AES round key
 | 
		
	
		
			
			|  | 338 | + */
 | 
		
	
		
			
			|  | 339 | +static inline __attribute__ (( always_inline )) void
 | 
		
	
		
			
			|  | 340 | +aes_addroundkey ( union aes_matrix *state, const union aes_matrix *key ) {
 | 
		
	
		
			
			|  | 341 | +
 | 
		
	
		
			
			|  | 342 | +	state->column[0] ^= key->column[0];
 | 
		
	
		
			
			|  | 343 | +	state->column[1] ^= key->column[1];
 | 
		
	
		
			
			|  | 344 | +	state->column[2] ^= key->column[2];
 | 
		
	
		
			
			|  | 345 | +	state->column[3] ^= key->column[3];
 | 
		
	
		
			
			|  | 346 | +}
 | 
		
	
		
			
			|  | 347 | +
 | 
		
	
		
			
			|  | 348 | +/**
 | 
		
	
		
			
			|  | 349 | + * Perform final round
 | 
		
	
		
			
			|  | 350 | + *
 | 
		
	
		
			
			|  | 351 | + * @v table		AES lookup table
 | 
		
	
		
			
			|  | 352 | + * @v stride		AES row shift stride
 | 
		
	
		
			
			|  | 353 | + * @v in		AES input state
 | 
		
	
		
			
			|  | 354 | + * @v out		AES output state
 | 
		
	
		
			
			|  | 355 | + * @v key		AES round key
 | 
		
	
		
			
			|  | 356 | + */
 | 
		
	
		
			
			|  | 357 | +static void aes_final ( const struct aes_table *table, size_t stride,
 | 
		
	
		
			
			|  | 358 | +			const union aes_matrix *in, union aes_matrix *out,
 | 
		
	
		
			
			|  | 359 | +			const union aes_matrix *key ) {
 | 
		
	
		
			
			|  | 360 | +	const union aes_table_entry *entry;
 | 
		
	
		
			
			|  | 361 | +	unsigned int byte;
 | 
		
	
		
			
			|  | 362 | +	size_t out_offset;
 | 
		
	
		
			
			|  | 363 | +	size_t in_offset;
 | 
		
	
		
			
			|  | 364 | +
 | 
		
	
		
			
			|  | 365 | +	/* Perform [Inv]ShiftRows and [Inv]SubBytes */
 | 
		
	
		
			
			|  | 366 | +	for ( out_offset = 0, in_offset = 0 ; out_offset < 16 ;
 | 
		
	
		
			
			|  | 367 | +	      out_offset++, in_offset = ( ( in_offset + stride ) & 0xf ) ) {
 | 
		
	
		
			
			|  | 368 | +
 | 
		
	
		
			
			|  | 369 | +		/* Extract input byte (i.e. perform [Inv]ShiftRows) */
 | 
		
	
		
			
			|  | 370 | +		byte = in->byte[in_offset];
 | 
		
	
		
			
			|  | 371 | +
 | 
		
	
		
			
			|  | 372 | +		/* Locate lookup table entry for this input byte
 | 
		
	
		
			
			|  | 373 | +		 * (i.e. perform [Inv]SubBytes).
 | 
		
	
		
			
			|  | 374 | +		 */
 | 
		
	
		
			
			|  | 375 | +		entry = &table->entry[byte];
 | 
		
	
		
			
			|  | 376 | +
 | 
		
	
		
			
			|  | 377 | +		/* Store output byte */
 | 
		
	
		
			
			|  | 378 | +		out->byte[out_offset] = entry->byte[0];
 | 
		
	
		
			
			|  | 379 | +	}
 | 
		
	
		
			
			|  | 380 | +
 | 
		
	
		
			
			|  | 381 | +	/* Perform AddRoundKey */
 | 
		
	
		
			
			|  | 382 | +	aes_addroundkey ( out, key );
 | 
		
	
		
			
			|  | 383 | +}
 | 
		
	
		
			
			|  | 384 | +
 | 
		
	
		
			
			|  | 385 | +/**
 | 
		
	
		
			
			|  | 386 | + * Encrypt data
 | 
		
	
		
			
			|  | 387 | + *
 | 
		
	
		
			
			|  | 388 | + * @v ctx		Context
 | 
		
	
		
			
			|  | 389 | + * @v src		Data to encrypt
 | 
		
	
		
			
			|  | 390 | + * @v dst		Buffer for encrypted data
 | 
		
	
		
			
			|  | 391 | + * @v len		Length of data
 | 
		
	
		
			
			|  | 392 | + */
 | 
		
	
		
			
			|  | 393 | +static void aes_encrypt ( void *ctx, const void *src, void *dst, size_t len ) {
 | 
		
	
		
			
			|  | 394 | +	struct aes_context *aes = ctx;
 | 
		
	
		
			
			|  | 395 | +	union aes_matrix buffer[2];
 | 
		
	
		
			
			|  | 396 | +	union aes_matrix *in = &buffer[0];
 | 
		
	
		
			
			|  | 397 | +	union aes_matrix *out = &buffer[1];
 | 
		
	
		
			
			|  | 398 | +	unsigned int rounds = aes->rounds;
 | 
		
	
		
			
			|  | 399 | +
 | 
		
	
		
			
			|  | 400 | +	/* Sanity check */
 | 
		
	
		
			
			|  | 401 | +	assert ( len == sizeof ( *in ) );
 | 
		
	
		
			
			|  | 402 | +
 | 
		
	
		
			
			|  | 403 | +	/* Initialise input state */
 | 
		
	
		
			
			|  | 404 | +	memcpy ( in, src, sizeof ( *in ) );
 | 
		
	
		
			
			|  | 405 | +
 | 
		
	
		
			
			|  | 406 | +	/* Perform initial round (AddRoundKey) */
 | 
		
	
		
			
			|  | 407 | +	aes_addroundkey ( in, &aes->encrypt.key[0] );
 | 
		
	
		
			
			|  | 408 | +
 | 
		
	
		
			
			|  | 409 | +	/* Perform intermediate rounds (ShiftRows, SubBytes,
 | 
		
	
		
			
			|  | 410 | +	 * MixColumns, AddRoundKey).
 | 
		
	
		
			
			|  | 411 | +	 */
 | 
		
	
		
			
			|  | 412 | +	aes_encrypt_rounds ( in, out, &aes->encrypt.key[1], ( rounds - 2 ) );
 | 
		
	
		
			
			|  | 413 | +	in = out;
 | 
		
	
		
			
			|  | 414 | +
 | 
		
	
		
			
			|  | 415 | +	/* Perform final round (ShiftRows, SubBytes, AddRoundKey) */
 | 
		
	
		
			
			|  | 416 | +	out = dst;
 | 
		
	
		
			
			|  | 417 | +	aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS, in, out,
 | 
		
	
		
			
			|  | 418 | +		    &aes->encrypt.key[ rounds - 1 ] );
 | 
		
	
		
			
			|  | 419 | +}
 | 
		
	
		
			
			|  | 420 | +
 | 
		
	
		
			
			|  | 421 | +/**
 | 
		
	
		
			
			|  | 422 | + * Decrypt data
 | 
		
	
		
			
			|  | 423 | + *
 | 
		
	
		
			
			|  | 424 | + * @v ctx		Context
 | 
		
	
		
			
			|  | 425 | + * @v src		Data to decrypt
 | 
		
	
		
			
			|  | 426 | + * @v dst		Buffer for decrypted data
 | 
		
	
		
			
			|  | 427 | + * @v len		Length of data
 | 
		
	
		
			
			|  | 428 | + */
 | 
		
	
		
			
			|  | 429 | +static void aes_decrypt ( void *ctx, const void *src, void *dst, size_t len ) {
 | 
		
	
		
			
			|  | 430 | +	struct aes_context *aes = ctx;
 | 
		
	
		
			
			|  | 431 | +	union aes_matrix buffer[2];
 | 
		
	
		
			
			|  | 432 | +	union aes_matrix *in = &buffer[0];
 | 
		
	
		
			
			|  | 433 | +	union aes_matrix *out = &buffer[1];
 | 
		
	
		
			
			|  | 434 | +	unsigned int rounds = aes->rounds;
 | 
		
	
		
			
			|  | 435 | +
 | 
		
	
		
			
			|  | 436 | +	/* Sanity check */
 | 
		
	
		
			
			|  | 437 | +	assert ( len == sizeof ( *in ) );
 | 
		
	
		
			
			|  | 438 | +
 | 
		
	
		
			
			|  | 439 | +	/* Initialise input state */
 | 
		
	
		
			
			|  | 440 | +	memcpy ( in, src, sizeof ( *in ) );
 | 
		
	
		
			
			|  | 441 | +
 | 
		
	
		
			
			|  | 442 | +	/* Perform initial round (AddRoundKey) */
 | 
		
	
		
			
			|  | 443 | +	aes_addroundkey ( in, &aes->decrypt.key[0] );
 | 
		
	
		
			
			|  | 444 | +
 | 
		
	
		
			
			|  | 445 | +	/* Perform intermediate rounds (InvShiftRows, InvSubBytes,
 | 
		
	
		
			
			|  | 446 | +	 * InvMixColumns, AddRoundKey).
 | 
		
	
		
			
			|  | 447 | +	 */
 | 
		
	
		
			
			|  | 448 | +	aes_decrypt_rounds ( in, out, &aes->decrypt.key[1], ( rounds - 2 ) );
 | 
		
	
		
			
			|  | 449 | +	in = out;
 | 
		
	
		
			
			|  | 450 | +
 | 
		
	
		
			
			|  | 451 | +	/* Perform final round (InvShiftRows, InvSubBytes, AddRoundKey) */
 | 
		
	
		
			
			|  | 452 | +	out = dst;
 | 
		
	
		
			
			|  | 453 | +	aes_final ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS, in, out,
 | 
		
	
		
			
			|  | 454 | +		    &aes->decrypt.key[ rounds - 1 ] );
 | 
		
	
		
			
			|  | 455 | +}
 | 
		
	
		
			
			|  | 456 | +
 | 
		
	
		
			
			|  | 457 | +/**
 | 
		
	
		
			
			|  | 458 | + * Multiply a polynomial by (x) modulo (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8)
 | 
		
	
		
			
			|  | 459 | + *
 | 
		
	
		
			
			|  | 460 | + * @v poly		Polynomial to be multiplied
 | 
		
	
		
			
			|  | 461 | + * @ret result		Result
 | 
		
	
		
			
			|  | 462 | + */
 | 
		
	
		
			
			|  | 463 | +static __attribute__ (( const )) unsigned int aes_double ( unsigned int poly ) {
 | 
		
	
		
			
			|  | 464 | +
 | 
		
	
		
			
			|  | 465 | +	/* Multiply polynomial by (x), placing the resulting x^8
 | 
		
	
		
			
			|  | 466 | +	 * coefficient in the LSB (i.e. rotate byte left by one).
 | 
		
	
		
			
			|  | 467 | +	 */
 | 
		
	
		
			
			|  | 468 | +	poly = rol8 ( poly, 1 );
 | 
		
	
		
			
			|  | 469 | +
 | 
		
	
		
			
			|  | 470 | +	/* If coefficient of x^8 (in LSB) is non-zero, then reduce by
 | 
		
	
		
			
			|  | 471 | +	 * subtracting (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8).
 | 
		
	
		
			
			|  | 472 | +	 */
 | 
		
	
		
			
			|  | 473 | +	if ( poly & 0x01 ) {
 | 
		
	
		
			
			|  | 474 | +		poly ^= 0x01; /* Subtract x^8 (currently in LSB) */
 | 
		
	
		
			
			|  | 475 | +		poly ^= 0x1b; /* Subtract (x^4 + x^3 + x^2 + 1) */
 | 
		
	
		
			
			|  | 476 | +	}
 | 
		
	
		
			
			|  | 477 | +
 | 
		
	
		
			
			|  | 478 | +	return poly;
 | 
		
	
		
			
			|  | 479 | +}
 | 
		
	
		
			
			|  | 480 | +
 | 
		
	
		
			
			|  | 481 | +/**
 | 
		
	
		
			
			|  | 482 | + * Fill in MixColumns lookup table entry
 | 
		
	
		
			
			|  | 483 | + *
 | 
		
	
		
			
			|  | 484 | + * @v entry		AES lookup table entry for scalar multiplicand
 | 
		
	
		
			
			|  | 485 | + *
 | 
		
	
		
			
			|  | 486 | + * The MixColumns lookup table vector multiplier is {1,1,1,3,2,1,1,3}.
 | 
		
	
		
			
			|  | 487 | + */
 | 
		
	
		
			
			|  | 488 | +static void aes_mixcolumns_entry ( union aes_table_entry *entry ) {
 | 
		
	
		
			
			|  | 489 | +	unsigned int scalar_x_1;
 | 
		
	
		
			
			|  | 490 | +	unsigned int scalar_x;
 | 
		
	
		
			
			|  | 491 | +	unsigned int scalar;
 | 
		
	
		
			
			|  | 492 | +
 | 
		
	
		
			
			|  | 493 | +	/* Retrieve scalar multiplicand */
 | 
		
	
		
			
			|  | 494 | +	scalar = entry->byte[0];
 | 
		
	
		
			
			|  | 495 | +	entry->byte[1] = scalar;
 | 
		
	
		
			
			|  | 496 | +	entry->byte[2] = scalar;
 | 
		
	
		
			
			|  | 497 | +	entry->byte[5] = scalar;
 | 
		
	
		
			
			|  | 498 | +	entry->byte[6] = scalar;
 | 
		
	
		
			
			|  | 499 | +
 | 
		
	
		
			
			|  | 500 | +	/* Calculate scalar multiplied by (x) */
 | 
		
	
		
			
			|  | 501 | +	scalar_x = aes_double ( scalar );
 | 
		
	
		
			
			|  | 502 | +	entry->byte[4] = scalar_x;
 | 
		
	
		
			
			|  | 503 | +
 | 
		
	
		
			
			|  | 504 | +	/* Calculate scalar multiplied by (x + 1) */
 | 
		
	
		
			
			|  | 505 | +	scalar_x_1 = ( scalar_x ^ scalar );
 | 
		
	
		
			
			|  | 506 | +	entry->byte[3] = scalar_x_1;
 | 
		
	
		
			
			|  | 507 | +	entry->byte[7] = scalar_x_1;
 | 
		
	
		
			
			|  | 508 | +}
 | 
		
	
		
			
			|  | 509 | +
 | 
		
	
		
			
			|  | 510 | +/**
 | 
		
	
		
			
			|  | 511 | + * Fill in InvMixColumns lookup table entry
 | 
		
	
		
			
			|  | 512 | + *
 | 
		
	
		
			
			|  | 513 | + * @v entry		AES lookup table entry for scalar multiplicand
 | 
		
	
		
			
			|  | 514 | + *
 | 
		
	
		
			
			|  | 515 | + * The InvMixColumns lookup table vector multiplier is {1,9,13,11,14,9,13,11}.
 | 
		
	
		
			
			|  | 516 | + */
 | 
		
	
		
			
			|  | 517 | +static void aes_invmixcolumns_entry ( union aes_table_entry *entry ) {
 | 
		
	
		
			
			|  | 518 | +	unsigned int scalar_x3_x2_x;
 | 
		
	
		
			
			|  | 519 | +	unsigned int scalar_x3_x2_1;
 | 
		
	
		
			
			|  | 520 | +	unsigned int scalar_x3_x2;
 | 
		
	
		
			
			|  | 521 | +	unsigned int scalar_x3_x_1;
 | 
		
	
		
			
			|  | 522 | +	unsigned int scalar_x3_1;
 | 
		
	
		
			
			|  | 523 | +	unsigned int scalar_x3;
 | 
		
	
		
			
			|  | 524 | +	unsigned int scalar_x2;
 | 
		
	
		
			
			|  | 525 | +	unsigned int scalar_x;
 | 
		
	
		
			
			|  | 526 | +	unsigned int scalar;
 | 
		
	
		
			
			|  | 527 | +
 | 
		
	
		
			
			|  | 528 | +	/* Retrieve scalar multiplicand */
 | 
		
	
		
			
			|  | 529 | +	scalar = entry->byte[0];
 | 
		
	
		
			
			|  | 530 | +
 | 
		
	
		
			
			|  | 531 | +	/* Calculate scalar multiplied by (x) */
 | 
		
	
		
			
			|  | 532 | +	scalar_x = aes_double ( scalar );
 | 
		
	
		
			
			|  | 533 | +
 | 
		
	
		
			
			|  | 534 | +	/* Calculate scalar multiplied by (x^2) */
 | 
		
	
		
			
			|  | 535 | +	scalar_x2 = aes_double ( scalar_x );
 | 
		
	
		
			
			|  | 536 | +
 | 
		
	
		
			
			|  | 537 | +	/* Calculate scalar multiplied by (x^3) */
 | 
		
	
		
			
			|  | 538 | +	scalar_x3 = aes_double ( scalar_x2 );
 | 
		
	
		
			
			|  | 539 | +
 | 
		
	
		
			
			|  | 540 | +	/* Calculate scalar multiplied by (x^3 + 1) */
 | 
		
	
		
			
			|  | 541 | +	scalar_x3_1 = ( scalar_x3 ^ scalar );
 | 
		
	
		
			
			|  | 542 | +	entry->byte[1] = scalar_x3_1;
 | 
		
	
		
			
			|  | 543 | +	entry->byte[5] = scalar_x3_1;
 | 
		
	
		
			
			|  | 544 | +
 | 
		
	
		
			
			|  | 545 | +	/* Calculate scalar multiplied by (x^3 + x + 1) */
 | 
		
	
		
			
			|  | 546 | +	scalar_x3_x_1 = ( scalar_x3_1 ^ scalar_x );
 | 
		
	
		
			
			|  | 547 | +	entry->byte[3] = scalar_x3_x_1;
 | 
		
	
		
			
			|  | 548 | +	entry->byte[7] = scalar_x3_x_1;
 | 
		
	
		
			
			|  | 549 | +
 | 
		
	
		
			
			|  | 550 | +	/* Calculate scalar multiplied by (x^3 + x^2) */
 | 
		
	
		
			
			|  | 551 | +	scalar_x3_x2 = ( scalar_x3 ^ scalar_x2 );
 | 
		
	
		
			
			|  | 552 | +
 | 
		
	
		
			
			|  | 553 | +	/* Calculate scalar multiplied by (x^3 + x^2 + 1) */
 | 
		
	
		
			
			|  | 554 | +	scalar_x3_x2_1 = ( scalar_x3_x2 ^ scalar );
 | 
		
	
		
			
			|  | 555 | +	entry->byte[2] = scalar_x3_x2_1;
 | 
		
	
		
			
			|  | 556 | +	entry->byte[6] = scalar_x3_x2_1;
 | 
		
	
		
			
			|  | 557 | +
 | 
		
	
		
			
			|  | 558 | +	/* Calculate scalar multiplied by (x^3 + x^2 + x) */
 | 
		
	
		
			
			|  | 559 | +	scalar_x3_x2_x = ( scalar_x3_x2 ^ scalar_x );
 | 
		
	
		
			
			|  | 560 | +	entry->byte[4] = scalar_x3_x2_x;
 | 
		
	
		
			
			|  | 561 | +}
 | 
		
	
		
			
			|  | 562 | +
 | 
		
	
		
			
			|  | 563 | +/**
 | 
		
	
		
			
			|  | 564 | + * Generate AES lookup tables
 | 
		
	
		
			
			|  | 565 | + *
 | 
		
	
		
			
			|  | 566 | + */
 | 
		
	
		
			
			|  | 567 | +static void aes_generate ( void ) {
 | 
		
	
		
			
			|  | 568 | +	union aes_table_entry *entry;
 | 
		
	
		
			
			|  | 569 | +	union aes_table_entry *inventry;
 | 
		
	
		
			
			|  | 570 | +	unsigned int poly = 0x01;
 | 
		
	
		
			
			|  | 571 | +	unsigned int invpoly = 0x01;
 | 
		
	
		
			
			|  | 572 | +	unsigned int transformed;
 | 
		
	
		
			
			|  | 573 | +	unsigned int i;
 | 
		
	
		
			
			|  | 574 | +
 | 
		
	
		
			
			|  | 575 | +	/* Iterate over non-zero values of GF(2^8) using generator (x + 1) */
 | 
		
	
		
			
			|  | 576 | +	do {
 | 
		
	
		
			
			|  | 577 | +
 | 
		
	
		
			
			|  | 578 | +		/* Multiply polynomial by (x + 1) */
 | 
		
	
		
			
			|  | 579 | +		poly ^= aes_double ( poly );
 | 
		
	
		
			
			|  | 580 | +
 | 
		
	
		
			
			|  | 581 | +		/* Divide inverse polynomial by (x + 1).  This code
 | 
		
	
		
			
			|  | 582 | +		 * fragment is taken directly from the Wikipedia page
 | 
		
	
		
			
			|  | 583 | +		 * on the Rijndael S-box.  An explanation of why it
 | 
		
	
		
			
			|  | 584 | +		 * works would be greatly appreciated.
 | 
		
	
		
			
			|  | 585 | +		 */
 | 
		
	
		
			
			|  | 586 | +		invpoly ^= ( invpoly << 1 );
 | 
		
	
		
			
			|  | 587 | +		invpoly ^= ( invpoly << 2 );
 | 
		
	
		
			
			|  | 588 | +		invpoly ^= ( invpoly << 4 );
 | 
		
	
		
			
			|  | 589 | +		if ( invpoly & 0x80 )
 | 
		
	
		
			
			|  | 590 | +			invpoly ^= 0x09;
 | 
		
	
		
			
			|  | 591 | +		invpoly &= 0xff;
 | 
		
	
		
			
			|  | 592 | +
 | 
		
	
		
			
			|  | 593 | +		/* Apply affine transformation */
 | 
		
	
		
			
			|  | 594 | +		transformed = ( 0x63 ^ invpoly ^ rol8 ( invpoly, 1 ) ^
 | 
		
	
		
			
			|  | 595 | +				rol8 ( invpoly, 2 ) ^ rol8 ( invpoly, 3 ) ^
 | 
		
	
		
			
			|  | 596 | +				rol8 ( invpoly, 4 ) );
 | 
		
	
		
			
			|  | 597 | +
 | 
		
	
		
			
			|  | 598 | +		/* Populate S-box (within MixColumns lookup table) */
 | 
		
	
		
			
			|  | 599 | +		aes_mixcolumns.entry[poly].byte[0] = transformed;
 | 
		
	
		
			
			|  | 600 | +
 | 
		
	
		
			
			|  | 601 | +	} while ( poly != 0x01 );
 | 
		
	
		
			
			|  | 602 | +
 | 
		
	
		
			
			|  | 603 | +	/* Populate zeroth S-box entry (which has no inverse) */
 | 
		
	
		
			
			|  | 604 | +	aes_mixcolumns.entry[0].byte[0] = 0x63;
 | 
		
	
		
			
			|  | 605 | +
 | 
		
	
		
			
			|  | 606 | +	/* Fill in MixColumns and InvMixColumns lookup tables */
 | 
		
	
		
			
			|  | 607 | +	for ( i = 0 ; i < 256 ; i++ ) {
 | 
		
	
		
			
			|  | 608 | +
 | 
		
	
		
			
			|  | 609 | +		/* Fill in MixColumns lookup table entry */
 | 
		
	
		
			
			|  | 610 | +		entry = &aes_mixcolumns.entry[i];
 | 
		
	
		
			
			|  | 611 | +		aes_mixcolumns_entry ( entry );
 | 
		
	
		
			
			|  | 612 | +
 | 
		
	
		
			
			|  | 613 | +		/* Populate inverse S-box (within InvMixColumns lookup table) */
 | 
		
	
		
			
			|  | 614 | +		inventry = &aes_invmixcolumns.entry[ entry->byte[0] ];
 | 
		
	
		
			
			|  | 615 | +		inventry->byte[0] = i;
 | 
		
	
		
			
			|  | 616 | +
 | 
		
	
		
			
			|  | 617 | +		/* Fill in InvMixColumns lookup table entry */
 | 
		
	
		
			
			|  | 618 | +		aes_invmixcolumns_entry ( inventry );
 | 
		
	
		
			
			|  | 619 | +	}
 | 
		
	
		
			
			|  | 620 | +}
 | 
		
	
		
			
			|  | 621 | +
 | 
		
	
		
			
			|  | 622 | +/**
 | 
		
	
		
			
			|  | 623 | + * Rotate key column
 | 
		
	
		
			
			|  | 624 | + *
 | 
		
	
		
			
			|  | 625 | + * @v column		Key column
 | 
		
	
		
			
			|  | 626 | + * @ret column		Updated key column
 | 
		
	
		
			
			|  | 627 | + */
 | 
		
	
		
			
			|  | 628 | +static inline __attribute__ (( always_inline )) uint32_t
 | 
		
	
		
			
			|  | 629 | +aes_key_rotate ( uint32_t column ) {
 | 
		
	
		
			
			|  | 630 | +
 | 
		
	
		
			
			|  | 631 | +	return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
 | 
		
	
		
			
			|  | 632 | +		 ror32 ( column, 8 ) : rol32 ( column, 8 ) );
 | 
		
	
		
			
			|  | 633 | +}
 | 
		
	
		
			
			|  | 634 | +
 | 
		
	
		
			
			|  | 635 | +/**
 | 
		
	
		
			
			|  | 636 | + * Apply S-box to key column
 | 
		
	
		
			
			|  | 637 | + *
 | 
		
	
		
			
			|  | 638 | + * @v column		Key column
 | 
		
	
		
			
			|  | 639 | + * @ret column		Updated key column
 | 
		
	
		
			
			|  | 640 | + */
 | 
		
	
		
			
			|  | 641 | +static uint32_t aes_key_sbox ( uint32_t column ) {
 | 
		
	
		
			
			|  | 642 | +	unsigned int i;
 | 
		
	
		
			
			|  | 643 | +	uint8_t byte;
 | 
		
	
		
			
			|  | 644 | +
 | 
		
	
		
			
			|  | 645 | +	for ( i = 0 ; i < 4 ; i++ ) {
 | 
		
	
		
			
			|  | 646 | +		byte = ( column & 0xff );
 | 
		
	
		
			
			|  | 647 | +		byte = aes_mixcolumns.entry[byte].byte[0];
 | 
		
	
		
			
			|  | 648 | +		column = ( ( column & ~0xff ) | byte );
 | 
		
	
		
			
			|  | 649 | +		column = rol32 ( column, 8 );
 | 
		
	
		
			
			|  | 650 | +	}
 | 
		
	
		
			
			|  | 651 | +	return column;
 | 
		
	
		
			
			|  | 652 | +}
 | 
		
	
		
			
			|  | 653 | +
 | 
		
	
		
			
			|  | 654 | +/**
 | 
		
	
		
			
			|  | 655 | + * Apply schedule round constant to key column
 | 
		
	
		
			
			|  | 656 | + *
 | 
		
	
		
			
			|  | 657 | + * @v column		Key column
 | 
		
	
		
			
			|  | 658 | + * @v rcon		Round constant
 | 
		
	
		
			
			|  | 659 | + * @ret column		Updated key column
 | 
		
	
		
			
			|  | 660 | + */
 | 
		
	
		
			
			|  | 661 | +static inline __attribute__ (( always_inline )) uint32_t
 | 
		
	
		
			
			|  | 662 | +aes_key_rcon ( uint32_t column, unsigned int rcon ) {
 | 
		
	
		
			
			|  | 663 | +
 | 
		
	
		
			
			|  | 664 | +	return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
 | 
		
	
		
			
			|  | 665 | +		 ( column ^ rcon ) : ( column ^ ( rcon << 24 ) ) );
 | 
		
	
		
			
			|  | 666 | +}
 | 
		
	
		
			
			|  | 667 | +
 | 
		
	
		
			
			|  | 668 | +/**
 | 
		
	
		
			
			|  | 669 | + * Set key
 | 
		
	
		
			
			|  | 670 | + *
 | 
		
	
		
			
			|  | 671 | + * @v ctx		Context
 | 
		
	
		
			
			|  | 672 | + * @v key		Key
 | 
		
	
		
			
			|  | 673 | + * @v keylen		Key length
 | 
		
	
		
			
			|  | 674 | + * @ret rc		Return status code
 | 
		
	
		
			
			|  | 675 | + */
 | 
		
	
		
			
			|  | 676 | +static int aes_setkey ( void *ctx, const void *key, size_t keylen ) {
 | 
		
	
		
			
			|  | 677 | +	struct aes_context *aes = ctx;
 | 
		
	
		
			
			|  | 678 | +	union aes_matrix *enc;
 | 
		
	
		
			
			|  | 679 | +	union aes_matrix *dec;
 | 
		
	
		
			
			|  | 680 | +	union aes_matrix temp;
 | 
		
	
		
			
			|  | 681 | +	union aes_matrix zero;
 | 
		
	
		
			
			|  | 682 | +	unsigned int rcon = 0x01;
 | 
		
	
		
			
			|  | 683 | +	unsigned int rounds;
 | 
		
	
		
			
			|  | 684 | +	size_t offset = 0;
 | 
		
	
		
			
			|  | 685 | +	uint32_t *prev;
 | 
		
	
		
			
			|  | 686 | +	uint32_t *next;
 | 
		
	
		
			
			|  | 687 | +	uint32_t *end;
 | 
		
	
		
			
			|  | 688 | +	uint32_t tmp;
 | 
		
	
		
			
			|  | 689 | +
 | 
		
	
		
			
			|  | 690 | +	/* Generate lookup tables, if not already done */
 | 
		
	
		
			
			|  | 691 | +	if ( ! aes_mixcolumns.entry[0].byte[0] )
 | 
		
	
		
			
			|  | 692 | +		aes_generate();
 | 
		
	
		
			
			|  | 693 | +
 | 
		
	
		
			
			|  | 694 | +	/* Validate key length and calculate number of intermediate rounds */
 | 
		
	
		
			
			|  | 695 | +	switch ( keylen ) {
 | 
		
	
		
			
			|  | 696 | +	case ( 128 / 8 ) :
 | 
		
	
		
			
			|  | 697 | +		rounds = 11;
 | 
		
	
		
			
			|  | 698 | +		break;
 | 
		
	
		
			
			|  | 699 | +	case ( 192 / 8 ) :
 | 
		
	
		
			
			|  | 700 | +		rounds = 13;
 | 
		
	
		
			
			|  | 701 | +		break;
 | 
		
	
		
			
			|  | 702 | +	case ( 256 / 8 ) :
 | 
		
	
		
			
			|  | 703 | +		rounds = 15;
 | 
		
	
		
			
			|  | 704 | +		break;
 | 
		
	
		
			
			|  | 705 | +	default:
 | 
		
	
		
			
			|  | 706 | +		DBGC ( aes, "AES %p unsupported key length (%zd bits)\n",
 | 
		
	
		
			
			|  | 707 | +		       aes, ( keylen * 8 ) );
 | 
		
	
		
			
			|  | 708 | +		return -EINVAL;
 | 
		
	
		
			
			|  | 709 | +	}
 | 
		
	
		
			
			|  | 710 | +	aes->rounds = rounds;
 | 
		
	
		
			
			|  | 711 | +	enc = aes->encrypt.key;
 | 
		
	
		
			
			|  | 712 | +	end = enc[rounds].column;
 | 
		
	
		
			
			|  | 713 | +
 | 
		
	
		
			
			|  | 714 | +	/* Copy raw key */
 | 
		
	
		
			
			|  | 715 | +	memcpy ( enc, key, keylen );
 | 
		
	
		
			
			|  | 716 | +	prev = enc->column;
 | 
		
	
		
			
			|  | 717 | +	next = ( ( ( void * ) prev ) + keylen );
 | 
		
	
		
			
			|  | 718 | +	tmp = next[-1];
 | 
		
	
		
			
			|  | 719 | +
 | 
		
	
		
			
			|  | 720 | +	/* Construct expanded key */
 | 
		
	
		
			
			|  | 721 | +	while ( next < end ) {
 | 
		
	
		
			
			|  | 722 | +
 | 
		
	
		
			
			|  | 723 | +		/* If this is the first column of an expanded key
 | 
		
	
		
			
			|  | 724 | +		 * block, or the middle column of an AES-256 key
 | 
		
	
		
			
			|  | 725 | +		 * block, then apply the S-box.
 | 
		
	
		
			
			|  | 726 | +		 */
 | 
		
	
		
			
			|  | 727 | +		if ( ( offset == 0 ) || ( ( offset | keylen ) == 48 ) )
 | 
		
	
		
			
			|  | 728 | +			tmp = aes_key_sbox ( tmp );
 | 
		
	
		
			
			|  | 729 | +
 | 
		
	
		
			
			|  | 730 | +		/* If this is the first column of an expanded key
 | 
		
	
		
			
			|  | 731 | +		 * block then rotate and apply the round constant.
 | 
		
	
		
			
			|  | 732 | +		 */
 | 
		
	
		
			
			|  | 733 | +		if ( offset == 0 ) {
 | 
		
	
		
			
			|  | 734 | +			tmp = aes_key_rotate ( tmp );
 | 
		
	
		
			
			|  | 735 | +			tmp = aes_key_rcon ( tmp, rcon );
 | 
		
	
		
			
			|  | 736 | +			rcon = aes_double ( rcon );
 | 
		
	
		
			
			|  | 737 | +		}
 | 
		
	
		
			
			|  | 738 | +
 | 
		
	
		
			
			|  | 739 | +		/* XOR with previous key column */
 | 
		
	
		
			
			|  | 740 | +		tmp ^= *prev;
 | 
		
	
		
			
			|  | 741 | +
 | 
		
	
		
			
			|  | 742 | +		/* Store column */
 | 
		
	
		
			
			|  | 743 | +		*next = tmp;
 | 
		
	
		
			
			|  | 744 | +
 | 
		
	
		
			
			|  | 745 | +		/* Move to next column */
 | 
		
	
		
			
			|  | 746 | +		offset += sizeof ( *next );
 | 
		
	
		
			
			|  | 747 | +		if ( offset == keylen )
 | 
		
	
		
			
			|  | 748 | +			offset = 0;
 | 
		
	
		
			
			|  | 749 | +		next++;
 | 
		
	
		
			
			|  | 750 | +		prev++;
 | 
		
	
		
			
			|  | 751 | +	}
 | 
		
	
		
			
			|  | 752 | +	DBGC2 ( aes, "AES %p expanded %zd-bit key:\n", aes, ( keylen * 8 ) );
 | 
		
	
		
			
			|  | 753 | +	DBGC2_HDA ( aes, 0, &aes->encrypt, ( rounds * sizeof ( *enc ) ) );
 | 
		
	
		
			
			|  | 754 | +
 | 
		
	
		
			
			|  | 755 | +	/* Convert to decryption key */
 | 
		
	
		
			
			|  | 756 | +	memset ( &zero, 0, sizeof ( zero ) );
 | 
		
	
		
			
			|  | 757 | +	dec = &aes->decrypt.key[ rounds - 1 ];
 | 
		
	
		
			
			|  | 758 | +	memcpy ( dec--, enc++, sizeof ( *dec ) );
 | 
		
	
		
			
			|  | 759 | +	while ( dec > aes->decrypt.key ) {
 | 
		
	
		
			
			|  | 760 | +		/* Perform InvMixColumns (by reusing the encryption
 | 
		
	
		
			
			|  | 761 | +		 * final-round code to perform ShiftRows+SubBytes and
 | 
		
	
		
			
			|  | 762 | +		 * reusing the decryption intermediate-round code to
 | 
		
	
		
			
			|  | 763 | +		 * perform InvShiftRows+InvSubBytes+InvMixColumns, all
 | 
		
	
		
			
			|  | 764 | +		 * with a zero encryption key).
 | 
		
	
		
			
			|  | 765 | +		 */
 | 
		
	
		
			
			|  | 766 | +		aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
 | 
		
	
		
			
			|  | 767 | +			    enc++, &temp, &zero );
 | 
		
	
		
			
			|  | 768 | +		aes_decrypt_rounds ( &temp, dec--, &zero, 1 );
 | 
		
	
		
			
			|  | 769 | +	}
 | 
		
	
		
			
			|  | 770 | +	memcpy ( dec--, enc++, sizeof ( *dec ) );
 | 
		
	
		
			
			|  | 771 | +	DBGC2 ( aes, "AES %p inverted %zd-bit key:\n", aes, ( keylen * 8 ) );
 | 
		
	
		
			
			|  | 772 | +	DBGC2_HDA ( aes, 0, &aes->decrypt, ( rounds * sizeof ( *dec ) ) );
 | 
		
	
		
			
			|  | 773 | +
 | 
		
	
		
			
			|  | 774 | +	return 0;
 | 
		
	
		
			
			|  | 775 | +}
 | 
		
	
		
			
			|  | 776 | +
 | 
		
	
		
			
			|  | 777 | +/**
 | 
		
	
		
			
			|  | 778 | + * Set initialisation vector
 | 
		
	
		
			
			|  | 779 | + *
 | 
		
	
		
			
			|  | 780 | + * @v ctx		Context
 | 
		
	
		
			
			|  | 781 | + * @v iv		Initialisation vector
 | 
		
	
		
			
			|  | 782 | + */
 | 
		
	
		
			
			|  | 783 | +static void aes_setiv ( void *ctx __unused, const void *iv __unused ) {
 | 
		
	
		
			
			|  | 784 | +	/* Nothing to do */
 | 
		
	
		
			
			|  | 785 | +}
 | 
		
	
		
			
			|  | 786 | +
 | 
		
	
		
			
			|  | 787 | +/** Basic AES algorithm */
 | 
		
	
		
			
			|  | 788 | +struct cipher_algorithm aes_algorithm = {
 | 
		
	
		
			
			|  | 789 | +	.name = "aes",
 | 
		
	
		
			
			|  | 790 | +	.ctxsize = sizeof ( struct aes_context ),
 | 
		
	
		
			
			|  | 791 | +	.blocksize = AES_BLOCKSIZE,
 | 
		
	
		
			
			|  | 792 | +	.setkey = aes_setkey,
 | 
		
	
		
			
			|  | 793 | +	.setiv = aes_setiv,
 | 
		
	
		
			
			|  | 794 | +	.encrypt = aes_encrypt,
 | 
		
	
		
			
			|  | 795 | +	.decrypt = aes_decrypt,
 | 
		
	
		
			
			|  | 796 | +};
 | 
		
	
		
			
			|  | 797 | +
 | 
		
	
		
			
			|  | 798 | +/* AES in Electronic Codebook mode */
 | 
		
	
		
			
			|  | 799 | +ECB_CIPHER ( aes_ecb, aes_ecb_algorithm,
 | 
		
	
		
			
			|  | 800 | +	     aes_algorithm, struct aes_context, AES_BLOCKSIZE );
 | 
		
	
		
			
			|  | 801 | +
 | 
		
	
		
			
			|  | 802 | +/* AES in Cipher Block Chaining mode */
 | 
		
	
		
			
			|  | 803 | +CBC_CIPHER ( aes_cbc, aes_cbc_algorithm,
 | 
		
	
		
			
			|  | 804 | +	     aes_algorithm, struct aes_context, AES_BLOCKSIZE );
 |