|  | @@ -0,0 +1,169 @@
 | 
		
	
		
			
			|  | 1 | +/*
 | 
		
	
		
			
			|  | 2 | + * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
 | 
		
	
		
			
			|  | 3 | + *
 | 
		
	
		
			
			|  | 4 | + * This program is free software; you can redistribute it and/or
 | 
		
	
		
			
			|  | 5 | + * modify it under the terms of the GNU General Public License as
 | 
		
	
		
			
			|  | 6 | + * published by the Free Software Foundation; either version 2 of the
 | 
		
	
		
			
			|  | 7 | + * License, or (at your option) any later version.
 | 
		
	
		
			
			|  | 8 | + *
 | 
		
	
		
			
			|  | 9 | + * This program is distributed in the hope that it will be useful, but
 | 
		
	
		
			
			|  | 10 | + * WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
		
	
		
			
			|  | 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
		
	
		
			
			|  | 12 | + * General Public License for more details.
 | 
		
	
		
			
			|  | 13 | + *
 | 
		
	
		
			
			|  | 14 | + * You should have received a copy of the GNU General Public License
 | 
		
	
		
			
			|  | 15 | + * along with this program; if not, write to the Free Software
 | 
		
	
		
			
			|  | 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | 
		
	
		
			
			|  | 17 | + * 02110-1301, USA.
 | 
		
	
		
			
			|  | 18 | + */
 | 
		
	
		
			
			|  | 19 | +
 | 
		
	
		
			
			|  | 20 | +FILE_LICENCE ( GPL2_OR_LATER );
 | 
		
	
		
			
			|  | 21 | +
 | 
		
	
		
			
			|  | 22 | +/** @file
 | 
		
	
		
			
			|  | 23 | + *
 | 
		
	
		
			
			|  | 24 | + * TCP/IP checksum
 | 
		
	
		
			
			|  | 25 | + *
 | 
		
	
		
			
			|  | 26 | + */
 | 
		
	
		
			
			|  | 27 | +
 | 
		
	
		
			
			|  | 28 | +#include <limits.h>
 | 
		
	
		
			
			|  | 29 | +#include <ipxe/tcpip.h>
 | 
		
	
		
			
			|  | 30 | +
 | 
		
	
		
			
			|  | 31 | +extern char x86_tcpip_loop_end[];
 | 
		
	
		
			
			|  | 32 | +
 | 
		
	
		
			
			|  | 33 | +/**
 | 
		
	
		
			
			|  | 34 | + * Calculate continued TCP/IP checkum
 | 
		
	
		
			
			|  | 35 | + *
 | 
		
	
		
			
			|  | 36 | + * @v partial		Checksum of already-summed data, in network byte order
 | 
		
	
		
			
			|  | 37 | + * @v data		Data buffer
 | 
		
	
		
			
			|  | 38 | + * @v len		Length of data buffer
 | 
		
	
		
			
			|  | 39 | + * @ret cksum		Updated checksum, in network byte order
 | 
		
	
		
			
			|  | 40 | + */
 | 
		
	
		
			
			|  | 41 | +uint16_t x86_tcpip_continue_chksum ( uint16_t partial,
 | 
		
	
		
			
			|  | 42 | +				     const void *data, size_t len ) {
 | 
		
	
		
			
			|  | 43 | +	unsigned long sum = ( ( ~partial ) & 0xffff );
 | 
		
	
		
			
			|  | 44 | +	unsigned long initial_word_count;
 | 
		
	
		
			
			|  | 45 | +	unsigned long loop_count;
 | 
		
	
		
			
			|  | 46 | +	unsigned long loop_partial_count;
 | 
		
	
		
			
			|  | 47 | +	unsigned long final_word_count;
 | 
		
	
		
			
			|  | 48 | +	unsigned long final_byte;
 | 
		
	
		
			
			|  | 49 | +	unsigned long discard_S;
 | 
		
	
		
			
			|  | 50 | +	unsigned long discard_c;
 | 
		
	
		
			
			|  | 51 | +	unsigned long discard_a;
 | 
		
	
		
			
			|  | 52 | +	unsigned long discard_r1;
 | 
		
	
		
			
			|  | 53 | +	unsigned long discard_r2;
 | 
		
	
		
			
			|  | 54 | +
 | 
		
	
		
			
			|  | 55 | +	/* Calculate number of initial 16-bit words required to bring
 | 
		
	
		
			
			|  | 56 | +	 * the main loop into alignment.  (We don't care about the
 | 
		
	
		
			
			|  | 57 | +	 * speed for data aligned to less than 16 bits, since this
 | 
		
	
		
			
			|  | 58 | +	 * situation won't occur in practice.)
 | 
		
	
		
			
			|  | 59 | +	 */
 | 
		
	
		
			
			|  | 60 | +	if ( len >= sizeof ( sum ) ) {
 | 
		
	
		
			
			|  | 61 | +		initial_word_count = ( ( -( ( intptr_t ) data ) &
 | 
		
	
		
			
			|  | 62 | +					 ( sizeof ( sum ) - 1 ) ) >> 1 );
 | 
		
	
		
			
			|  | 63 | +	} else {
 | 
		
	
		
			
			|  | 64 | +		initial_word_count = 0;
 | 
		
	
		
			
			|  | 65 | +	}
 | 
		
	
		
			
			|  | 66 | +	len -= ( initial_word_count * 2 );
 | 
		
	
		
			
			|  | 67 | +
 | 
		
	
		
			
			|  | 68 | +	/* Calculate number of iterations of the main loop.  This loop
 | 
		
	
		
			
			|  | 69 | +	 * processes native machine words (32-bit or 64-bit), and is
 | 
		
	
		
			
			|  | 70 | +	 * unrolled 16 times.  We calculate an overall iteration
 | 
		
	
		
			
			|  | 71 | +	 * count, and a starting point for the first iteration.
 | 
		
	
		
			
			|  | 72 | +	 */
 | 
		
	
		
			
			|  | 73 | +	loop_count = ( len / ( sizeof ( sum ) * 16 ) );
 | 
		
	
		
			
			|  | 74 | +	loop_partial_count =
 | 
		
	
		
			
			|  | 75 | +		( ( len % ( sizeof ( sum ) * 16 ) ) / sizeof ( sum ) );
 | 
		
	
		
			
			|  | 76 | +
 | 
		
	
		
			
			|  | 77 | +	/* Calculate number of 16-bit words remaining after the main
 | 
		
	
		
			
			|  | 78 | +	 * loop completes.
 | 
		
	
		
			
			|  | 79 | +	 */
 | 
		
	
		
			
			|  | 80 | +	final_word_count = ( ( len % sizeof ( sum ) ) / 2 );
 | 
		
	
		
			
			|  | 81 | +
 | 
		
	
		
			
			|  | 82 | +	/* Calculate whether or not a final byte remains at the end */
 | 
		
	
		
			
			|  | 83 | +	final_byte = ( len & 1 );
 | 
		
	
		
			
			|  | 84 | +
 | 
		
	
		
			
			|  | 85 | +	/* Calculate the checksum */
 | 
		
	
		
			
			|  | 86 | +	__asm__ ( /* Calculate position at which to jump into the
 | 
		
	
		
			
			|  | 87 | +		   * unrolled loop.
 | 
		
	
		
			
			|  | 88 | +		   */
 | 
		
	
		
			
			|  | 89 | +		  "imul $( -x86_tcpip_loop_step_size ), %4\n\t"
 | 
		
	
		
			
			|  | 90 | +		  "add %5, %4\n\t"
 | 
		
	
		
			
			|  | 91 | +
 | 
		
	
		
			
			|  | 92 | +		  /* Clear carry flag before starting checksumming */
 | 
		
	
		
			
			|  | 93 | +		  "clc\n\t"
 | 
		
	
		
			
			|  | 94 | +
 | 
		
	
		
			
			|  | 95 | +		  /* Checksum initial words */
 | 
		
	
		
			
			|  | 96 | +		  "jmp 2f\n\t"
 | 
		
	
		
			
			|  | 97 | +		  "\n1:\n\t"
 | 
		
	
		
			
			|  | 98 | +		  "lodsw\n\t"
 | 
		
	
		
			
			|  | 99 | +		  "adcw %w2, %w0\n\t"
 | 
		
	
		
			
			|  | 100 | +		  "\n2:\n\t"
 | 
		
	
		
			
			|  | 101 | +		  "loop 1b\n\t"
 | 
		
	
		
			
			|  | 102 | +
 | 
		
	
		
			
			|  | 103 | +		  /* Main "lods;adc" loop, unrolled x16 */
 | 
		
	
		
			
			|  | 104 | +		  "mov %12, %3\n\t"
 | 
		
	
		
			
			|  | 105 | +		  "jmp *%4\n\t"
 | 
		
	
		
			
			|  | 106 | +		  "\nx86_tcpip_loop_start:\n\t"
 | 
		
	
		
			
			|  | 107 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 108 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 109 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 110 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 111 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 112 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 113 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 114 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 115 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 116 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 117 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 118 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 119 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 120 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 121 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 122 | +		  "lods%z2\n\tadc %2, %0\n\t"
 | 
		
	
		
			
			|  | 123 | +		  "\nx86_tcpip_loop_end:\n\t"
 | 
		
	
		
			
			|  | 124 | +		  "loop x86_tcpip_loop_start\n\t"
 | 
		
	
		
			
			|  | 125 | +		  ".equ x86_tcpip_loop_step_size, "
 | 
		
	
		
			
			|  | 126 | +		  "  ( ( x86_tcpip_loop_end - x86_tcpip_loop_start ) / 16 )\n\t"
 | 
		
	
		
			
			|  | 127 | +
 | 
		
	
		
			
			|  | 128 | +		  /* Checksum remaining whole words */
 | 
		
	
		
			
			|  | 129 | +		  "mov %13, %3\n\t"
 | 
		
	
		
			
			|  | 130 | +		  "jmp 2f\n\t"
 | 
		
	
		
			
			|  | 131 | +		  "\n1:\n\t"
 | 
		
	
		
			
			|  | 132 | +		  "lodsw\n\t"
 | 
		
	
		
			
			|  | 133 | +		  "adcw %w2, %w0\n\t"
 | 
		
	
		
			
			|  | 134 | +		  "\n2:\n\t"
 | 
		
	
		
			
			|  | 135 | +		  "loop 1b\n\t"
 | 
		
	
		
			
			|  | 136 | +
 | 
		
	
		
			
			|  | 137 | +		  /* Checksum final byte if applicable */
 | 
		
	
		
			
			|  | 138 | +		  "mov %14, %3\n\t"
 | 
		
	
		
			
			|  | 139 | +		  "loop 1f\n\t"
 | 
		
	
		
			
			|  | 140 | +		  "adcb (%1), %b0\n\t"
 | 
		
	
		
			
			|  | 141 | +		  "adcb $0, %h0\n\t"
 | 
		
	
		
			
			|  | 142 | +		  "\n1:\n\t"
 | 
		
	
		
			
			|  | 143 | +
 | 
		
	
		
			
			|  | 144 | +		  /* Fold down to a uint16_t */
 | 
		
	
		
			
			|  | 145 | +		  "push %0\n\t"
 | 
		
	
		
			
			|  | 146 | +		  "popw %w0\n\t"
 | 
		
	
		
			
			|  | 147 | +		  "popw %w2\n\t"
 | 
		
	
		
			
			|  | 148 | +		  "adcw %w2, %w0\n\t"
 | 
		
	
		
			
			|  | 149 | +#if ULONG_MAX > 0xffffffffUL /* 64-bit only */
 | 
		
	
		
			
			|  | 150 | +		  "popw %w2\n\t"
 | 
		
	
		
			
			|  | 151 | +		  "adcw %w2, %w0\n\t"
 | 
		
	
		
			
			|  | 152 | +		  "popw %w2\n\t"
 | 
		
	
		
			
			|  | 153 | +		  "adcw %w2, %w0\n\t"
 | 
		
	
		
			
			|  | 154 | +#endif /* 64-bit only */
 | 
		
	
		
			
			|  | 155 | +
 | 
		
	
		
			
			|  | 156 | +		  /* Consume CF */
 | 
		
	
		
			
			|  | 157 | +		  "adcw $0, %w0\n\t"
 | 
		
	
		
			
			|  | 158 | +		  "adcw $0, %w0\n\t"
 | 
		
	
		
			
			|  | 159 | +
 | 
		
	
		
			
			|  | 160 | +		  : "=&Q" ( sum ), "=&S" ( discard_S ), "=&a" ( discard_a ),
 | 
		
	
		
			
			|  | 161 | +		    "=&c" ( discard_c ), "=&r" ( discard_r1 ),
 | 
		
	
		
			
			|  | 162 | +		    "=&r" ( discard_r2 )
 | 
		
	
		
			
			|  | 163 | +		  : "0" ( sum ), "1" ( data ), "2" ( 0 ),
 | 
		
	
		
			
			|  | 164 | +		    "3" ( initial_word_count + 1 ), "4" ( loop_partial_count ),
 | 
		
	
		
			
			|  | 165 | +		    "5" ( x86_tcpip_loop_end ), "g" ( loop_count + 1 ),
 | 
		
	
		
			
			|  | 166 | +		    "g" ( final_word_count + 1 ), "g" ( final_byte ) );
 | 
		
	
		
			
			|  | 167 | +
 | 
		
	
		
			
			|  | 168 | +	return ( ~sum & 0xffff );
 | 
		
	
		
			
			|  | 169 | +}
 |