|
@@ -0,0 +1,175 @@
|
|
1
|
+/*
|
|
2
|
+ * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
|
|
3
|
+ *
|
|
4
|
+ * This program is free software; you can redistribute it and/or
|
|
5
|
+ * modify it under the terms of the GNU General Public License as
|
|
6
|
+ * published by the Free Software Foundation; either version 2 of the
|
|
7
|
+ * License, or (at your option) any later version.
|
|
8
|
+ *
|
|
9
|
+ * This program is distributed in the hope that it will be useful, but
|
|
10
|
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12
|
+ * General Public License for more details.
|
|
13
|
+ *
|
|
14
|
+ * You should have received a copy of the GNU General Public License
|
|
15
|
+ * along with this program; if not, write to the Free Software
|
|
16
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
17
|
+ * 02110-1301, USA.
|
|
18
|
+ *
|
|
19
|
+ * You can also choose to distribute this program under the terms of
|
|
20
|
+ * the Unmodified Binary Distribution Licence (as given in the file
|
|
21
|
+ * COPYING.UBDL), provided that you have satisfied its requirements.
|
|
22
|
+ */
|
|
23
|
+
|
|
24
|
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
|
25
|
+
|
|
26
|
+/** @file
|
|
27
|
+ *
|
|
28
|
+ * TCP/IP checksum
|
|
29
|
+ *
|
|
30
|
+ */
|
|
31
|
+
|
|
32
|
+#include <strings.h>
|
|
33
|
+#include <ipxe/tcpip.h>
|
|
34
|
+
|
|
35
|
+/** Alignment used by main checksumming loop */
|
|
36
|
+#define TCPIP_CHKSUM_ALIGN 16
|
|
37
|
+
|
|
38
|
+/** Number of steps in each iteration of the unrolled main checksumming loop */
|
|
39
|
+#define TCPIP_CHKSUM_UNROLL 4
|
|
40
|
+
|
|
41
|
+/**
|
|
42
|
+ * Calculate continued TCP/IP checkum
|
|
43
|
+ *
|
|
44
|
+ * @v sum Checksum of already-summed data, in network byte order
|
|
45
|
+ * @v data Data buffer
|
|
46
|
+ * @v len Length of data buffer
|
|
47
|
+ * @ret sum Updated checksum, in network byte order
|
|
48
|
+ */
|
|
49
|
+uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
|
|
50
|
+ size_t len ) {
|
|
51
|
+ intptr_t start;
|
|
52
|
+ intptr_t end;
|
|
53
|
+ intptr_t mid;
|
|
54
|
+ unsigned int pre;
|
|
55
|
+ unsigned int post;
|
|
56
|
+ unsigned int first;
|
|
57
|
+ uint64_t discard_low;
|
|
58
|
+ uint64_t discard_high;
|
|
59
|
+
|
|
60
|
+ /* Avoid potentially undefined shift operation */
|
|
61
|
+ if ( len == 0 )
|
|
62
|
+ return sum;
|
|
63
|
+
|
|
64
|
+ /* Find maximally-aligned midpoint. For short blocks of data,
|
|
65
|
+ * this may be aligned to fewer than 16 bytes.
|
|
66
|
+ */
|
|
67
|
+ start = ( ( intptr_t ) data );
|
|
68
|
+ end = ( start + len );
|
|
69
|
+ mid = ( end &
|
|
70
|
+ ~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );
|
|
71
|
+
|
|
72
|
+ /* Calculate pre- and post-alignment lengths */
|
|
73
|
+ pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
|
|
74
|
+ post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
|
|
75
|
+
|
|
76
|
+ /* Calculate number of steps in first iteration of unrolled loop */
|
|
77
|
+ first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
|
|
78
|
+ ( TCPIP_CHKSUM_UNROLL - 1 ) );
|
|
79
|
+
|
|
80
|
+ /* Calculate checksum */
|
|
81
|
+ __asm__ ( /* Invert sum */
|
|
82
|
+ "eor %w0, %w0, #0xffff\n\t"
|
|
83
|
+ /* Clear carry flag */
|
|
84
|
+ "cmn xzr, xzr\n\t"
|
|
85
|
+ /* Byteswap and sum pre-alignment byte, if applicable */
|
|
86
|
+ "tbz %w4, #0, 1f\n\t"
|
|
87
|
+ "ldrb %w2, [%1], #1\n\t"
|
|
88
|
+ "rev16 %w0, %w0\n\t"
|
|
89
|
+ "rev16 %w2, %w2\n\t"
|
|
90
|
+ "adcs %0, %0, %2\n\t"
|
|
91
|
+ "\n1:\n\t"
|
|
92
|
+ /* Sum pre-alignment halfword, if applicable */
|
|
93
|
+ "tbz %w4, #1, 1f\n\t"
|
|
94
|
+ "ldrh %w2, [%1], #2\n\t"
|
|
95
|
+ "adcs %0, %0, %2\n\t"
|
|
96
|
+ "\n1:\n\t"
|
|
97
|
+ /* Sum pre-alignment word, if applicable */
|
|
98
|
+ "tbz %w4, #2, 1f\n\t"
|
|
99
|
+ "ldr %w2, [%1], #4\n\t"
|
|
100
|
+ "adcs %0, %0, %2\n\t"
|
|
101
|
+ "\n1:\n\t"
|
|
102
|
+ /* Sum pre-alignment doubleword, if applicable */
|
|
103
|
+ "tbz %w4, #3, 1f\n\t"
|
|
104
|
+ "ldr %2, [%1], #8\n\t"
|
|
105
|
+ "adcs %0, %0, %2\n\t"
|
|
106
|
+ "\n1:\n\t"
|
|
107
|
+ /* Jump into unrolled (x4) main loop */
|
|
108
|
+ "adr %2, 2f\n\t"
|
|
109
|
+ "sub %2, %2, %5, lsl #3\n\t"
|
|
110
|
+ "sub %2, %2, %5, lsl #2\n\t"
|
|
111
|
+ "br %2\n\t"
|
|
112
|
+ "\n1:\n\t"
|
|
113
|
+ "ldp %2, %3, [%1], #16\n\t"
|
|
114
|
+ "adcs %0, %0, %2\n\t"
|
|
115
|
+ "adcs %0, %0, %3\n\t"
|
|
116
|
+ "ldp %2, %3, [%1], #16\n\t"
|
|
117
|
+ "adcs %0, %0, %2\n\t"
|
|
118
|
+ "adcs %0, %0, %3\n\t"
|
|
119
|
+ "ldp %2, %3, [%1], #16\n\t"
|
|
120
|
+ "adcs %0, %0, %2\n\t"
|
|
121
|
+ "adcs %0, %0, %3\n\t"
|
|
122
|
+ "ldp %2, %3, [%1], #16\n\t"
|
|
123
|
+ "adcs %0, %0, %2\n\t"
|
|
124
|
+ "adcs %0, %0, %3\n\t"
|
|
125
|
+ "\n2:\n\t"
|
|
126
|
+ "sub %2, %1, %6\n\t"
|
|
127
|
+ "cbnz %2, 1b\n\t"
|
|
128
|
+ /* Sum post-alignment doubleword, if applicable */
|
|
129
|
+ "tbz %w7, #3, 1f\n\t"
|
|
130
|
+ "ldr %2, [%1], #8\n\t"
|
|
131
|
+ "adcs %0, %0, %2\n\t"
|
|
132
|
+ "\n1:\n\t"
|
|
133
|
+ /* Sum post-alignment word, if applicable */
|
|
134
|
+ "tbz %w7, #2, 1f\n\t"
|
|
135
|
+ "ldr %w2, [%1], #4\n\t"
|
|
136
|
+ "adcs %0, %0, %2\n\t"
|
|
137
|
+ "\n1:\n\t"
|
|
138
|
+ /* Sum post-alignment halfword, if applicable */
|
|
139
|
+ "tbz %w7, #1, 1f\n\t"
|
|
140
|
+ "ldrh %w2, [%1], #2\n\t"
|
|
141
|
+ "adcs %0, %0, %2\n\t"
|
|
142
|
+ "\n1:\n\t"
|
|
143
|
+ /* Sum post-alignment byte, if applicable */
|
|
144
|
+ "tbz %w7, #0, 1f\n\t"
|
|
145
|
+ "ldrb %w2, [%1], #1\n\t"
|
|
146
|
+ "adcs %0, %0, %2\n\t"
|
|
147
|
+ "\n1:\n\t"
|
|
148
|
+ /* Fold down to a uint32_t plus carry flag */
|
|
149
|
+ "lsr %2, %0, #32\n\t"
|
|
150
|
+ "adcs %w0, %w0, %w2\n\t"
|
|
151
|
+ /* Fold down to a uint16_t plus carry in bit 16 */
|
|
152
|
+ "ubfm %2, %0, #0, #15\n\t"
|
|
153
|
+ "ubfm %3, %0, #16, #31\n\t"
|
|
154
|
+ "adc %w0, %w2, %w3\n\t"
|
|
155
|
+ /* Fold down to a uint16_t */
|
|
156
|
+ "tbz %w0, #16, 1f\n\t"
|
|
157
|
+ "mov %w2, #0xffff\n\t"
|
|
158
|
+ "sub %w0, %w0, %w2\n\t"
|
|
159
|
+ "tbz %w0, #16, 1f\n\t"
|
|
160
|
+ "sub %w0, %w0, %w2\n\t"
|
|
161
|
+ "\n1:\n\t"
|
|
162
|
+ /* Byteswap back, if applicable */
|
|
163
|
+ "tbz %w4, #0, 1f\n\t"
|
|
164
|
+ "rev16 %w0, %w0\n\t"
|
|
165
|
+ "\n1:\n\t"
|
|
166
|
+ /* Invert sum */
|
|
167
|
+ "eor %w0, %w0, #0xffff\n\t"
|
|
168
|
+ : "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
|
|
169
|
+ "=&r" ( discard_high )
|
|
170
|
+ : "r" ( pre ), "r" ( first ), "r" ( end - post ),
|
|
171
|
+ "r" ( post )
|
|
172
|
+ : "cc" );
|
|
173
|
+
|
|
174
|
+ return sum;
|
|
175
|
+}
|