Просмотр исходного кода

[arm] Add optimised TCP/IP checksumming for 64-bit ARM

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 8 лет назад
Родитель
Сommit
47931a4de5

src/arch/arm/include/bits/tcpip.h → src/arch/arm32/include/bits/tcpip.h Просмотреть файл


+ 175
- 0
src/arch/arm64/core/arm64_tcpip.c Просмотреть файл

@@ -0,0 +1,175 @@
1
+/*
2
+ * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17
+ * 02110-1301, USA.
18
+ *
19
+ * You can also choose to distribute this program under the terms of
20
+ * the Unmodified Binary Distribution Licence (as given in the file
21
+ * COPYING.UBDL), provided that you have satisfied its requirements.
22
+ */
23
+
24
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
+
26
+/** @file
27
+ *
28
+ * TCP/IP checksum
29
+ *
30
+ */
31
+
32
+#include <strings.h>
33
+#include <ipxe/tcpip.h>
34
+
35
+/** Alignment used by main checksumming loop */
36
+#define TCPIP_CHKSUM_ALIGN 16
37
+
38
+/** Number of steps in each iteration of the unrolled main checksumming loop */
39
+#define TCPIP_CHKSUM_UNROLL 4
40
+
41
+/**
42
+ * Calculate continued TCP/IP checkum
43
+ *
44
+ * @v sum		Checksum of already-summed data, in network byte order
45
+ * @v data		Data buffer
46
+ * @v len		Length of data buffer
47
+ * @ret sum		Updated checksum, in network byte order
48
+ */
49
+uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
50
+				 size_t len ) {
51
+	intptr_t start;
52
+	intptr_t end;
53
+	intptr_t mid;
54
+	unsigned int pre;
55
+	unsigned int post;
56
+	unsigned int first;
57
+	uint64_t discard_low;
58
+	uint64_t discard_high;
59
+
60
+	/* Avoid potentially undefined shift operation */
61
+	if ( len == 0 )
62
+		return sum;
63
+
64
+	/* Find maximally-aligned midpoint.  For short blocks of data,
65
+	 * this may be aligned to fewer than 16 bytes.
66
+	 */
67
+	start = ( ( intptr_t ) data );
68
+	end = ( start + len );
69
+	mid = ( end &
70
+		~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );
71
+
72
+	/* Calculate pre- and post-alignment lengths */
73
+	pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
74
+	post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
75
+
76
+	/* Calculate number of steps in first iteration of unrolled loop */
77
+	first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
78
+		  ( TCPIP_CHKSUM_UNROLL - 1 ) );
79
+
80
+	/* Calculate checksum */
81
+	__asm__ ( /* Invert sum */
82
+		  "eor %w0, %w0, #0xffff\n\t"
83
+		  /* Clear carry flag */
84
+		  "cmn xzr, xzr\n\t"
85
+		  /* Byteswap and sum pre-alignment byte, if applicable */
86
+		  "tbz %w4, #0, 1f\n\t"
87
+		  "ldrb %w2, [%1], #1\n\t"
88
+		  "rev16 %w0, %w0\n\t"
89
+		  "rev16 %w2, %w2\n\t"
90
+		  "adcs %0, %0, %2\n\t"
91
+		  "\n1:\n\t"
92
+		  /* Sum pre-alignment halfword, if applicable */
93
+		  "tbz %w4, #1, 1f\n\t"
94
+		  "ldrh %w2, [%1], #2\n\t"
95
+		  "adcs %0, %0, %2\n\t"
96
+		  "\n1:\n\t"
97
+		  /* Sum pre-alignment word, if applicable */
98
+		  "tbz %w4, #2, 1f\n\t"
99
+		  "ldr %w2, [%1], #4\n\t"
100
+		  "adcs %0, %0, %2\n\t"
101
+		  "\n1:\n\t"
102
+		  /* Sum pre-alignment doubleword, if applicable */
103
+		  "tbz %w4, #3, 1f\n\t"
104
+		  "ldr %2, [%1], #8\n\t"
105
+		  "adcs %0, %0, %2\n\t"
106
+		  "\n1:\n\t"
107
+		  /* Jump into unrolled (x4) main loop */
108
+		  "adr %2, 2f\n\t"
109
+		  "sub %2, %2, %5, lsl #3\n\t"
110
+		  "sub %2, %2, %5, lsl #2\n\t"
111
+		  "br %2\n\t"
112
+		  "\n1:\n\t"
113
+		  "ldp %2, %3, [%1], #16\n\t"
114
+		  "adcs %0, %0, %2\n\t"
115
+		  "adcs %0, %0, %3\n\t"
116
+		  "ldp %2, %3, [%1], #16\n\t"
117
+		  "adcs %0, %0, %2\n\t"
118
+		  "adcs %0, %0, %3\n\t"
119
+		  "ldp %2, %3, [%1], #16\n\t"
120
+		  "adcs %0, %0, %2\n\t"
121
+		  "adcs %0, %0, %3\n\t"
122
+		  "ldp %2, %3, [%1], #16\n\t"
123
+		  "adcs %0, %0, %2\n\t"
124
+		  "adcs %0, %0, %3\n\t"
125
+		  "\n2:\n\t"
126
+		  "sub %2, %1, %6\n\t"
127
+		  "cbnz %2, 1b\n\t"
128
+		  /* Sum post-alignment doubleword, if applicable */
129
+		  "tbz %w7, #3, 1f\n\t"
130
+		  "ldr %2, [%1], #8\n\t"
131
+		  "adcs %0, %0, %2\n\t"
132
+		  "\n1:\n\t"
133
+		  /* Sum post-alignment word, if applicable */
134
+		  "tbz %w7, #2, 1f\n\t"
135
+		  "ldr %w2, [%1], #4\n\t"
136
+		  "adcs %0, %0, %2\n\t"
137
+		  "\n1:\n\t"
138
+		  /* Sum post-alignment halfword, if applicable */
139
+		  "tbz %w7, #1, 1f\n\t"
140
+		  "ldrh %w2, [%1], #2\n\t"
141
+		  "adcs %0, %0, %2\n\t"
142
+		  "\n1:\n\t"
143
+		  /* Sum post-alignment byte, if applicable */
144
+		  "tbz %w7, #0, 1f\n\t"
145
+		  "ldrb %w2, [%1], #1\n\t"
146
+		  "adcs %0, %0, %2\n\t"
147
+		  "\n1:\n\t"
148
+		  /* Fold down to a uint32_t plus carry flag */
149
+		  "lsr %2, %0, #32\n\t"
150
+		  "adcs %w0, %w0, %w2\n\t"
151
+		  /* Fold down to a uint16_t plus carry in bit 16 */
152
+		  "ubfm %2, %0, #0, #15\n\t"
153
+		  "ubfm %3, %0, #16, #31\n\t"
154
+		  "adc %w0, %w2, %w3\n\t"
155
+		  /* Fold down to a uint16_t */
156
+		  "tbz %w0, #16, 1f\n\t"
157
+		  "mov %w2, #0xffff\n\t"
158
+		  "sub %w0, %w0, %w2\n\t"
159
+		  "tbz %w0, #16, 1f\n\t"
160
+		  "sub %w0, %w0, %w2\n\t"
161
+		  "\n1:\n\t"
162
+		  /* Byteswap back, if applicable */
163
+		  "tbz %w4, #0, 1f\n\t"
164
+		  "rev16 %w0, %w0\n\t"
165
+		  "\n1:\n\t"
166
+		  /* Invert sum */
167
+		  "eor %w0, %w0, #0xffff\n\t"
168
+		  : "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
169
+		    "=&r" ( discard_high )
170
+		  : "r" ( pre ), "r" ( first ), "r" ( end - post ),
171
+		    "r" ( post )
172
+		  : "cc" );
173
+
174
+	return sum;
175
+}

+ 15
- 0
src/arch/arm64/include/bits/tcpip.h Просмотреть файл

@@ -0,0 +1,15 @@
1
+#ifndef _BITS_TCPIP_H
2
+#define _BITS_TCPIP_H
3
+
4
+/** @file
5
+ *
6
+ * Transport-network layer interface
7
+ *
8
+ */
9
+
10
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
11
+
12
+extern uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
13
+					size_t len );
14
+
15
+#endif /* _BITS_TCPIP_H */

Загрузка…
Отмена
Сохранить