Selaa lähdekoodia

Added __constant_memcpy() based on version in Linux's string.h.

Added memswap().
tags/v0.9.3
Michael Brown 19 vuotta sitten
vanhempi
commit
510ccb2900
1 muutettua tiedostoa jossa 135 lisäystä ja 12 poistoa
  1. 135
    12
      src/arch/i386/include/bits/string.h

+ 135
- 12
src/arch/i386/include/bits/string.h Näytä tiedosto

@@ -3,6 +3,9 @@
3 3
 /*
4 4
  * Taken from Linux /usr/include/asm/string.h
5 5
  * All except memcpy, memmove, memset and memcmp removed.
6
+ *
7
+ * Non-standard memswap() function added because it saves quite a bit
8
+ * of code (mbrown@fensystems.co.uk).
6 9
  */
7 10
 
8 11
 /*
@@ -19,19 +22,122 @@
19 22
  */
20 23
 
21 24
 #define __HAVE_ARCH_MEMCPY
22
-static inline void * memcpy(void *dest, const void *src, size_t n)
23
-{
24
-int d0, d1, d2;
25
-__asm__ __volatile__ (
26
-	"cld\n\t"
27
-	"rep\n\t"
28
-	"movsb"
29
-	: "=&c" (d0), "=&S" (d1), "=&D" (d2)
30
-	:"0" (n), "1" (src), "2" (dest)
31
-	: "memory");
32
-return dest; 
25
+static inline __attribute__ (( always_inline )) void *
26
+__memcpy ( void *dest, const void *src, size_t len ) {
27
+	int d0, d1, d2;
28
+	__asm__ __volatile__ ( "rep ; movsb"
29
+			       : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 )
30
+			       : "0" ( len ), "1" ( src ), "2" ( dest )
31
+			       : "memory" );
32
+	return dest; 
33
+}
34
+
35
+static inline __attribute__ (( always_inline )) void *
36
+__constant_memcpy ( void *dest, const void *src, size_t len ) {
37
+	const void *esi;
38
+	void *edi;
39
+
40
+	switch ( len ) {
41
+	case 0 : /* 0 bytes */
42
+		return dest;
43
+	/*
44
+	 * Single-register moves; these are always better than a
45
+	 * string operation.  We can clobber an arbitrary two
46
+	 * registers (data, source, dest can re-use source register)
47
+	 * instead of being restricted to esi and edi.  There's also a
48
+	 * much greater potential for optimising with nearby code.
49
+	 *
50
+	 */
51
+	case 1 : /* 4 bytes */
52
+		* ( uint8_t  * ) ( dest + 0 ) = * ( uint8_t  * ) ( src + 0 );
53
+		return dest;
54
+	case 2 : /* 6 bytes */
55
+		* ( uint16_t * ) ( dest + 0 ) = * ( uint16_t * ) ( src + 0 );
56
+		return dest;
57
+	case 4 : /* 4 bytes */
58
+		* ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
59
+		return dest;
60
+	/*
61
+	 * Double-register moves; these are probably still a win.
62
+	 *
63
+	 */
64
+	case 3 : /* 12 bytes */
65
+		* ( uint16_t * ) ( dest + 0 ) = * ( uint16_t * ) ( src + 0 );
66
+		* ( uint8_t  * ) ( dest + 2 ) = * ( uint8_t  * ) ( src + 2 );
67
+		return dest;
68
+	case 5 : /* 10 bytes */
69
+		* ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
70
+		* ( uint8_t  * ) ( dest + 4 ) = * ( uint8_t  * ) ( src + 4 );
71
+		return dest;
72
+	case 6 : /* 12 bytes */
73
+		* ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
74
+		* ( uint16_t * ) ( dest + 4 ) = * ( uint16_t * ) ( src + 4 );
75
+		return dest;
76
+	case 8 : /* 10 bytes */
77
+		* ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
78
+		* ( uint32_t * ) ( dest + 4 ) = * ( uint32_t * ) ( src + 4 );
79
+		return dest;
80
+	}
81
+
82
+	/* Even if we have to load up esi and edi ready for a string
83
+	 * operation, we can sometimes save space by using multiple
84
+	 * single-byte "movs" operations instead of loading up ecx and
85
+	 * using "rep movsb".
86
+	 *
87
+	 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
88
+	 * to allow for saving/restoring ecx 50% of the time.
89
+	 *
90
+	 * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
91
+	 * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
92
+	 * but "movsl" moves twice as much data, so it balances out).
93
+	 *
94
+	 * The cutoff point therefore occurs around 26 bytes; the byte
95
+	 * requirements for each method are:
96
+	 *
97
+	 * len		   16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
98
+	 * #bytes (ecx)	    8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
99
+	 * #bytes (no ecx)  4  5  6  7  5  6  7  8  6  7  8  9  7  8  9 10
100
+	 */
101
+
102
+	esi = src;
103
+	edi = dest;
104
+	
105
+	if ( len >= 26 )
106
+		return __memcpy ( dest, src, len );
107
+	
108
+	if ( len >= 6*4 )
109
+		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
110
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
111
+	if ( len >= 5*4 )
112
+		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
113
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
114
+	if ( len >= 4*4 )
115
+		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
116
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
117
+	if ( len >= 3*4 )
118
+		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
119
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
120
+	if ( len >= 2*4 )
121
+		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
122
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
123
+	if ( len >= 1*4 )
124
+		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
125
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
126
+	if ( ( len % 4 ) >= 2 )
127
+		__asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
128
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
129
+	if ( ( len % 2 ) >= 1 )
130
+		__asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
131
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
132
+
133
+	return dest;
33 134
 }
34 135
 
136
+#define memcpy( dest, src, len )			\
137
+	( __builtin_constant_p ( (len) ) ?		\
138
+	  __constant_memcpy ( (dest), (src), (len) ) :	\
139
+	  __memcpy ( (dest), (src), (len) ) )
140
+
35 141
 #define __HAVE_ARCH_MEMMOVE
36 142
 static inline void * memmove(void * dest,const void * src, size_t n)
37 143
 {
@@ -59,7 +165,7 @@ return dest;
59 165
 }
60 166
 
61 167
 #define __HAVE_ARCH_MEMSET
62
-static inline void *memset(void *s, int c,size_t count)
168
+static inline void * memset(void *s, int c,size_t count)
63 169
 {
64 170
 int d0, d1;
65 171
 __asm__ __volatile__(
@@ -72,6 +178,23 @@ __asm__ __volatile__(
72 178
 return s;
73 179
 }
74 180
 
181
+#define __HAVE_ARCH_MEMSWAP
182
+static inline void * memswap(void *dest, void *src, size_t n)
183
+{
184
+int d0, d1, d2, d3;
185
+__asm__ __volatile__(
186
+	"\n1:\t"
187
+	"movb (%%edi),%%al\n\t"
188
+	"xchgb (%%esi),%%al\n\t"
189
+	"incl %%esi\n\t"
190
+	"stosb\n\t"
191
+	"loop 1b"
192
+	: "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3)
193
+	: "0" (n), "1" (src), "2" (dest)
194
+	: "memory" );
195
+return dest;
196
+}
197
+
75 198
 #define __HAVE_ARCH_STRNCMP
76 199
 static inline int strncmp(const char * cs,const char * ct,size_t count)
77 200
 {

Loading…
Peruuta
Tallenna