|
@@ -3,6 +3,9 @@
|
3
|
3
|
/*
|
4
|
4
|
* Taken from Linux /usr/include/asm/string.h
|
5
|
5
|
* All except memcpy, memmove, memset and memcmp removed.
|
|
6
|
+ *
|
|
7
|
+ * Non-standard memswap() function added because it saves quite a bit
|
|
8
|
+ * of code (mbrown@fensystems.co.uk).
|
6
|
9
|
*/
|
7
|
10
|
|
8
|
11
|
/*
|
|
@@ -19,19 +22,122 @@
|
19
|
22
|
*/
|
20
|
23
|
|
21
|
24
|
#define __HAVE_ARCH_MEMCPY
|
22
|
|
-static inline void * memcpy(void *dest, const void *src, size_t n)
|
23
|
|
-{
|
24
|
|
-int d0, d1, d2;
|
25
|
|
-__asm__ __volatile__ (
|
26
|
|
- "cld\n\t"
|
27
|
|
- "rep\n\t"
|
28
|
|
- "movsb"
|
29
|
|
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
|
30
|
|
- :"0" (n), "1" (src), "2" (dest)
|
31
|
|
- : "memory");
|
32
|
|
-return dest;
|
|
25
|
+static inline __attribute__ (( always_inline )) void *
|
|
26
|
+__memcpy ( void *dest, const void *src, size_t len ) {
|
|
27
|
+ int d0, d1, d2;
|
|
28
|
+ __asm__ __volatile__ ( "rep ; movsb"
|
|
29
|
+ : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 )
|
|
30
|
+ : "0" ( len ), "1" ( src ), "2" ( dest )
|
|
31
|
+ : "memory" );
|
|
32
|
+ return dest;
|
|
33
|
+}
|
|
34
|
+
|
|
35
|
+static inline __attribute__ (( always_inline )) void *
|
|
36
|
+__constant_memcpy ( void *dest, const void *src, size_t len ) {
|
|
37
|
+ const void *esi;
|
|
38
|
+ void *edi;
|
|
39
|
+
|
|
40
|
+ switch ( len ) {
|
|
41
|
+ case 0 : /* 0 bytes */
|
|
42
|
+ return dest;
|
|
43
|
+ /*
|
|
44
|
+ * Single-register moves; these are always better than a
|
|
45
|
+ * string operation. We can clobber an arbitrary two
|
|
46
|
+ * registers (data, source, dest can re-use source register)
|
|
47
|
+ * instead of being restricted to esi and edi. There's also a
|
|
48
|
+ * much greater potential for optimising with nearby code.
|
|
49
|
+ *
|
|
50
|
+ */
|
|
51
|
+ case 1 : /* 4 bytes */
|
|
52
|
+ * ( uint8_t * ) ( dest + 0 ) = * ( uint8_t * ) ( src + 0 );
|
|
53
|
+ return dest;
|
|
54
|
+ case 2 : /* 6 bytes */
|
|
55
|
+ * ( uint16_t * ) ( dest + 0 ) = * ( uint16_t * ) ( src + 0 );
|
|
56
|
+ return dest;
|
|
57
|
+ case 4 : /* 4 bytes */
|
|
58
|
+ * ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
|
|
59
|
+ return dest;
|
|
60
|
+ /*
|
|
61
|
+ * Double-register moves; these are probably still a win.
|
|
62
|
+ *
|
|
63
|
+ */
|
|
64
|
+ case 3 : /* 12 bytes */
|
|
65
|
+ * ( uint16_t * ) ( dest + 0 ) = * ( uint16_t * ) ( src + 0 );
|
|
66
|
+ * ( uint8_t * ) ( dest + 2 ) = * ( uint8_t * ) ( src + 2 );
|
|
67
|
+ return dest;
|
|
68
|
+ case 5 : /* 10 bytes */
|
|
69
|
+ * ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
|
|
70
|
+ * ( uint8_t * ) ( dest + 4 ) = * ( uint8_t * ) ( src + 4 );
|
|
71
|
+ return dest;
|
|
72
|
+ case 6 : /* 12 bytes */
|
|
73
|
+ * ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
|
|
74
|
+ * ( uint16_t * ) ( dest + 4 ) = * ( uint16_t * ) ( src + 4 );
|
|
75
|
+ return dest;
|
|
76
|
+ case 8 : /* 10 bytes */
|
|
77
|
+ * ( uint32_t * ) ( dest + 0 ) = * ( uint32_t * ) ( src + 0 );
|
|
78
|
+ * ( uint32_t * ) ( dest + 4 ) = * ( uint32_t * ) ( src + 4 );
|
|
79
|
+ return dest;
|
|
80
|
+ }
|
|
81
|
+
|
|
82
|
+ /* Even if we have to load up esi and edi ready for a string
|
|
83
|
+ * operation, we can sometimes save space by using multiple
|
|
84
|
+ * single-byte "movs" operations instead of loading up ecx and
|
|
85
|
+ * using "rep movsb".
|
|
86
|
+ *
|
|
87
|
+ * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
|
|
88
|
+ * to allow for saving/restoring ecx 50% of the time.
|
|
89
|
+ *
|
|
90
|
+ * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
|
|
91
|
+ * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
|
|
92
|
+ * but "movsl" moves twice as much data, so it balances out).
|
|
93
|
+ *
|
|
94
|
+ * The cutoff point therefore occurs around 26 bytes; the byte
|
|
95
|
+ * requirements for each method are:
|
|
96
|
+ *
|
|
97
|
+ * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
|
98
|
+ * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
|
|
99
|
+ * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
|
|
100
|
+ */
|
|
101
|
+
|
|
102
|
+ esi = src;
|
|
103
|
+ edi = dest;
|
|
104
|
+
|
|
105
|
+ if ( len >= 26 )
|
|
106
|
+ return __memcpy ( dest, src, len );
|
|
107
|
+
|
|
108
|
+ if ( len >= 6*4 )
|
|
109
|
+ __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
|
|
110
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
111
|
+ if ( len >= 5*4 )
|
|
112
|
+ __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
|
|
113
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
114
|
+ if ( len >= 4*4 )
|
|
115
|
+ __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
|
|
116
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
117
|
+ if ( len >= 3*4 )
|
|
118
|
+ __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
|
|
119
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
120
|
+ if ( len >= 2*4 )
|
|
121
|
+ __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
|
|
122
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
123
|
+ if ( len >= 1*4 )
|
|
124
|
+ __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
|
|
125
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
126
|
+ if ( ( len % 4 ) >= 2 )
|
|
127
|
+ __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
|
|
128
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
129
|
+ if ( ( len % 2 ) >= 1 )
|
|
130
|
+ __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
|
|
131
|
+ : "0" ( edi ), "1" ( esi ) : "memory" );
|
|
132
|
+
|
|
133
|
+ return dest;
|
33
|
134
|
}
|
34
|
135
|
|
|
136
|
+#define memcpy( dest, src, len ) \
|
|
137
|
+ ( __builtin_constant_p ( (len) ) ? \
|
|
138
|
+ __constant_memcpy ( (dest), (src), (len) ) : \
|
|
139
|
+ __memcpy ( (dest), (src), (len) ) )
|
|
140
|
+
|
35
|
141
|
#define __HAVE_ARCH_MEMMOVE
|
36
|
142
|
static inline void * memmove(void * dest,const void * src, size_t n)
|
37
|
143
|
{
|
|
@@ -59,7 +165,7 @@ return dest;
|
59
|
165
|
}
|
60
|
166
|
|
61
|
167
|
#define __HAVE_ARCH_MEMSET
|
62
|
|
-static inline void *memset(void *s, int c,size_t count)
|
|
168
|
+static inline void * memset(void *s, int c,size_t count)
|
63
|
169
|
{
|
64
|
170
|
int d0, d1;
|
65
|
171
|
__asm__ __volatile__(
|
|
@@ -72,6 +178,23 @@ __asm__ __volatile__(
|
72
|
178
|
return s;
|
73
|
179
|
}
|
74
|
180
|
|
|
181
|
+#define __HAVE_ARCH_MEMSWAP
|
|
182
|
+static inline void * memswap(void *dest, void *src, size_t n)
|
|
183
|
+{
|
|
184
|
+int d0, d1, d2, d3;
|
|
185
|
+__asm__ __volatile__(
|
|
186
|
+ "\n1:\t"
|
|
187
|
+ "movb (%%edi),%%al\n\t"
|
|
188
|
+ "xchgb (%%esi),%%al\n\t"
|
|
189
|
+ "incl %%esi\n\t"
|
|
190
|
+ "stosb\n\t"
|
|
191
|
+ "loop 1b"
|
|
192
|
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3)
|
|
193
|
+ : "0" (n), "1" (src), "2" (dest)
|
|
194
|
+ : "memory" );
|
|
195
|
+return dest;
|
|
196
|
+}
|
|
197
|
+
|
75
|
198
|
#define __HAVE_ARCH_STRNCMP
|
76
|
199
|
static inline int strncmp(const char * cs,const char * ct,size_t count)
|
77
|
200
|
{
|