Browse Source

Accelerate memcpy() by around 32% on large, dword-aligned copies.

tags/v0.9.3
Michael Brown 17 years ago
parent
commit
bd95927386
2 changed files with 70 additions and 0 deletions
  1. 63
    0
      src/arch/i386/core/i386_string.c
  2. 7
    0
      src/arch/i386/include/bits/string.h

+ 63
- 0
src/arch/i386/core/i386_string.c View File

@@ -0,0 +1,63 @@
1
+/*
2
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
+ */
18
+
19
+/** @file
20
+ *
21
+ * Optimised string operations
22
+ *
23
+ */
24
+
25
+#include <string.h>
26
+
27
+/**
28
+ * Copy memory area
29
+ *
30
+ * @v dest		Destination address
31
+ * @v src		Source address
32
+ * @v len		Length
33
+ * @ret dest		Destination address
34
+ */
35
+__attribute__ (( regparm ( 3 ) )) void * __memcpy ( void *dest,
36
+						    const void *src,
37
+						    size_t len ) {
38
+	void *edi = dest;
39
+	const void *esi = src;
40
+	int discard_ecx;
41
+
42
+	/* We often do large dword-aligned and dword-length block
43
+	 * moves.  Using movsl rather than movsb speeds these up by
44
+	 * around 32%.
45
+	 */
46
+	if ( len >> 2 ) {
47
+		__asm__ __volatile__ ( "rep movsl"
48
+				       : "=&D" ( edi ), "=&S" ( esi ),
49
+				         "=&c" ( discard_ecx )
50
+				       : "0" ( edi ), "1" ( esi ),
51
+				         "2" ( len >> 2 )
52
+				       : "memory" );
53
+	}
54
+	if ( len & 0x02 ) {
55
+		__asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
56
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
57
+	}
58
+	if ( len & 0x01 ) {
59
+		__asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
60
+				       : "0" ( edi ), "1" ( esi ) : "memory" );
61
+	}
62
+	return dest;
63
+}

+ 7
- 0
src/arch/i386/include/bits/string.h View File

@@ -22,6 +22,12 @@
22 22
  */
23 23
 
24 24
 #define __HAVE_ARCH_MEMCPY
25
+
26
+extern __attribute__ (( regparm ( 3 ) )) void * __memcpy ( void *dest,
27
+							   const void *src,
28
+							   size_t len );
29
+
30
+#if 0
25 31
 static inline __attribute__ (( always_inline )) void *
26 32
 __memcpy ( void *dest, const void *src, size_t len ) {
27 33
 	int d0, d1, d2;
@@ -31,6 +37,7 @@ __memcpy ( void *dest, const void *src, size_t len ) {
31 37
 			       : "memory" );
32 38
 	return dest; 
33 39
 }
40
+#endif
34 41
 
35 42
 static inline __attribute__ (( always_inline )) void *
36 43
 __constant_memcpy ( void *dest, const void *src, size_t len ) {

Loading…
Cancel
Save