123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- #ifndef ETHERBOOT_BITS_STRING_H
- #define ETHERBOOT_BITS_STRING_H
- /*
- * Taken from Linux /usr/include/asm/string.h
- * All except memcpy, memmove, memset and memcmp removed.
- *
- * Non-standard memswap() function added because it saves quite a bit
- * of code (mbrown@fensystems.co.uk).
- */
-
- /*
- * This string-include defines all string functions as inline
- * functions. Use gcc. It also assumes ds=es=data space, this should be
- * normal. Most of the string-functions are rather heavily hand-optimized,
- * see especially strtok,strstr,str[c]spn. They should work, but are not
- * very easy to understand. Everything is done entirely within the register
- * set, making the functions fast and clean. String instructions have been
- * used through-out, making for "slightly" unclear code :-)
- *
- * NO Copyright (C) 1991, 1992 Linus Torvalds,
- * consider these trivial functions to be PD.
- */
-
- #define __HAVE_ARCH_MEMCPY
-
- extern void * __memcpy ( void *dest, const void *src, size_t len );
-
- #if 0
- static inline __attribute__ (( always_inline )) void *
- __memcpy ( void *dest, const void *src, size_t len ) {
- int d0, d1, d2;
- __asm__ __volatile__ ( "rep ; movsb"
- : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 )
- : "0" ( len ), "1" ( src ), "2" ( dest )
- : "memory" );
- return dest;
- }
- #endif
-
- static inline __attribute__ (( always_inline )) void *
- __constant_memcpy ( void *dest, const void *src, size_t len ) {
- union {
- uint32_t u32[2];
- uint16_t u16[4];
- uint8_t u8[8];
- } __attribute__ (( __may_alias__ )) *dest_u = dest;
- const union {
- uint32_t u32[2];
- uint16_t u16[4];
- uint8_t u8[8];
- } __attribute__ (( __may_alias__ )) *src_u = src;
- const void *esi;
- void *edi;
-
- switch ( len ) {
- case 0 : /* 0 bytes */
- return dest;
- /*
- * Single-register moves; these are always better than a
- * string operation. We can clobber an arbitrary two
- * registers (data, source, dest can re-use source register)
- * instead of being restricted to esi and edi. There's also a
- * much greater potential for optimising with nearby code.
- *
- */
- case 1 : /* 4 bytes */
- dest_u->u8[0] = src_u->u8[0];
- return dest;
- case 2 : /* 6 bytes */
- dest_u->u16[0] = src_u->u16[0];
- return dest;
- case 4 : /* 4 bytes */
- dest_u->u32[0] = src_u->u32[0];
- return dest;
- /*
- * Double-register moves; these are probably still a win.
- *
- */
- case 3 : /* 12 bytes */
- dest_u->u16[0] = src_u->u16[0];
- dest_u->u8[2] = src_u->u8[2];
- return dest;
- case 5 : /* 10 bytes */
- dest_u->u32[0] = src_u->u32[0];
- dest_u->u8[4] = src_u->u8[4];
- return dest;
- case 6 : /* 12 bytes */
- dest_u->u32[0] = src_u->u32[0];
- dest_u->u16[2] = src_u->u16[2];
- return dest;
- case 8 : /* 10 bytes */
- dest_u->u32[0] = src_u->u32[0];
- dest_u->u32[1] = src_u->u32[1];
- return dest;
- }
-
- /* Even if we have to load up esi and edi ready for a string
- * operation, we can sometimes save space by using multiple
- * single-byte "movs" operations instead of loading up ecx and
- * using "rep movsb".
- *
- * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
- * to allow for saving/restoring ecx 50% of the time.
- *
- * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
- * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
- * but "movsl" moves twice as much data, so it balances out).
- *
- * The cutoff point therefore occurs around 26 bytes; the byte
- * requirements for each method are:
- *
- * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
- * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
- * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
- */
-
- esi = src;
- edi = dest;
-
- if ( len >= 26 )
- return __memcpy ( dest, src, len );
-
- if ( len >= 6*4 )
- __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
- if ( len >= 5*4 )
- __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
- if ( len >= 4*4 )
- __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
- if ( len >= 3*4 )
- __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
- if ( len >= 2*4 )
- __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
- if ( len >= 1*4 )
- __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
- if ( ( len % 4 ) >= 2 )
- __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
- if ( ( len % 2 ) >= 1 )
- __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
- : "0" ( edi ), "1" ( esi ) : "memory" );
-
- return dest;
- }
-
- #define memcpy( dest, src, len ) \
- ( __builtin_constant_p ( (len) ) ? \
- __constant_memcpy ( (dest), (src), (len) ) : \
- __memcpy ( (dest), (src), (len) ) )
-
- #define __HAVE_ARCH_MEMMOVE
- static inline void * memmove(void * dest,const void * src, size_t n)
- {
- int d0, d1, d2;
- if (dest<src)
- __asm__ __volatile__(
- "cld\n\t"
- "rep\n\t"
- "movsb"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),"1" (src),"2" (dest)
- : "memory");
- else
- __asm__ __volatile__(
- "std\n\t"
- "rep\n\t"
- "movsb\n\t"
- "cld"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),
- "1" (n-1+(const char *)src),
- "2" (n-1+(char *)dest)
- :"memory");
- return dest;
- }
-
- #define __HAVE_ARCH_MEMSET
- static inline void * memset(void *s, int c,size_t count)
- {
- int d0, d1;
- __asm__ __volatile__(
- "cld\n\t"
- "rep\n\t"
- "stosb"
- : "=&c" (d0), "=&D" (d1)
- :"a" (c),"1" (s),"0" (count)
- :"memory");
- return s;
- }
-
- #define __HAVE_ARCH_MEMSWAP
- static inline void * memswap(void *dest, void *src, size_t n)
- {
- int d0, d1, d2, d3;
- __asm__ __volatile__(
- "\n1:\t"
- "movb (%%edi),%%al\n\t"
- "xchgb (%%esi),%%al\n\t"
- "incl %%esi\n\t"
- "stosb\n\t"
- "loop 1b"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3)
- : "0" (n), "1" (src), "2" (dest)
- : "memory" );
- return dest;
- }
-
- #define __HAVE_ARCH_STRNCMP
- static inline int strncmp(const char * cs,const char * ct,size_t count)
- {
- register int __res;
- int d0, d1, d2;
- __asm__ __volatile__(
- "1:\tdecl %3\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "scasb\n\t"
- "jne 3f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "2:\txorl %%eax,%%eax\n\t"
- "jmp 4f\n"
- "3:\tsbbl %%eax,%%eax\n\t"
- "orb $1,%%al\n"
- "4:"
- :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- :"1" (cs),"2" (ct),"3" (count));
- return __res;
- }
-
- #define __HAVE_ARCH_STRLEN
- static inline size_t strlen(const char * s)
- {
- int d0;
- register int __res;
- __asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "notl %0\n\t"
- "decl %0"
- :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
- return __res;
- }
-
- #endif /* ETHERBOOT_BITS_STRING_H */
|