您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

string.h 6.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. #ifndef X86_BITS_STRING_H
  2. #define X86_BITS_STRING_H
  3. /*
  4. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License as
  8. * published by the Free Software Foundation; either version 2 of the
  9. * License, or any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19. * 02110-1301, USA.
  20. */
  21. FILE_LICENCE ( GPL2_OR_LATER );
  22. /** @file
  23. *
  24. * Optimised string operations
  25. *
  26. */
  27. extern void * __memcpy ( void *dest, const void *src, size_t len );
  28. extern void * __memcpy_reverse ( void *dest, const void *src, size_t len );
  29. /**
  30. * Copy memory area (where length is a compile-time constant)
  31. *
  32. * @v dest Destination address
  33. * @v src Source address
  34. * @v len Length
  35. * @ret dest Destination address
  36. */
  37. static inline __attribute__ (( always_inline )) void *
  38. __constant_memcpy ( void *dest, const void *src, size_t len ) {
  39. union {
  40. uint32_t u32[2];
  41. uint16_t u16[4];
  42. uint8_t u8[8];
  43. } __attribute__ (( __may_alias__ )) *dest_u = dest;
  44. const union {
  45. uint32_t u32[2];
  46. uint16_t u16[4];
  47. uint8_t u8[8];
  48. } __attribute__ (( __may_alias__ )) *src_u = src;
  49. const void *esi;
  50. void *edi;
  51. switch ( len ) {
  52. case 0 : /* 0 bytes */
  53. return dest;
  54. /*
  55. * Single-register moves; these are always better than a
  56. * string operation. We can clobber an arbitrary two
  57. * registers (data, source, dest can re-use source register)
  58. * instead of being restricted to esi and edi. There's also a
  59. * much greater potential for optimising with nearby code.
  60. *
  61. */
  62. case 1 : /* 4 bytes */
  63. dest_u->u8[0] = src_u->u8[0];
  64. return dest;
  65. case 2 : /* 6 bytes */
  66. dest_u->u16[0] = src_u->u16[0];
  67. return dest;
  68. case 4 : /* 4 bytes */
  69. dest_u->u32[0] = src_u->u32[0];
  70. return dest;
  71. /*
  72. * Double-register moves; these are probably still a win.
  73. *
  74. */
  75. case 3 : /* 12 bytes */
  76. dest_u->u16[0] = src_u->u16[0];
  77. dest_u->u8[2] = src_u->u8[2];
  78. return dest;
  79. case 5 : /* 10 bytes */
  80. dest_u->u32[0] = src_u->u32[0];
  81. dest_u->u8[4] = src_u->u8[4];
  82. return dest;
  83. case 6 : /* 12 bytes */
  84. dest_u->u32[0] = src_u->u32[0];
  85. dest_u->u16[2] = src_u->u16[2];
  86. return dest;
  87. case 8 : /* 10 bytes */
  88. dest_u->u32[0] = src_u->u32[0];
  89. dest_u->u32[1] = src_u->u32[1];
  90. return dest;
  91. }
  92. /* Even if we have to load up esi and edi ready for a string
  93. * operation, we can sometimes save space by using multiple
  94. * single-byte "movs" operations instead of loading up ecx and
  95. * using "rep movsb".
  96. *
  97. * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
  98. * to allow for saving/restoring ecx 50% of the time.
  99. *
  100. * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
  101. * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
  102. * but "movsl" moves twice as much data, so it balances out).
  103. *
  104. * The cutoff point therefore occurs around 26 bytes; the byte
  105. * requirements for each method are:
  106. *
  107. * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  108. * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
  109. * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
  110. */
  111. esi = src;
  112. edi = dest;
  113. if ( len >= 26 )
  114. return __memcpy ( dest, src, len );
  115. if ( len >= 6*4 )
  116. __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
  117. : "0" ( edi ), "1" ( esi ) : "memory" );
  118. if ( len >= 5*4 )
  119. __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
  120. : "0" ( edi ), "1" ( esi ) : "memory" );
  121. if ( len >= 4*4 )
  122. __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
  123. : "0" ( edi ), "1" ( esi ) : "memory" );
  124. if ( len >= 3*4 )
  125. __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
  126. : "0" ( edi ), "1" ( esi ) : "memory" );
  127. if ( len >= 2*4 )
  128. __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
  129. : "0" ( edi ), "1" ( esi ) : "memory" );
  130. if ( len >= 1*4 )
  131. __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
  132. : "0" ( edi ), "1" ( esi ) : "memory" );
  133. if ( ( len % 4 ) >= 2 )
  134. __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
  135. : "0" ( edi ), "1" ( esi ) : "memory" );
  136. if ( ( len % 2 ) >= 1 )
  137. __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
  138. : "0" ( edi ), "1" ( esi ) : "memory" );
  139. return dest;
  140. }
  141. /**
  142. * Copy memory area
  143. *
  144. * @v dest Destination address
  145. * @v src Source address
  146. * @v len Length
  147. * @ret dest Destination address
  148. */
  149. static inline __attribute__ (( always_inline )) void *
  150. memcpy ( void *dest, const void *src, size_t len ) {
  151. if ( __builtin_constant_p ( len ) ) {
  152. return __constant_memcpy ( dest, src, len );
  153. } else {
  154. return __memcpy ( dest, src, len );
  155. }
  156. }
  157. extern void * __memmove ( void *dest, const void *src, size_t len );
  158. /**
  159. * Copy (possibly overlapping) memory area
  160. *
  161. * @v dest Destination address
  162. * @v src Source address
  163. * @v len Length
  164. * @ret dest Destination address
  165. */
  166. static inline __attribute__ (( always_inline )) void *
  167. memmove ( void *dest, const void *src, size_t len ) {
  168. ssize_t offset = ( dest - src );
  169. if ( __builtin_constant_p ( offset ) ) {
  170. if ( offset <= 0 ) {
  171. return memcpy ( dest, src, len );
  172. } else {
  173. return __memcpy_reverse ( dest, src, len );
  174. }
  175. } else {
  176. return __memmove ( dest, src, len );
  177. }
  178. }
  179. /**
  180. * Fill memory region
  181. *
  182. * @v dest Destination address
  183. * @v fill Fill pattern
  184. * @v len Length
  185. * @ret dest Destination address
  186. */
  187. static inline void * memset ( void *dest, int fill, size_t len ) {
  188. void *discard_D;
  189. size_t discard_c;
  190. __asm__ __volatile__ ( "rep stosb"
  191. : "=&D" ( discard_D ), "=&c" ( discard_c )
  192. : "0" ( dest ), "1" ( len ), "a" ( fill )
  193. : "memory" );
  194. return dest;
  195. }
  196. #endif /* X86_BITS_STRING_H */