You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm64_string.c 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /*
  2. * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. /** @file
  24. *
  25. * Optimised string operations
  26. *
  27. */
  28. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  29. #include <string.h>
  30. /**
  31. * Copy memory area
  32. *
  33. * @v dest Destination address
  34. * @v src Source address
  35. * @v len Length
  36. * @ret dest Destination address
  37. */
  38. void arm64_memcpy ( void *dest, const void *src, size_t len ) {
  39. void *discard_dest;
  40. void *discard_end;
  41. const void *discard_src;
  42. size_t discard_offset;
  43. unsigned long discard_data;
  44. unsigned long discard_low;
  45. unsigned long discard_high;
  46. /* If length is too short for an "ldp"/"stp" instruction pair,
  47. * then just copy individual bytes.
  48. */
  49. if ( len < 16 ) {
  50. __asm__ __volatile__ ( "cbz %0, 2f\n\t"
  51. "\n1:\n\t"
  52. "sub %0, %0, #1\n\t"
  53. "ldrb %w1, [%3, %0]\n\t"
  54. "strb %w1, [%2, %0]\n\t"
  55. "cbnz %0, 1b\n\t"
  56. "\n2:\n\t"
  57. : "=&r" ( discard_offset ),
  58. "=&r" ( discard_data )
  59. : "r" ( dest ), "r" ( src ), "0" ( len )
  60. : "memory" );
  61. return;
  62. }
  63. /* Use "ldp"/"stp" to copy 16 bytes at a time: one initial
  64. * potentially unaligned access, multiple destination-aligned
  65. * accesses, one final potentially unaligned access.
  66. */
  67. __asm__ __volatile__ ( "ldp %3, %4, [%1], #16\n\t"
  68. "stp %3, %4, [%0], #16\n\t"
  69. "and %3, %0, #15\n\t"
  70. "sub %0, %0, %3\n\t"
  71. "sub %1, %1, %3\n\t"
  72. "bic %2, %5, #15\n\t"
  73. "b 2f\n\t"
  74. "\n1:\n\t"
  75. "ldp %3, %4, [%1], #16\n\t"
  76. "stp %3, %4, [%0], #16\n\t"
  77. "\n2:\n\t"
  78. "cmp %0, %2\n\t"
  79. "bne 1b\n\t"
  80. "ldp %3, %4, [%6, #-16]\n\t"
  81. "stp %3, %4, [%5, #-16]\n\t"
  82. : "=&r" ( discard_dest ),
  83. "=&r" ( discard_src ),
  84. "=&r" ( discard_end ),
  85. "=&r" ( discard_low ),
  86. "=&r" ( discard_high )
  87. : "r" ( dest + len ), "r" ( src + len ),
  88. "0" ( dest ), "1" ( src )
  89. : "memory", "cc" );
  90. }
  91. /**
  92. * Zero memory region
  93. *
  94. * @v dest Destination region
  95. * @v len Length
  96. */
  97. void arm64_bzero ( void *dest, size_t len ) {
  98. size_t discard_offset;
  99. void *discard_dest;
  100. void *discard_end;
  101. /* If length is too short for an "stp" instruction, then just
  102. * zero individual bytes.
  103. */
  104. if ( len < 16 ) {
  105. __asm__ __volatile__ ( "cbz %0, 2f\n\t"
  106. "\n1:\n\t"
  107. "sub %0, %0, #1\n\t"
  108. "strb wzr, [%1, %0]\n\t"
  109. "cbnz %0, 1b\n\t"
  110. "\n2:\n\t"
  111. : "=&r" ( discard_offset )
  112. : "r" ( dest ), "0" ( len )
  113. : "memory" );
  114. return;
  115. }
  116. /* Use "stp" to zero 16 bytes at a time: one initial
  117. * potentially unaligned access, multiple aligned accesses,
  118. * one final potentially unaligned access.
  119. */
  120. __asm__ __volatile__ ( "stp xzr, xzr, [%0], #16\n\t"
  121. "bic %0, %0, #15\n\t"
  122. "bic %1, %2, #15\n\t"
  123. "b 2f\n\t"
  124. "\n1:\n\t"
  125. "stp xzr, xzr, [%0], #16\n\t"
  126. "\n2:\n\t"
  127. "cmp %0, %1\n\t"
  128. "bne 1b\n\t"
  129. "stp xzr, xzr, [%2, #-16]\n\t"
  130. : "=&r" ( discard_dest ),
  131. "=&r" ( discard_end )
  132. : "r" ( dest + len ), "0" ( dest )
  133. : "memory", "cc" );
  134. }
  135. /**
  136. * Fill memory region
  137. *
  138. * @v dest Destination region
  139. * @v len Length
  140. * @v character Fill character
  141. *
  142. * The unusual parameter order is to allow for more efficient
  143. * tail-calling to arm64_memset() when zeroing a region.
  144. */
  145. void arm64_memset ( void *dest, size_t len, int character ) {
  146. size_t discard_offset;
  147. /* Use optimised zeroing code if applicable */
  148. if ( character == 0 ) {
  149. arm64_bzero ( dest, len );
  150. return;
  151. }
  152. /* Fill one byte at a time. Calling memset() with a non-zero
  153. * value is relatively rare and unlikely to be
  154. * performance-critical.
  155. */
  156. __asm__ __volatile__ ( "cbz %0, 2f\n\t"
  157. "\n1:\n\t"
  158. "sub %0, %0, #1\n\t"
  159. "strb %w2, [%1, %0]\n\t"
  160. "cbnz %0, 1b\n\t"
  161. "\n2:\n\t"
  162. : "=&r" ( discard_offset )
  163. : "r" ( dest ), "r" ( character ), "0" ( len )
  164. : "memory" );
  165. }
  166. /**
  167. * Copy (possibly overlapping) memory region forwards
  168. *
  169. * @v dest Destination region
  170. * @v src Source region
  171. * @v len Length
  172. */
  173. void arm64_memmove_forwards ( void *dest, const void *src, size_t len ) {
  174. void *discard_dest;
  175. const void *discard_src;
  176. unsigned long discard_data;
  177. /* Assume memmove() is not performance-critical, and perform a
  178. * bytewise copy for simplicity.
  179. */
  180. __asm__ __volatile__ ( "b 2f\n\t"
  181. "\n1:\n\t"
  182. "ldrb %w2, [%1], #1\n\t"
  183. "strb %w2, [%0], #1\n\t"
  184. "\n2:\n\t"
  185. "cmp %0, %3\n\t"
  186. "bne 1b\n\t"
  187. : "=&r" ( discard_dest ),
  188. "=&r" ( discard_src ),
  189. "=&r" ( discard_data )
  190. : "r" ( dest + len ), "0" ( dest ), "1" ( src )
  191. : "memory" );
  192. }
  193. /**
  194. * Copy (possibly overlapping) memory region backwards
  195. *
  196. * @v dest Destination region
  197. * @v src Source region
  198. * @v len Length
  199. */
  200. void arm64_memmove_backwards ( void *dest, const void *src, size_t len ) {
  201. size_t discard_offset;
  202. unsigned long discard_data;
  203. /* Assume memmove() is not performance-critical, and perform a
  204. * bytewise copy for simplicity.
  205. */
  206. __asm__ __volatile__ ( "cbz %0, 2f\n\t"
  207. "\n1:\n\t"
  208. "sub %0, %0, #1\n\t"
  209. "ldrb %w1, [%3, %0]\n\t"
  210. "strb %w1, [%2, %0]\n\t"
  211. "cbnz %0, 1b\n\t"
  212. "\n2:\n\t"
  213. : "=&r" ( discard_offset ),
  214. "=&r" ( discard_data )
  215. : "r" ( dest ), "r" ( src ), "0" ( len )
  216. : "memory" );
  217. }
  218. /**
  219. * Copy (possibly overlapping) memory region
  220. *
  221. * @v dest Destination region
  222. * @v src Source region
  223. * @v len Length
  224. */
  225. void arm64_memmove ( void *dest, const void *src, size_t len ) {
  226. if ( dest <= src ) {
  227. arm64_memmove_forwards ( dest, src, len );
  228. } else {
  229. arm64_memmove_backwards ( dest, src, len );
  230. }
  231. }