You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

memset.S 3.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /*
  2. * Copyright (C) 1999-2002 Hewlett-Packard Co.
  3. * Contributed by Stephane Eranian <eranian@hpl.hp.com>
  4. *
  5. * This file is part of the ELILO, the EFI Linux boot loader.
  6. *
  7. * ELILO is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2, or (at your option)
  10. * any later version.
  11. *
  12. * ELILO is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with ELILO; see the file COPYING. If not, write to the Free
  19. * Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20. * 02111-1307, USA.
  21. *
  22. * Please check out the elilo.txt for complete documentation on how
  23. * to use this program.
  24. *
  25. * This code is derived from the Linux/ia64 source code.
  26. */
  27. /*
  28. *
  29. * Optimized version of the standard memset() function
  30. *
  31. * Return: none
  32. *
  33. * Inputs:
  34. * in0: address of buffer
  35. * in1: byte value to use for storing
  36. * in2: length of the buffer
  37. *
  38. */
  39. // arguments
  40. //
  41. #define buf r32
  42. #define val r33
  43. #define len r34
  44. //
  45. // local registers
  46. //
  47. #define saved_pfs r14
  48. #define cnt r18
  49. #define buf2 r19
  50. #define saved_lc r20
  51. #define tmp r21
  52. .text
  53. .global memset
  54. .proc memset
  55. memset:
  56. .prologue
  57. .save ar.pfs, saved_pfs
  58. alloc saved_pfs=ar.pfs,3,0,0,0 // cnt is sink here
  59. cmp.eq p8,p0=r0,len // check for zero length
  60. .save ar.lc, saved_lc
  61. mov saved_lc=ar.lc // preserve ar.lc (slow)
  62. ;;
  63. .body
  64. adds tmp=-1,len // br.ctop is repeat/until
  65. tbit.nz p6,p0=buf,0 // odd alignment
  66. (p8) br.ret.spnt.few rp
  67. cmp.lt p7,p0=16,len // if len > 16 then long memset
  68. mux1 val=val,@brcst // prepare value
  69. (p7) br.cond.dptk.few long_memset
  70. ;;
  71. mov ar.lc=tmp // initialize lc for small count
  72. ;; // avoid RAW and WAW on ar.lc
  73. 1: // worst case 15 cyles, avg 8 cycles
  74. st1 [buf]=val,1
  75. br.cloop.dptk.few 1b
  76. ;; // avoid RAW on ar.lc
  77. mov ar.lc=saved_lc
  78. mov ar.pfs=saved_pfs
  79. br.ret.sptk.few rp // end of short memset
  80. // at this point we know we have more than 16 bytes to copy
  81. // so we focus on alignment
  82. long_memset:
  83. (p6) st1 [buf]=val,1 // 1-byte aligned
  84. (p6) adds len=-1,len;; // sync because buf is modified
  85. tbit.nz p6,p0=buf,1
  86. ;;
  87. (p6) st2 [buf]=val,2 // 2-byte aligned
  88. (p6) adds len=-2,len;;
  89. tbit.nz p6,p0=buf,2
  90. ;;
  91. (p6) st4 [buf]=val,4 // 4-byte aligned
  92. (p6) adds len=-4,len;;
  93. tbit.nz p6,p0=buf,3
  94. ;;
  95. (p6) st8 [buf]=val,8 // 8-byte aligned
  96. (p6) adds len=-8,len;;
  97. shr.u cnt=len,4 // number of 128-bit (2x64bit) words
  98. ;;
  99. cmp.eq p6,p0=r0,cnt
  100. adds tmp=-1,cnt
  101. (p6) br.cond.dpnt.few .dotail // we have less than 16 bytes left
  102. ;;
  103. adds buf2=8,buf // setup second base pointer
  104. mov ar.lc=tmp
  105. ;;
  106. 2: // 16bytes/iteration
  107. st8 [buf]=val,16
  108. st8 [buf2]=val,16
  109. br.cloop.dptk.few 2b
  110. ;;
  111. .dotail: // tail correction based on len only
  112. tbit.nz p6,p0=len,3
  113. ;;
  114. (p6) st8 [buf]=val,8 // at least 8 bytes
  115. tbit.nz p6,p0=len,2
  116. ;;
  117. (p6) st4 [buf]=val,4 // at least 4 bytes
  118. tbit.nz p6,p0=len,1
  119. ;;
  120. (p6) st2 [buf]=val,2 // at least 2 bytes
  121. tbit.nz p6,p0=len,0
  122. mov ar.lc=saved_lc
  123. ;;
  124. (p6) st1 [buf]=val // only 1 byte left
  125. br.ret.dptk.few rp
  126. .endp memset