You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm64_tcpip.c 5.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. /*
  2. * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. /** @file
  25. *
  26. * TCP/IP checksum
  27. *
  28. */
  29. #include <strings.h>
  30. #include <ipxe/tcpip.h>
  31. /** Alignment used by main checksumming loop */
  32. #define TCPIP_CHKSUM_ALIGN 16
  33. /** Number of steps in each iteration of the unrolled main checksumming loop */
  34. #define TCPIP_CHKSUM_UNROLL 4
  35. /**
  36. * Calculate continued TCP/IP checkum
  37. *
  38. * @v sum Checksum of already-summed data, in network byte order
  39. * @v data Data buffer
  40. * @v len Length of data buffer
  41. * @ret sum Updated checksum, in network byte order
  42. */
  43. uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
  44. size_t len ) {
  45. intptr_t start;
  46. intptr_t end;
  47. intptr_t mid;
  48. unsigned int pre;
  49. unsigned int post;
  50. unsigned int first;
  51. uint64_t discard_low;
  52. uint64_t discard_high;
  53. /* Avoid potentially undefined shift operation */
  54. if ( len == 0 )
  55. return sum;
  56. /* Find maximally-aligned midpoint. For short blocks of data,
  57. * this may be aligned to fewer than 16 bytes.
  58. */
  59. start = ( ( intptr_t ) data );
  60. end = ( start + len );
  61. mid = ( end &
  62. ~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );
  63. /* Calculate pre- and post-alignment lengths */
  64. pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
  65. post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
  66. /* Calculate number of steps in first iteration of unrolled loop */
  67. first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
  68. ( TCPIP_CHKSUM_UNROLL - 1 ) );
  69. /* Calculate checksum */
  70. __asm__ ( /* Invert sum */
  71. "eor %w0, %w0, #0xffff\n\t"
  72. /* Clear carry flag */
  73. "cmn xzr, xzr\n\t"
  74. /* Byteswap and sum pre-alignment byte, if applicable */
  75. "tbz %w4, #0, 1f\n\t"
  76. "ldrb %w2, [%1], #1\n\t"
  77. "rev16 %w0, %w0\n\t"
  78. "rev16 %w2, %w2\n\t"
  79. "adcs %0, %0, %2\n\t"
  80. "\n1:\n\t"
  81. /* Sum pre-alignment halfword, if applicable */
  82. "tbz %w4, #1, 1f\n\t"
  83. "ldrh %w2, [%1], #2\n\t"
  84. "adcs %0, %0, %2\n\t"
  85. "\n1:\n\t"
  86. /* Sum pre-alignment word, if applicable */
  87. "tbz %w4, #2, 1f\n\t"
  88. "ldr %w2, [%1], #4\n\t"
  89. "adcs %0, %0, %2\n\t"
  90. "\n1:\n\t"
  91. /* Sum pre-alignment doubleword, if applicable */
  92. "tbz %w4, #3, 1f\n\t"
  93. "ldr %2, [%1], #8\n\t"
  94. "adcs %0, %0, %2\n\t"
  95. "\n1:\n\t"
  96. /* Jump into unrolled (x4) main loop */
  97. "adr %2, 2f\n\t"
  98. "sub %2, %2, %5, lsl #3\n\t"
  99. "sub %2, %2, %5, lsl #2\n\t"
  100. "br %2\n\t"
  101. "\n1:\n\t"
  102. "ldp %2, %3, [%1], #16\n\t"
  103. "adcs %0, %0, %2\n\t"
  104. "adcs %0, %0, %3\n\t"
  105. "ldp %2, %3, [%1], #16\n\t"
  106. "adcs %0, %0, %2\n\t"
  107. "adcs %0, %0, %3\n\t"
  108. "ldp %2, %3, [%1], #16\n\t"
  109. "adcs %0, %0, %2\n\t"
  110. "adcs %0, %0, %3\n\t"
  111. "ldp %2, %3, [%1], #16\n\t"
  112. "adcs %0, %0, %2\n\t"
  113. "adcs %0, %0, %3\n\t"
  114. "\n2:\n\t"
  115. "sub %2, %1, %6\n\t"
  116. "cbnz %2, 1b\n\t"
  117. /* Sum post-alignment doubleword, if applicable */
  118. "tbz %w7, #3, 1f\n\t"
  119. "ldr %2, [%1], #8\n\t"
  120. "adcs %0, %0, %2\n\t"
  121. "\n1:\n\t"
  122. /* Sum post-alignment word, if applicable */
  123. "tbz %w7, #2, 1f\n\t"
  124. "ldr %w2, [%1], #4\n\t"
  125. "adcs %0, %0, %2\n\t"
  126. "\n1:\n\t"
  127. /* Sum post-alignment halfword, if applicable */
  128. "tbz %w7, #1, 1f\n\t"
  129. "ldrh %w2, [%1], #2\n\t"
  130. "adcs %0, %0, %2\n\t"
  131. "\n1:\n\t"
  132. /* Sum post-alignment byte, if applicable */
  133. "tbz %w7, #0, 1f\n\t"
  134. "ldrb %w2, [%1], #1\n\t"
  135. "adcs %0, %0, %2\n\t"
  136. "\n1:\n\t"
  137. /* Fold down to a uint32_t plus carry flag */
  138. "lsr %2, %0, #32\n\t"
  139. "adcs %w0, %w0, %w2\n\t"
  140. /* Fold down to a uint16_t plus carry in bit 16 */
  141. "ubfm %2, %0, #0, #15\n\t"
  142. "ubfm %3, %0, #16, #31\n\t"
  143. "adc %w0, %w2, %w3\n\t"
  144. /* Fold down to a uint16_t */
  145. "tbz %w0, #16, 1f\n\t"
  146. "mov %w2, #0xffff\n\t"
  147. "sub %w0, %w0, %w2\n\t"
  148. "tbz %w0, #16, 1f\n\t"
  149. "sub %w0, %w0, %w2\n\t"
  150. "\n1:\n\t"
  151. /* Byteswap back, if applicable */
  152. "tbz %w4, #0, 1f\n\t"
  153. "rev16 %w0, %w0\n\t"
  154. "\n1:\n\t"
  155. /* Invert sum */
  156. "eor %w0, %w0, #0xffff\n\t"
  157. : "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
  158. "=&r" ( discard_high )
  159. : "r" ( pre ), "r" ( first ), "r" ( end - post ),
  160. "r" ( post )
  161. : "cc" );
  162. return sum;
  163. }