Experimentation reveals that gcc ignores -mrtd for the implicit arithmetic functions (e.g. __udivdi3), but not for the implicit memcpy() and memset() functions. Mark the implicit arithmetic functions with __attribute__((cdecl)) to compensate for this. (Note: we cannot mark with with __cdecl, because we define __cdecl to incorporate regparm(0) as well.)

18 years ago · 4ce8d61a5c
--- a/src/Makefile
+++ b/src/Makefile
@@ -145,6 +145,7 @@ DEBUG_TARGETS	+= dbg%.o c s
 
				
				 
			
 
				
				 # SRCDIRS lists all directories containing source files.
			
 
				
				 #
			
 
				
				+SRCDIRS		+= libgcc
			
 
				
				 SRCDIRS		+= core
			
 
				
				 SRCDIRS		+= proto
			
 
				
				 SRCDIRS		+= net net/tcp net/udp
			
--- a/src/arch/i386/core/udivmod64.c
+++ b/src/arch/i386/core/udivmod64.c
@@ -1,336 +0,0 @@
 
				
				-/*
			
 
				
				- * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
			
 
				
				- *
			
 
				
				- * This program is free software; you can redistribute it and/or
			
 
				
				- * modify it under the terms of the GNU General Public License as
			
 
				
				- * published by the Free Software Foundation; either version 2 of the
			
 
				
				- * License, or any later version.
			
 
				
				- *
			
 
				
				- * This program is distributed in the hope that it will be useful, but
			
 
				
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				
				- * General Public License for more details.
			
 
				
				- *
			
 
				
				- * You should have received a copy of the GNU General Public License
			
 
				
				- * along with this program; if not, write to the Free Software
			
 
				
				- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				
				- */
			
 
				
				-
			
 
				
				-/** @file
			
 
				
				- *
			
 
				
				- * 64-bit division
			
 
				
				- *
			
 
				
				- * The x86 CPU (386 upwards) has a divl instruction which will perform
			
 
				
				- * unsigned division of a 64-bit dividend by a 32-bit divisor.  If the
			
 
				
				- * resulting quotient does not fit in 32 bits, then a CPU exception
			
 
				
				- * will occur.
			
 
				
				- *
			
 
				
				- * Unsigned integer division is expressed as solving 
			
 
				
				- *
			
 
				
				- *   x = d.q + r			0 <= q, 0 <= r < d
			
 
				
				- *
			
 
				
				- * given the dividend (x) and divisor (d), to find the quotient (q)
			
 
				
				- * and remainder (r).
			
 
				
				- *
			
 
				
				- * The x86 divl instruction will solve
			
 
				
				- *
			
 
				
				- *   x = d.q + r			0 <= q, 0 <= r < d
			
 
				
				- *
			
 
				
				- * given x in the range 0 <= x < 2^64 and 1 <= d < 2^32, and causing a
			
 
				
				- * hardware exception if the resulting q >= 2^32.
			
 
				
				- *
			
 
				
				- * We can therefore use divl only if we can prove that the conditions
			
 
				
				- *
			
 
				
				- *   0 <= x < 2^64
			
 
				
				- *   1 <= d < 2^32
			
 
				
				- *        q < 2^32
			
 
				
				- *
			
 
				
				- * are satisfied.
			
 
				
				- *
			
 
				
				- *
			
 
				
				- * Case 1 : 1 <= d < 2^32
			
 
				
				- * ======================
			
 
				
				- *
			
 
				
				- * We express x as
			
 
				
				- *
			
 
				
				- *   x = xh.2^32 + xl			0 <= xh < 2^32, 0 <= xl < 2^32	(1)
			
 
				
				- *
			
 
				
				- * i.e. split x into low and high dwords.  We then solve
			
 
				
				- *
			
 
				
				- *   xh = d.qh + r'			0 <= qh, 0 <= r' < d		(2)
			
 
				
				- *
			
 
				
				- * which we can do using a divl instruction since
			
 
				
				- *
			
 
				
				- *   0 <= xh < 2^64			since 0 <= xh < 2^32 from (1)	(3)
			
 
				
				- *
			
 
				
				- * and
			
 
				
				- *
			
 
				
				- *   1 <= d < 2^32			by definition of this Case	(4)
			
 
				
				- *
			
 
				
				- * and
			
 
				
				- *
			
 
				
				- *   d.qh = xh - r'			from (2)
			
 
				
				- *   d.qh <= xh				since r' >= 0 from (2)
			
 
				
				- *   qh <= xh				since d >= 1 from (2)
			
 
				
				- *   qh < 2^32				since xh < 2^32 from (1)	(5)
			
 
				
				- *
			
 
				
				- * Having obtained qh and r', we then solve
			
 
				
				- *
			
 
				
				- *   ( r'.2^32 + xl ) = d.ql + r	0 <= ql, 0 <= r < d		(6)
			
 
				
				- *
			
 
				
				- * which we can do using another divl instruction since
			
 
				
				- *
			
 
				
				- *   xl <= 2^32 - 1			from (1), so
			
 
				
				- *   r'.2^32 + xl <= ( r' + 1 ).2^32 - 1
			
 
				
				- *   r'.2^32 + xl <= d.2^32 - 1		since r' < d from (2)
			
 
				
				- *   r'.2^32 + xl < d.2^32						(7)
			
 
				
				- *   r'.2^32 + xl < 2^64		since d < 2^32 from (4)		(8)
			
 
				
				- *
			
 
				
				- * and
			
 
				
				- *
			
 
				
				- *   1 <= d < 2^32			by definition of this Case	(9)
			
 
				
				- *
			
 
				
				- * and
			
 
				
				- *
			
 
				
				- *   d.ql = ( r'.2^32 + xl ) - r	from (6)
			
 
				
				- *   d.ql <= r'.2^32 + xl		since r >= 0 from (6)
			
 
				
				- *   d.ql < d.2^32			from (7)
			
 
				
				- *   ql < 2^32				since d >= 1 from (2)		(10)
			
 
				
				- *
			
 
				
				- * This then gives us
			
 
				
				- *
			
 
				
				- *   x = xh.2^32 + xl			from (1)
			
 
				
				- *   x = ( d.qh + r' ).2^32 + xl	from (2)
			
 
				
				- *   x = d.qh.2^32 + ( r'.2^32 + xl )
			
 
				
				- *   x = d.qh.2^32 + d.ql + r		from (3)
			
 
				
				- *   x = d.( qh.2^32 + ql ) + r						(11)
			
 
				
				- *
			
 
				
				- * Letting
			
 
				
				- *
			
 
				
				- *   q = qh.2^32 + ql							(12)
			
 
				
				- *
			
 
				
				- * gives
			
 
				
				- *
			
 
				
				- *   x = d.q + r			from (11) and (12)
			
 
				
				- *
			
 
				
				- * which is the solution.
			
 
				
				- *
			
 
				
				- *
			
 
				
				- * This therefore gives us a two-step algorithm:
			
 
				
				- *
			
 
				
				- *   xh = d.qh + r'			0 <= qh, 0 <= r' < d		(2)
			
 
				
				- *   ( r'.2^32 + xl ) = d.ql + r	0 <= ql, 0 <= r < d		(6)
			
 
				
				- *
			
 
				
				- * which translates to
			
 
				
				- *
			
 
				
				- *   %edx:%eax = 0:xh
			
 
				
				- *   divl d
			
 
				
				- *   qh = %eax
			
 
				
				- *   r' = %edx
			
 
				
				- *
			
 
				
				- *   %edx:%eax = r':xl
			
 
				
				- *   divl d
			
 
				
				- *   ql = %eax
			
 
				
				- *   r = %edx
			
 
				
				- *
			
 
				
				- * Note that if
			
 
				
				- *
			
 
				
				- *   xh < d
			
 
				
				- *
			
 
				
				- * (which is a fast dword comparison) then the first divl instruction
			
 
				
				- * can be omitted, since the answer will be
			
 
				
				- *
			
 
				
				- *   qh = 0
			
 
				
				- *   r = xh
			
 
				
				- *
			
 
				
				- *
			
 
				
				- * Case 2 : 2^32 <= d < 2^64
			
 
				
				- * =========================
			
 
				
				- *
			
 
				
				- * We first express d as
			
 
				
				- *
			
 
				
				- *   d = dh.2^k + dl			2^31 <= dh < 2^32,
			
 
				
				- *					0 <= dl < 2^k, 1 <= k <= 32	(1)
			
 
				
				- *
			
 
				
				- * i.e. find the highest bit set in d, subtract 32, and split d into
			
 
				
				- * dh and dl at that point.
			
 
				
				- *
			
 
				
				- * We then express x as
			
 
				
				- *
			
 
				
				- *   x = xh.2^k + xl			0 <= xl < 2^k			(2)
			
 
				
				- *
			
 
				
				- * giving
			
 
				
				- *
			
 
				
				- *   xh.2^k = x - xl			from (2)
			
 
				
				- *   xh.2^k <= x			since xl >= 0 from (1)
			
 
				
				- *   xh.2^k < 2^64			since xh < 2^64 from (1)
			
 
				
				- *   xh < 2^(64-k)							(3)
			
 
				
				- *
			
 
				
				- * We then solve the division
			
 
				
				- *
			
 
				
				- *   xh = dh.q' + r'	            		0 <= r' < dh		(4)
			
 
				
				- *
			
 
				
				- * which we can do using a divl instruction since
			
 
				
				- *
			
 
				
				- *   0 <= xh < 2^64			since x < 2^64 and xh < x
			
 
				
				- *
			
 
				
				- * and
			
 
				
				- *
			
 
				
				- *   1 <= dh < 2^32			from (1)
			
 
				
				- *
			
 
				
				- * and
			
 
				
				- *
			
 
				
				- *   dh.q' = xh - r'			from (4)
			
 
				
				- *   dh.q' <= xh			since r' >= 0 from (4)
			
 
				
				- *   dh.q' < 2^(64-k)			from (3)			(5)
			
 
				
				- *   q'.2^31 <= dh.q'			since dh >= 2^31 from (1)	(6)
			
 
				
				- *   q'.2^31 < 2^(64-k)			from (5) and (6)
			
 
				
				- *   q' < 2^(33-k)
			
 
				
				- *   q' < 2^32				since k >= 1 from (1)		(7)
			
 
				
				- *
			
 
				
				- * This gives us
			
 
				
				- *
			
 
				
				- *   xh.2^k = dh.q'.2^k + r'.2^k	from (4)
			
 
				
				- *   x - xl = ( d - dl ).q' + r'.2^k	from (1) and (2)
			
 
				
				- *   x = d.q' + ( r'.2^k + xl ) - dl.q'					(8)
			
 
				
				- *
			
 
				
				- * Now
			
 
				
				- *
			
 
				
				- *  r'.2^k + xl < r'.2^k + 2^k		since xl < 2^k from (2)
			
 
				
				- *  r'.2^k + xl < ( r' + 1 ).2^k
			
 
				
				- *  r'.2^k + xl < dh.2^k		since r' < dh from (4)
			
 
				
				- *  r'.2^k + xl < ( d - dl )		from (1)			(9)
			
 
				
				- *
			
 
				
				- *
			
 
				
				- * (missing)
			
 
				
				- *
			
 
				
				- *
			
 
				
				- * This gives us two cases to consider:
			
 
				
				- *
			
 
				
				- * case (a):
			
 
				
				- *
			
 
				
				- *   dl.q' <= ( r'.2^k + xl )						(15a)
			
 
				
				- *
			
 
				
				- * in which case
			
 
				
				- *
			
 
				
				- *   x = d.q' + ( r'.2^k + xl - dl.q' )
			
 
				
				- *
			
 
				
				- * is a direct solution to the division, since
			
 
				
				- *
			
 
				
				- *   r'.2^k + xl < d			from (9)
			
 
				
				- *   ( r'.2^k + xl - dl.q' ) < d	since dl >= 0 and q' >= 0
			
 
				
				- *
			
 
				
				- * and
			
 
				
				- *
			
 
				
				- *   0 <= ( r'.2^k + xl - dl.q' )	from (15a)
			
 
				
				- *
			
 
				
				- * case (b):
			
 
				
				- *
			
 
				
				- *   dl.q' > ( r'.2^k + xl )						(15b)
			
 
				
				- *   
			
 
				
				- * Express
			
 
				
				- *
			
 
				
				- *  x = d.(q'-1) + ( r'.2^k + xl ) + ( d - dl.q' )
			
 
				
				- *  
			
 
				
				- *   
			
 
				
				- * (missing)
			
 
				
				- *
			
 
				
				- *
			
 
				
				- * special case: k = 32 cannot be handled with shifts
			
 
				
				- *
			
 
				
				- * (missing)
			
 
				
				- * 
			
 
				
				- */
			
 
				
				-
			
 
				
				-#include <stdint.h>
			
 
				
				-#include <assert.h>
			
 
				
				-
			
 
				
				-typedef uint64_t UDItype;
			
 
				
				-
			
 
				
				-struct uint64 {
			
 
				
				-	uint32_t l;
			
 
				
				-	uint32_t h;
			
 
				
				-};
			
 
				
				-
			
 
				
				-static inline void udivmod64_lo ( const struct uint64 *x,
			
 
				
				-				  const struct uint64 *d,
			
 
				
				-				  struct uint64 *q,
			
 
				
				-				  struct uint64 *r ) {
			
 
				
				-	uint32_t r_dash;
			
 
				
				-
			
 
				
				-	q->h = 0;
			
 
				
				-	r->h = 0;
			
 
				
				-	r_dash = x->h;
			
 
				
				-
			
 
				
				-	if ( x->h >= d->l ) {
			
 
				
				-		__asm__ ( "divl %2"
			
 
				
				-			  : "=&a" ( q->h ), "=&d" ( r_dash )
			
 
				
				-			  : "g" ( d->l ), "0" ( x->h ), "1" ( 0 ) );
			
 
				
				-	}
			
 
				
				-
			
 
				
				-	__asm__ ( "divl %2"
			
 
				
				-		  : "=&a" ( q->l ), "=&d" ( r->l )
			
 
				
				-		  : "g" ( d->l ), "0" ( x->l ), "1" ( r_dash ) );
			
 
				
				-}
			
 
				
				-
			
 
				
				-void udivmod64 ( const struct uint64 *x,
			
 
				
				-			const struct uint64 *d,
			
 
				
				-			struct uint64 *q,
			
 
				
				-			struct uint64 *r ) {
			
 
				
				-
			
 
				
				-	if ( d->h == 0 ) {
			
 
				
				-		udivmod64_lo ( x, d, q, r );
			
 
				
				-	} else {
			
 
				
				-		assert ( 0 );
			
 
				
				-		while ( 1 ) {};
			
 
				
				-	}	
			
 
				
				-}
			
 
				
				-
			
 
				
				-/**
			
 
				
				- * 64-bit division with remainder
			
 
				
				- *
			
 
				
				- * @v x			Dividend
			
 
				
				- * @v d			Divisor
			
 
				
				- * @ret r		Remainder
			
 
				
				- * @ret q		Quotient
			
 
				
				- */
			
 
				
				-UDItype __udivmoddi4 ( UDItype x, UDItype d, UDItype *r ) {
			
 
				
				-	UDItype q;
			
 
				
				-	UDItype *_x = &x;
			
 
				
				-	UDItype *_d = &d;
			
 
				
				-	UDItype *_q = &q;
			
 
				
				-	UDItype *_r = r;
			
 
				
				-
			
 
				
				-	udivmod64 ( ( struct uint64 * ) _x, ( struct uint64 * ) _d,
			
 
				
				-		    ( struct uint64 * ) _q, ( struct uint64 * ) _r );
			
 
				
				-
			
 
				
				-	assert ( ( x == ( ( d * q ) + (*r) ) ) );
			
 
				
				-	assert ( (*r) < d );
			
 
				
				-
			
 
				
				-	return q;
			
 
				
				-}
			
 
				
				-
			
 
				
				-/**
			
 
				
				- * 64-bit division
			
 
				
				- *
			
 
				
				- * @v x			Dividend
			
 
				
				- * @v d			Divisor
			
 
				
				- * @ret q		Quotient
			
 
				
				- */
			
 
				
				-UDItype __udivdi3 ( UDItype x, UDItype d ) {
			
 
				
				-	UDItype r;
			
 
				
				-	return __udivmoddi4 ( x, d, &r );
			
 
				
				-}
			
 
				
				-
			
 
				
				-/**
			
 
				
				- * 64-bit modulus
			
 
				
				- *
			
 
				
				- * @v x			Dividend
			
 
				
				- * @v d			Divisor
			
 
				
				- * @ret q		Quotient
			
 
				
				- */
			
 
				
				-UDItype __umoddi3 ( UDItype x, UDItype d ) {
			
 
				
				-	UDItype r;
			
 
				
				-	__udivmoddi4 ( x, d, &r );
			
 
				
				-	return r;
			
 
				
				-}
			
--- a/src/libgcc/__divdi3.c
+++ b/src/libgcc/__divdi3.c
@@ -0,0 +1,26 @@
 
				
				+/*
			
 
				
				+ * arch/i386/libgcc/__divdi3.c
			
 
				
				+ */
			
 
				
				+
			
 
				
				+#include "libgcc.h"
			
 
				
				+
			
 
				
				+LIBGCC int64_t __divdi3(int64_t num, int64_t den)
			
 
				
				+{
			
 
				
				+  int minus = 0;
			
 
				
				+  int64_t v;
			
 
				
				+
			
 
				
				+  if ( num < 0 ) {
			
 
				
				+    num = -num;
			
 
				
				+    minus = 1;
			
 
				
				+  }
			
 
				
				+  if ( den < 0 ) {
			
 
				
				+    den = -den;
			
 
				
				+    minus ^= 1;
			
 
				
				+  }
			
 
				
				+
			
 
				
				+  v = __udivmoddi4(num, den, NULL);
			
 
				
				+  if ( minus )
			
 
				
				+    v = -v;
			
 
				
				+
			
 
				
				+  return v;
			
 
				
				+}
			
--- a/src/libgcc/__moddi3.c
+++ b/src/libgcc/__moddi3.c
@@ -0,0 +1,26 @@
 
				
				+/*
			
 
				
				+ * arch/i386/libgcc/__moddi3.c
			
 
				
				+ */
			
 
				
				+
			
 
				
				+#include "libgcc.h"
			
 
				
				+
			
 
				
				+LIBGCC int64_t __moddi3(int64_t num, int64_t den)
			
 
				
				+{
			
 
				
				+  int minus = 0;
			
 
				
				+  int64_t v;
			
 
				
				+
			
 
				
				+  if ( num < 0 ) {
			
 
				
				+    num = -num;
			
 
				
				+    minus = 1;
			
 
				
				+  }
			
 
				
				+  if ( den < 0 ) {
			
 
				
				+    den = -den;
			
 
				
				+    minus ^= 1;
			
 
				
				+  }
			
 
				
				+
			
 
				
				+  (void) __udivmoddi4(num, den, (uint64_t *)&v);
			
 
				
				+  if ( minus )
			
 
				
				+    v = -v;
			
 
				
				+
			
 
				
				+  return v;
			
 
				
				+}
			
--- a/src/libgcc/__udivdi3.c
+++ b/src/libgcc/__udivdi3.c
@@ -0,0 +1,10 @@
 
				
				+/*
			
 
				
				+ * arch/i386/libgcc/__divdi3.c
			
 
				
				+ */
			
 
				
				+
			
 
				
				+#include "libgcc.h"
			
 
				
				+
			
 
				
				+LIBGCC uint64_t __udivdi3(uint64_t num, uint64_t den)
			
 
				
				+{
			
 
				
				+  return __udivmoddi4(num, den, NULL);
			
 
				
				+}
			
--- a/src/libgcc/__udivmoddi4.c
+++ b/src/libgcc/__udivmoddi4.c
@@ -0,0 +1,32 @@
 
				
				+#include "libgcc.h"
			
 
				
				+
			
 
				
				+LIBGCC uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *rem_p)
			
 
				
				+{
			
 
				
				+  uint64_t quot = 0, qbit = 1;
			
 
				
				+
			
 
				
				+  if ( den == 0 ) {
			
 
				
				+    return 1/((unsigned)den); /* Intentional divide by zero, without
			
 
				
				+				 triggering a compiler warning which
			
 
				
				+				 would abort the build */
			
 
				
				+  }
			
 
				
				+
			
 
				
				+  /* Left-justify denominator and count shift */
			
 
				
				+  while ( (int64_t)den >= 0 ) {
			
 
				
				+    den <<= 1;
			
 
				
				+    qbit <<= 1;
			
 
				
				+  }
			
 
				
				+
			
 
				
				+  while ( qbit ) {
			
 
				
				+    if ( den <= num ) {
			
 
				
				+      num -= den;
			
 
				
				+      quot += qbit;
			
 
				
				+    }
			
 
				
				+    den >>= 1;
			
 
				
				+    qbit >>= 1;
			
 
				
				+  }
			
 
				
				+
			
 
				
				+  if ( rem_p )
			
 
				
				+    *rem_p = num;
			
 
				
				+
			
 
				
				+  return quot;
			
 
				
				+}
			
--- a/src/libgcc/__umoddi3.c
+++ b/src/libgcc/__umoddi3.c
@@ -0,0 +1,13 @@
 
				
				+/*
			
 
				
				+ * arch/i386/libgcc/__umoddi3.c
			
 
				
				+ */
			
 
				
				+
			
 
				
				+#include "libgcc.h"
			
 
				
				+
			
 
				
				+LIBGCC uint64_t __umoddi3(uint64_t num, uint64_t den)
			
 
				
				+{
			
 
				
				+  uint64_t v;
			
 
				
				+
			
 
				
				+  (void) __udivmoddi4(num, den, &v);
			
 
				
				+  return v;
			
 
				
				+}
			
--- a/src/libgcc/libgcc.h
+++ b/src/libgcc/libgcc.h
@@ -0,0 +1,26 @@
 
				
				+#ifndef _LIBGCC_H
			
 
				
				+#define _LIBGCC_H
			
 
				
				+
			
 
				
				+#include <stdint.h>
			
 
				
				+#include <stddef.h>
			
 
				
				+
			
 
				
				+/*
			
 
				
				+ * It seems as though gcc expects its implicit arithmetic functions to
			
 
				
				+ * be cdecl, even if -mrtd is specified.  This is somewhat
			
 
				
				+ * inconsistent; for example, if -mregparm=3 is used then the implicit
			
 
				
				+ * functions do become regparm(3).
			
 
				
				+ *
			
 
				
				+ * The implicit calls to memcpy() and memset() which gcc can generate
			
 
				
				+ * do not seem to have this inconsistency; -mregparm and -mrtd affect
			
 
				
				+ * them in the same way as any other function.
			
 
				
				+ *
			
 
				
				+ */
			
 
				
				+#define LIBGCC __attribute__ (( cdecl ))
			
 
				
				+
			
 
				
				+extern LIBGCC uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *rem);
			
 
				
				+extern LIBGCC uint64_t __udivdi3(uint64_t num, uint64_t den);
			
 
				
				+extern LIBGCC uint64_t __umoddi3(uint64_t num, uint64_t den);
			
 
				
				+extern LIBGCC int64_t __divdi3(int64_t num, int64_t den);
			
 
				
				+extern LIBGCC int64_t __moddi3(int64_t num, int64_t den);
			
 
				
				+
			
 
				
				+#endif /* _LIBGCC_H */
			
--- a/src/core/gcc_implicit.c
+++ b/src/core/gcc_implicit.c
@@ -1,6 +1,4 @@
 
				
				 /** @file
			
 
				
				- *
			
 
				
				- * gcc implicit functions
			
 
				
				  *
			
 
				
				  * gcc sometimes likes to insert implicit calls to memcpy().
			
 
				
				  * Unfortunately, there doesn't seem to be any way to prevent it from