ソースを参照

Handle (64-bit) / (32-bit) = (64-bit), i.e. one step beyond that

provided by the divl instruction.
tags/v0.9.3
Michael Brown 19年前
コミット
b3b6b25aeb
1個のファイルの変更319行の追加0行の削除
  1. 319
    0
      src/arch/i386/core/udivmod64.c

+ 319
- 0
src/arch/i386/core/udivmod64.c ファイルの表示

@@ -0,0 +1,319 @@
1
+/*
2
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
+ */
18
+
19
+/** @file
20
+ *
21
+ * 64-bit division
22
+ *
23
+ * The x86 CPU (386 upwards) has a divl instruction which will perform
24
+ * unsigned division of a 64-bit dividend by a 32-bit divisor.  If the
25
+ * resulting quotient does not fit in 32 bits, then a CPU exception
26
+ * will occur.
27
+ *
28
+ * Unsigned integer division is expressed as solving 
29
+ *
30
+ *   x = d.q + r			0 <= q, 0 <= r < d
31
+ *
32
+ * given the dividend (x) and divisor (d), to find the quotient (q)
33
+ * and remainder (r).
34
+ *
35
+ * The x86 divl instruction will solve
36
+ *
37
+ *   x = d.q + r			0 <= q, 0 <= r < d
38
+ *
39
+ * given x in the range 0 <= x < 2^64 and 1 <= d < 2^32, and causing a
40
+ * hardware exception if the resulting q >= 2^32.
41
+ *
42
+ * We can therefore use divl only if we can prove that the conditions
43
+ *
44
+ *   0 <= x < 2^64
45
+ *   1 <= d < 2^32
46
+ *        q < 2^32
47
+ *
48
+ * are satisfied.
49
+ *
50
+ *
51
+ * Case 1 : 1 <= d < 2^32
52
+ * ======================
53
+ *
54
+ * We express x as
55
+ *
56
+ *   x = xh.2^32 + xl			0 <= xh < 2^32, 0 <= xl < 2^32	(1)
57
+ *
58
+ * i.e. split x into low and high dwords.  We then solve
59
+ *
60
+ *   xh = d.qh + r'			0 <= qh, 0 <= r' < d		(2)
61
+ *
62
+ * which we can do using a divl instruction since
63
+ *
64
+ *   0 <= xh < 2^64			since 0 <= xh < 2^32 from (1)	(3)
65
+ *
66
+ * and
67
+ *
68
+ *   1 <= d < 2^32			by definition of this Case	(4)
69
+ *
70
+ * and
71
+ *
72
+ *   d.qh = xh - r'			from (2)
73
+ *   d.qh <= xh				since r' >= 0 from (2)
74
+ *   qh <= xh				since d >= 1 from (2)
75
+ *   qh < 2^32				since xh < 2^32 from (1)	(5)
76
+ *
77
+ * Having obtained qh and r', we then solve
78
+ *
79
+ *   ( r'.2^32 + xl ) = d.ql + r	0 <= ql, 0 <= r < d		(6)
80
+ *
81
+ * which we can do using another divl instruction since
82
+ *
83
+ *   xl <= 2^32 - 1			from (1), so
84
+ *   r'.2^32 + xl <= ( r' + 1 ).2^32 - 1
85
+ *   r'.2^32 + xl <= d.2^32 - 1		since r' < d from (2)
86
+ *   r'.2^32 + xl < d.2^32						(7)
87
+ *   r'.2^32 + xl < 2^64		since d < 2^32 from (4)		(8)
88
+ *
89
+ * and
90
+ *
91
+ *   1 <= d < 2^32			by definition of this Case	(9)
92
+ *
93
+ * and
94
+ *
95
+ *   d.ql = ( r'.2^32 + xl ) - r	from (6)
96
+ *   d.ql <= r'.2^32 + xl		since r >= 0 from (6)
97
+ *   d.ql < d.2^32			from (7)
98
+ *   ql < 2^32				since d >= 1 from (2)		(10)
99
+ *
100
+ * This then gives us
101
+ *
102
+ *   x = xh.2^32 + xl			from (1)
103
+ *   x = ( d.qh + r' ).2^32 + xl	from (2)
104
+ *   x = d.qh.2^32 + ( r'.2^32 + xl )
105
+ *   x = d.qh.2^32 + d.ql + r		from (3)
106
+ *   x = d.( qh.2^32 + ql ) + r						(11)
107
+ *
108
+ * Letting
109
+ *
110
+ *   q = qh.2^32 + ql							(12)
111
+ *
112
+ * gives
113
+ *
114
+ *   x = d.q + r			from (11) and (12)
115
+ *
116
+ * which is the solution.
117
+ *
118
+ *
119
+ * This therefore gives us a two-step algorithm:
120
+ *
121
+ *   xh = d.qh + r'			0 <= qh, 0 <= r' < d		(2)
122
+ *   ( r'.2^32 + xl ) = d.ql + r	0 <= ql, 0 <= r < d		(6)
123
+ *
124
+ * which translates to
125
+ *
126
+ *   %edx:%eax = 0:xh
127
+ *   divl d
128
+ *   qh = %eax
129
+ *   r' = %edx
130
+ *
131
+ *   %edx:%eax = r':xl
132
+ *   divl d
133
+ *   ql = %eax
134
+ *   r = %edx
135
+ *
136
+ * Note that if
137
+ *
138
+ *   xh < d
139
+ *
140
+ * (which is a fast dword comparison) then the first divl instruction
141
+ * can be omitted, since the answer will be
142
+ *
143
+ *   qh = 0
144
+ *   r = xh
145
+ *
146
+ *
147
+ * Case 2 : 2^32 <= d < 2^64
148
+ * =========================
149
+ *
150
+ * We first express d as
151
+ *
152
+ *   d = dh.2^k + dl			2^31 <= dh < 2^32,
153
+ *					0 <= dl < 2^k, 1 <= k <= 32	(1)
154
+ *
155
+ * i.e. find the highest bit set in d, subtract 32, and split d into
156
+ * dh and dl at that point.
157
+ *
158
+ * We then express x as
159
+ *
160
+ *   x = xh.2^k + xl			0 <= xl < 2^k			(2)
161
+ *
162
+ * giving
163
+ *
164
+ *   xh.2^k = x - xl			from (2)
165
+ *   xh.2^k <= x			since xl >= 0 from (1)
166
+ *   xh.2^k < 2^64			since xh < 2^64 from (1)
167
+ *   xh < 2^(64-k)							(3)
168
+ *
169
+ * We then solve the division
170
+ *
171
+ *   xh = dh.q' + r'	            		0 <= r' < dh		(4)
172
+ *
173
+ * which we can do using a divl instruction since
174
+ *
175
+ *   0 <= xh < 2^64			since x < 2^64 and xh < x
176
+ *
177
+ * and
178
+ *
179
+ *   1 <= dh < 2^32			from (1)
180
+ *
181
+ * and
182
+ *
183
+ *   dh.q' = xh - r'			from (4)
184
+ *   dh.q' <= xh			since r' >= 0 from (4)
185
+ *   dh.q' < 2^(64-k)			from (3)			(5)
186
+ *   q'.2^31 <= dh.q'			since dh >= 2^31 from (1)	(6)
187
+ *   q'.2^31 < 2^(64-k)			from (5) and (6)
188
+ *   q' < 2^(33-k)
189
+ *   q' < 2^32				since k >= 1 from (1)		(7)
190
+ *
191
+ * This gives us
192
+ *
193
+ *   xh.2^k = dh.q'.2^k + r'.2^k	from (4)
194
+ *   x - xl = ( d - dl ).q' + r'.2^k	from (1) and (2)
195
+ *   x = d.q' + ( r'.2^k + xl ) - dl.q'					(8)
196
+ *
197
+ * Now
198
+ *
199
+ *  r'.2^k + xl < r'.2^k + 2^k		since xl < 2^k from (2)
200
+ *  r'.2^k + xl < ( r' + 1 ).2^k
201
+ *  r'.2^k + xl < dh.2^k		since r' < dh from (4)
202
+ *  r'.2^k + xl < ( d - dl )		from (1)			(9)
203
+ *
204
+ *
205
+ * (missing)
206
+ *
207
+ *
208
+ * This gives us two cases to consider:
209
+ *
210
+ * case (a):
211
+ *
212
+ *   dl.q' <= ( r'.2^k + xl )						(15a)
213
+ *
214
+ * in which case
215
+ *
216
+ *   x = d.q' + ( r'.2^k + xl - dl.q' )
217
+ *
218
+ * is a direct solution to the division, since
219
+ *
220
+ *   r'.2^k + xl < d			from (9)
221
+ *   ( r'.2^k + xl - dl.q' ) < d	since dl >= 0 and q' >= 0
222
+ *
223
+ * and
224
+ *
225
+ *   0 <= ( r'.2^k + xl - dl.q' )	from (15a)
226
+ *
227
+ * case (b):
228
+ *
229
+ *   dl.q' > ( r'.2^k + xl )						(15b)
230
+ *   
231
+ * Express
232
+ *
233
+ *  x = d.(q'-1) + ( r'.2^k + xl ) + ( d - dl.q' )
234
+ *  
235
+ *   
236
+ * (missing)
237
+ *
238
+ *
239
+ * special case: k = 32 cannot be handled with shifts
240
+ *
241
+ * (missing)
242
+ * 
243
+ */
244
+
245
+#include <stdint.h>
246
+#include <assert.h>
247
+
248
+typedef uint64_t UDItype;
249
+
250
+struct uint64 {
251
+	uint32_t l;
252
+	uint32_t h;
253
+};
254
+
255
+static inline void udivmod64_lo ( const struct uint64 *x,
256
+				  const struct uint64 *d,
257
+				  struct uint64 *q,
258
+				  struct uint64 *r ) {
259
+	uint32_t r_dash;
260
+
261
+	q->h = 0;
262
+	r->h = 0;
263
+	r_dash = x->h;
264
+
265
+	if ( x->h >= d->l ) {
266
+		__asm__ ( "divl %2"
267
+			  : "=&a" ( q->h ), "=&d" ( r_dash )
268
+			  : "g" ( d->l ), "0" ( x->h ), "1" ( 0 ) );
269
+	}
270
+
271
+	__asm__ ( "divl %2"
272
+		  : "=&a" ( q->l ), "=&d" ( r->l )
273
+		  : "g" ( d->l ), "0" ( x->l ), "1" ( r_dash ) );
274
+}
275
+
276
+static void udivmod64 ( const struct uint64 *x,
277
+			const struct uint64 *d,
278
+			struct uint64 *q,
279
+			struct uint64 *r ) {
280
+
281
+	if ( d->h == 0 ) {
282
+		udivmod64_lo ( x, d, q, r );
283
+	} else {
284
+		assert ( 0 );
285
+		while ( 1 ) {};
286
+	}	
287
+}
288
+
289
+/**
290
+ * 64-bit division with remainder
291
+ *
292
+ * @v x			Dividend
293
+ * @v d			Divisor
294
+ * @ret r		Remainder
295
+ * @ret q		Quotient
296
+ */
297
+UDItype __udivmoddi4 ( UDItype x, UDItype d, UDItype *r ) {
298
+	UDItype q;
299
+	UDItype *_x = &x;
300
+	UDItype *_d = &d;
301
+	UDItype *_q = &q;
302
+	UDItype *_r = r;
303
+
304
+	udivmod64 ( ( struct uint64 * ) _x, ( struct uint64 * ) _d,
305
+		    ( struct uint64 * ) _q, ( struct uint64 * ) _r );
306
+	return q;
307
+}
308
+
309
+/**
310
+ * 64-bit division
311
+ *
312
+ * @v x			Dividend
313
+ * @v d			Divisor
314
+ * @ret q		Quotient
315
+ */
316
+UDItype __udivdi3 ( UDItype x, UDItype d ) {
317
+	UDItype r;
318
+	return __udivmoddi4 ( x, d, &r );
319
+}

読み込み中…
キャンセル
保存