|
|
@@ -1,336 +0,0 @@
|
|
1
|
|
-/*
|
|
2
|
|
- * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
|
|
3
|
|
- *
|
|
4
|
|
- * This program is free software; you can redistribute it and/or
|
|
5
|
|
- * modify it under the terms of the GNU General Public License as
|
|
6
|
|
- * published by the Free Software Foundation; either version 2 of the
|
|
7
|
|
- * License, or any later version.
|
|
8
|
|
- *
|
|
9
|
|
- * This program is distributed in the hope that it will be useful, but
|
|
10
|
|
- * WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
|
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12
|
|
- * General Public License for more details.
|
|
13
|
|
- *
|
|
14
|
|
- * You should have received a copy of the GNU General Public License
|
|
15
|
|
- * along with this program; if not, write to the Free Software
|
|
16
|
|
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
17
|
|
- */
|
|
18
|
|
-
|
|
19
|
|
-/** @file
|
|
20
|
|
- *
|
|
21
|
|
- * 64-bit division
|
|
22
|
|
- *
|
|
23
|
|
- * The x86 CPU (386 upwards) has a divl instruction which will perform
|
|
24
|
|
- * unsigned division of a 64-bit dividend by a 32-bit divisor. If the
|
|
25
|
|
- * resulting quotient does not fit in 32 bits, then a CPU exception
|
|
26
|
|
- * will occur.
|
|
27
|
|
- *
|
|
28
|
|
- * Unsigned integer division is expressed as solving
|
|
29
|
|
- *
|
|
30
|
|
- * x = d.q + r 0 <= q, 0 <= r < d
|
|
31
|
|
- *
|
|
32
|
|
- * given the dividend (x) and divisor (d), to find the quotient (q)
|
|
33
|
|
- * and remainder (r).
|
|
34
|
|
- *
|
|
35
|
|
- * The x86 divl instruction will solve
|
|
36
|
|
- *
|
|
37
|
|
- * x = d.q + r 0 <= q, 0 <= r < d
|
|
38
|
|
- *
|
|
39
|
|
- * given x in the range 0 <= x < 2^64 and 1 <= d < 2^32, and causing a
|
|
40
|
|
- * hardware exception if the resulting q >= 2^32.
|
|
41
|
|
- *
|
|
42
|
|
- * We can therefore use divl only if we can prove that the conditions
|
|
43
|
|
- *
|
|
44
|
|
- * 0 <= x < 2^64
|
|
45
|
|
- * 1 <= d < 2^32
|
|
46
|
|
- * q < 2^32
|
|
47
|
|
- *
|
|
48
|
|
- * are satisfied.
|
|
49
|
|
- *
|
|
50
|
|
- *
|
|
51
|
|
- * Case 1 : 1 <= d < 2^32
|
|
52
|
|
- * ======================
|
|
53
|
|
- *
|
|
54
|
|
- * We express x as
|
|
55
|
|
- *
|
|
56
|
|
- * x = xh.2^32 + xl 0 <= xh < 2^32, 0 <= xl < 2^32 (1)
|
|
57
|
|
- *
|
|
58
|
|
- * i.e. split x into low and high dwords. We then solve
|
|
59
|
|
- *
|
|
60
|
|
- * xh = d.qh + r' 0 <= qh, 0 <= r' < d (2)
|
|
61
|
|
- *
|
|
62
|
|
- * which we can do using a divl instruction since
|
|
63
|
|
- *
|
|
64
|
|
- * 0 <= xh < 2^64 since 0 <= xh < 2^32 from (1) (3)
|
|
65
|
|
- *
|
|
66
|
|
- * and
|
|
67
|
|
- *
|
|
68
|
|
- * 1 <= d < 2^32 by definition of this Case (4)
|
|
69
|
|
- *
|
|
70
|
|
- * and
|
|
71
|
|
- *
|
|
72
|
|
- * d.qh = xh - r' from (2)
|
|
73
|
|
- * d.qh <= xh since r' >= 0 from (2)
|
|
74
|
|
- * qh <= xh since d >= 1 from (2)
|
|
75
|
|
- * qh < 2^32 since xh < 2^32 from (1) (5)
|
|
76
|
|
- *
|
|
77
|
|
- * Having obtained qh and r', we then solve
|
|
78
|
|
- *
|
|
79
|
|
- * ( r'.2^32 + xl ) = d.ql + r 0 <= ql, 0 <= r < d (6)
|
|
80
|
|
- *
|
|
81
|
|
- * which we can do using another divl instruction since
|
|
82
|
|
- *
|
|
83
|
|
- * xl <= 2^32 - 1 from (1), so
|
|
84
|
|
- * r'.2^32 + xl <= ( r' + 1 ).2^32 - 1
|
|
85
|
|
- * r'.2^32 + xl <= d.2^32 - 1 since r' < d from (2)
|
|
86
|
|
- * r'.2^32 + xl < d.2^32 (7)
|
|
87
|
|
- * r'.2^32 + xl < 2^64 since d < 2^32 from (4) (8)
|
|
88
|
|
- *
|
|
89
|
|
- * and
|
|
90
|
|
- *
|
|
91
|
|
- * 1 <= d < 2^32 by definition of this Case (9)
|
|
92
|
|
- *
|
|
93
|
|
- * and
|
|
94
|
|
- *
|
|
95
|
|
- * d.ql = ( r'.2^32 + xl ) - r from (6)
|
|
96
|
|
- * d.ql <= r'.2^32 + xl since r >= 0 from (6)
|
|
97
|
|
- * d.ql < d.2^32 from (7)
|
|
98
|
|
- * ql < 2^32 since d >= 1 from (2) (10)
|
|
99
|
|
- *
|
|
100
|
|
- * This then gives us
|
|
101
|
|
- *
|
|
102
|
|
- * x = xh.2^32 + xl from (1)
|
|
103
|
|
- * x = ( d.qh + r' ).2^32 + xl from (2)
|
|
104
|
|
- * x = d.qh.2^32 + ( r'.2^32 + xl )
|
|
105
|
|
- * x = d.qh.2^32 + d.ql + r from (3)
|
|
106
|
|
- * x = d.( qh.2^32 + ql ) + r (11)
|
|
107
|
|
- *
|
|
108
|
|
- * Letting
|
|
109
|
|
- *
|
|
110
|
|
- * q = qh.2^32 + ql (12)
|
|
111
|
|
- *
|
|
112
|
|
- * gives
|
|
113
|
|
- *
|
|
114
|
|
- * x = d.q + r from (11) and (12)
|
|
115
|
|
- *
|
|
116
|
|
- * which is the solution.
|
|
117
|
|
- *
|
|
118
|
|
- *
|
|
119
|
|
- * This therefore gives us a two-step algorithm:
|
|
120
|
|
- *
|
|
121
|
|
- * xh = d.qh + r' 0 <= qh, 0 <= r' < d (2)
|
|
122
|
|
- * ( r'.2^32 + xl ) = d.ql + r 0 <= ql, 0 <= r < d (6)
|
|
123
|
|
- *
|
|
124
|
|
- * which translates to
|
|
125
|
|
- *
|
|
126
|
|
- * %edx:%eax = 0:xh
|
|
127
|
|
- * divl d
|
|
128
|
|
- * qh = %eax
|
|
129
|
|
- * r' = %edx
|
|
130
|
|
- *
|
|
131
|
|
- * %edx:%eax = r':xl
|
|
132
|
|
- * divl d
|
|
133
|
|
- * ql = %eax
|
|
134
|
|
- * r = %edx
|
|
135
|
|
- *
|
|
136
|
|
- * Note that if
|
|
137
|
|
- *
|
|
138
|
|
- * xh < d
|
|
139
|
|
- *
|
|
140
|
|
- * (which is a fast dword comparison) then the first divl instruction
|
|
141
|
|
- * can be omitted, since the answer will be
|
|
142
|
|
- *
|
|
143
|
|
- * qh = 0
|
|
144
|
|
- * r = xh
|
|
145
|
|
- *
|
|
146
|
|
- *
|
|
147
|
|
- * Case 2 : 2^32 <= d < 2^64
|
|
148
|
|
- * =========================
|
|
149
|
|
- *
|
|
150
|
|
- * We first express d as
|
|
151
|
|
- *
|
|
152
|
|
- * d = dh.2^k + dl 2^31 <= dh < 2^32,
|
|
153
|
|
- * 0 <= dl < 2^k, 1 <= k <= 32 (1)
|
|
154
|
|
- *
|
|
155
|
|
- * i.e. find the highest bit set in d, subtract 32, and split d into
|
|
156
|
|
- * dh and dl at that point.
|
|
157
|
|
- *
|
|
158
|
|
- * We then express x as
|
|
159
|
|
- *
|
|
160
|
|
- * x = xh.2^k + xl 0 <= xl < 2^k (2)
|
|
161
|
|
- *
|
|
162
|
|
- * giving
|
|
163
|
|
- *
|
|
164
|
|
- * xh.2^k = x - xl from (2)
|
|
165
|
|
- * xh.2^k <= x since xl >= 0 from (1)
|
|
166
|
|
- * xh.2^k < 2^64 since xh < 2^64 from (1)
|
|
167
|
|
- * xh < 2^(64-k) (3)
|
|
168
|
|
- *
|
|
169
|
|
- * We then solve the division
|
|
170
|
|
- *
|
|
171
|
|
- * xh = dh.q' + r' 0 <= r' < dh (4)
|
|
172
|
|
- *
|
|
173
|
|
- * which we can do using a divl instruction since
|
|
174
|
|
- *
|
|
175
|
|
- * 0 <= xh < 2^64 since x < 2^64 and xh < x
|
|
176
|
|
- *
|
|
177
|
|
- * and
|
|
178
|
|
- *
|
|
179
|
|
- * 1 <= dh < 2^32 from (1)
|
|
180
|
|
- *
|
|
181
|
|
- * and
|
|
182
|
|
- *
|
|
183
|
|
- * dh.q' = xh - r' from (4)
|
|
184
|
|
- * dh.q' <= xh since r' >= 0 from (4)
|
|
185
|
|
- * dh.q' < 2^(64-k) from (3) (5)
|
|
186
|
|
- * q'.2^31 <= dh.q' since dh >= 2^31 from (1) (6)
|
|
187
|
|
- * q'.2^31 < 2^(64-k) from (5) and (6)
|
|
188
|
|
- * q' < 2^(33-k)
|
|
189
|
|
- * q' < 2^32 since k >= 1 from (1) (7)
|
|
190
|
|
- *
|
|
191
|
|
- * This gives us
|
|
192
|
|
- *
|
|
193
|
|
- * xh.2^k = dh.q'.2^k + r'.2^k from (4)
|
|
194
|
|
- * x - xl = ( d - dl ).q' + r'.2^k from (1) and (2)
|
|
195
|
|
- * x = d.q' + ( r'.2^k + xl ) - dl.q' (8)
|
|
196
|
|
- *
|
|
197
|
|
- * Now
|
|
198
|
|
- *
|
|
199
|
|
- * r'.2^k + xl < r'.2^k + 2^k since xl < 2^k from (2)
|
|
200
|
|
- * r'.2^k + xl < ( r' + 1 ).2^k
|
|
201
|
|
- * r'.2^k + xl < dh.2^k since r' < dh from (4)
|
|
202
|
|
- * r'.2^k + xl < ( d - dl ) from (1) (9)
|
|
203
|
|
- *
|
|
204
|
|
- *
|
|
205
|
|
- * (missing)
|
|
206
|
|
- *
|
|
207
|
|
- *
|
|
208
|
|
- * This gives us two cases to consider:
|
|
209
|
|
- *
|
|
210
|
|
- * case (a):
|
|
211
|
|
- *
|
|
212
|
|
- * dl.q' <= ( r'.2^k + xl ) (15a)
|
|
213
|
|
- *
|
|
214
|
|
- * in which case
|
|
215
|
|
- *
|
|
216
|
|
- * x = d.q' + ( r'.2^k + xl - dl.q' )
|
|
217
|
|
- *
|
|
218
|
|
- * is a direct solution to the division, since
|
|
219
|
|
- *
|
|
220
|
|
- * r'.2^k + xl < d from (9)
|
|
221
|
|
- * ( r'.2^k + xl - dl.q' ) < d since dl >= 0 and q' >= 0
|
|
222
|
|
- *
|
|
223
|
|
- * and
|
|
224
|
|
- *
|
|
225
|
|
- * 0 <= ( r'.2^k + xl - dl.q' ) from (15a)
|
|
226
|
|
- *
|
|
227
|
|
- * case (b):
|
|
228
|
|
- *
|
|
229
|
|
- * dl.q' > ( r'.2^k + xl ) (15b)
|
|
230
|
|
- *
|
|
231
|
|
- * Express
|
|
232
|
|
- *
|
|
233
|
|
- * x = d.(q'-1) + ( r'.2^k + xl ) + ( d - dl.q' )
|
|
234
|
|
- *
|
|
235
|
|
- *
|
|
236
|
|
- * (missing)
|
|
237
|
|
- *
|
|
238
|
|
- *
|
|
239
|
|
- * special case: k = 32 cannot be handled with shifts
|
|
240
|
|
- *
|
|
241
|
|
- * (missing)
|
|
242
|
|
- *
|
|
243
|
|
- */
|
|
244
|
|
-
|
|
245
|
|
-#include <stdint.h>
|
|
246
|
|
-#include <assert.h>
|
|
247
|
|
-
|
|
248
|
|
-typedef uint64_t UDItype;
|
|
249
|
|
-
|
|
250
|
|
-struct uint64 {
|
|
251
|
|
- uint32_t l;
|
|
252
|
|
- uint32_t h;
|
|
253
|
|
-};
|
|
254
|
|
-
|
|
255
|
|
-static inline void udivmod64_lo ( const struct uint64 *x,
|
|
256
|
|
- const struct uint64 *d,
|
|
257
|
|
- struct uint64 *q,
|
|
258
|
|
- struct uint64 *r ) {
|
|
259
|
|
- uint32_t r_dash;
|
|
260
|
|
-
|
|
261
|
|
- q->h = 0;
|
|
262
|
|
- r->h = 0;
|
|
263
|
|
- r_dash = x->h;
|
|
264
|
|
-
|
|
265
|
|
- if ( x->h >= d->l ) {
|
|
266
|
|
- __asm__ ( "divl %2"
|
|
267
|
|
- : "=&a" ( q->h ), "=&d" ( r_dash )
|
|
268
|
|
- : "g" ( d->l ), "0" ( x->h ), "1" ( 0 ) );
|
|
269
|
|
- }
|
|
270
|
|
-
|
|
271
|
|
- __asm__ ( "divl %2"
|
|
272
|
|
- : "=&a" ( q->l ), "=&d" ( r->l )
|
|
273
|
|
- : "g" ( d->l ), "0" ( x->l ), "1" ( r_dash ) );
|
|
274
|
|
-}
|
|
275
|
|
-
|
|
276
|
|
-void udivmod64 ( const struct uint64 *x,
|
|
277
|
|
- const struct uint64 *d,
|
|
278
|
|
- struct uint64 *q,
|
|
279
|
|
- struct uint64 *r ) {
|
|
280
|
|
-
|
|
281
|
|
- if ( d->h == 0 ) {
|
|
282
|
|
- udivmod64_lo ( x, d, q, r );
|
|
283
|
|
- } else {
|
|
284
|
|
- assert ( 0 );
|
|
285
|
|
- while ( 1 ) {};
|
|
286
|
|
- }
|
|
287
|
|
-}
|
|
288
|
|
-
|
|
289
|
|
-/**
|
|
290
|
|
- * 64-bit division with remainder
|
|
291
|
|
- *
|
|
292
|
|
- * @v x Dividend
|
|
293
|
|
- * @v d Divisor
|
|
294
|
|
- * @ret r Remainder
|
|
295
|
|
- * @ret q Quotient
|
|
296
|
|
- */
|
|
297
|
|
-UDItype __udivmoddi4 ( UDItype x, UDItype d, UDItype *r ) {
|
|
298
|
|
- UDItype q;
|
|
299
|
|
- UDItype *_x = &x;
|
|
300
|
|
- UDItype *_d = &d;
|
|
301
|
|
- UDItype *_q = &q;
|
|
302
|
|
- UDItype *_r = r;
|
|
303
|
|
-
|
|
304
|
|
- udivmod64 ( ( struct uint64 * ) _x, ( struct uint64 * ) _d,
|
|
305
|
|
- ( struct uint64 * ) _q, ( struct uint64 * ) _r );
|
|
306
|
|
-
|
|
307
|
|
- assert ( ( x == ( ( d * q ) + (*r) ) ) );
|
|
308
|
|
- assert ( (*r) < d );
|
|
309
|
|
-
|
|
310
|
|
- return q;
|
|
311
|
|
-}
|
|
312
|
|
-
|
|
313
|
|
-/**
|
|
314
|
|
- * 64-bit division
|
|
315
|
|
- *
|
|
316
|
|
- * @v x Dividend
|
|
317
|
|
- * @v d Divisor
|
|
318
|
|
- * @ret q Quotient
|
|
319
|
|
- */
|
|
320
|
|
-UDItype __udivdi3 ( UDItype x, UDItype d ) {
|
|
321
|
|
- UDItype r;
|
|
322
|
|
- return __udivmoddi4 ( x, d, &r );
|
|
323
|
|
-}
|
|
324
|
|
-
|
|
325
|
|
-/**
|
|
326
|
|
- * 64-bit modulus
|
|
327
|
|
- *
|
|
328
|
|
- * @v x Dividend
|
|
329
|
|
- * @v d Divisor
|
|
330
|
|
- * @ret q Quotient
|
|
331
|
|
- */
|
|
332
|
|
-UDItype __umoddi3 ( UDItype x, UDItype d ) {
|
|
333
|
|
- UDItype r;
|
|
334
|
|
- __udivmoddi4 ( x, d, &r );
|
|
335
|
|
- return r;
|
|
336
|
|
-}
|