MODULE = quad_math




include $(RIOTBASE)/Makefile.base

/* $OpenBSD: adddi3.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


/*


* Copyright (c) 1992, 1993


* The Regents of the University of California. All rights reserved.


*


* This software was developed by the Computer Systems Engineering group


* at Lawrence Berkeley Laboratory under DARPA contract BG 9166 and


* contributed to Berkeley.


*


* Redistribution and use in source and binary forms, with or without


* modification, are permitted provided that the following conditions


* are met:


* 1. Redistributions of source code must retain the above copyright


* notice, this list of conditions and the following disclaimer.


* 2. Redistributions in binary form must reproduce the above copyright


* notice, this list of conditions and the following disclaimer in the


* documentation and/or other materials provided with the distribution.


* 3. Neither the name of the University nor the names of its contributors


* may be used to endorse or promote products derived from this software


* without specific prior written permission.


*


* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND


* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE


* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE


* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE


* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL


* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS


* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)


* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT


* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY


* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF


* SUCH DAMAGE.


*/




#include "quad.h"




/*


* Add two quads. This is trivial since a onebit carry from a single


* u_int addition x+y occurs if and only if the sum x+y is less than


* either x or y (the choice to compare with x or y is arbitrary).


*/


quad_t __adddi3(quad_t a, quad_t b)


{


union uu aa, bb, sum;




aa.q = a;


bb.q = b;


sum.ul[L] = aa.ul[L] + bb.ul[L];


sum.ul[H] = aa.ul[H] + bb.ul[H] + (sum.ul[L] < bb.ul[L]);


return sum.q;


}

/* $OpenBSD: anddi3.c,v 1.5 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Return a & b, in quad.


*/


quad_t __anddi3(quad_t a, quad_t b)


{


union uu aa, bb;




aa.q = a;


bb.q = b;


aa.ul[0] &= bb.ul[0];


aa.ul[1] &= bb.ul[1];


return aa.q;


}

/* $OpenBSD: ashldi3.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Shift a (signed) quad value left (arithmetic shift left).


* This is the same as logical shift left!


*/


quad_t __ashldi3(quad_t a, qshift_t shift)


{


union uu aa;




if (shift == 0) {


return a;


}




aa.q = a;




if (shift >= INT_BITS) {


aa.ul[H] = aa.ul[L] << (shift  INT_BITS);


aa.ul[L] = 0;


}


else {


aa.ul[H] = (aa.ul[H] << shift)  (aa.ul[L] >> (INT_BITS  shift));


aa.ul[L] <<= shift;


}




return aa.q;


}

/* $OpenBSD: ashrdi3.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Shift a (signed) quad value right (arithmetic shift right).


*/


quad_t __ashrdi3(quad_t a, qshift_t shift)


{


union uu aa;




if (shift == 0) {


return a;


}




aa.q = a;




if (shift >= INT_BITS) {


/*


* Smear bits rightward using the machine's rightshift


* method, whether that is sign extension or zero fill,


* to get the `sign word' s. Note that shifting by


* INT_BITS is undefined, so we shift (INT_BITS1),


* then 1 more, to get our answer.


*/


/* LINTED inherits machine dependency */


int s = (aa.sl[H] >> (INT_BITS  1)) >> 1;


/* LINTED inherits machine dependency*/


aa.ul[L] = aa.sl[H] >> (shift  INT_BITS);


aa.ul[H] = s;


}


else {


aa.ul[L] = (aa.ul[L] >> shift)  (aa.ul[H] << (INT_BITS  shift));


/* LINTED inherits machine dependency */


aa.sl[H] >>= shift;


}




return aa.q;


}

/* $OpenBSD: cmpdi2.c,v 1.5 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Return 0, 1, or 2 as a <, =, > b respectively.


* Both a and b are considered signedwhich means only the high word is


* signed.


*/


int __cmpdi2(quad_t a, quad_t b)


{


union uu aa, bb;




aa.q = a;


bb.q = b;


return aa.sl[H] < bb.sl[H] ? 0


: aa.sl[H] > bb.sl[H] ? 2


: aa.ul[L] < bb.ul[L] ? 0


: aa.ul[L] > bb.ul[L] ? 2


: 1;


}

/* $OpenBSD: divdi3.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Divide two signed quads.


* ??? if 1/2 should produce 1 on this machine, this code is wrong


*/


quad_t __divdi3(quad_t a, quad_t b)


{


u_quad_t ua, ub, uq;


int neg = 0;




ua = a;


ub = b;




if (a < 0) {


ua = ua;


neg = !neg;


}




if (b < 0) {


ub = ub;


neg = !neg;


}




uq = __qdivrem(ua, ub, NULL);




if (neg) {


uq = uq;


}




return uq;


}

/* $OpenBSD: fixdfdi.c,v 1.5 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Convert double to (signed) quad.


* We clamp anything that is out of range.


*/


quad_t __fixdfdi(double x)


{


if (x < 0) {


if (x <= QUAD_MIN) {


return QUAD_MIN;


}


else {


return (quad_t) (u_quad_t) x;


}


}


else {


if (x >= QUAD_MAX) {


return QUAD_MAX;


}


else {


return (quad_t) (u_quad_t) x;


}


}


}

/* $OpenBSD: fixsfdi.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Convert float to (signed) quad.


* We clamp anything that is out of range.


*


* N.B.: must use new ANSI syntax (sorry).


*/


quad_t __fixsfdi(float x)


{


if (x < 0) {


if (x <= QUAD_MIN) {


return QUAD_MIN;


}


else {


return (quad_t) (u_quad_t) x;


}


}


else {


if (x >= QUAD_MAX) {


return QUAD_MAX;


}


else {


return (quad_t) (u_quad_t) x;


}


}


}

/* $OpenBSD: fixunsdfdi.c,v 1.7 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




#define ONE_FOURTH ((int) 1 << (INT_BITS  2))


#define ONE_HALF (ONE_FOURTH * 2.0)


#define ONE (ONE_FOURTH * 4.0)




/*


* Convert double to (unsigned) quad.


* Not sure what to do with negative numbersfor now, anything out


* of range becomes UQUAD_MAX.


*/


u_quad_t __fixunsdfdi(double x)


{


union uu t;


unsigned int tmp;




if (x < 0) {


return (UQUAD_MAX); /* ??? should be 0? ERANGE??? */


}




#ifdef notdef /* this falls afoul of a GCC bug */


if (x >= UQUAD_MAX) {


return (UQUAD_MAX);


}


#else /* so we wire in 2^641 instead */


if (x >= 18446744073709551615.0) { /* XXX */


return (UQUAD_MAX);


}


#endif




/*


* Now we know that 0 <= x <= 18446744073709549568. The upper


* limit is one ulp less than 18446744073709551615 tested for above.


* Dividing this by 2^32 will *not* round irrespective of any


* rounding modes (except if the result is an IEEE denorm).


* Furthermore, the quotient will fit into a 32bit integer.


*/


tmp = x / ONE;


t.ul[L] = (unsigned int) (x  tmp * ONE);


t.ul[H] = tmp;


return (t.uq);


}

/* $OpenBSD: fixunssfdi.c,v 1.5 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




#define ONE_FOURTH ((int)1 << (INT_BITS  2))


#define ONE_HALF (ONE_FOURTH * 2.0)


#define ONE (ONE_FOURTH * 4.0)




/*


* Convert float to (unsigned) quad. We do most of our work in double,


* out of sheer paranoia.


*


* Not sure what to do with negative numbersfor now, anything out


* of range becomes UQUAD_MAX.


*


* N.B.: must use new ANSI syntax (sorry).


*/


u_quad_t __fixunssfdi(float f)


{


double x, toppart;


union uu t;




if (f < 0) {


return (UQUAD_MAX); /* ??? should be 0? ERANGE??? */


}




#ifdef notdef /* this falls afoul of a GCC bug */


if (f >= UQUAD_MAX) {


return UQUAD_MAX;


}


#else /* so we wire in 2^641 instead */


if (f >= 18446744073709551615.0) { /* XXX */


return (UQUAD_MAX);


}




#endif




x = f;


/*


* Get the upper part of the result. Note that the divide


* may round up; we want to avoid this if possible, so we


* subtract `1/2' first.


*/


toppart = (x  ONE_HALF) / ONE;


/*


* Now build a u_quad_t out of the top part. The difference


* between x and this is the bottom part (this may introduce


* a few fuzzy bits, but what the heck). With any luck this


* difference will be nonnegative: x should wind up in the


* range [0..UINT_MAX]. For paranoia, we assume [INT_MIN..


* 2*UINT_MAX] instead.


*/


t.ul[H] = (unsigned int) toppart;


t.ul[L] = 0;


x = (double) t.uq;




if (x < 0) {


t.ul[H];


x += UINT_MAX;


}




if (x > UINT_MAX) {


t.ul[H]++;


x = UINT_MAX;


}




t.ul[L] = (u_int)x;


return t.uq;


}

/* $OpenBSD: floatdidf.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Convert (signed) quad to double.


*/


double __floatdidf(quad_t x)


{


double d;


union uu u;


int neg;




/*


* Get an unsigned number first, by negating if necessary.


*/


if (x < 0) {


u.q = x;


neg = 1;


}


else {


u.q = x;


neg = 0;


}




/*


* Now u.ul[H] has the factor of 2^32 (or whatever) and u.ul[L]


* has the units. Ideally we could just set d, add INT_BITS to


* its exponent, and then add the units, but this is portable


* code and does not know how to get at an exponent. Machine


* specific code may be able to do this more efficiently.


*/


d = (double) u.ul[H] * (((int) 1 << (INT_BITS  2)) * 4.0);


d += u.ul[L];




return neg ? d : d;


}

/* $OpenBSD: floatdisf.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Convert (signed) quad to float.


*/


float __floatdisf(quad_t x)


{


float f;


union uu u;


int neg;




/*


* Get an unsigned number first, by negating if necessary.


*/


if (x < 0) {


u.q = x;


neg = 1;


}


else {


u.q = x;


neg = 0;


}




/*


* Now u.ul[H] has the factor of 2^32 (or whatever) and u.ul[L]


* has the units. Ideally we could just set f, add INT_BITS to


* its exponent, and then add the units, but this is portable


* code and does not know how to get at an exponent. Machine


* specific code may be able to do this more efficiently.


*


* Using double here may be excessive paranoia.


*/


f = (double) u.ul[H] * (((int) 1 << (INT_BITS  2)) * 4.0);


f += u.ul[L];




return neg ? f : f;


}

/* $OpenBSD: floatunsdidf.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Convert (unsigned) quad to double.


* This is exactly like floatdidf.c except that negatives never occur.


*/


double __floatunsdidf(u_quad_t x)


{


double d;


union uu u;




u.uq = x;


d = (double) u.ul[H] * (((int) 1 << (INT_BITS  2)) * 4.0);


d += u.ul[L];


return d;


}

/* $OpenBSD: iordi3.c,v 1.5 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Return a  b, in quad.


*/


quad_t __iordi3(quad_t a, quad_t b)


{


union uu aa, bb;




aa.q = a;


bb.q = b;


aa.ul[0] = bb.ul[0];


aa.ul[1] = bb.ul[1];


return aa.q;


}

/* $OpenBSD: lshldi3.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Shift an (unsigned) quad value left (logical shift left).


* This is the same as arithmetic shift left!


*/


quad_t __lshldi3(quad_t a, qshift_t shift)


{


union uu aa;




if (shift == 0) {


return a;


}




aa.q = a;




if (shift >= INT_BITS) {


aa.ul[H] = aa.ul[L] << (shift  INT_BITS);


aa.ul[L] = 0;


}


else {


aa.ul[H] = (aa.ul[H] << shift)  (aa.ul[L] >> (INT_BITS  shift));


aa.ul[L] <<= shift;


}




return aa.q;


}

/* $OpenBSD: lshrdi3.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Shift an (unsigned) quad value right (logical shift right).


*/


quad_t __lshrdi3(quad_t a, qshift_t shift)


{


union uu aa;




if (shift == 0) {


return a;


}




aa.q = a;




if (shift >= INT_BITS) {


aa.ul[L] = aa.ul[H] >> (shift  INT_BITS);


aa.ul[H] = 0;


}


else {


aa.ul[L] = (aa.ul[L] >> shift)  (aa.ul[H] << (INT_BITS  shift));


aa.ul[H] >>= shift;


}




return aa.q;


}

/* $OpenBSD: moddi3.c,v 1.6 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Return remainder after dividing two signed quads.


*


* XXX we assume a % b < 0 iff a < 0, but this is actually machinedependent.


*/


quad_t __moddi3(quad_t a, quad_t b)


{


u_quad_t ua, ub, ur;


int neg = 0;




ua = a;


ub = b;




if (a < 0) {


ua = ua;


neg = 1;


}


if (b < 0) {


ub = ub;


}




(void) __qdivrem(ua, ub, &ur);




if (neg) {


ur = ur;


}




return ur;


}

/* $OpenBSD: muldi3.c,v 1.5 2005/08/08 08:05:35 espie Exp $ */


#include "quad.h"




/*


* Multiply two quads.


*


* Our algorithm is based on the following. Split incoming quad values


* u and v (where u,v >= 0) into


*


* u = 2^n u1 * u0 (n = number of bits in `u_int', usu. 32)


*


* and


*


* v = 2^n v1 * v0


*


* Then


*


* uv = 2^2n u1 v1 + 2^n u1 v0 + 2^n v1 u0 + u0 v0


* = 2^2n u1 v1 + 2^n (u1 v0 + v1 u0) + u0 v0


*


* Now add 2^n u1 v1 to the first term and subtract it from the middle,


* and add 2^n u0 v0 to the last term and subtract it from the middle.


* This gives:


*


* uv = (2^2n + 2^n) (u1 v1) +


* (2^n) (u1 v0  u1 v1 + u0 v1  u0 v0) +


* (2^n + 1) (u0 v0)


*


* Factoring the middle a bit gives us:


*


* uv = (2^2n + 2^n) (u1 v1) + [u1v1 = high]


* (2^n) (u1  u0) (v0  v1) + [(u1u0)... = mid]


* (2^n + 1) (u0 v0) [u0v0 = low]


*


* The terms (u1 v1), (u1  u0) (v0  v1), and (u0 v0) can all be done


* in just half the precision of the original. (Note that either or both


* of (u1  u0) or (v0  v1) may be negative.)


*


* This algorithm is from Knuth vol. 2 (2nd ed), section 4.3.3, p. 278.


*


* Since C does not give us a `int * int = quad' operator, we split


* our input quads into two ints, then split the two ints into two


* shorts. We can then calculate `short * short = int' in native


* arithmetic.


*


* Our product should, strictly speaking, be a `long quad', with 128


* bits, but we are going to discard the upper 64. In other words,


* we are not interested in uv, but rather in (uv mod 2^2n). This


* makes some of the terms above vanish, and we get:


*


* (2^n)(high) + (2^n)(mid) + (2^n + 1)(low)


*


* or


*


* (2^n)(high + mid + low) + low


*


* Furthermore, `high' and `mid' can be computed mod 2^n, as any factor


* of 2^n in either one will also vanish. Only `low' need be computed


* mod 2^2n, and only because of the final term above.


*/


static quad_t __lmulq(u_int, u_int);




quad_t __muldi3(quad_t a, quad_t b)


{


union uu u, v, low, prod;


u_int high, mid, udiff, vdiff;


int negall, negmid;


#define u1 u.ul[H]


#define u0 u.ul[L]


#define v1 v.ul[H]


#define v0 v.ul[L]




/*


* Get u and v such that u, v >= 0. When this is finished,


* u1, u0, v1, and v0 will be directly accessible through the


* int fields.


*/


if (a >= 0) {


u.q = a, negall = 0;


}


else {


u.q = a, negall = 1;


}




if (b >= 0) {


v.q = b;


}


else {


v.q = b, negall ^= 1;


}




if (u1 == 0 && v1 == 0) {


/*


* An (I hope) important optimization occurs when u1 and v1


* are both 0. This should be common since most numbers


* are small. Here the product is just u0*v0.


*/


prod.q = __lmulq(u0, v0);


}


else {


/*


* Compute the three intermediate products, remembering


* whether the middle term is negative. We can discard


* any upper bits in high and mid, so we can use native


* u_int * u_int => u_int arithmetic.


*/


low.q = __lmulq(u0, v0);




if (u1 >= u0) {


negmid = 0, udiff = u1  u0;


}


else {


negmid = 1, udiff = u0  u1;


}




if (v0 >= v1) {


vdiff = v0  v1;


}


else {


vdiff = v1  v0, negmid ^= 1;


}




mid = udiff * vdiff;




high = u1 * v1;




/*


* Assemble the final product.


*/


prod.ul[H] = high + (negmid ? mid : mid) + low.ul[L] + low.ul[H];


prod.ul[L] = low.ul[L];


}




return negall ? prod.q : prod.q;


#undef u1


#undef u0


#undef v1


#undef v0


}




/*


* Multiply two 2Nbit ints to produce a 4Nbit quad, where N is half


* the number of bits in an int (whatever that isthe code below


* does not care as long as quad.h does its part of the bargainbut


* typically N==16).


*


* We use the same algorithm from Knuth, but this time the modulo refinement


* does not apply. On the other hand, since N is half the size of an int,


* we can get away with native multiplicationnone of our input terms


* exceeds (UINT_MAX >> 1).


*


* Note that, for u_int l, the quadprecision result


*


* l << N


*


* splits into high and low ints as HHALF(l) and LHUP(l) respectively.


*/


static quad_t __lmulq(u_int u, u_int v)


{


u_int u1, u0, v1, v0, udiff, vdiff, high, mid, low;


u_int prodh, prodl, was;


union uu prod;


int neg;




u1 = HHALF(u);


u0 = LHALF(u);


v1 = HHALF(v);


v0 = LHALF(v);




low = u0 * v0;




/* This is the same smallnumber optimization as before. */


if (u1 == 0 && v1 == 0) {


return low;


}




if (u1 >= u0) {


udiff = u1  u0, neg = 0;


}


else {


udiff = u0  u1, neg = 1;


}




if (v0 >= v1) {


vdiff = v0  v1;


}


else {


vdiff = v1  v0, neg ^= 1;


}




mid = udiff * vdiff;




high = u1 * v1;




/* prod = (high << 2N) + (high << N); */


prodh = high + HHALF(high);


prodl = LHUP(high);




/* if (neg) prod = mid << N; else prod += mid << N; */


if (neg) {


was = prodl;

