[apple/security.git] / libsecurity_cryptkit / lib / giantFFT.c

/* Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
 *
 * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT
 * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE
 * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE COMPUTER, INC. AND THE
 * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE COMPUTER,
 * INC.  ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL
 * EXPOSE YOU TO LIABILITY.
 ***************************************************************************

   giantFFT.c
   Library for large-integer arithmetic via FFT. Currently unused
   in CryptKit.

   R. E. Crandall, Scientific Computation Group, NeXT Computer, Inc.

 Revision History
 ----------------
 19 Jan 1998	Doug Mitchell at Apple
 	Split off from NSGiantIntegers.c.

*/

/*
 * FIXME - make sure platform-specific math lib has floor(), fmod(),
 *         sin(), pow()
 */
#include <math.h>
#include "NSGiantIntegers.h"

#define AUTO_MUL 	0
#define GRAMMAR_MUL 	1
#define FFT_MUL 	2

#define TWOPI 		(double)(2*3.1415926535897932384626433)
#define SQRT2 		(double)(1.414213562373095048801688724209)
#define SQRTHALF 	(double)(0.707106781186547524400844362104)
#define TWO16 		(double)(65536.0)
#define TWOM16 		(double)(0.0000152587890625)
#define BREAK_SHORTS 	400    // Number of shorts at which FFT breaks over.

static int lpt(int n, int *lambda);
static void mul_hermitian(double *a, double *b, int n) ;
static void square_hermitian(double *b, int n);
static void addsignal(giant x, double *zs, int n);
static void scramble_real(double *x, int n);
static void fft_real_to_hermitian(double *zs, int n);
static void fftinv_hermitian_to_real(double *zs, int n);
static void GiantFFTSquare(giant gx);
static void GiantFFTMul(giant,giant);
static void giant_to_double(giant x, int sizex, double *zs, int L);

static int mulmode = AUTO_MUL;

void mulg(giant a, giant b) { /* b becomes a*b. */
	PROF_START;
	INCR_MULGS;
	GiantAuxMul(a,b);
	#if	FEE_DEBUG
        (void)bitlen(b); // XXX
	#endif	FEE_DEBUG
        PROF_END(mulgTime);
	PROF_INCR(numMulg);
}

static void GiantAuxMul(giant a, giant b) {
/* Optimized general multiply, b becomes a*b. Modes are:
   AUTO_MUL: switch according to empirical speed criteria.
   GRAMMAR_MUL: force grammar-school algorithm.
   FFT_MUL: force floating point FFT method.
*/
    int square = (a==b);

    if (isZero(b)) return;
    if (isZero(a)) {
        gtog(a, b);
        return;
    }
    switch(mulmode) {
    case GRAMMAR_MUL:
        GiantGrammarMul(a,b);
        break;
    case FFT_MUL:
        if (square) {
            GiantFFTSquare(b);
        }
        else {
            GiantFFTMul(a,b);
        }
        break;
    case AUTO_MUL: {
        int sizea, sizeb;
        float grammartime;
        sizea = abs(a->sign);
        sizeb = abs(b->sign);
        grammartime = sizea; grammartime *= sizeb;
        if(grammartime < BREAK_SHORTS*BREAK_SHORTS) {
                GiantGrammarMul(a,b);
        }
        else {
            if (square) GiantFFTSquare(b);
            else GiantFFTMul(a,b);
        }
        break;
      }
   }
}

/***************** Commence FFT multiply routines ****************/

static int CurrentRun = 0;
double *sincos = NULL;
static void init_sincos(int n) {
    int j;
    double e = TWOPI/n;

    if (n <= CurrentRun) return;
    CurrentRun = n;
    if (sincos) free(sincos);
    sincos = (double *)malloc(sizeof(double)*(1+(n>>2)));
    for(j=0;j<=(n>>2);j++) {
        sincos[j] = sin(e*j);
    }
}

static double s_sin(int n) {
    int seg = n/(CurrentRun>>2);

    switch(seg) {
    case 0: return(sincos[n]);
    case 1: return(sincos[(CurrentRun>>1)-n]);
    case 2: return(-sincos[n-(CurrentRun>>1)]);
    case 3:
    default: return(-sincos[CurrentRun-n]);
    }
}

static double s_cos(int n) {
    int quart = (CurrentRun>>2);

    if (n < quart) return(s_sin(n+quart));
    return(-s_sin(n-quart));
}


static int lpt(int n, int *lambda) {
/* returns least power of two greater than n */
    register int i = 1;

    *lambda = 0;
    while(i<n) {
        i<<=1;
        ++(*lambda);
    }
    return(i);
}

static void addsignal(giant x, double *zs, int n) {
   register int j, k, m, car;
   register double f, g;
   /*double  err,  maxerr = 0.0;*/

   for(j=0;j<n;j++) {
   	f = floor(zs[j]+0.5);

	/* err = fabs(zs[j]-f);
	if(err>maxerr) maxerr = err;
	*/

	zs[j] =0;
	k = 0;
	do{
           g = floor(f*TWOM16);
	   zs[j+k] += f-g*TWO16;
	   ++k;
	   f=g;
	} while(f != 0.0);
   }
   car = 0;
   for(j=0;j<n;j++) {
   	m = zs[j]+car;
	x->n[j] = m & 0xffff;
	car = (m>>16);
   }
   if(car) x->n[j] = car;
      else --j;
   while(!(x->n[j])) --j;
   x->sign = j+1;
   if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow");
}

static void GiantFFTSquare(giant gx) {
    int j,size = abs(gx->sign);
    register int L;

    if(size<4) { GiantGrammarMul(gx,gx); return; }
    L = lpt(size+size, &j);
    {
        //was...double doubles[L];
	//is...
	double *doubles = malloc(sizeof(double) * L);
	// end
        giant_to_double(gx, size, doubles, L);
        fft_real_to_hermitian(doubles, L);
        square_hermitian(doubles, L);
        fftinv_hermitian_to_real(doubles, L);
        addsignal(gx, doubles, L);
	// new
	free(doubles);
    }
    gx->sign = abs(gx->sign);
    bitlen(gx); // XXX
    if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow");
}

static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */
    int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign);
    int finalsign = gsign(x)*gsign(y);
    register int L;

    if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; }
    size = sizex; if(size<sizey) size=sizey;
    L = lpt(size+size, &lambda);
    {
        //double doubles1[L];
        //double doubles2[L];
       	double *doubles1 = malloc(sizeof(double) * L);
	double *doubles2 = malloc(sizeof(double) * L);

        giant_to_double(x, sizex, doubles1, L);
        giant_to_double(y, sizey, doubles2, L);
        fft_real_to_hermitian(doubles1, L);
        fft_real_to_hermitian(doubles2, L);
        mul_hermitian(doubles2, doubles1, L);
        fftinv_hermitian_to_real(doubles1, L);
        addsignal(x, doubles1, L);

	free(doubles1);
	free(doubles2);
    }
    x->sign = finalsign*abs(x->sign);
    bitlen(x); // XXX
    if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow");
}

static void scramble_real(double *x, int n) {
    register int i,j,k;
    register double tmp;

    for(i=0,j=0;i<n-1;i++) {
        if(i<j) {
            tmp = x[j];
            x[j]=x[i];
            x[i]=tmp;
        }
        k = n/2;
        while(k<=j) {
            j -= k;
            k>>=1;
        }
        j += k;
    }
}

static void fft_real_to_hermitian(double *zs, int n) {
/* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
   This is a decimation-in-time, split-radix algorithm.
 */
	register double cc1, ss1, cc3, ss3;
	register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
		     a, a3, b, b3, nminus = n-1, dil, expand;
	register double *x, e;
	int nn = n>>1;
	double t1, t2, t3, t4, t5, t6;
	register int n2, n4, n8, i, j;

        init_sincos(n);
	expand = CurrentRun/n;
	scramble_real(zs, n);
	x = zs-1;  /* FORTRAN compatibility. */
	is = 1;
	iD = 4;
	do{
	   for(i0=is;i0<=n;i0+=iD) {
		i1 = i0+1;
		e = x[i0];
		x[i0] = e + x[i1];
		x[i1] = e - x[i1];
	   }
	   is = (iD<<1)-1;
	   iD <<= 2;
	} while(is<n);
	n2 = 2;
	while(nn>>=1) {
		n2 <<= 1;
		n4 = n2>>2;
		n8 = n2>>3;
		is = 0;
		iD = n2<<1;
		do {
			for(i=is;i<n;i+=iD) {
				i1 = i+1;
				i2 = i1 + n4;
				i3 = i2 + n4;
				i4 = i3 + n4;
				t1 = x[i4]+x[i3];
				x[i4] -= x[i3];
				x[i3] = x[i1] - t1;
				x[i1] += t1;
				if(n4==1) continue;
				i1 += n8;
				i2 += n8;
				i3 += n8;
				i4 += n8;
				t1 = (x[i3]+x[i4])*SQRTHALF;
				t2 = (x[i3]-x[i4])*SQRTHALF;
				x[i4] = x[i2] - t1;
				x[i3] = -x[i2] - t1;
				x[i2] = x[i1] - t2;
				x[i1] += t2;
			}
			is = (iD<<1) - n2;
			iD <<= 2;
		} while(is<n);
		dil = n/n2;
		a = dil;
		for(j=2;j<=n8;j++) {
		    	a3 = (a+(a<<1))&nminus;
			b = a*expand;
			b3 = a3*expand;
			cc1 = s_cos(b);
			ss1 = s_sin(b);
			cc3 = s_cos(b3);
			ss3 = s_sin(b3);
			a = (a+dil)&nminus;
			is = 0;
			iD = n2<<1;
		        do {
				for(i=is;i<n;i+=iD) {
					i1 = i+j;
					i2 = i1 + n4;
					i3 = i2 + n4;
					i4 = i3 + n4;
					i5 = i + n4 - j + 2;
					i6 = i5 + n4;
					i7 = i6 + n4;
					i8 = i7 + n4;
					t1 = x[i3]*cc1 + x[i7]*ss1;
					t2 = x[i7]*cc1 - x[i3]*ss1;
					t3 = x[i4]*cc3 + x[i8]*ss3;
					t4 = x[i8]*cc3 - x[i4]*ss3;
					t5 = t1 + t3;
					t6 = t2 + t4;
					t3 = t1 - t3;
					t4 = t2 - t4;
					t2 = x[i6] + t6;
					x[i3] = t6 - x[i6];
					x[i8] = t2;
					t2 = x[i2] - t3;
					x[i7] = -x[i2] - t3;
					x[i4] = t2;
					t1 = x[i1] + t5;
					x[i6] = x[i1] - t5;
					x[i1] = t1;
					t1 = x[i5] + t4;
					x[i5] -= t4;
					x[i2] = t1;
				}
			        is = (iD<<1) - n2;
				iD <<= 2;
			} while(is<n);
		}
	}
}

static void fftinv_hermitian_to_real(double *zs, int n) {
/* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
   This is a decimation-in-frequency, split-radix algorithm.
 */
	register double cc1, ss1, cc3, ss3;
	register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
		 a, a3, b, b3, nminus = n-1, dil, expand;
	register double *x, e;
	int nn = n>>1;
	double t1, t2, t3, t4, t5;
	int n2, n4, n8, i, j;

        init_sincos(n);
	expand = CurrentRun/n;
	x = zs-1;
	n2 = n<<1;
	while(nn >>= 1) {
		is = 0;
		iD = n2;
		n2 >>= 1;
		n4 = n2>>2;
		n8 = n4>>1;
		do {
			for(i=is;i<n;i+=iD) {
				i1 = i+1;
				i2 = i1 + n4;
				i3 = i2 + n4;
				i4 = i3 + n4;
				t1 = x[i1] - x[i3];
				x[i1] += x[i3];
				x[i2] += x[i2];
				x[i3] = t1 - 2.0*x[i4];
				x[i4] = t1 + 2.0*x[i4];
				if(n4==1) continue;
				i1 += n8;
				i2 += n8;
				i3 += n8;
				i4 += n8;
				t1 = (x[i2]-x[i1])*SQRTHALF;
				t2 = (x[i4]+x[i3])*SQRTHALF;
				x[i1] += x[i2];
				x[i2] = x[i4]-x[i3];
				x[i3] = -2.0*(t2+t1);
				x[i4] = 2.0*(t1-t2);
			}
			is = (iD<<1) - n2;
			iD <<= 2;
		} while(is<n-1);
		dil = n/n2;
		a = dil;
		for(j=2;j<=n8;j++) {
		    	a3 = (a+(a<<1))&nminus;
			b = a*expand;
			b3 = a3*expand;
			cc1 = s_cos(b);
			ss1 = s_sin(b);
			cc3 = s_cos(b3);
			ss3 = s_sin(b3);
			a = (a+dil)&nminus;
			is = 0;
			iD = n2<<1;
			do {
			   for(i=is;i<n;i+=iD) {
				i1 = i+j;
				i2 = i1+n4;
				i3 = i2+n4;
				i4 = i3+n4;
				i5 = i+n4-j+2;
				i6 = i5+n4;
				i7 = i6+n4;
				i8 = i7+n4;
				t1 = x[i1] - x[i6];
				x[i1] += x[i6];
				t2 = x[i5] - x[i2];
				x[i5] += x[i2];
				t3 = x[i8] + x[i3];
				x[i6] = x[i8] - x[i3];
				t4 = x[i4] + x[i7];
				x[i2] = x[i4] - x[i7];
				t5 = t1 - t4;
				t1 += t4;
				t4 = t2 - t3;
				t2 += t3;
				x[i3] = t5*cc1 + t4*ss1;
				x[i7] = -t4*cc1 + t5*ss1;
				x[i4] = t1*cc3 - t2*ss3;
				x[i8] = t2*cc3 + t1*ss3;
			   }
			   is = (iD<<1) - n2;
			   iD <<= 2;
			} while(is<n-1);
		}
	}
	is = 1;
	iD = 4;
	do {
	  for(i0=is;i0<=n;i0+=iD){
		i1 = i0+1;
		e = x[i0];
		x[i0] = e + x[i1];
		x[i1] = e - x[i1];
	  }
	  is = (iD<<1) - 1;
	  iD <<= 2;
	} while(is<n);
	scramble_real(zs, n);
	e = 1/(double)n;
	for(i=0;i<n;i++) zs[i] *= e;
}


static void mul_hermitian(double *a, double *b, int n) {
	register int k, half = n>>1;
	register double aa, bb, am, bm;

	b[0] *= a[0];
	b[half] *= a[half];
	for(k=1;k<half;k++) {
	        aa = a[k]; bb = b[k];
		am = a[n-k]; bm = b[n-k];
		b[k] = aa*bb - am*bm;
		b[n-k] = aa*bm + am*bb;
	}
}

static void square_hermitian(double *b, int n) {
	register int k, half = n>>1;
	register double c, d;

	b[0] *= b[0];
	b[half] *= b[half];
	for(k=1;k<half;k++) {
	        c = b[k]; d = b[n-k];
		b[n-k] = 2.0*c*d;
		b[k] = (c+d)*(c-d);
	}
}

static void giant_to_double(giant x, int sizex, double *zs, int L) {
	register int j;
	for(j=sizex;j<L;j++) zs[j]=0.0;
	for(j=0;j<sizex;j++) {
		 zs[j] = x->n[j];
	}
}
Commit	Line	Data
b1ab9ed8 A	1	/* Copyright (c) 1998 Apple Computer, Inc. All rights reserved.
	2	*
	3	* NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT
	4	* TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE
	5	* SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE COMPUTER, INC. AND THE
	6	* ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE COMPUTER,
	7	* INC. ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL
	8	* EXPOSE YOU TO LIABILITY.
	9	***************************************************************************
	10
	11	giantFFT.c
	12	Library for large-integer arithmetic via FFT. Currently unused
	13	in CryptKit.
	14
	15	R. E. Crandall, Scientific Computation Group, NeXT Computer, Inc.
	16
	17	Revision History
	18	----------------
	19	19 Jan 1998 Doug Mitchell at Apple
	20	Split off from NSGiantIntegers.c.
	21
	22	*/
	23
	24	/*
	25	* FIXME - make sure platform-specific math lib has floor(), fmod(),
	26	* sin(), pow()
	27	*/
	28	#include <math.h>
	29	#include "NSGiantIntegers.h"
	30
	31	#define AUTO_MUL 0
	32	#define GRAMMAR_MUL 1
	33	#define FFT_MUL 2
	34
	35	#define TWOPI (double)(2*3.1415926535897932384626433)
	36	#define SQRT2 (double)(1.414213562373095048801688724209)
	37	#define SQRTHALF (double)(0.707106781186547524400844362104)
	38	#define TWO16 (double)(65536.0)
	39	#define TWOM16 (double)(0.0000152587890625)
	40	#define BREAK_SHORTS 400 // Number of shorts at which FFT breaks over.
	41
	42	static int lpt(int n, int *lambda);
	43	static void mul_hermitian(double a, double b, int n) ;
	44	static void square_hermitian(double *b, int n);
	45	static void addsignal(giant x, double *zs, int n);
	46	static void scramble_real(double *x, int n);
	47	static void fft_real_to_hermitian(double *zs, int n);
	48	static void fftinv_hermitian_to_real(double *zs, int n);
	49	static void GiantFFTSquare(giant gx);
	50	static void GiantFFTMul(giant,giant);
	51	static void giant_to_double(giant x, int sizex, double *zs, int L);
	52
	53	static int mulmode = AUTO_MUL;
	54
	55	void mulg(giant a, giant b) { /* b becomes ab. /
	56	PROF_START;
	57	INCR_MULGS;
	58	GiantAuxMul(a,b);
	59	#if FEE_DEBUG
	60	(void)bitlen(b); // XXX
	61	#endif FEE_DEBUG
	62	PROF_END(mulgTime);
	63	PROF_INCR(numMulg);
	64	}
65
66	static void GiantAuxMul(giant a, giant b) {
67	/* Optimized general multiply, b becomes a*b. Modes are:
68	AUTO_MUL: switch according to empirical speed criteria.
69	GRAMMAR_MUL: force grammar-school algorithm.
70	FFT_MUL: force floating point FFT method.
71	*/
72	int square = (a==b);
73
74	if (isZero(b)) return;
75	if (isZero(a)) {
76	gtog(a, b);
77	return;
78	}
79	switch(mulmode) {
80	case GRAMMAR_MUL:
81	GiantGrammarMul(a,b);
82	break;
83	case FFT_MUL:
84	if (square) {
85	GiantFFTSquare(b);
86	}
87	else {
88	GiantFFTMul(a,b);
89	}
90	break;
91	case AUTO_MUL: {
92	int sizea, sizeb;
93	float grammartime;
94	sizea = abs(a->sign);
95	sizeb = abs(b->sign);
96	grammartime = sizea; grammartime *= sizeb;
97	if(grammartime < BREAK_SHORTS*BREAK_SHORTS) {
98	GiantGrammarMul(a,b);
99	}
100	else {
101	if (square) GiantFFTSquare(b);
102	else GiantFFTMul(a,b);
103	}
104	break;
105	}
106	}
107	}
108
109	/*************** Commence FFT multiply routines **************/
110
111	static int CurrentRun = 0;
112	double *sincos = NULL;
113	static void init_sincos(int n) {
114	int j;
115	double e = TWOPI/n;
116
117	if (n <= CurrentRun) return;
118	CurrentRun = n;
119	if (sincos) free(sincos);
120	sincos = (double )malloc(sizeof(double)(1+(n>>2)));
121	for(j=0;j<=(n>>2);j++) {
122	sincos[j] = sin(e*j);
123	}
124	}
125
126	static double s_sin(int n) {
127	int seg = n/(CurrentRun>>2);
128
129	switch(seg) {
130	case 0: return(sincos[n]);
131	case 1: return(sincos[(CurrentRun>>1)-n]);
132	case 2: return(-sincos[n-(CurrentRun>>1)]);
133	case 3:
134	default: return(-sincos[CurrentRun-n]);
135	}
136	}
137
138	static double s_cos(int n) {
139	int quart = (CurrentRun>>2);
140
141	if (n < quart) return(s_sin(n+quart));
142	return(-s_sin(n-quart));
143	}
144
145
146	static int lpt(int n, int *lambda) {
147	/* returns least power of two greater than n */
148	register int i = 1;
149
150	*lambda = 0;
151	while(i<n) {
152	i<<=1;
153	++(*lambda);
154	}
155	return(i);
156	}
157
158	static void addsignal(giant x, double *zs, int n) {
159	register int j, k, m, car;
160	register double f, g;
161	/double err, maxerr = 0.0;/
162
163	for(j=0;j<n;j++) {
164	f = floor(zs[j]+0.5);
165
166	/* err = fabs(zs[j]-f);
167	if(err>maxerr) maxerr = err;
168	*/
169
170	zs[j] =0;
171	k = 0;
172	do{
173	g = floor(f*TWOM16);
174	zs[j+k] += f-g*TWO16;
175	++k;
176	f=g;
177	} while(f != 0.0);
178	}
179	car = 0;
180	for(j=0;j<n;j++) {
181	m = zs[j]+car;
182	x->n[j] = m & 0xffff;
183	car = (m>>16);
184	}
185	if(car) x->n[j] = car;
186	else --j;
187	while(!(x->n[j])) --j;
188	x->sign = j+1;
189	if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow");
190	}
191
192	static void GiantFFTSquare(giant gx) {
193	int j,size = abs(gx->sign);
194	register int L;
195
196	if(size<4) { GiantGrammarMul(gx,gx); return; }
197	L = lpt(size+size, &j);
198	{
199	//was...double doubles[L];
200	//is...
201	double doubles = malloc(sizeof(double) L);
202	// end
203	giant_to_double(gx, size, doubles, L);
204	fft_real_to_hermitian(doubles, L);
205	square_hermitian(doubles, L);
206	fftinv_hermitian_to_real(doubles, L);
207	addsignal(gx, doubles, L);
208	// new
209	free(doubles);
210	}
211	gx->sign = abs(gx->sign);
212	bitlen(gx); // XXX
213	if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow");
214	}
215
216	static void GiantFFTMul(giant y, giant x) { /* x becomes yx. /
217	int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign);
218	int finalsign = gsign(x)*gsign(y);
219	register int L;
220
221	if((sizex<=4)\|\|(sizey<=4)) { GiantGrammarMul(y,x); return; }
222	size = sizex; if(size<sizey) size=sizey;
223	L = lpt(size+size, &lambda);
224	{
225	//double doubles1[L];
226	//double doubles2[L];
227	double doubles1 = malloc(sizeof(double) L);
228	double doubles2 = malloc(sizeof(double) L);
229
230	giant_to_double(x, sizex, doubles1, L);
231	giant_to_double(y, sizey, doubles2, L);
232	fft_real_to_hermitian(doubles1, L);
233	fft_real_to_hermitian(doubles2, L);
234	mul_hermitian(doubles2, doubles1, L);
235	fftinv_hermitian_to_real(doubles1, L);
236	addsignal(x, doubles1, L);
237
238	free(doubles1);
239	free(doubles2);
240	}
241	x->sign = finalsign*abs(x->sign);
242	bitlen(x); // XXX
243	if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow");
244	}
245
246	static void scramble_real(double *x, int n) {
247	register int i,j,k;
248	register double tmp;
249
250	for(i=0,j=0;i<n-1;i++) {
251	if(i<j) {
252	tmp = x[j];
253	x[j]=x[i];
254	x[i]=tmp;
255	}
256	k = n/2;
257	while(k<=j) {
258	j -= k;
259	k>>=1;
260	}
261	j += k;
262	}
263	}
264
265	static void fft_real_to_hermitian(double *zs, int n) {
266	/* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
267	This is a decimation-in-time, split-radix algorithm.
268	*/
269	register double cc1, ss1, cc3, ss3;
270	register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
271	a, a3, b, b3, nminus = n-1, dil, expand;
272	register double *x, e;
273	int nn = n>>1;
274	double t1, t2, t3, t4, t5, t6;
275	register int n2, n4, n8, i, j;
276
277	init_sincos(n);
278	expand = CurrentRun/n;
279	scramble_real(zs, n);
280	x = zs-1; /* FORTRAN compatibility. */
281	is = 1;
282	iD = 4;
283	do{
284	for(i0=is;i0<=n;i0+=iD) {
285	i1 = i0+1;
286	e = x[i0];
287	x[i0] = e + x[i1];
288	x[i1] = e - x[i1];
289	}
290	is = (iD<<1)-1;
291	iD <<= 2;
292	} while(is<n);
293	n2 = 2;
294	while(nn>>=1) {
295	n2 <<= 1;
296	n4 = n2>>2;
297	n8 = n2>>3;
298	is = 0;
299	iD = n2<<1;
300	do {
301	for(i=is;i<n;i+=iD) {
302	i1 = i+1;
303	i2 = i1 + n4;
304	i3 = i2 + n4;
305	i4 = i3 + n4;
306	t1 = x[i4]+x[i3];
307	x[i4] -= x[i3];
308	x[i3] = x[i1] - t1;
309	x[i1] += t1;
310	if(n4==1) continue;
311	i1 += n8;
312	i2 += n8;
313	i3 += n8;
314	i4 += n8;
315	t1 = (x[i3]+x[i4])*SQRTHALF;
316	t2 = (x[i3]-x[i4])*SQRTHALF;
317	x[i4] = x[i2] - t1;
318	x[i3] = -x[i2] - t1;
319	x[i2] = x[i1] - t2;
320	x[i1] += t2;
321	}
322	is = (iD<<1) - n2;
323	iD <<= 2;
324	} while(is<n);
325	dil = n/n2;
326	a = dil;
327	for(j=2;j<=n8;j++) {
328	a3 = (a+(a<<1))&nminus;
329	b = a*expand;
330	b3 = a3*expand;
331	cc1 = s_cos(b);
332	ss1 = s_sin(b);
333	cc3 = s_cos(b3);
334	ss3 = s_sin(b3);
335	a = (a+dil)&nminus;
336	is = 0;
337	iD = n2<<1;
338	do {
339	for(i=is;i<n;i+=iD) {
340	i1 = i+j;
341	i2 = i1 + n4;
342	i3 = i2 + n4;
343	i4 = i3 + n4;
344	i5 = i + n4 - j + 2;
345	i6 = i5 + n4;
346	i7 = i6 + n4;
347	i8 = i7 + n4;
348	t1 = x[i3]cc1 + x[i7]ss1;
349	t2 = x[i7]cc1 - x[i3]ss1;
350	t3 = x[i4]cc3 + x[i8]ss3;
351	t4 = x[i8]cc3 - x[i4]ss3;
352	t5 = t1 + t3;
353	t6 = t2 + t4;
354	t3 = t1 - t3;
355	t4 = t2 - t4;
356	t2 = x[i6] + t6;
357	x[i3] = t6 - x[i6];
358	x[i8] = t2;
359	t2 = x[i2] - t3;
360	x[i7] = -x[i2] - t3;
361	x[i4] = t2;
362	t1 = x[i1] + t5;
363	x[i6] = x[i1] - t5;
364	x[i1] = t1;
365	t1 = x[i5] + t4;
366	x[i5] -= t4;
367	x[i2] = t1;
368	}
369	is = (iD<<1) - n2;
370	iD <<= 2;
371	} while(is<n);
372	}
373	}
374	}
375
376	static void fftinv_hermitian_to_real(double *zs, int n) {
377	/* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
378	This is a decimation-in-frequency, split-radix algorithm.
379	*/
380	register double cc1, ss1, cc3, ss3;
381	register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
382	a, a3, b, b3, nminus = n-1, dil, expand;
383	register double *x, e;
384	int nn = n>>1;
385	double t1, t2, t3, t4, t5;
386	int n2, n4, n8, i, j;
387
388	init_sincos(n);
389	expand = CurrentRun/n;
390	x = zs-1;
391	n2 = n<<1;
392	while(nn >>= 1) {
393	is = 0;
394	iD = n2;
395	n2 >>= 1;
396	n4 = n2>>2;
397	n8 = n4>>1;
398	do {
399	for(i=is;i<n;i+=iD) {
400	i1 = i+1;
401	i2 = i1 + n4;
402	i3 = i2 + n4;
403	i4 = i3 + n4;
404	t1 = x[i1] - x[i3];
405	x[i1] += x[i3];
406	x[i2] += x[i2];
407	x[i3] = t1 - 2.0*x[i4];
408	x[i4] = t1 + 2.0*x[i4];
409	if(n4==1) continue;
410	i1 += n8;
411	i2 += n8;
412	i3 += n8;
413	i4 += n8;
414	t1 = (x[i2]-x[i1])*SQRTHALF;
415	t2 = (x[i4]+x[i3])*SQRTHALF;
416	x[i1] += x[i2];
417	x[i2] = x[i4]-x[i3];
418	x[i3] = -2.0*(t2+t1);
419	x[i4] = 2.0*(t1-t2);
420	}
421	is = (iD<<1) - n2;
422	iD <<= 2;
423	} while(is<n-1);
424	dil = n/n2;
425	a = dil;
426	for(j=2;j<=n8;j++) {
427	a3 = (a+(a<<1))&nminus;
428	b = a*expand;
429	b3 = a3*expand;
430	cc1 = s_cos(b);
431	ss1 = s_sin(b);
432	cc3 = s_cos(b3);
433	ss3 = s_sin(b3);
434	a = (a+dil)&nminus;
435	is = 0;
436	iD = n2<<1;
437	do {
438	for(i=is;i<n;i+=iD) {
439	i1 = i+j;
440	i2 = i1+n4;
441	i3 = i2+n4;
442	i4 = i3+n4;
443	i5 = i+n4-j+2;
444	i6 = i5+n4;
445	i7 = i6+n4;
446	i8 = i7+n4;
447	t1 = x[i1] - x[i6];
448	x[i1] += x[i6];
449	t2 = x[i5] - x[i2];
450	x[i5] += x[i2];
451	t3 = x[i8] + x[i3];
452	x[i6] = x[i8] - x[i3];
453	t4 = x[i4] + x[i7];
454	x[i2] = x[i4] - x[i7];
455	t5 = t1 - t4;
456	t1 += t4;
457	t4 = t2 - t3;
458	t2 += t3;
459	x[i3] = t5cc1 + t4ss1;
460	x[i7] = -t4cc1 + t5ss1;
461	x[i4] = t1cc3 - t2ss3;
462	x[i8] = t2cc3 + t1ss3;
463	}
464	is = (iD<<1) - n2;
465	iD <<= 2;
466	} while(is<n-1);
467	}
468	}
469	is = 1;
470	iD = 4;
471	do {
472	for(i0=is;i0<=n;i0+=iD){
473	i1 = i0+1;
474	e = x[i0];
475	x[i0] = e + x[i1];
476	x[i1] = e - x[i1];
477	}
478	is = (iD<<1) - 1;
479	iD <<= 2;
480	} while(is<n);
481	scramble_real(zs, n);
482	e = 1/(double)n;
483	for(i=0;i<n;i++) zs[i] *= e;
484	}
485
486
487	static void mul_hermitian(double a, double b, int n) {
488	register int k, half = n>>1;
489	register double aa, bb, am, bm;
490
491	b[0] *= a[0];
492	b[half] *= a[half];
493	for(k=1;k<half;k++) {
494	aa = a[k]; bb = b[k];
495	am = a[n-k]; bm = b[n-k];
496	b[k] = aabb - ambm;
497	b[n-k] = aabm + ambb;
498	}
499	}
500
501	static void square_hermitian(double *b, int n) {
502	register int k, half = n>>1;
503	register double c, d;
504
505	b[0] *= b[0];
506	b[half] *= b[half];
507	for(k=1;k<half;k++) {
508	c = b[k]; d = b[n-k];
509	b[n-k] = 2.0cd;
510	b[k] = (c+d)*(c-d);
511	}
512	}
513
514	static void giant_to_double(giant x, int sizex, double *zs, int L) {
515	register int j;
516	for(j=sizex;j<L;j++) zs[j]=0.0;
517	for(j=0;j<sizex;j++) {
518	zs[j] = x->n[j];
519	}
520	}