From lenstra@jordan.bellcore.com Thu May 14 09:17:48 1992
Received: from jordan.bellcore.com by scss3.cl.msu.edu (4.1/4.7)  id AA02637; Thu, 14 May 92 09:17:41 EDT
Received: by jordan.bellcore.com (5.61/1.34)
	id AA09370; Thu, 14 May 92 09:17:30 -0400
Date: Thu, 14 May 92 09:17:30 -0400
From: lenstra@jordan.bellcore.com (Arjen Lenstra)
Message-Id: <9205141317.AA09370@jordan.bellcore.com>
To: mrr@scss3.cl.msu.edu
Subject: bug fix in lenstra_3
Status: OR

/*
Signed arbitrary length long int package.

Note From lenstra@bellcore.com Wed Jun 16 09:24:11 1993:
For the latest version of this package (including some bug fixes,
new functions, and documentation), contact lenstra@bellcore.com

	Copyright Arjen K. Lenstra, 1989

Example: to read long ints a and b  of arbitrary length from stdin, put their
	product in c, and print c followed by new line, you can use the
	following program:

	main () { long *a=0, *b=0, *c=0;
		zread(&a); zread(&b);
		zmul(a,b,&c);
		zwriteln(c);
	}

	Notice the following:
		-always declare long int varname as long *varname=0;
		-output variables to functions get a & (because their
		 length might increase). (Exception: first argument of
		 zcomposite can be output, but never larger than is was
		 upon call, so it doesn't get a &).
	Furthermore:
		-if you know what you're doing you can allocate the
                 space by hand (use zsetlength), and make things slightly
                 faster by giving the -DALLOCATE flag.
		-if you don't forget, unlike me, to call zstart()
		 before you do anything else, you can use -DSTART.
		-you can see the reallocations by using -DREALLOC.
		-if you don't want to think about flags, allocations, and
		 other dirt, everything should work as in the above example,
		 and it is supposed to be easy: declare your long ints as
		 long *varname=0, and give them a & in a function call if
		 they are output. So, zadd(a,a,&a) adds a to a and puts
		 the result in a. But zmul(a,b,&a) is wrong, because input
		 cannot be output in zmul; use zmulin(&a,b) in this case.
		-Usually it's better to declare long ints in frequently
		 called routines as
			static long *varname=0;
		 instead of
			long *varname=0;
		 The first alternative avoids reallocation of space for
		 the long int varname (unless it needs more space than
		 was needed during all previous calls to the routine), in
		 the second alternative space will be allocated for
		 varname during each call where varname will get a value.
		-I plan to write a complete documentation, with among
		 others a complete list of what's available (and how it
		 works), but that may take a while. If someone made something
		 like that, I would be happy to include it. For the moment,
		 this might be helpful:
		 

	(long *a=0, *b=0, *c=0, *q=0, *r=0)
	zread(&a) reads signed a from stdin, \ breaks long lines
	zwriteln(a) prints a on stdout, \ breaks long lines
	zadd(a,b,&c) c=a+b
	zsub(a,b,&c) c=a-b
	zmul(a,b,&c) c=a*b, output cannot be input
	zsq(a,&c) c=a*a, output cannot be input
	zmulin(&a,b) a=a*b
	zsqin(&a) a=a*a
	zdiv(a,b,&q,&r) q=a/b, r=a mod b, r same sign as b, a=b*q+r
	zmod(a,b,&r) as above, but slightly faster, forgets about q
	
		-zcomposite allows input of first base, and recognizes
		 first base 2 (which makes things much faster; of course
		 for some numbers it's a bad idea to use base 2, like
		 for instance Fermat numbers). zcomposite now also includes
		 another nice feature: it can prove that the input is not
		 a prime power (if that's the case).
		-zmul(a,a,&c)=zsq(a,&c) uses squaring, which is much faster
		 than multiplication.
		-for multiplication (squaring) of large numbers there's 
		 a lazy (it uses + instead of -, I was too lazy) Karatsuba.
		-there are various new exponentiation toys, like a fast m-ary
		 method, both modular and montgomery. Didn't use it in
		 zcomposite (because the base is usually small), but in other
		 applications it's usually faster than normal exponentiation.
		   zmcomposite uses mixed arithmetic, and is faster than
		 zcomposite.

	Bugs, questions, suggestions, additions, whatever, to
		Arjen K. Lenstra
		Room 2Q334
		Bellcore
		445 South Street
		Morristown, NJ 07960

		email: lenstra@flash.bellcore.com
		tel: 201 829 4878
		fax: 201 829 2645

*/

/* basic.c.  Someone who knows gcc can probably make better versions */
/* of zdiv21 and zsmulmod (like zaddmulp contribution); be careful */
/* because zsmulmod is also referenced in the new mpqs.c.   */

#include <stdio.h>
#include <math.h>
#include <malloc.h>
#include <signal.h>
#include <sys/time.h>
#include <sys/resource.h>

#define BYTEL		8	/* 8 bits per byte */
#define SIZEOFLONG	4	/* set this to SIZEOFLONG */
#define BITSOFLONG	(BYTEL*SIZEOFLONG)
#define NBITS		30	/* must be even and 0 < NBITS <BITSOFLONG */
#define NBITSH		(NBITS>>1)
#define RADIX		(1<<NBITS)
#define RADIXM		(RADIX-1)
#define RADIXROOT	(1<<NBITSH)
#define RADIXROOTM	(RADIXROOT-1)
#define LOG10RAD	(double)9.03089986991943585644 /* base 10 log(RADIX) */
#define MAXDEPTH	20
#define MULT_CROSS 	30
#define SQUARE_CROSS 	30

/* MULT_CROSS and SQUARE_CROSS: if number of blocks <= MULT_CROSS in
   multiplication, then normal multiplication will be used, Karatsuba
   otherwise. Similar SQRT_CROSS and squaring. Values depend on machine
   you're going to use. Sparcs typically get smaller optimal values
   than Decs (so sparcs go to Karatsuba earlier). The current values are
   not too far from optimal for a Dec 5000
*/

#define SIZE		20	/* SIZE*NBITS must be >= BITSOFLONG */
		/* SIZE is default and minimum allocation size for long ints. */
		/* any sufficiently large positive value should work. */
		/* Depending on the application smaller or larger values */
		/* might lead to (slightly) more efficient code (because */
		/* it uses less space (SIZE <20), or because it needs */
		/* fewer reallocations (SIZE >20)). */
		/* give -DREALLOC flag to see the reallocations */
		/* and recompile with other (larger) SIZE to prevent */
		/* them in future runs. Of course, you don't have to, */
		/* but you can. */

#if (!(NBITS&1)&&(NBITS>0)&&(NBITS<BITSOFLONG)&&(SIZE*NBITS>=BITSOFLONG))

#define LINE		68	/* approximate bound # digits per line */
#define	BUFSIZE		2048	/* at most 2048 characters per line */

#define	PRIMBND		5000	/* to generate primes <= (2*PRIMBND+1)^2 */
				/* for 5000 the last prime is 100019977 */
				/* there are 5762535 primes <= 100019977 */

/* it is a good idea to replace zaddmulp, zaddmul, zmmulp, zsmulmod, and */
/* zsubmul by assembly language versions */

/* zaddmulp(a,b,d,t): a = a+t+b*d % RADIX; t = b*d / RADIX; long a,b,d,t */
/* zaddmulpsq(a,b,t): a = a+b*b % RADIX; t = b*b / RADIX; long a,b,t */


#if 0
#define		zaddmulp(_a,_b,_d,_t) \
{ register long lb = (_b), ld = (_d), \
  		b1 = (_b) & RADIXROOTM, d1 = (_d) & RADIXROOTM; \
  register long aa = *(_a) + b1 * d1; \
  b1 = b1* ( ld >>= NBITSH ) + d1* ( lb >>= NBITSH ) + (aa >> NBITSH); \
  aa = (aa & RADIXROOTM) + ((b1 & RADIXROOTM) << NBITSH) + *(_t); \
  *(_t) = ld*lb + (b1 >> NBITSH) + (aa >> NBITS); \
  *(_a) = (aa & RADIXM); \
}

#define         zaddmulpsq(_a,_b,_t) \
{ register long lb = (_b), b1 = (_b) & RADIXROOTM; \
  register long aa = *(_a) + b1 * b1; \
  b1 = ( b1* ( lb >>= NBITSH ) << 1)  + (aa >> NBITSH); \
  aa = (aa & RADIXROOTM) + ((b1 & RADIXROOTM) << NBITSH); \
  *(_t) = lb*lb + (b1 >> NBITSH) + (aa >> NBITS); \
  *(_a) = (aa & RADIXM); \
}

#endif
#if 1
/*
	This definition of zaddmulp presumes a two's complement
	machine in which integer overflow is ignored
	and where double precision arithmetic is fast.
	The 0.25 allows round to nearest or round towards zero
	(value being rounded should be integer except for roundoff.)
*/
static double fradix_inv = 1.0/RADIX;	/* Global constant */

#define zaddmulp(_a,_b,_d,_t) \
{ register at = *(_a) + *(_t); \
  register long aa = (at + (_b)*(_d)) & RADIXM; \
  *(_t) = (long)(0.25 + fradix_inv*(  ((double)(at-aa)) \
		                    + ((double)(_b)) * ((double)(_d)) ) ); \
  *(_a) = aa; \
}

#define zaddmulpsq(_a,_b,_t) \
{ register at = *(_a); \
  register long aa = (at + (_b)*(_b)) & RADIXM; \
  *(_t) = (long)(0.25 + fradix_inv*(  ((double)(at-aa)) \
                                    + ((double)(_b)) * ((double)(_b)) ) ); \
  *(_a) = aa; \
}

#endif

#if 1
/* zaddmul(d,a,b): a += d*b, long d, *a, *b */
/* a and b not at overlapping addresses, except for d=1 */

#define 	zaddmulone(ama,amb) \
{ register long lami, lams = 0, *lama = (ama), *lamb = (amb); \
     lams = 0; \
     for (lami=(*lamb++);lami>0;lami--) \
	{  lams += (*lama + *lamb++); \
	  *lama++ = lams & RADIXM; \
	   lams >>= NBITS; } \
     *lama += lams; \
}

#define 	zaddmul(ams,ama,amb) \
{ register long lami, lams = (ams), *lama = (ama), *lamb = (amb); \
  long lamcarry=0; \
     for (lami=(*lamb++);lami>0;lami--) \
	{ zaddmulp(lama,*lamb,lams,&lamcarry); lama++; lamb++; } \
     *lama += lamcarry; \
}
/* Be careful, the last lama is unnormalized */

/* zaddmulsq(d,a,b): a += b[0]*b[rest, d entries], long d, *a, *b */
/* a and b not at overlapping addresses, except for d=1 */

#define         zaddmulsq(sql,sqa,sqb) \
{ register long lsqi=(sql), lsqs= *(sqb), *lsqa= (sqa), *lsqb= (sqb); \
  long lsqcarry=0; lsqb++;\
     for (;lsqi>0;lsqi--) \
        { zaddmulp(lsqa,*lsqb,lsqs,&lsqcarry); lsqa++; lsqb++; } \
     *lsqa += lsqcarry; \
}
/* Be careful, the last lama is unnormalized */

/* zmmulp(a,b): a[s:] += (a[s]*zminv)*b so that a[s] = 0 */
/* only in Montgomery multiplication zmont */
/* The meaning of s is clear from zmont, something like zntop-i +- 1 */

#define		zmmulp(mmpa,mmpb) \
{ register long *lmmpa = (mmpa), *lmmpb =(mmpb); \
  register long lmmi = (*lmmpa) >> NBITSH, lmmd = (*lmmpa) & RADIXROOTM; \
  long zmmtemp = 0; \
  lmmd=(zminv1*lmmd+(((zminv1*lmmi+zminv2*lmmd)&RADIXROOTM)<<NBITSH))&RADIXM; \
  for (lmmi = *lmmpb++;lmmi>0;lmmi--) \
	{ zaddmulp(lmmpa,*lmmpb,lmmd,&zmmtemp); lmmpa++; *lmmpb++; } \
  if (((*lmmpa += zmmtemp) & RADIX) > 0 ) { (*lmmpa++)&=RADIXM; (*lmmpa)++; } \
}

/* If you prefer you can replace the above macro's */
/* by the following C-routines (or better, */
/* replace them by assembly versions) */
#endif
#if 0

zaddmulp(a,b,d,t) long *a,b,d,*t; {
	register long b1 = b & RADIXROOTM, d1 = d & RADIXROOTM;
	register long aa = *a + b1 * d1;
	b1 = b1* ( d >>= NBITSH ) + d1* ( b >>= NBITSH ) + (aa >> NBITSH);
	aa = (aa & RADIXROOTM) + ((b1 & RADIXROOTM) << NBITSH) + *t;
	*t = d*b + (b1 >> NBITSH) + (aa >> NBITS);
	*a = (aa & RADIXM);
}

zaddmul(d,a,b) long d,*a,*b; {
	register long i;
	long carry=0;
	long *carrya= &carry;
	for (i=(*b++);i>0;i--) zaddmulp(a++,*b++,d,carrya);
	*a += carry;
}

zaddmulone(a,b) long *a,*b; { zaddmul(1,a,b); }

zmmulp(pa,pb) long *pa,*pb; {
	register long i = (*pa) >> NBITSH, d = (*pa) & RADIXROOTM;
	long temp = 0;
	extern long zminv1,zminv2;
	d=(zminv1*d+(((zminv1*i+zminv2*d)&RADIXROOTM)<<NBITSH))&RADIXM;
	for (i = *pb++;i>0;i--) zaddmulp(pa++,*pb++,d,&temp);
	if (((*pa += temp) & RADIX) > 0 ) { (*pa++)&=RADIXM; (*pa)++; }
}

#endif

/* return (a*b)%n */

long zsmulmod(a, b, n) long a,b,n; {
	register long lqmul = (long)(((double)a)*((double)b) /((double)n));
#if 0
	register long lr;
	long lprodhigh1=0, lprodhigh2=0, lprodlow1=0, lprodlow2=0;
	zaddmulp(&lprodlow1,  a, b, &lprodhigh1);
	zaddmulp(&lprodlow2, lqmul, n, &lprodhigh2);
	lr = RADIX*(lprodhigh1 - lprodhigh2) + (lprodlow1 - lprodlow2);
#else
/* Many machines compute the following modulo 2^32, which is OK */
	register long lr = a*b - lqmul*n;
#endif
	while (lr >= n) lr -= n;
	while (lr < 0) lr += n;
	return(lr);
}

/* a[s:] -= d*b, optimize this one, a and b not at overlapping addresses */

zsubmul(r,a,b)	long r,*a,*b; {
	register long rd = RADIX-r, i;
	long carry = RADIX;
	for (i=(*b++);i>0;i--) {
		zaddmulp(a,*b,rd,&carry); a++;
		carry += RADIXM-(*b++);
	}
	*a += carry-RADIX; /* unnormalized */
}

extern char *calloc();

/* global variables */

static int time_flag=0;		/* for pollard time outs */
static double epsilon, fradix= (double)RADIX, fudge = -1.0; 
				/* for long division */
static long *zseed = 0, *zranp=0, *zprroot=0;	/* for random */
static long *zn = 0, *zr = 0, *zrr = 0, *zrrr = 0, *znm = 0,
	    zntop, zminv1, zminv2; /* for Montgomery multiplication */
static short int *lowsieve = 0, *movesieve = 0;	/* to generate primes */
static long pindex, pshift = -1, lastp=0;
static long oner[] = {1,1,1}, glosho[] = {1,1,0};
static long *one = &oner[1];
static long ***exp_odd_powers=0; /* for m-ary exponentiation */
static long **kar_mem = 0; /* for karatsuba */

/* end global variables */

/* timing function */

double gettime() {
	struct rusage used;
	getrusage (RUSAGE_SELF, &used);
	return ( used.ru_utime.tv_sec + used.ru_stime.tv_sec +
		( used.ru_utime.tv_usec + used.ru_stime.tv_usec ) / 1e6 );
}

zhalt(e) long e; {
	printf("fatal error number %ld occurred:\n	",e);
	switch(e) {
		case 1:	printf("out of space in zsetlength\n"); break;
		case 2: printf("negative size allocation in zsetlength, bug\n");
				break;
		case 4: printf("division by zero in zsdiv\n"); break;
		case 5: printf("division by zero in zdiv\n"); break;
		case 6: printf("division by zero in zmod\n"); break;
		case 7: printf("modulus zero in zaddmod\n"); break;
		case 8: printf("modulus zero in zsubmod\n"); break;
		case 9: printf("modulus zero in zmul/sqmod\n"); break;
		case 10: printf("wrong modulus in zmstart\n"); break;
		case 11: printf("modulus zero in ztom\n"); break;
		case 12: printf("modulus zero in zmtoz\n"); break;
		case 13: printf("modulus zero in zmont/sq\n"); break;
		case 14: printf("modulus zero in zmexp\n"); break;
		case 15: printf("zmexp with e<0: a^(-e) and n not coprime\n");
				break;
		case 16: printf("modulus zero in zexpmod\n"); break;
		case 17: printf("zexpmod with e<0: a^(-e) and n not coprime\n");
				break;
		case 18: printf("negative exponent in zexp\n"); break;
		case 19: printf("arguments of zsodinv not coprime\n"); break;
		case 20: printf("wrong call to zsinv(n,p): p not odd >=3 ");
				printf("or not n>0\n"); break;
		case 21: printf("allocation failure in zfwrite\n"); break;
		case 22: printf("reallocation failure in zfwrite\n"); break;
		case 23: printf("negative argument in zsqrt\n"); break;
		case 24: printf("wrong factor in pollard, bug\n"); break;
		case 25: printf("non-positive argument to zinv\n"); break;
		case 26: printf("non-zero remainder in zexteucl, bug\n"); break;
		case 27: printf("wrong argument(s) in zchirem\n"); break;
		case 28: printf("zchirem(a,b,ma,mb,.): a=b but ma-mb!=0 mod a\n");
				break;
		case 29: printf("arguments not co-prime in zchirem\n"); break;
		case 30: printf("non-initialized argument(s) in zchirem\n");
				break;
		case 31: printf("non-initialized argument(s) in zexteucl\n");
				break;
		case 32: printf("non-initialized argument(s) in zgcd\n");
				break;
		case 33: printf("non-initialized argument(s) in zgcdeucl\n");
				break;
		case 34: printf("modulus zero in zmexp_m_ary\n"); break;
		case 35: printf("zmexp_m_ary with e<0: a^(-e) and n not coprime\n");
				break;
		case 36: printf("modulus zero in zexpmod_m_ary\n"); break;
		case 37: printf("zexpmod_m_ary with e<0: a^(-e) and n not coprime\n");
				break;
	}
	fflush(stdout);
	exit(0);
}

/* allocation routines */

zsetlength(v,len
#ifdef REALLOC
		, str
#endif
			) long **v; long len;
#ifdef REALLOC
						char *str;
#endif
							 {
	long *x= *v;
	if (x)	{
		if (len <= x[-1]) return;
#ifdef REALLOC
		printf("%s reallocating to %ld\n",str,len); fflush(stdout);
#endif
		x[-1] = len;
		if (!(x=(long*)realloc(&(x[-1]),((int)len+2)*SIZEOFLONG))) {
	 	 printf("%d bytes realloc failed\n",((int)len+2)*SIZEOFLONG);
			zhalt((long)1);
		}
	} else if (len >= 0) {
		if (len < SIZE) len = SIZE;
#ifdef REALLOC
		printf("%s allocating to %ld\n",str,len); fflush(stdout);
#endif
		if (!(x = (long *)calloc((int)len+2,SIZEOFLONG))) {
	 	 printf("%d bytes calloc failed\n",((int)len+2)*SIZEOFLONG);
			zhalt((long)1);
		}
		x[0] = len; x[1] = 1; x[2] = 0;
	} else zhalt((long)2);
	x++;
	*v=x;
}

/* return double approximation of n, no overflow check */

double zdoub(n) long *n; {
	double res;
	register long i;
#ifndef ALLOCATE
	if (!n) return (double)0;
#endif
	if ((i = n[0])<0) i = -i;
	res = (double)(n[i--]);
	for (;i;i--) res = res*fradix+(double)(n[i]);
	if (n[0]>0) return(res);
	return(-res);
}

zfree(x) long *x; {
        long *y=(&(x[-1]));
        free(y);
        return;
}

zstart() {
	/* set values global variables */
	double one = (double)1, half = 1/(double)2;
	epsilon = one; fudge = one + epsilon;
	/* the following loop is sick, but we have to: */
	/* comparing one to one + epsilon does not have */
	/* the desired effect on some machines, because */
	/* one + epsilon is Not cast to double (as it should) */
	while ( one != fudge ) {
		epsilon = epsilon * half;
		fudge = one + epsilon;
	}
	epsilon += epsilon;
	if ((epsilon*RADIX) > one) {
		printf("decrease RADIX and recompile\n");
		exit((long)0);
	} else epsilon *= 3;
	fudge = fradix+epsilon*fradix;
}

/* arithmetic routines */

/* a = 0 */

zzero(aa) long **aa; {
	zsetlength(aa,(long)SIZE
#ifdef REALLOC
			  ,"in zzero, first argument"
#endif
						       );
	(*aa)[0] = 1; (*aa)[1] = 0;
}

/* a = 1 */

zone(aa) long **aa; {
	zsetlength(aa,(long)SIZE
#ifdef REALLOC
			  ,"in zzero, first argument"
#endif
						       );
	(*aa)[0] = (long)1; (*aa)[1] = (long)1;
}

/* bb = a */

zcopy(a,bb) long *a, **bb; {
	register long i;
	long *b = *bb;
#ifndef ALLOCATE
	if (!a) { zzero(bb); return; }
#endif
	if (a!=b) {
		if ((i = *a)<0) i = (-i);
		zsetlength(&b,i
#ifdef REALLOC
				,"in zcopy, second argument"
#endif
							    ); *bb = b;
		for (; i>=0; i--) *b++ = *a++;
	}
}

/* a = d */

zintoz(d,aa) long d,**aa; {
	register long i=0, anegative = 0;
	long *a= *aa;
	zsetlength(&a,(long)SIZE
#ifdef REALLOC
			  ,"in zintoz, second argument"
#endif
						       ); *aa = a;
	if (d<0) { anegative = 1; d = -d; }
	a[1] = 0;
	while (d > 0) { a[++i] = d&RADIXM; d >>= NBITS; }
	if (i>0) a[0] = i;
	else a[0] = 1;
	if (anegative) a[0] = (-a[0]);
}

/* returns (long)a, no overflow check */

long ztoint(a) long *a; {
	register long d, sa;
#ifndef ALLOCATE
	if (!a) return (long)0;
#endif
	if ((sa = *a) < 0) sa = -sa;
	d = *(a += sa);
	while (--sa) { d <<= NBITS; d += *(--a); }
	if ((*(--a))<0) return(-d);
	return(d);
}

/* if a>b then 1 else if a==b then 0 else (if a<b) then -1 */

long zcompare(a,b) long *a, *b; {
	register long sa , sb ;
#ifndef ALLOCATE
	if (!(a)) {
		if (!b) return 0;
		if (b[0]<0) return 1;
		if (b[0]>1) return -1;
		if (b[1]) return -1;
		return 0;
	}
	if (!(b)) {
		if (a[0]<0) return -1;
		if (a[0]>1) return 1;
		if (a[1]) return 1;
		return 0;
	}
#endif
	/* if (a==b) return(0); but who is comparing identical addresses? */
	if ((sa = *a) > (sb = *b)) return(1);
	if (sa < sb ) return(-1);
	if (sa<0) sa = (-sa);
	a += sa; b += sa;
	for (;sa;sa--) {
		if (*a > *b) {
			if (sb<0) return(-1);
			return(1);
		}
	  	if (*a-- < *b-- ) {
			if (sb<0) return(1);
			return(-1);
		}
		/* depending on the relative speed of various operations
		   it might be better to do the following:
		if ((aux=(*a--)-(*b--))>0) {
			if (sb<0) return(-1);
			return(1);
		} else if (aux<0) {
			if (sb<0) return(1);
			return(-1);
		}
		where aux is a register long
		*/
	}
	return(0);
}

/* a = -a */

znegate(a) long *a; {
	if (!a) return;
	if (a[1] || a[0]!=1) a[0]=(-a[0]);
}

/* b = a+d, abs(d)<RADIX, output can be input */

zsadd(a,d,b) long *a,d,**b; {
	long *x = &glosho[1];
	if (d >= 0) { x[1] = d; zadd(a,x,b); }
	else { x[1] = -d; zsub(a,x,b); }
}

/* low level c = a+b, b shorter than a, output can be input */

static zaddls(a,b,c) long *a,*b,*c; {
	register long sa = *a , sb = *b , carry=0, i, *pc;
	/* we know that sa and sb are both >0 or both <0 */
	pc = &c[0];
	if (sa < 0) { sa = -sa; sb = -sb; }
	for (i=1; i<=sb; i++) {
		if (( *(++pc) = (*(++a))+(*(++b))+carry) < RADIX ) carry = 0;
		else { *pc -= RADIX; carry = 1; }
	}
	for (; i<=sa; i++) {
		if (( *(++pc) = (*(++a))+carry) < RADIX ) carry = 0;
		else { *pc -= RADIX; carry = 1; }
	}
	if (carry) { c[0] = sa+1; *(++pc) = 1; }
	else c[0] = sa;
}

/* c = a+b, output can be input */

zadd(a,b,cc) long *a,*b,**cc; {
	register long sa, sb, anegative;
	long *c = *cc;
#ifndef ALLOCATE
	if (!a) { if (b) zcopy(b,cc); else zzero(cc); return; }
	if (!b) { zcopy(a,cc); return; }
#endif
	if ( ( anegative = ((sa = a[0])<0) ) == ((sb = b[0])<0 ) ) {
		/* signs a and b are the same */
		register long i;
		if (anegative) { sa = -sa; sb = -sb; }
		zsetlength(&c,(sa>sb?sa:sb)+1
#ifdef REALLOC
				 ,"in zadd, third argument"
#endif
							   ); *cc=c;
		if ( sa == sb ) {
			register long *pc;
			pc = &c[0]; i = 0;
			for (; sa; sa-- ) {
				if ((*(++pc)=(*(++a))+(*(++b))+i)<RADIX ) i = 0;
		  		else { *pc -= RADIX; i = 1; }
			}
			if (i) { c[0] = sb+1; *(++pc) = 1; }
			else c[0] = sb;
		} else if ( sa > sb ) zaddls(a,b,c);
		else zaddls(b,a,c); 
		if (anegative) c[0] = -c[0];
		/* if anegative, then c cannot be zero */
	} else {
		/* signs a and b are different */
		long *old, *oldc;
		oldc = c;
		if (anegative) { a[0] = -a[0];  old = a; }
		else { b[0] = -b[0]; old = b; }
		/* the one that's negative cannot be zero */
		if (!(sa=zcompare(a,b))) {
			zzero(&c); *cc = c;
			if (anegative) {
				if (old!=oldc) a[0] = -a[0];
			} else if (old!=oldc) b[0] = -b[0];
		} else if (sa>0) {
			zsubpos(a,b,&c); *cc = c;
			if (anegative) {
				c[0] = -c[0];
				if (old!=oldc) a[0] = -a[0];
			} else if (old!=oldc) b[0] = -b[0];
		} else {
			zsubpos(b,a,&c); *cc = c;
			if (!anegative) {
				c[0] = -c[0];
				if (old!=oldc) b[0] = -b[0];
			} else if (old!=oldc) a[0] = -a[0];
		}
	}
}

/* c = a-b, a>=b>=0, output can be input */

zsubpos(a,b,cc) long *a,*b,**cc; {
	register long sa=a[0], sb=b[0], carry = 0, i, *pc;
	long *c = *cc;
#ifndef ALLOCATE
	if (!b) { if (a) zcopy(a,cc); else zzero(cc); return; }
	if (!a) { zzero(cc); return; }
#endif
	zsetlength(&c,sa
#ifdef REALLOC
			,"in zsubpos, third argument"
#endif
						     ); *cc = c;
	pc = &c[0];
	for (i=1; i<=sb; i++) {
		if ((*(++pc) = (*(++a))-(*(++b))-carry) >= 0 ) carry=0;
	  	else { *pc += RADIX; carry = 1; };
	}
	for (; i<=sa; i++) {
		if ((*(++pc) = (*(++a))-carry) >= 0 ) carry = 0;
	  	else { *pc += RADIX; carry = 1; };
	}
	i = sa;
	while ((i>1) && (!(*pc))) { i--; pc--; }
	c[0] = i;
}

/* c = a-b, output can be input */

zsub(a,b,cc) long *a,*b,**cc; {
	register long sa, sb, anegative;
	long *c = *cc;
#ifndef ALLOCATE
	if (!b) { if (a) zcopy(a,cc); else zzero(cc); return; }
	if (!a) { zcopy(b,cc); znegate(*cc); return; }
#endif
	if ( ( anegative = ((sa = a[0])<0) ) == ((sb = b[0])<0 ) ) {
		/* signs a and b are the same */
		register long carry=0, i, *pc, agrb;
		if (!(agrb=zcompare(a,b))) {
			zzero(cc); return;
		}
		if ((agrb>0 && anegative) || (agrb<0 && !anegative)) {
			pc = a; a = b; b = pc; sa = *a; sb = *b;
		}
		if (anegative) { sa = -sa; sb = -sb; }
		zsetlength(&c,sa
#ifdef REALLOC
				,"in zsub, third argument"
#endif
							  ); *cc = c;
		pc = &c[0];
		for (i=1; i<=sb; i++) {
			if ((*(++pc) = (*(++a))-(*(++b))-carry) >= 0 ) carry=0;
	  		else { *pc += RADIX; carry = 1; };
		}
		for (; i<=sa; i++) {
			if ((*(++pc) = (*(++a))-carry) >= 0 ) carry = 0;
	  		else { *pc += RADIX; carry = 1; };
		}
		i = sa;
		while ((i>1) && (!(*pc))) { i--; pc--; }
		if (agrb>0) c[0] = i; else c[0] = -i;
	} else {
		/* signs a and b are different */
		long *old, *oldc;
		oldc = c;
		if (anegative) { a[0] = -a[0];  old = a; }
		else { b[0] = -b[0]; old = b; }
		/* the one that's negative cannot be zero */
		zadd(a,b,&c); *cc = c;
		if (anegative) {
			c[0] = -c[0];
			if (old!=oldc) a[0] = -a[0];
		} else if (old!=oldc) b[0] = -b[0];
	}
}

/* b = d*a, d < RADIX, output can be input */

zsmul(a,d,bb) long *a,d,**bb; {
	register long sa, i, *pb, bnegative;
	long *b= *bb;
#ifndef ALLOCATE
	if (!a) { zzero(bb); return; }
#endif
	if (!d) { zzero(bb); return; }
	if ((sa = a[0])<0) {sa= (-sa); if (d<0) d = (-d);}
	else if (bnegative = (d<0)) d = (-d);
	zsetlength(&b,sa+1
#ifdef REALLOC
			  ,"in zsmul, third argument"
#endif
						     ); *bb = b;
	pb = &b[0];
	for (i=sa;i>= 0;i--) *pb++ = *a++;
	sa ++; *pb = 0;
	zaddmul(d-1,&b[1],&b[0]);
 	while ((sa>1) && (!(b[sa]))) sa --;
	b[0] = sa;
	if (bnegative && (b[1] || b[0] != 1)) b[0] = (-b[0]);
}

zmul(a,b,c) long *a, *b, **c; { /* output not input */
	register long aneg, bneg;
	long *olda, *oldb;
#ifndef ALLOCATE
	if ((!a) || (!b)) { zzero(c); return; }
#endif
	if (a==b) { zsq(a,c); return; }
	if (!kar_mem) {
		if ((kar_mem = (long**)malloc(5*MAXDEPTH*sizeof(long*))) == NULL)
			printf("initial malloc of kar_mem didn't work\n");
		for (aneg=(5*MAXDEPTH)-1;aneg>=0;aneg--) kar_mem[aneg]=(long*)0;
	}
	olda = a; oldb = b;
	if (aneg=(*a<0)) a[0] = -a[0];
	if (bneg=(*b<0)) b[0] = -b[0];
	if (*a > *b) kar_mul(a,b,c,(long)0);
	else kar_mul(b,a,c,(long)0);
	if (aneg != bneg && ((*c)[1] || (*c)[0]!=1)) (*c)[0] = -(*c)[0];
	if (aneg) olda[0] = -olda[0];
	if (bneg) oldb[0] = -oldb[0];
}

static kar_mul(a,b,c,shi) long *a, *b, **c, shi; {
	register long al, hal, i,restoreb0=b[0], *pc, bbig = 1;
	long **a0, **a1, **a2, **a3, **a4;
	zsetlength(c,(hal=(al=a[0])+(i=b[0]))
#ifdef REALLOC
                                      ,"in kar_mul, third argument"
#endif
								   );
	if ((shi>=(5*MAXDEPTH)) || (al<MULT_CROSS) || (i<MULT_CROSS)) {
		pc = &(*c)[1];
		for (i=hal;i>0;i--) *pc++ = 0;
		pc = &(*c)[1];
		if (al <= *b) for (i= al;i;i--) { zaddmul(*(++a),pc++,b); }
		else for (i= *b;i;i--) { zaddmul(*(++b),pc++,a); }
		while ((hal>1) && (!((*c)[hal]))) hal --;
		(*c)[0] = hal;
		return;
	}
	a0= &(kar_mem[shi]); a1= &(kar_mem[shi+1]); a2= &(kar_mem[shi+2]);
	a3= &(kar_mem[shi+3]); a4= &(kar_mem[shi+4]);
	hal = ((al+1)>>1);
	zsetlength(a0,al
#ifdef REALLOC
                                      ,"in kar_mul, locals\n"
#endif
							     );
	zsetlength(a1,al
#ifdef REALLOC
                                      ,""
#endif
					 );
	zsetlength(a2,al
#ifdef REALLOC
                                      ,""
#endif
					 );
	zsetlength(a3,al+hal
#ifdef REALLOC
                                      ,""
#endif
					 );
	zsetlength(a4,al+2
#ifdef REALLOC
                                      ,""
#endif
					 );
	i=hal; while ((i>1) && (!(a[i]))) i--; a[0] = i;
	if (hal>=b[0]) bbig = 0;
	else { i=hal; while ((i>1) && (!(b[i]))) i--; b[0] = i; }
	for (i=hal+1;i<=al;i++) (*a1)[i-hal]=a[i]; (*a1)[0] = al-hal;
	if (bbig) {
		for (i=hal+1;i<=restoreb0;i++) (*a3)[i-hal]=b[i];
		(*a3)[0] = restoreb0-hal;
	}
	kar_mul(a,b,a4,shi+5);
	zadd(a,(*a1),a0);
	a[0] = al;
	if (bbig) {
		kar_mul((*a1),(*a3),c,shi+5);
		zadd(b,(*a3),a2);
		b[0] = restoreb0;
		kar_mul((*a0),(*a2),a3,shi+5);
	} else kar_mul((*a0),b,a3,shi+5);
	zsubpos((*a3),(*a4),a3);
	if (bbig) zsubpos((*a3),*c,a3);
	zlshift((*a3),hal*NBITS,a3);
	hal <<= 1;
	if (bbig) {
		for (i=(*c)[0];i;i--) (*c)[i+hal] = (*c)[i];
		for (i=hal;i>(*a4)[0];i--) (*c)[i] = 0;
		for (;i;i--) (*c)[i] = (*a4)[i];
		(*c)[0] += hal;
	} else { for (i=(*a4)[0];i>=0;i--) (*c)[i] = (*a4)[i]; }
	zadd(*c,(*a3),c);
}

zsq(a,c) long *a, **c; { /* output is not input */
        register long aneg;
#ifndef ALLOCATE
        if (!a) { zzero(c); return; }
#endif
        if (!kar_mem) {
              if ((kar_mem = (long**)malloc(5*MAXDEPTH*sizeof(long*))) == NULL)
                      printf("initial malloc of kar_mem didn't work\n");
              for (aneg=(5*MAXDEPTH)-1;aneg>=0;aneg--) kar_mem[aneg]=(long*)0;
        }
        if (aneg=(*a<0)) a[0] = -a[0];
        kar_sq(a,c,(long)0);
        if (aneg) a[0] = -a[0];
}

static kar_sq(a,c,shi) long *a, **c, shi; {
        register long al, hal, i,*pc;
        long **a0, **a1, **a2;
	zsetlength(c,(i=((al=a[0])<<1))
#ifdef REALLOC
                                      ,"in kar_sq, second argument"
#endif
								   );
        if ((shi>=(3*MAXDEPTH)) || (al<SQUARE_CROSS)) {
                register unsigned long uncar;
                long carry = 0;
                pc = &(*c)[1];
                for (;i>0;i--) *pc++ = 0;
                for (hal=1;hal<=al;hal++) {
                        i += 2;
                        { zaddmulsq(al-hal,&((*c)[i]),&(a[hal])); }
                        uncar = ((*c)[i-1] <<1) + carry;
                        (*c)[i-1] = uncar&RADIXM;
                        uncar = ((*c)[i] << 1) + (uncar >> NBITS);
                        { zaddmulpsq(&(*c)[i-1],a[hal],&carry); }
                        uncar += carry;
                        carry = uncar >> NBITS;
                        (*c)[i] = uncar&RADIXM;
                }
                while ((i>1) && (!((*c)[i]))) i --;
                (*c)[0] = i;
                return;
        }
        a0= &(kar_mem[shi]); a1= &(kar_mem[shi+1]); a2= &(kar_mem[shi+2]);
        hal = ((al+1)>>1);
        zsetlength(a0,al+hal+2
#ifdef REALLOC
                                      ,"in kar_sq, locals\n"
#endif
							    );
        zsetlength(a1,al+2
#ifdef REALLOC
                                      ,""
#endif
					 );
        zsetlength(a2,al
#ifdef REALLOC
                                      ,""
#endif
					 );
        i=hal; while ((i>1) && (!(a[i]))) i--; a[0] = i;
        for (i=hal+1;i<=al;i++) (*a0)[i-hal]=a[i]; (*a0)[0] = al-hal;
        kar_sq(a,a1,shi+3);
	zadd(a,(*a0),a2);
        kar_sq((*a0),c,shi+3);
        a[0] = al;
	kar_sq((*a2),a0,shi+3);
	zsubpos((*a0),(*a1),a0);
	zsubpos((*a0),*c,a0);
        zlshift((*a0),hal*NBITS,a0);
        hal <<= 1;
        for (i=(*c)[0];i;i--) (*c)[i+hal] = (*c)[i];
        for (i=hal;i>(*a1)[0];i--) (*c)[i] = 0;
        for (;i;i--) (*c)[i] = (*a1)[i];
        (*c)[0] += hal;
        zadd(*c,(*a0),c);
}

/* c =a*b, output cannot be input */

zmul_plain(a,b,cc) long *a,*b,**cc; {
	register long i, *pc, sc;
	long *c = *cc, anegative, bnegative;
	long *olda, *oldb;
#ifndef ALLOCATE
	if ((!a) || (!b)) { zzero(cc); return; }
#endif
	olda = a; oldb = b;
	if (anegative=(*a<0)) a[0] = -a[0];
	if (olda == oldb) bnegative = anegative;
	else if (bnegative=(*b<0)) b[0] = -b[0];
	zsetlength(&c,(sc= *a + *b )
#ifdef REALLOC
				    ,"in zmul_plain, third argument"
#endif
							      ); *cc = c;
	pc = &c[1];
	for (i= sc; i>0; i--) *pc++ = 0;
	pc = &c[1];
	if (*a <= *b ) for (i= *a;i;i--) { zaddmul(*(++a),pc++,b); }
	else for (i= *b;i;i--) { zaddmul(*(++b),pc++,a); }
	while ((sc>1) && (!(c[sc]))) sc --;
	c[0] = sc;
	if (anegative!=bnegative && (c[1] || c[0]!=1)) c[0] = -c[0];
	if (anegative) olda[0] = -olda[0];
	if (bnegative && oldb != olda) oldb[0] = -oldb[0];
}

zmulin(a,b) long **a,*b; {
	static long *mem=0;
#ifndef ALLOCATE
	if ((!(*a)) || (!b)) { zzero(a); return; }
#endif
	zsetlength(&mem,(*a)[0]
#ifdef REALLOC
				,"in zmulin, local"
#endif
						   );
	zcopy(*a,&mem); zmul(mem,b,a);
}


/* c = a^2, output cannot be input */

zsq_plain(a,cc) long *a, **cc; {
        register long i, sc, *pc;
        register unsigned long uncar;
        long carry = 0, *c = *cc;
        long anegative;
#ifndef ALLOCATE
        if (!a) { zzero(cc); return; }
#endif
        if ((anegative=(*a))<0) anegative = - anegative;
        zsetlength(&c,(sc=2*anegative)
#ifdef REALLOC
                		      ,"in zsq_plain, second argument"
#endif
                						); *cc = c;
        pc = &c[1];
        for (i=sc;i>0;i--) *pc++=0;
        for (sc=1;sc<=anegative;sc++) {
                i += 2;
                { zaddmulsq(anegative-sc,&(c[i]),&(a[sc])); }
                uncar = (c[i-1] <<1) + carry;
                c[i-1] = uncar&RADIXM;
                uncar = (c[i] << 1) + (uncar >> NBITS);
                { zaddmulpsq(&c[i-1],a[sc],&carry); }
                uncar += carry;
                carry = uncar >> NBITS;
                c[i] = uncar&RADIXM;
        }
        while ((i>1) && (!(c[i]))) i --;
        c[0] = i;
}

zsqin(a) long **a; {
	static long *mem=0;
#ifndef ALLOCATE
	if (!(*a)) { zzero(a); return; }
#endif
	zsetlength(&mem,(*a)[0]
#ifdef REALLOC
			       ,"in zsqin, local"
#endif
						 );
	zcopy(*a,&mem);
	zsq(mem,a);
}

/*
	zdiv21 returns quot, rem so

	quot = (numhigh*RADIX + numlow)/denom;
	rem  = (numhigh*RADIX + numlow)%denom;

Assumes 0 <= numhigh < denom < RADIX and 0 <= numlow < RADIX.
*/

#define zdiv21(numhigh, numlow, denom, quot, rem) {\
    register long lr21, lq21 = (long)(((fradix*(double)(numhigh)) \
		              + (double)(numlow)) / (double)(denom)); \
    if (1) { \
      long lprodhigh = 0, lprodlow = 0; \
      zaddmulp(&lprodlow, lq21, denom, &lprodhigh); \
      lr21 = RADIX*(numhigh - lprodhigh) + (numlow - lprodlow); \
    } else { \
/* Following works in many two's complement architectures. */ \
      lr21 = (numhigh << NBITS) + numlow  - lq21*denom; \
    }\
    if (lr21 < 0) { do {lq21--;} while ((lr21 += denom) < 0); \
    } else      { while (lr21 >= denom) {lr21 -= denom; lq21++;}; \
    }  \
    *quot = lq21; *rem = lr21; \
}

/* b = a/d, returns a%ld, output can be input */

long zsdiv(a,d,bb) long *a,d,**bb; {
	register long sa;
	long *b= *bb;
#ifndef START
	if (fudge <0) zstart();
#endif
#ifndef ALLOCATE
	if (!a) { zzero(bb); return 0; }
#endif
	if (!d) zhalt((long)4);
	if ((sa=a[0])<0) sa = (-sa);
	zsetlength(&b,sa
#ifdef REALLOC
			,"in zsdiv, third argument"
#endif
						   ); *bb = b;
	if ((d >= RADIX) || ( d <= - RADIX )) {
		static long *zd = 0, *zb = 0;
		zintoz(d,&zb);
		zdiv(a,zb,&b,&zd);  *bb = b;
		return(ztoint(zd));
	} else {
		register long den = d, carry=0, i;
		int flag = (*a<0?2:0) | (den<0?1:0);
		if (den<0) den = -den;
		if (a[sa] < den && sa > 1) carry = a[sa--];
 		if (den < RADIXROOT) {
	    		register long rdivd = RADIX/den,
				      rmodd = RADIX - den*rdivd, temp;
  	    		for (i=sa; i; i--) {
				temp = a[i] + rmodd*carry;
	  			b[i] = rdivd*carry + temp/den;
	  			carry = temp % den;
			}
		} else {
	    		for (i = sa; i; i--) {
				long newcarry;
	    			zdiv21(carry, a[i], den, &b[i], &newcarry);
	    			carry = newcarry;
			}
        	}
  		while ((sa>1) && (!(b[sa]))) sa--;
  		b[0] = sa;
		if (flag) {
			if (flag<=2) {
				if (!carry) znegate(b);
				else {
					zadd(b,one,&b);
					b[0] = -b[0];
					if (flag==1) carry = carry - den;
					else carry = den - carry;
					*bb = b;
				}
			} else carry= -carry;
		}
  		return(carry);
	}
}

/* q = a/b, r = a%b, b can only be q or r if a and b positive */

zdiv(a,b,qqq,rrr) long *a,*b,**qqq,**rrr; {
	register long i, qq;
	long sa, sb, sq, *p, *pc, zsdiv(), sign;
	static long *c=0, *d=0;
	double btopinv, aux;
	long *q = *qqq, *r = *rrr, *olda, *oldb, *oldq, *oldr;
#ifndef START
	if (fudge <0) zstart();
#endif
#ifndef ALLOCATE
	if (!a) { zzero(qqq); zzero(rrr); return; }
#endif
	if ((!b) || (((sb=b[0])==1) && (!b[1]))) zhalt((long)5);
	olda = a; oldb = b; oldq = q; oldr = r;
	sign = (*a<0?2:0) | (*b<0?1:0);
	if (*a<0) (*a) = (-(*a)); sa = (*a);
	if (*b<0) (*b) = (-(*b)); sb = (*b);
	zsetlength(&c,(i = (sa>sb?sa:sb)+1)
#ifdef REALLOC
					   ,"in zdiv, locals\n"
#endif
							     );
	zsetlength(&d,i
#ifdef REALLOC
					   ,""
#endif
							     );
	zsetlength(&q,i
#ifdef REALLOC
			,"in zdiv, third argument"
#endif
						  ); *qqq=q;
  	zsetlength(&r,sb+1
#ifdef REALLOC
			  ,"in zdiv, fourth argument"
#endif
						     ); *rrr=r;
	p = &b[sb];
	if ((sb == 1) && (*p < RADIX)) zintoz(zsdiv(a,*p,&q),&r);
	else {
		sq = sa-sb; btopinv = (double)(*p)*fradix;
		if (sb > 1) btopinv += (*(--p));
		btopinv *= fradix;
		if (sb > 2) btopinv += (*(p-1));
		btopinv = fudge / btopinv;
		p = &a[1]; pc = &c[0];
		for (i=sa;i>0;i--) *pc++ = *p++;
		p = pc; sa = 1-sb;
		for (i=(-sq);i>0;i--) *p++ = 0;
		*pc = 0; d[1] = 0; p = &d[sq+1];
		for (i=sq;i>=0;i--) {
			aux = fradix*(fradix*(*pc)+(*(pc-1)))+1.0;
			if (i > sa) aux += (*(pc-2));
			qq = (long)(btopinv*aux+0.5);
		/* dirty, but safe. On most machines (long)(btopinv*aux) */
		/* is correct, or one too big; on some however it becomes*/
		/* too small. Could change zstart, but +0.5 and a while */
		/* instead of one single if is safer */
			if (qq > 0) {
				zsubmul(qq,&c[i],&b[0]);
				while (*pc < 0) {
					qq --;
					{ zaddmulone(&c[i],&b[0]); }
				}
			}
			pc--; *p-- = qq;
		}
		sb --;
		while ((sb > 0) && (!(c[sb]))) sb --;
		sb ++; r[0] = sb; p = &r[1]; pc = &c[0];
		for (i = sb;i>0;i--) *p++ = *pc++;
		if (sq < 0) { q[0] = 1; q[1] = d[1]; }
		else {
			sq ++;
			while ((sq > 1) && (!(d[sq]))) sq --;
			q[0] = sq; p = &q[1]; pc = &d[1];
			for (i=sq;i>0;i--) *p++ = *pc++;
		}
	} /* non-trivial case */

	if (sign) {
		if (sign <= 2) {
			if (!(r[1]) && (r[0] == 1)) q[0] = -(q[0]);
			else {
				zadd(q,one,&q);
				q[0] = -q[0];
				if (sign==1) zsub(r,b,&r);
				else zsub(b,r,&r);
			}
		} else znegate(r);
	}
	*qqq = q; *rrr = r;
	if (sign&2 && olda != oldq && olda != oldr) a[0] = (-a[0]);
	if (sign&1 && oldb != oldq && oldb != oldr && oldb != olda)
		b[0] = (-b[0]);
}

/* r = a%b, b can only be r if a and b positive */

zmod(a,b,rr) long *a,*b,**rr; {
	register long i, qq;
	long *r= *rr, *olda, *oldb, *oldr;
	static long *c=0;
	long  sa, sb, sq, *p, *pc, zsdiv(), sign;
	double btopinv, aux;
#ifndef START
	if (fudge <0) zstart();
#endif
#ifndef ALLOCATE
	if (!a) { zzero(rr); return; }
#endif
	if ((!b) || (((sb=b[0])==1) && (!b[1]))) zhalt((long)6);
	olda = a; oldb = b; oldr = r;
	sign = (*a<0?2:0) | (*b<0?1:0);
	if (*a<0) (*a) = (-(*a)); sa = (*a);
	if (*b<0) (*b) = (-(*b)); sb = (*b);
	zsetlength(&c,(sa>sb?sa:sb)+1
#ifdef REALLOC
					   ,"in zmod, local"
#endif
							    );
  	zsetlength(&r,sb+1
#ifdef REALLOC
			  ,"in zmod, third argument"
#endif
						    ); *rr=r;
	p = &b[sb];
	if ((sb == 1) && (*p < RADIX)) zintoz(zsdiv(a,*p,&c),&r);
	else {
		sq = sa-sb; btopinv = (double)(*p)*fradix;
		if (sb > 1) btopinv += (*(--p));
		btopinv *= fradix;
		if (sb > 2) btopinv += (*(p-1));
		btopinv = fudge / btopinv;
		p = &a[1]; pc = &c[0];
		for (i=sa;i>0;i--) *pc++ = *p++;
		p = pc; sa = 1-sb;
		for (i=(-sq);i>0;i--) *p++ = 0;
		*pc = 0;
		for (i=sq;i>=0;i--) {
			aux = fradix*(fradix*(*pc)+(*(pc-1)))+1.0;
			if (i > sa) aux += (*(pc-2));
			qq = (long)(btopinv*aux+0.5);
			/* see comment in zdiv */
			if (qq > 0) {
				zsubmul(qq,&c[i],&b[0]);
				while (*pc < 0) {
					{ zaddmulone(&c[i],&b[0]); }
				}
			}
			pc--;
		} /* loop on i */
		sb --;
		while ((sb > 0) && (!(c[sb]))) sb --;
		sb ++; r[0] = sb; p = &r[1]; pc = &c[0];
		for (i=sb;i>0;i--) *p++ = *pc++;
	} /* non-trivial case */
	if (sign) {
		if ((sign <= 2) &&(!((r[0]==1) && !(r[1])))){
				if (sign==1) zsub(r,b,&r);
				else zsub(b,r,&r);
		} else znegate(r); /* negating r==0 doesn't hurt */
	}
	*rr = r;
	if (sign&2 && olda != oldr) a[0] = (-a[0]);
	if (sign&1 && oldb != oldr && oldb != olda) b[0] = (-b[0]);
}

/* c = (a+b)%n, with 0 <= a,b < n, and n positive */

zaddmod(a,b,n,c) long *a,*b,*n,**c; {
	long zcompare();
#ifndef ALLOCATE
	if (!n) zhalt((long)7);
	if (!a) { if (b) zcopy(b,c); else zzero(c); return; }
	if (!b) { zcopy(a,c); return; }
#endif
	zadd(a,b,c);
	if (zcompare(*c,n) >= 0) zsubpos(*c,n,c);			
}

/* c = (a-b)%n, with 0 <= a,b < n, and n positive */

zsubmod(a,b,n,c) long *a,*b,*n,**c; {
	long zcompare();
#ifndef ALLOCATE
	if (!n) zhalt((long)8);
	if (!b) { if (a) zcopy(a,c); else zzero(c); return; }
	if (!a) { 
		if (b[1] || (b[0]!=1)) zsubpos(n,b,c);
		else zzero(c);
		return;
	}
#endif
	zsub(a,b,c);
	if ((*c)[0]<0) zadd(*c,n,c);
}

/* c = (a*b)%n, with 0 <= a,b < n, and n positive */

zmulmod(a,b,n,c) long *a,*b,*n,**c; {
	static long *mem=0;
#ifndef ALLOCATE
	if (!n) zhalt((long)9);
	if ((!a) || (!b)) { zzero(c); return; }
#endif
	zsetlength(&mem,a[0] + b[0] + 2
#ifdef REALLOC
					    ,"in zmulmod, local"
#endif
							        );
	zmul(a,b,&mem); zmod(mem,n,c);
}

zsqmod(a,n,c) long *a,*n,**c; {
	static long *mem=0;
#ifndef ALLOCATE
	if (!n) zhalt((long)9);
	if (!a) { zzero(c); return; }
#endif
	zsetlength(&mem, 2*a[0] + 2
#ifdef REALLOC
					,"in zsqmod, local"
#endif
							   );
	zsq(a,&mem);
	zmod(mem,n,c);
}

/* routines for Montgomery multiplication */

/* set up global values for Montgomery's multiplication */

zmstart(n) long *n; {
	static long *x = 0;
	long i;
	if (!n || !(n[1]&1) ||  ((zntop=n[0]) < 1)
		|| ((zntop == 1) && (n[1]<=1 )))
		zhalt((long)10);
	if (n[zntop] >= (RADIX >>1)) zntop ++;
	zsetlength(&x,(long)(zntop+1)
#ifdef REALLOC
				     ,"in zmstart, local"
#endif
							 );
	zsetlength(&zn,(long)(zntop+1)
#ifdef REALLOC
				     ,"in zmstart, globals\n"
#endif
							   );
	zsetlength(&zr,(long)(zntop+1)
#ifdef REALLOC
				     ,""
#endif
					 );
	zsetlength(&zrr,(long)(zntop+1)
#ifdef REALLOC
				     ,""
#endif
					 );
	zsetlength(&zrrr,(long)(zntop+1)
#ifdef REALLOC
				     ,""
#endif
					 );
	zsetlength(&znm,(long)(zntop+1)
#ifdef REALLOC
				     ,""
#endif
					 );
	if (zcompare(n,zn)) {
		zcopy(n,&zn);
		zadd(zn,one,&zrr);
		zrshift(zrr,(long)1,&x);	 /* x = (n+1)/2	*/
		zrr[0] = 1; zrr[1] = NBITS;
		zexpmod(x,zrr,zn,&x);		/* x = 2^(-NBITS) %n	*/
		for (i=x[0]; i; i--) x[i+1] = x[i];
		x[1] = 0; x[0] ++;
		zsubpos(x,one,&x);
		zdiv(x,zn,&x,&zrr);
		zminv1 = x[1];		/* zminv1 = n[1]^(-1) modulo 2^NBITS */
		zminv2 = zminv1 >> NBITSH;
		zminv1 &= RADIXROOTM;
		for (i=1; i<=zntop; i++) x[i] = 0;
		x[zntop+1] = 1; x[0] = zntop+1;
		zdiv(x,zn,&zrr,&zr);
		zsqmod(zr,zn,&zrr);
		zmulmod(zr,zrr,zn,&zrrr);
		zsubmod(zn,zr,zn,&znm);
	}
}

getzr(c) long **c; { if (zr) zcopy(zr,c); }

getzrrr(c) long **c; { if (zrrr) zcopy(zrrr,c); }

getznm(c) long **c; { if (znm) zcopy(znm,c); }

/* b is Montgomery representation of a */

ztom(a,bb) long *a,**bb; {
	long zcompare();
	if (!zn) zhalt((long)11);
#ifndef ALLOCATE
	if (!a) { zzero(bb); return; }
#endif
	if (zcompare(zn,a) < 0) { zmod(a,zn,bb); zmont(*bb,zrr,bb);}
	else zmont(a,zrr,bb);
}

/* Montgomery a is backtransformed to normal b */

zmtoz(a,bb) long *a,**bb; {
	if (!zn) zhalt((long)12);
#ifndef ALLOCATE
	if (!a) { zzero(bb); return; }
#endif
	zmont(a,one,bb);
}

/* c is Montgomery product of Mont's a and b, output can be input */

zmont(a,b,cc) long *a,*b,**cc; {
	register long i, j;
	long *c = *cc;
	static long *x = 0;
	long *px, *pc, zcompare();
	if (!zn) zhalt((long)13);
#ifndef ALLOCATE
	if ((!a) || (!b)) { zzero(cc); return; }
#endif
	zsetlength(&x,(i=(zntop << 1)+1)
#ifdef REALLOC
				       ,"in zmont, local"
#endif
							 );
	zsetlength(&c,zntop
#ifdef REALLOC
			   ,"in zmont, third argument"
#endif
						      ); *cc = c;
        if (!kar_mem) {
              if ((kar_mem = (long**)malloc(5*MAXDEPTH*sizeof(long*))) == NULL)
                      printf("initial malloc of kar_mem didn't work\n");
              for (j=(5*MAXDEPTH)-1;j>=0;j--) kar_mem[j]=(long*)0;
        }
	if (*a <= *b ) kar_mul(b,a,&x,(long)0);
	else kar_mul(a,b,&x,(long)0);
	for (;i>x[0];i--) x[i] = (long)0;
	px = &x[1];
	for (i = zntop; i>0; i--) zmmulp(px++,zn);
	pc = &c[1];
	for (i = zntop; i>0; i--) *pc++ = *px++;
	i = zntop;
	while ((i > 1) && (!(*--pc))) i --;
	c[0] = i;
	if (zcompare(c,zn) >= 0) zsubpos(c,zn,&c);
	*cc = c;
}

/* c is Montgomery square of Mont's a, output can be input */

zmontsq(a,cc) long *a,**cc; {
	register long i, j;
	long *c = *cc;
	static long *x = 0;
	long *px, *pc, zcompare();
	if (!zn) zhalt((long)13);
#ifndef ALLOCATE
	if (!a) { zzero(cc); return; }
#endif
	zsetlength(&x,(i=(zntop << 1)+1)
#ifdef REALLOC
				       ,"in zmontsq, local"
#endif
							 );
	zsetlength(&c,zntop
#ifdef REALLOC
			   ,"in zmontsq, third argument"
#endif
						      ); *cc = c;
        if (!kar_mem) {
              if ((kar_mem = (long**)malloc(5*MAXDEPTH*sizeof(long*))) == NULL)
                      printf("initial malloc of kar_mem didn't work\n");
              for (j=(5*MAXDEPTH)-1;j>=0;j--) kar_mem[j]=(long*)0;
        }
	kar_sq(a,&x,(long)0);
	for (;i>x[0];i--) x[i] = (long)0;
	px = &x[1];
	for (i = zntop; i>0; i--) zmmulp(px++,zn);
	pc = &c[1];
	for (i = zntop; i>0; i--) *pc++ = *px++;
	i = zntop;
	while ((i > 1) && (!(*--pc))) i --;
	c[0] = i;
	if (zcompare(c,zn) >= 0) zsubpos(c,zn,&c);
	*cc = c;
}

/* b = (a^e)&n, a and b Montgomery numbers, b and a can be identical */

zmexp(a,e,bb) long *a,*e,**bb; {
	register long i, j, k = 0;
	long *b = *bb;
	static long *loca = 0;
	if (!zn) zhalt((long)14);
#ifndef ALLOCATE
	if (!e) { zone(bb); return; }
	if (!a) { zzero(bb); return; }
#endif
	if ((i = e[0])<0) i = (-i);
	if ((i == 1) && (!(e[1]))) zone(bb);
	else {
		zcopy(a,&loca);
		zcopy(loca,&b);
		for (; i; i--) {
			j = e[i]; 
			if (!k) { 
				k = 1;
				while ((k << 1) <= j) k <<= 1;
			}
			while (k >>= 1) {
				zmontsq(b,&b);
				if (j & k) zmont(loca,b,&b);
			}
			k = RADIX;
		}
	}
        if (e[0]<0) {
		 if (zinv(b,zn,&b)) zhalt((long)15);
		zmont(b,zrrr,&b);
	}
	*bb = b;
}

/* b = (a^e) %n, long *a, **b, e, parameters cannot be the same  */

zexpsmod(a,e,n,bb) long *a,e,*n, **bb; {
	static long *le = 0;
	zintoz(e,&le); zexpmod(a,le,n,bb);
}

/* b = (a^e)&n, b and a can be identical, but both unequal to n and e */
			/* n is positive */

zexpmod(a,e,n,bb) long *a,*e,*n,**bb; {
	register long i, j, k = 0;
	long *b = *bb;
	static long *loca = 0;
#ifndef ALLOCATE
	if (!n) zhalt((long)16);
	if (!e) { zone(bb); return; }
	if (!a) { zzero(bb); return; }
#endif
	if ((i = e[0])<0) i = (-i);
	if ((i == 1) && (!(e[1]))) zone(&b);
	else {
		zmod(a,n,&loca); zcopy(loca,&b);
		for (; i; i--) {
			j = e[i]; 
			if (!k) { 
				k = 1;
				while ((k << 1) <= j) k <<= 1;
			}
			while (k >>= 1) {
				zsqmod(b,n,&b);
				if (j & k) zmulmod(loca,b,n,&b);
			}
			k = RADIX;
		}
	}
	if (e[0]<0 && zinv(b,n,&b)) zhalt((long)17);
	*bb = b;
}

/* b = (2^e)&n, b unequal to n and e, n odd and positive */

zexpmod2(e,n,bb) long *e,*n,**bb; {
	register long i, j, k = 0;
	long *b = *bb;
#ifndef ALLOCATE
	if (!n) zhalt((long)16);
	if (!e) { zone(bb); return; }
#endif
	if ((i = e[0])<0) i = (-i);
	if ((i == 1) && (!(e[1]))) zone(&b);
	else {
		if ((j=n[0])<0) j = -j;
		zsetlength(&b,j
#ifdef REALLOC
				,"in zexpmod2, third argument"
#endif
								); *bb=b;
		zintoz((long)2,&b);
		for (; i; i--) {
			j = e[i]; 
			if (!k) { 
				k = 1;
				while ((k << 1) <= j) k <<= 1;
			}
			while (k >>= 1) {
				zsqmod(b,n,&b);
				if (j & k) {
					ztimes2(b,&b);
					if (zcompare(b,n)>=0) zsubpos(b,n,&b);
				}
			}
			k = RADIX;
		}
	}
	if (e[0]<0 && zinv(b,n,&b)) zhalt((long)17);
	*bb = b;
}

/* m selector for m_ary exponentiation */

long default_m(l) long l; {
  if (l<3) return 2;
  if (l<5) return 3;
  if (l<11) return 4;
  if (l<26) return 5;
  if (l<65) return 6;
  if (l<160) return 7;
  if (l<393) return 8;
  if (l<800) return 9;
  return 10;
}

/* odd power setter for montgomery m-ary exponentiation */

static long mont_init_odd_power(a,a_sq,m,odd_a_power)
  long *a,**a_sq,m,***odd_a_power; {
  /* initialize odd_a_power with a^i mod n for odd positive i < 2^m */
  /* a on input is montgomery number */
  register long i,length= (1<<(m-1));
  if (m >= NBITS) {
    printf("wrong call to mont_init_odd_power, m= %ld, exit\n",m); exit(0);
  }
  if (!(*odd_a_power)) {
   /* printf("allocating exponentiation row for m=%ld\n",m); */
    *odd_a_power = (long**)malloc(length*sizeof(long*));
    for (i=0;i<length;i++) (*odd_a_power)[i] = (long*)0;
  }
  zcopy(a,&((*odd_a_power)[0]));
  zmontsq((*odd_a_power)[0],a_sq);
}

/* b = a^e, long *a, *e, **b, a and b montgomery numbers, use m_ary method */
/*		a and b can be the same */
/* uses default m if input m is <= 1, uses m = NBITS-1 if m >= NBITS */

zmexp_m_ary(a,e,bb,m) long *a,*e,**bb, m; {
        register long i, ei;
        static long *a_sq=0;
	if (!zn) zhalt((long)34);
#ifndef ALLOCATE
        if (!e) { zone(bb); return; }
        if (!a) { zzero(bb); return; }
#endif
        if ((i = e[0])<0) i = (-i);
        if (i == 1) { zmexp(a,e,bb); return; }
if (!exp_odd_powers) {
  exp_odd_powers = (long***)malloc(NBITS*sizeof(long**));
  for (ei=0;ei<NBITS;ei++) exp_odd_powers[ei] = (long**)0;
}
if (m <= 1) m = default_m(i); else if (m >= NBITS) m = (NBITS - 1);
{ register long k=0, od=0, odlen, left, zeros = 0, rem=0, first=1, reach = 1;
  long ***pow = &(exp_odd_powers[m]);
  mont_init_odd_power(a,&a_sq,m,pow);
  for (; i>0; i--) {
    ei = e[i];
    if (!k) {k=1; while (k<=ei) k <<= 1;}
    while (k >>= 1) {
      if (zeros) {
        if (ei&k) {
          odlen = od = 1; rem = zeros; left = zeros = 0;
        } else zeros ++;
      } else if (!od) {
        if (ei&k) {
          odlen = od = 1; left = zeros = 0;
        } else zeros = 1;
      } else {
        left ++;
        if (odlen+left == m) {
          if (ei&k) {
            od <<= left; odlen += left; left = 0; od ++;
            od >>= 1;
            for (;reach <= od; reach ++)
              zmont((*pow)[reach-1],a_sq,&((*pow)[reach]));
            if (first) {
              zcopy((*pow)[od],bb);
              /* printf("od %ld\n",od); */
              first = rem = odlen = od = 0;
            } else  {
              for (rem = rem+odlen; rem; rem--) zmontsq(*bb,bb);
              zmont(*bb,(*pow)[od],bb);
              /* printf("shift %ld, and od %ld\n",rem+odlen, od); */
              odlen = od = 0;
            }
          } else {
            if (first) {
              if (od==1) {
                zcopy(a_sq,bb);
                /* printf("use square\n"); */
                left --;
              } else {
                od >>= 1;
                for (;reach <= od; reach ++)
                  zmont((*pow)[reach-1],a_sq,&((*pow)[reach]));
                zcopy((*pow)[od],bb);
                /* printf("od %ld\n",od); */
              }
              zeros = left; first = left = odlen = od = rem = 0;
            } else {
              for (rem = rem+odlen; rem; rem--) zmontsq(*bb,bb);
              od >>= 1;
              for (;reach <= od; reach ++)
                zmont((*pow)[reach-1],a_sq,&((*pow)[reach]));
              zmont(*bb,(*pow)[od],bb);
              /* printf("shift %ld, and od %ld\n",rem+odlen, od); */
              zeros = left; left = odlen = od = 0;
            }
          }
        } else if (ei&k) {
          od <<= left; odlen += left; left = 0; od ++;
        }
      }
    }    
    k = RADIX;
  }
  if (od) {
    if (first) {
      if (od==1) {
        zcopy(a_sq,bb);
        /* printf("use square\n"); */
        left --;
      } else {
        od >>= 1;
        for (;reach < od; reach ++)
          zmont((*pow)[reach-1],a_sq,&((*pow)[reach]));
        zmont((*pow)[od-1],a_sq,bb);
        /* printf("od %ld\n",od); */
      }
    } else {
      for (rem = rem+odlen; rem; rem--) zmontsq(*bb,bb);
      od >>= 1;
      for (;reach <= od; reach ++)
        zmont((*pow)[reach-1],a_sq,&((*pow)[reach]));
      zmont(*bb,(*pow)[od],bb);
      /* printf("shift %ld, and od %ld\n",rem+odlen,od); */
    }
  }
  if (left || zeros) {
    for (rem = zeros+left; rem; rem--) zmontsq(*bb,bb);
    /* printf("shift %ld\n",zeros+left); */
  }
}

        if (e[0]<0) {
		 if (zinv(*bb,zn,bb)) zhalt((long)35);
		zmont(*bb,zrrr,bb);
	}
}

/* odd power setter for m-ary exponentiation */

static long init_odd_power(a,n,a_sq,m,odd_a_power)
  long *a,*n,**a_sq,m,***odd_a_power; {
  /* initialize odd_a_power with a^i mod n for odd positive i < 2^m */
  /* a on input is not necessarily reduced mod n */
  register long i,length= (1<<(m-1));
  if (m >= NBITS) {
    printf("wrong call to init_odd_power, m= %ld, exit\n",m); exit(0);
  }
  if (!(*odd_a_power)) {
   /* printf("allocating exponentiation row for m=%ld\n",m); */
    *odd_a_power = (long**)malloc(length*sizeof(long*));
    for (i=0;i<length;i++) (*odd_a_power)[i] = (long*)0;
  }
  zmod(a,n,&((*odd_a_power)[0]));
  zsqmod((*odd_a_power)[0],n,a_sq);
}

/* b = a^e, long *a, *e, **b, use m_ary method */
/*		a and b can be the identical, but both unequal to n and e */
/* uses default m if input m is <= 1, uses m = NBITS-1 if m >= NBITS */
/* n is positive */
/* better use zexpmod if a is small! */

zexpmod_m_ary(a,e,n,bb,m) long *a,*e,*n,**bb, m; {
        register long i, ei;
        static long *a_sq=0;
#ifndef ALLOCATE
        if (!n) zhalt((long)36);
        if (!e) { zone(bb); return; }
        if (!a) { zzero(bb); return; }
#endif
        if ((i = e[0])<0) i = (-i);
        if (i == 1) { zexpmod(a,e,n,bb); return; }
if (!exp_odd_powers) {
  exp_odd_powers = (long***)malloc(NBITS*sizeof(long**));
  for (ei=0;ei<NBITS;ei++) exp_odd_powers[ei] = (long**)0;
}
if (m <= 1) m = default_m(i); else if (m >= NBITS) m = (NBITS - 1);
{ register long k=0, od=0, odlen, left, zeros = 0, rem=0, first=1, reach = 1;
  long ***pow = &(exp_odd_powers[m]);
  init_odd_power(a,n,&a_sq,m,pow);
  for (; i>0; i--) {
    ei = e[i];
    if (!k) {k=1; while (k<=ei) k <<= 1;}
    while (k >>= 1) {
      if (zeros) {
        if (ei&k) {
          odlen = od = 1; rem = zeros; left = zeros = 0;
        } else zeros ++;
      } else if (!od) {
        if (ei&k) {
          odlen = od = 1; left = zeros = 0;
        } else zeros = 1;
      } else {
        left ++;
        if (odlen+left == m) {
          if (ei&k) {
            od <<= left; odlen += left; left = 0; od ++;
            od >>= 1;
            for (;reach <= od; reach ++)
              zmulmod((*pow)[reach-1],a_sq,n,&((*pow)[reach]));
            if (first) {
              zcopy((*pow)[od],bb);
              /* printf("od %ld\n",od); */
              first = rem = odlen = od = 0;
            } else  {
              for (rem = rem+odlen; rem; rem--) zsqmod(*bb,n,bb);
              zmulmod(*bb,(*pow)[od],n,bb);
              /* printf("shift %ld, and od %ld\n",rem+odlen, od); */
              odlen = od = 0;
            }
          } else {
            if (first) {
              if (od==1) {
                zcopy(a_sq,bb);
                /* printf("use square\n"); */
                left --;
              } else {
                od >>= 1;
                for (;reach <= od; reach ++)
                  zmulmod((*pow)[reach-1],a_sq,n,&((*pow)[reach]));
                zcopy((*pow)[od],bb);
                /* printf("od %ld\n",od); */
              }
              zeros = left; first = left = odlen = od = rem = 0;
            } else {
              for (rem = rem+odlen; rem; rem--) zsqmod(*bb,n,bb);
              od >>= 1;
              for (;reach <= od; reach ++)
                zmulmod((*pow)[reach-1],a_sq,n,&((*pow)[reach]));
              zmulmod(*bb,(*pow)[od],n,bb);
              /* printf("shift %ld, and od %ld\n",rem+odlen, od); */
              zeros = left; left = odlen = od = 0;
            }
          }
        } else if (ei&k) {
          od <<= left; odlen += left; left = 0; od ++;
        }
      }
    }    
    k = RADIX;
  }
  if (od) {
    if (first) {
      if (od==1) {
        zcopy(a_sq,bb);
        /* printf("use square\n"); */
        left --;
      } else {
        od >>= 1;
        for (;reach < od; reach ++)
          zmulmod((*pow)[reach-1],a_sq,n,&((*pow)[reach]));
        zmulmod((*pow)[od-1],a_sq,n,bb);
        /* printf("od %ld\n",od); */
      }
    } else {
      for (rem = rem+odlen; rem; rem--) zsqmod(*bb,n,bb);
      od >>= 1;
      for (;reach <= od; reach ++)
        zmulmod((*pow)[reach-1],a_sq,n,&((*pow)[reach]));
      zmulmod(*bb,(*pow)[od],n,bb);
      /* printf("shift %ld, and od %ld\n",rem+odlen,od); */
    }
  }
  if (left || zeros) {
    for (rem = zeros+left; rem; rem--) zsqmod(*bb,n,bb);
    /* printf("shift %ld\n",zeros+left); */
  }
}

        if (e[0]<0 && zinv(*bb,n,bb)) zhalt((long)37);
}

/* b = a^e, long *a, **b, e, parameters cannot be the same  */

zexps(a,e,bb) long *a,e,**bb; {
	static long *le = 0;
	zintoz(e,&le); zexp(a,le,bb);
}

/* b = a^e, long *a, **b, *e, parameters cannot be the same  */

zexp(a,e,bb) long *a,*e,**bb; {
	register long i,j,k = 0;
	int sa;
	long *b = (*bb);
	static long *temp= 0;
	extern double log10();
#ifndef ALLOCATE
	if (!e) { zone(bb); return; }
	if (!a) { zzero(bb); return; }
#endif
	if ((e[0] == 1) && (!(e[1]))) zone(bb);
	else if ((a[1] == 1) && (a[0] == 1)) zone(bb);
	else if ((a[1] == 1) && (a[0] == -1)) {
		if (e[1]&1) zintoz((long)(-1),bb); else zone(bb);
	} else if (e[0]<0) zhalt((long)18);
	else if (!(a[1]) && (a[0] == 1)) zzero(bb);
	else {
		if ((sa = a[0])<0) sa = (-sa);
	  	i = (long)((sa-1+log10((double)(a[sa]))/LOG10RAD) *ztoint(e)+4);
		zsetlength(&b,i
#ifdef REALLOC
				,"in zexp, third argument"
#endif
	  						  );
		zsetlength(&temp,i
#ifdef REALLOC
				  ,"in zexp, local"
#endif
						   );
		for (i=sa; i>=0; i--) b[i] = a[i];
		for (i=e[0]; i; i--) {
			j = e[i]; 
			if (!k) { 
				k = 1;
				while ((k << 1) <= j) k <<= 1;
			}
			while (k >>= 1) {
				zsqin(&b);
				if (j & k) zmulin(&b,a);
			}
			k = RADIX;
		}
		*bb = b;
	}
}

/* return (a^e) mod n  */

long zsexp(a,e,n) long a,e,n; {
	if (!e) return(1);
	else {
		register long aa = a%n, ipow2 = 1;
		long b = aa;
		while ((ipow2 << 1) <= e) ipow2 <<= 1;
		while (ipow2 >>= 1) {
			b = zsmulmod(b, b, n);
	     		if (e & ipow2) b = zsmulmod(b, aa, n);
		}
		return(b);
	}
}

/* initalize random generator, set zseed = s, zranp = 2^89-1, */
/* zprroot = 3^101 mod zranp */

zrstart(s) long s; {
	register long i, ones, res;
	static long *three=0;
	zintoz(s,&zseed);
	if (!three) {
		zintoz((long)3,&three);
		ones = 89/NBITS; res = 89%NBITS;
		zsetlength(&zranp,ones+1
#ifdef REALLOC
					,"in zrstart, zranp"
#endif
							    );
		for (i=ones; i; i--) zranp[i] = RADIXM;
		if (res) zranp[++ones] = ( RADIXM >> (NBITS - res) );
		zranp[0] = ones;
	}
	zexpsmod(three,(long)101,zranp,&zprroot);
}

/* returns random long in [0,b) */
/* if randomizer not initialized, initializes it with seed 7157891 */

long zrandom(b) long b; {
	long  ztoint();
	static long *zb=0, *bound=0;
	if (b <= 0) return 0;
	if (!zseed) zrstart((long)7157891); /* guess how random 7157891 is */
	zintoz(b,&zb);
	zmod(zranp,zb,&bound);
	zsubpos(zranp,bound,&bound);
	do {
		zmulmod(zseed,zprroot,zranp,&zseed);
	} while (zcompare(zseed,bound)>=0); /* I agree, this is sick */
	zmod(zseed,zb,&zb);
	return(ztoint(zb));
}

/* `special odd' inverse, only for odd p>=3 and n>0 */
/* returns inverse of n modulo p, only if n and p coprime */

long zsodinv(n,p) long n, p; {
	register long n1 = n, n2 = p, preg = p, m1 = 1, m2 = 0;
	/* m1*n == n1 mod p,  m2*n == n2 mod p */
	/* n2 is odd, n1 is odd after initialization */
	while (!(n1&1)) { n1 >>= 1; if (m1&1) m1 += preg; m1 >>= 1; }
	if (n1 == 1) return m1;
	while (n1 != n2) {
		if (n1 >n2) {
			n1 -= n2; m1 -= m2;
			if (m1 < 0) m1 += preg;
			do {
				n1 >>= 1;
				if (m1&1) m1 += preg;
				m1 >>= 1;
			} while (!(n1 & 1));
			if (n1 == 1) return m1;
		} else {
			n2 -= n1; m2 -= m1;
			if (m2 < 0) m2 += preg;
			do {
				n2 >>= 1;
				if (m2&1) m2 += preg;
				m2 >>= 1;
			} while (!(n2 & 1));
			if (n2 == 1) return m2;
		}
	}
	zhalt((long)19);
}

/* return inverse of n modulo p */
/* halts if non-trivial gcd or even p */

long zsinv (n,p) long n,p; {
	if (p < 3 || (!(p & 1)) || n <= 0) {
		if ((p==2) && (n&1)) return(1);
		zhalt((long)20);
	}
	return( zsodinv(n,p) );
}

/* returns 0 if ain and nin coprime, 1 otherwise */
/* uu returns gcd of ain and nin */
/* invv such that invv* (ain/uu) = 1 mod (nin/uu) */
/* allocated and positive arguments, output can be input */

static long zxxeucl(ain,nin,invv,uu) long *ain,*nin,**invv,**uu; {
	static long *a = 0, *n, *q, *w, *x, *y, *z;
	long *inv = *invv, *u = *uu;
	long diff, ilo, sa, sn, temp, e, fast, parity, gotthem, *pin, *p, i,
	    try11, try12, try21, try22, got11, got12, got21, got22;
	double hi, lo, dt, fhi, flo, num, den;
#ifndef START
	if (fudge <0) zstart();
#endif
	zsetlength(&a,(e = (ain[-1]>nin[-1]?ain[-1]:nin[-1]))
#ifdef REALLOC
			,"in zxxeucl, locals\n"
#endif
				    );
	zsetlength(&n,e
#ifdef REALLOC
			,""
#endif
				    );
	zsetlength(&q,e
#ifdef REALLOC
			,""
#endif
				    );
	zsetlength(&w,e
#ifdef REALLOC
			,""
#endif
				    );
	zsetlength(&x,e
#ifdef REALLOC
			,""
#endif
				    );
	zsetlength(&y,e
#ifdef REALLOC
			,""
#endif
				    );
	zsetlength(&z,e
#ifdef REALLOC
			,""
#endif
				    );
	zsetlength(&inv,e
#ifdef REALLOC
			 ,"in zxxeucl, third argument"
#endif
						      ); *invv = inv;
	zsetlength(&u,e
#ifdef REALLOC
		       ,"in zxxeucl, fourth argument"
#endif
						     ); *uu = u;
	fhi = 1.0 + epsilon; flo = 1.0 - epsilon;
	pin = &ain[0]; p = &a[0];
	for (i=(*pin); i>=0; i--) *p++ = *pin++;
	pin = &nin[0]; p = &n[0];
	for (i=(*pin); i>=0; i--) *p++ = *pin++;
	inv[0] = 1; inv[1] = 1; w[0] = 1; w[1] = 0;
	while (n[0] > 1 || n[1] > 0) {
		gotthem = 0; sa = a[0]; sn = n[0]; diff = sa-sn;
 		if (!diff || diff == 1 ) {
			sa = a[0]; p = &a[sa];
			num = (double)(*p)*fradix;
			if (sa > 1) num += (*(--p));
			num *= fradix;
			if (sa > 2) num += (*(p-1));
			sn = n[0]; p = &n[sn];
			den = (double)(*p)*fradix;
			if (sn > 1) den += (*(--p));
			den *= fradix;
			if (sn > 2) den += (*(p-1));
			hi = fhi*(num+1.0)/den;
			lo = flo*num/(den+1.0);
			if (diff > 0) { hi *= fradix; lo *= fradix; }
			try11 = 1; try12 = 0; try21 = 0; try22 = 1;
			parity = 1; fast = 1;
			while (fast > 0) {
				parity = 1 - parity;
				if (hi >= fradix ) fast = 0;
				else {
					ilo = (long)lo;
				  	if (!ilo || ilo < (long)hi) fast = 0;
				  	else {
						dt = lo; lo = flo/(hi-ilo);
						if (dt > ilo) hi = fhi/(dt-ilo);
						else hi = fradix;
						temp = try11; try11 = try21;
						if ((RADIX-temp)/ilo < try21)
							fast = 0;
					  	else try21 = temp+ilo*try21;
						temp = try12; try12 = try22;
						if ((RADIX-temp)/ilo < try22)
							fast = 0;
					  	else try22 = temp+ilo*try22;
						if ((fast > 0) && (parity >0)) {
							gotthem = 1;
							got11 = try11;
							got12 = try12;
							got21 = try21;
							got22 = try22;
						}
					}
				}
			}
		}
		if (gotthem) {
			zsmul(inv,got11,&x); zsmul(w,got12,&y);
			zsmul(inv,got21,&z); zsmul(w,got22,&w);
			zadd(x,y,&inv); zadd(z,w,&w);
			zsmul(a,got11,&x); zsmul(n,got12,&y);
			zsmul(a,got21,&z); zsmul(n,got22,&n);
			zsub(x,y,&a); zsub(n,z,&n);
		} else	{
			zdiv(a,n,&q,&a); zmul(q,w,&x); zadd(inv,x,&inv);
			if (a[0] > 1 || a[1] > 0) {
				zdiv(n,a,&q,&n); zmul(q,inv,&x); zadd(w,x,&w);
			} else {
				p = &a[0]; pin = &n[0];
				for (i=(*pin); i>=0; i--) *p++ = *pin++;
				n[0] = 1; n[1] = 0; zsub(nin,w,&inv);
			}
		}
	}
	if ((a[0] == 1) && (a[1] ==1)) e = 0; else e = 1;
	p = &u[0]; pin = &a[0];
	for (i=(*pin); i>=0; i--) *p++ = *pin++;
	*invv = inv; *uu = u;
	return(e);
}

/* return 0 if ain and nin coprime, invv such that ain*invv%nin=1 */
/* return 1 if ain and nin not coprime, invv = gcd(ain,nin) */
/* ain and nin must be non-negative, output can be input */

long zinv(ain,nin,invv) long *ain,*nin,**invv; {
	static long *u = 0;
	if ((!nin) || (!ain) || (ain[0]<0) || (nin[0]<0)) zhalt((long)25);
	if (!(zxxeucl(ain,nin,invv,&u))) return(0);
	zcopy(u,invv);
	return(1);
}

/* a*xa+b*xb=gcd(a,b)=d, arguments should be distinct */

zexteucl(a,xa,b,xb,d) long *a, **xa, *b, **xb, **d; {
	static long *modcon = 0;
	register long agrb, anegative = 0, bnegative = 0;
	if ((!a) || (!b))  zhalt((long)31);
	if (anegative = (a[0]<0)) a[0] = -a[0];
	if (bnegative = (b[0]<0)) b[0] = -b[0];
	if (!a[1] && (a[0]==1)) {
		zzero(xa);  zone(xb);
		zcopy(b,d);
		goto done;
	}
	if ((!b[1] && (b[0]==1)) || !(agrb=zcompare(a,b))) {
		zone(xa); zzero(xb);
		zcopy(a,d);
		goto done;
	}
	if (agrb>0) {
		zxxeucl(a,b,xa,d);
		zmul(a,*xa,xb);
		zsub(*d,*xb,xb);
		zdiv(*xb,b,xb,&modcon);
	} else {
		zxxeucl(b,a,xb,d);
		zmul(b,*xb,xa);
		zsub(*d,*xa,xa);
		zdiv(*xa,a,xa,&modcon);
	}
	if ((modcon[1]) || (modcon[0]!=1)) zhalt((long)26);
	done:
	if (anegative) { a[0] = -a[0]; znegate(*xa); }
	if (bnegative) { b[0] = -b[0]; znegate(*xb); }
}

/* chinese remaindering: d is ma mod a and mb mod b */
/* a and b coprime (if ma!=mb) and non-negative */
/* 0 <= d < a*b, except if a=b */

zchirem(aa,maa,bb,mbb,d) long *aa, *maa, *bb, *mbb, **d; {
	static long *u = 0, *v = 0, *w = 0;
	long *a, *b, *ma, *mb;
	register long agrb;
	if ((!aa) || (!maa) || (!bb) || (!mbb)) zhalt((long)30);
	if ((!aa[1] && aa[0]==1) || (!bb[1] && bb[0]==1) ||
		(aa[0] < 0) || (bb[0] < 0))
		zhalt((long)27);
	if ((agrb=aa[0])<bb[0]) agrb = bb[0];
	zsetlength(&u,agrb
#ifdef REALLOC
			  ,"in zchirem, locals"
#endif
						);
	zsetlength(&v,agrb
#ifdef REALLOC
			  ,""
#endif
			     );
	zsetlength(&w,agrb
#ifdef REALLOC
			  ,""
#endif
			     );
	if (!(agrb=zcompare(aa,bb))) {
		zsub(maa,mbb,&u);
		zmod(u,aa,&w);
		if (w[1] || w[0]!=1) zhalt((long)28);
		zcopy(maa,d);
		return;
	} else if (agrb>0) { a = aa; b = bb; ma = maa; mb = mbb;
	} else { a = bb; b = aa; ma = mbb; mb = maa; }
	if (zxxeucl(a,b,&u,&w)) zhalt((long)29);
	zmod(ma,a,&v);
	zsub(mb,v,&w);
	zmulmod(w,u,b,&w);
	zmul(w,a,&u);
	zadd(v,u,d);
}

/* res = 2*n, output can be input */

ztimes2(n,rres) long *n, **rres; {
	register long sn, i, carry = 0;
	long *res = *rres;
#ifndef ALLOCATE
	if (!n) { zzero(rres); return; }
#endif
	if ((!n[1]) && (n[0]==1)) { zzero(rres); return; }
	if ((sn=n[0])<0) sn = -sn;
	zsetlength(&res,sn+1
#ifdef REALLOC
			    ,"in ztimes2, second argument"
#endif
							  ); *rres = res;
	for (i=1;i<=sn;i++) {
		if ((res[i] = (n[i] << 1) + carry ) >= RADIX ) {
			res[i] -= RADIX; carry = 1;
		} else carry = 0;
	}
	if (carry) res[++sn] = 1;
	if (n[0]<0) res[0] = -sn; else res[0] = sn;
}

/* res = n shift right over one position, output can be input */
/* returns n mod 2 */
/* WARNING: for negative odd n res != n/2 */

long zdivide2(n,rres) long *n, **rres; {
	register long sn, i, result;
	long *res = *rres;
	if ((!n) || (!n[1]) && (n[0]==1)) { zzero(rres); return 0; }
	if ((sn=n[0])<0) sn = -sn;
	zsetlength(&res,sn
#ifdef REALLOC
			  ,"in zdivide2, second argument"
#endif
							 ); *rres = res;
	result = n[1]&1;
	for (i=1;i<sn;i++) {
		res[i] = (n[i] >> 1);
		if (n[i+1]&1) res[i] += (RADIX>>1);
	}
	if (res[sn] = (n[sn] >> 1) ) res[0] = n[0];
	else if (sn == 1) {
		res[0] = 1;
	} else if (n[0] < 0) res[0] = -sn+1;
	else res[0] = sn-1;
	return(result);
}

/* res = 2^k * n, i.e., shift left over k positions */
/* if k negative zrshift(n,-k,rres) */
/* output can be input */

zlshift(n,k,rres) long *n, k, **rres; {
	register long big, small, sn, i, cosmall;
	long *res = *rres;
#ifndef ALLOCATE
	if (!n) { zzero(rres); return; }
#endif
	if ((!n[1]) && (n[0]==1)) { zzero(rres); return; }
	if (!k) {
		if (n!=*rres) zcopy(n,rres);
		return;
	}
	if (k<0) { zrshift(n,-k,rres); return; }
	if (k==1) { ztimes2(n,rres); return; }
	if ((sn=n[0])<0) sn = -sn;
	i= sn + (big = k/NBITS);
	if (small = k - big*NBITS) {
		zsetlength(&res,i+1
#ifdef REALLOC
				   ,"in zlshift, third argument"
#endif
								); *rres = res;
		res[i+1] = n[sn] >> (cosmall = NBITS - small);
		for (i=sn;i>1;i--)
			res[i+big] = ((n[i]<<small)&RADIXM) + (n[i-1]>>cosmall);
		res[big+1] = (n[1]<<small)&RADIXM;
		for (i=big;i;i--) res[i] = 0;
		if (res[sn+big+1]) big++;
	} else {
		zsetlength(&res,i
#ifdef REALLOC
				 ,"in zlshift, third argument"
#endif
							      ); *rres = res;
		for (i=sn;i;i--) res[i+big]=n[i];
		for (i=big;i;i--) res[i] = 0;
	}
	if (n[0]>0) res[0] = n[0]+big; else res[0] = n[0]-big;
}

/* res = n/2^k, i.e., shift right over k positions */
/* if k negative zlshift(n,-krres) */
/* output can be input */

zrshift(n,k,rres) long *n, k, **rres; {
	register long big, small, sn, i, cosmall;
	long *res = *rres;
#ifndef ALLOCATE
	if (!n) { zzero(rres); return; }
#endif
	if ((!n[1]) && (n[0]==1)) { zzero(rres); return; }
	if (!k) {
		if (n!=*rres) zcopy(n,rres);
		return;
	}
	if (k<0) { zlshift(n,-k,rres); return; }
	if (k==1) { zdivide2(n,rres); return; }
	big = k/NBITS; small = k - big*NBITS;
	if ((sn=n[0])<0) sn = -sn;
	if ((big>=sn) || ((big==sn-1)&&(!(n[sn]>>small)))) {
		zzero(rres); return;
	}
	sn -= big;
	zsetlength(&res,sn
#ifdef REALLOC
			  ,"in zrshift, third argument"
#endif
							); *rres = res;
	if (small) {
		cosmall = NBITS - small;
		for (i=1; i<sn; i++)
			res[i] = (n[i+big]>>small)+
				 ((n[i+big+1]<<cosmall)&RADIXM);
		if (!(res[sn] = (n[sn+big]>>small))) sn --;
	} else for (i=1; i<=sn; i++) res[i] = n[i+big];
	if (n[0]>0) res[0] = sn; else res[0] = -sn;
}

/* return k such that n = 2^k * n0 with n0 odd, and set n to n0 */
/* return -1 if n = 0 */

long makeodd(n) long *n; {
	register long i;
	long shift = 1;
	if (!n || (!n[1] && (n[0]==1))) return -1;
	while (!(n[shift])) shift ++;
	i = n[shift];
	shift = NBITS * (shift - 1);
	while (!(i&1)) { shift ++; i >>= 1; }
	zrshift(n,shift,&n);
	return(shift);
}

/* return 1 if m < 2 or m composite,
/* return 0 if m >= 2 and compositeness could not be proved */
/* after |t| probabilistic compositeness tests */
/* a special feature for odd n > RADIXROOT :*/
/* if t<0, then return 1 if composite and proved to be not a prime power */
/* unless return -1: composite, no prime power, and */
/* gcd(a^m-a,m)=m, a returned in m, so that factor of m can be found by */
/* looking at factorization of a^m - a */
/* return -2 if factor found, factor is returned in m */
/* if firstbase == 0, then the bases to be tested will be random >2 */
/* and less than RADIX */
/* if firstbase !=0, then this value will be used as the first base */
/* (which allows slightly faster arithmetic if firstb == 2) */
/* and the other bases will be random >2 and < RADIX */

long zcomposite(m,t,firstbase) long *m,t,firstbase; {
	register long sm=0, i, j, s, rand;
	long zrandom(), zcompare();
	static long *u = 0, *m1 = 0, *a = 0;
	if (!m || m[0] <0) return(1);
	if ((m[0] == 1) && (m[1] <= RADIXROOT)) {
		sm = m[1];
		if (sm < 2) return(1);
		if (sm == 2) return(0);
		if (!(sm&1)) return(1);
		i = 3;
		while (i*i <= sm ) {
			if (!(sm%i)) return(1);
			i++; i++;
		}
		return(0);
	}
	if (!(m[1]&1)) return(1);
	zsetlength(&u,(i=m[0])
#ifdef REALLOC
				,"in zcomposite, locals\n"
#endif
							  );
	zsetlength(&m1,i
#ifdef REALLOC
			,""
#endif
			   );
	zsetlength(&a,i
#ifdef REALLOC
			,""
#endif
			   );
	zsubpos(m,one,&m1);
	zcopy(m1,&u);
	s = makeodd(u) - 1;
	if (!firstbase && (sm = m[1]) <= RADIXROOT ) sm = RADIXM-3;
	if (t<0) i= -t; else i = t;
	for (; i>0; i--) {
		if (sm) {
			zintoz((rand=(3+zrandom(sm))&RADIXM),&a);
			zexpmod(a,u,m,&a);
		} else {
			if (firstbase == 2) zexpmod2(u,m,&a);
			else {
				zintoz(firstbase,&a);
				zexpmod(a,u,m,&a);
			}
			if ((sm = m[1]) <= RADIXROOT ) sm = RADIXM-3;
		}
		if ( (a[0] != 1 || a[1] != 1) && zcompare(m1,a)) {
			for (j = s; j; j--) {
				zsqmod(a,m,&a);
				if (!zcompare(a,m1)) goto nexti;
			}
			if (t>0) return (1);
			else {
				zsqmod(a,m,&a);
				zsubmod(a,one,m,&a);
				zintoz(rand,&u);
				zmulmod(a,u,m,&a);
				if (!a[1] && a[0]==1) {
					zcopy(a,&m);
					return(-1);
				}
				zgcd(a,m,&u);
				if (u[1]==1 && u[0]==1) return(1);
				zcopy(u,&m);
				return(-2);
			}
		}
		nexti:;
	}
	return(0);
}

/* return 0 if n<2 or n proved composite by one of t probabilistic */
/*	compositeness tests */
/* return 1 otherwise */
/* firstbase == 0 gives random bases, otherwise firstbase used as first basis */

long zprime(n,t,firstbase) long *n, t, firstbase; {
	return ( 1-zcomposite(n,t,firstbase) );
}

/* mixed arithmetic exponentiation, only for in zmcomposite */

zsmexp(a,e,bb) long a,*e,**bb; {
	/* with mixed montgomery and ordinary arithmetic */
	register long i, j, k = 0;
	long *b = *bb;
#ifndef ALLOCATE
	if (!zn) zhalt((long)16); /* can't happen, aborts in zmstart */
	if (!e) { zcopy(zr,bb); return; }
	if (!a) { zzero(bb); return; }
#endif
	if ((i = e[0])<0) i = (-i); /* negative exponent made positive ... */
	if ((i == 1) && (!(e[1]))) zcopy(zr,&b);
	else {
		zintoz(a,&b); ztom(b,&b);
		for (; i; i--) {
			j = e[i]; 
			if (!k) { 
				k = 1;
				while ((k << 1) <= j) k <<= 1;
			}
			while (k >>= 1) {
				zmontsq(b,&b);
				if (j & k) {
					zsmul(b,a,&b); zmod(b,zn,&b);
				}
			}
			k = RADIX;
		}
	}
	*bb = b;
}

/* like zcomposite, but using montgomery */
/* t only positive, number of tests */
/* side effect: sets zn to m if zn was not set */

/* only intended for many compositeness tests on a probable prime, */
/* slighly faster than zcomposite */

long zmcomposite(m,t) long *m,t; {
	register long sm=0, i, j, s;
	long zrandom(), zcompare(), restore = 0;
	static long *u = 0, *m1 = 0, *a = 0, *oldzn=0;
	if (!m || m[0] <0) return(1);
	if ((m[0] == 1) && (m[1] <= RADIXROOT)) {
		sm = m[1];
		if (sm < 2) return(1);
		if (sm == 2) return(0);
		if (!(sm&1)) return(1);
		i = 3;
		while (i*i <= sm ) {
			if (!(sm%i)) return(1);
			i++; i++;
		}
		return(0);
	}
	if (!(m[1]&1)) return(1);
	zsetlength(&u,(i=m[0])
#ifdef REALLOC
				,"in zmcomposite, locals\n"
#endif
							  );
	zsetlength(&m1,i
#ifdef REALLOC
			,""
#endif
			   );
	zsetlength(&a,i
#ifdef REALLOC
			,""
#endif
			   );
	if (zn) { zcopy(zn,&oldzn); restore = 1; }
	zmstart(m);
	zsubpos(m,zr,&m1); /* zr is montgomery-one, m1 is montgomery-(n-1) */
	zsubpos(m,one,&u);
	s = makeodd(u) - 1;
	if ((sm = m[1]) <= RADIXROOT ) sm = RADIXM-3;
	for (i=t; i>0; i--) {
		zsmexp((3+zrandom(sm))&RADIXM,u,&a);
		if ( zcompare(zr,a) && zcompare(m1,a)) {
			for (j = s; j; j--) {
				zmontsq(a,&a);
				if (!zcompare(a,m1)) goto nexti;
			}
			if (restore) zmstart(oldzn);
			return(1);
		}
		nexti:;
	}
	if (restore) zmstart(oldzn);
	return(0);
}

/* return [n^{0.5}] */

long zssqrt(n) long n; {
	register long a, ndiva, newa;
	if (n <= 0) return (0);
	if (n <= 3) return (1);
	if (n <= 8) return (2);
	newa = 3 << (2 * (NBITSH -1));
	a = 1 << NBITSH;
	while (!(n & newa)) {
		newa >>= 2;
		a >>= 1;
	}
	while (1) {
		newa = ((ndiva = n/a) +a)/2;
		if (newa-ndiva <= 1) {
			if (newa*newa <= n) return(newa);
			else return(ndiva);
		}
		a = newa;
	}
}

/* returns 1 if n perfect square, 0 otherwise */
/* r = [n^{0.5}], dif = n-r^2 */
/* output cannot be input */

long zsqrt(n,rr,ddif) long *n, **rr, **ddif; {
	static long *a = 0, *ndiva = 0, *diff = 0;
	register long i;
	long  zssqrt(), zcompare();
	long *dif = *ddif;
	long *r = *rr;
#ifndef ALLOCATE
	if (!n) { zzero(rr); zzero(ddif); return 1; }
#endif
	if ((i=n[0])<0) zhalt((long)23);
	zsetlength(&a,i
#ifdef REALLOC
			,"in zsqrt, locals\n"
#endif
					     );
	zsetlength(&ndiva,i
#ifdef REALLOC
			,""
#endif
			   );
	zsetlength(&diff,i
#ifdef REALLOC
			,""
#endif
			   );
	if (i == 1) {
		zintoz(i=zssqrt(n[1]),&r);
		zintoz(i*i,&diff);
		goto done;
	}
	a[ (a[0]=(i+1)/2) ] = zssqrt(n[i]) + 1;
	if (!(i&1)) a[a[0]] <<= NBITSH;
	if (a[a[0]]&RADIX) {
		a[a[0]] = 0; a[0] ++; a[a[0]] = 1;
	}
	for (i=a[0]-1;i;i--) a[i]=0;
	while (1) {
		zdiv(n,a,&ndiva,&r);
		zadd(a,ndiva,&r);
		zrshift(r,(long)1,&r);
		if (zcompare(r,ndiva) <= 0) {
			zsq(r,&diff);
			goto done;
		}
		zsubpos(r,ndiva,&diff);
		if ((diff[0] == 1) && (diff[1] <= 1)) {
			zsq(r,&diff);
			if (zcompare(diff,n) > 0) {
				zcopy(ndiva,&r);
				zsq(r,&diff);
			}
			goto done;
		}
		zcopy(r,&a);
	}
done:
	*rr=r;
	zsubpos(n,diff,&dif);
	*ddif=dif;
	if (!(dif[1]) && (dif[0] == 1)) return(1);
	return(0);
}

/* start prime generator sieve */

zpstart() {
	register long i, j, jstep, jstart, ibnd;
	register short int *p;
	if (!lowsieve) {
		lowsieve = (short int *) calloc(PRIMBND,sizeof(short int));
		p = &lowsieve[0];
		for (i = PRIMBND; i; i--) *p++ = 1;
		jstep = 1;
		jstart = -1;
		ibnd = (zssqrt((long)(2*PRIMBND+1))-3)/2;
		for (i=0; i<=ibnd; i++) {
			jstart += 2*((jstep += 2) -1);
			if (lowsieve[i]) for (j = jstart; j<PRIMBND; j+=jstep)
				lowsieve[j] = 0;
		}
	}
	lastp = 0; pshift = 0; pindex = -1;
}

/* restarter for prime generator to get 2 first */

zpstart2() {
	lastp = 0; pshift = -1;
}

/* auxiliary routine for prime generator */

static zpshift() {
	register long i, j, jstep, jstart, ibound;
	register short int *p, *pi;
	if (!movesieve)
		movesieve = (short int *) calloc(PRIMBND,sizeof(short int));
	pi = &movesieve[0];
	if (!pshift) {
		p = &lowsieve[0];
		for (i=PRIMBND; i; i--) *pi++ = *p++;
	} else {
		for (i=PRIMBND; i; i--) *pi++ = 1;
		jstep = 3; ibound = pshift+2*PRIMBND+1;
		for (i=0; jstep*jstep<=ibound; i++) {
			if (lowsieve[i]) {
				if (!((jstart = (pshift+2)/jstep+1)&1)) jstart++;
				if (jstart <= jstep) jstart = jstep;
				jstart = (jstart*jstep-pshift-3)/2;
				for (j=jstart;j<PRIMBND; j+=jstep)
					movesieve[j] = 0;
			}
			jstep += 2;
		}
	}
}

/* returns next prime number, starting at 2 */
/* restart with zpstart2() (get 2 first) */
/* or with zpstart() (get 3 first) */
/* after returning the last prime (as defined by PRIMBND) */
/* zpnext starts all over again, at 2 */

long zpnext() {
	if (pshift < 0) { zpstart(); return(lastp=2); }
	if (pindex < 0) { pindex = 0; zpshift(); return(lastp=3); }
	do {
		while ((++pindex) < PRIMBND ) {
			if (movesieve[pindex]) return(lastp=pshift+2*pindex+3);
		}
		if ((pshift += 2*PRIMBND) > 2*PRIMBND*(2*PRIMBND+1))
			{ zpstart(); return(lastp=2); }
		zpshift();
		pindex = -1;
	} while (1);
}

/* return current prime number */

long zp() {
	return lastp;
}

/* res is greatest common divisor of m1 and m2, output can be input */
/* uses binary gcd algorithm; uses at most one mod to make same size */

zgcd(mm1,mm2,rres) long *mm1, *mm2, **rres; {
        register long agrb, shibl;
        static long *aa=0, *bb=0, *cc=0;
        long *a, *b, *c, *d, m1negative, m2negative;
	if ((!mm1) || (!mm2)) zhalt((long)32);
	if (mm1==mm2) { zcopy(mm1,rres); return; }
	if (m1negative = (mm1[0]<0)) mm1[0] = -mm1[0];
	if (m2negative = (mm2[0]<0)) mm2[0] = -mm2[0];
        if (!(mm1[1]) && (mm1[0]==1)) { a = mm2; goto done; }
        if (!(mm2[1]) && (mm2[0]==1)) { a = mm1; goto done; }
	if ((agrb=mm1[0])<mm2[0]) agrb=mm2[0];
        zsetlength(&aa,agrb
#ifdef REALLOC
			   ,"in zgcd, locals\n"
#endif
						);
        zsetlength(&bb,agrb
#ifdef REALLOC
			   ,""
#endif
			      );
        zsetlength(&cc,agrb
#ifdef REALLOC
			   ,""
#endif
			      );
	if (mm1[0] != mm2[0]) {
		if (mm1[0] > mm2[0]) {
			zcopy(mm2,&aa); zmod(mm1,aa,&bb);
		} else {
			zcopy(mm1,&aa); zmod(mm2,aa,&bb);
		}
		if (!(bb[1]) && (bb[0]==1)) { a = aa; goto done; }
	} else { zcopy(mm1,&aa); zcopy(mm2,&bb); }
        if ((agrb = makeodd(aa)) < (shibl = makeodd(bb))) shibl = agrb;
        if (!(agrb = zcompare(aa,bb))) { a = aa; goto endshift; }
        else if (agrb < 0) { a = bb; b = aa;
	} else { a = aa; b = bb; }
        c = cc; zsubpos(a,b,&c);
        do {
                makeodd(c);
                if (!(agrb = zcompare(b,c))) {
                        a = b; goto endshift;
                } else if (agrb > 0) { a = b; b = c; c = a;
                } else { d = a; a = c; c = d; }
                zsubpos(a,b,&c);
        } while (c[1] || c[0] != 1);
        endshift: zlshift(a,shibl,&a);
	done:
    	if (m1negative) mm1[0] = -mm1[0];
	if (m2negative) mm2[0] = -mm2[0];
	zcopy(a,rres);
}

/* res is greatest common divisor of m1 and m2, output can be input */
/* uses plain euclidean algorithm, no tricks ... */

zgcdeucl(mm1,mm2,rres) long *mm1, *mm2, **rres; {
	register long agrb;
        static long *aa=0, *bb=0;
        long *a, m1negative, m2negative;
	if ((!mm1) || (!mm2)) zhalt((long)33);
	if (mm1==mm2) {
		if (mm1 != *rres) zcopy(mm1,rres);
		return;
	}
	if (m1negative = (mm1[0]<0)) mm1[0] = -mm1[0];
	if (m2negative = (mm2[0]<0)) mm2[0] = -mm2[0];
        if (!(mm1[1]) && (mm1[0]==1)) { a = mm2; goto done; }
        if (!(mm2[1]) && (mm2[0]==1)) { a = mm1; goto done; }
	if ((agrb=mm1[0])<mm2[0]) agrb=mm2[0];
        zsetlength(&aa,agrb
#ifdef REALLOC
			   ,"in zgcd, locals\n"
#endif
						);
        zsetlength(&bb,agrb
#ifdef REALLOC
			   ,""
#endif
			      );
	zcopy(mm1,&aa); zcopy(mm2,&bb);
	while (bb[1] || (bb[0] != 1)) {
		zmod(aa,bb,&aa);
		if (!(aa[1]) && (aa[0] == 1)) { a = bb; goto done; }
		zmod(bb,aa,&bb);
	}
	a = aa;
	done:
    	if (m1negative) mm1[0] = -mm1[0];
	if (m2negative) mm2[0] = -mm2[0];
	zcopy(a,rres);
}

timer_handler() {
	time_flag=0;
}

/* returns 1 if factor of n found in pollard rho, puts factor in res */
/* returns 0 if no factor found; if n negative: set res = -1 */
/* cofactor in cof, res <= cof unless res=2, cof = 1 */
/* side effect: uses montgomery multiplication, does zmstart(n) */

long pollardrho(n,rres,ccof) long *n, **rres, **ccof; {
	register long i, j = 10;
	static long *xi = 0, *x2i = 0, *dif = 0;
	long *res = *rres, *cof = *ccof, *adder = &glosho[1];;
	time_flag=1;
	adder[1] = 1;
#ifndef ALLOCATE
	if (!n) { zzero(rres); zzero(ccof); return 0; }
#endif
	if (!n[1] && (n[0]==1)) {
		zzero(rres); zzero(ccof); return 0;
	}
	if (n[0]<0) {
		zintoz((long)(-1),rres); zcopy(n,ccof); return(1);
	}
	if (!(n[1]&1)) {
		zintoz((long)2,rres); zrshift(n,(long)1,ccof);
		if (((*ccof)[1]!=1) || ((*ccof)[0]!=1)) return(1);
		return(0);
	}
	zsetlength(&xi, (i= n[0] +1)
#ifdef REALLOC
				    ,"in pollardrho, locals\n"
#endif
							      );
	zsetlength(&x2i,i
#ifdef REALLOC
			 ,""
#endif
			    );
	zsetlength(&dif,i
#ifdef REALLOC
			 ,""
#endif
			    );
	i = 0;
	zmstart(n);
	zcopy(one,&dif);
	ztom(dif,&xi);
	zcopy(xi,&x2i);
	while (i<100000 && time_flag) {
		for (;i<j && time_flag;i++) {
			zmontsq(xi,&xi); zadd(xi,adder,&xi);
			zmontsq(x2i,&x2i); zadd(x2i,adder,&x2i);
			zmontsq(x2i,&x2i); zadd(x2i,adder,&x2i);
			zsub(xi,x2i,&res);
			if (res[0]<0) res[0] = -res[0];
			if ((res[1])||(res[0]!=1)) zmont(dif,res,&dif);
			else {
				zcopy(one,&dif);
				ztom(dif,&xi);
				zcopy(xi,&x2i);
				if ((adder[1] += 2) > RADIX) {
					(void)signal(SIGALRM,SIG_IGN);
					return 0;
				}
			}
		}
		if ((dif[1])||(dif[0]!=1)) {
	                zgcd(n,dif,&res); 
			if (((res[1]!=1) || (res[0]!=1)) && zcompare(res,n)) {
				zdiv(n,res,&cof,&xi);
				if (xi[1] || xi[0]!=1) {
					*rres=res; *ccof = cof;
					zhalt((long)24);
					(void)signal(SIGALRM,SIG_IGN);
					return 0;
				}
				if (zcompare(res,cof)>0) {
					*rres = cof; *ccof = res;
				} else { *rres = res; *ccof = cof; }
				(void)signal(SIGALRM,SIG_IGN);
				return (i+1);
			}
		} else zcopy(one,&dif);
		j=i+6;
	}
	if (!time_flag){
		fprintf(stderr,"timed out in pollard\n");
	} else {
		(void)signal(SIGALRM,SIG_IGN);
	}
	return(0);
}

/* attempts to find smallest prime divisor >=b1 and <=b2 of n */
/* returns divisor or first prime >b2 if nothing found */
/* side effect: restarts prime generator */
/* is Not clever about trial division > sqrt n */

long ztridiv(n,cof,b1,b2) long *n, **cof, b1, b2; {
	static long lasttried = 0;
#ifndef ALLOCATE
	if (!n) { zzero(cof); return 0; }
#endif
	if (lasttried > b1) {
		zpstart2();
		lasttried = 0;
	}
	while (lasttried < b1) { lasttried = zpnext(); }
	while (lasttried <= b2) {
		if (!zsdiv(n,lasttried,cof)) return(lasttried);
		lasttried = zpnext();
	}
	return(lasttried);
}

/* read a from file */

long zfread(f,aa) FILE *f; long **aa; {
	/* return 1 if success, 0 if not */
	static char *inmem = 0;
	char *in;
	register long d=0, anegative=0;
	long *a= *aa, *digit = &glosho[1];
	if (!inmem) inmem = (char *)calloc(BUFSIZE,sizeof(char));
	if (fscanf(f,"%s",inmem)==EOF) return 0;
	if (inmem[0]=='-') { anegative=1; in = (inmem+1); }
	else in = inmem;
	zsetlength(&a,(long)(strlen(in)/LOG10RAD)
#ifdef REALLOC
		             ,"in zfread, second argument"
#endif
							  );
	a[0] = 1; a[1] = 0;
	while (in[d] != '\0') {
		if (in[d] == '\\') {
			if (fscanf(f,"%s",in)==EOF) return 0;
			 d = 0;
			if ( strlen(in) > LOG10RAD*(a[-1]-a[0]) ) {
			/* to avoid multiple reallocations */
			   zsetlength(&a,(long)(a[0]+3+strlen(in)/LOG10RAD)
#ifdef REALLOC
			   			,"in zfread, second argument"
#endif
									     );
			}
		} else {
			zsmul(a,(long)10,&a);
			digit[1] = (long)(in[d++]-'0');
			zadd(a,digit,&a);
		};
	}
	if (anegative) znegate(a);
	*aa = a;
	return 1;
}

/* read a from standard input */

long zread(a) long **a; {
	return zfread(stdin,a);
}

/* write a to file f, return decimal length of a */

long zfwrite(f,a,linelen,str1,str2) FILE *f; long *a, linelen; char *str1, *str2; {
	static long *out = 0, *ca = 0, outsize = 0, div = 0, ldiv;
	register long i;
	long sa, result, zeros, strlen1 = strlen(str1), strlen2 = strlen(str2);
#ifndef ALLOCATE
	if (!a) { fprintf(f,"0"); return 1; }
#endif
	if (!div) {
		div = 10; ldiv = 1;
		while (div * 10 < RADIXROOT) { div *= 10; ldiv ++; }
	}
	if (linelen && linelen<ldiv) linelen=LINE;
	if (!out) out = (long *)calloc((outsize = SIZE<<1),SIZEOFLONG);
	if ((sa = a[0])<0) {
		sa = -sa;
		fprintf(f,"%s-",str1);
	} else fprintf(f,"%s",str1);
	if (ldiv*outsize < LOG10RAD*sa) {
	    free(out);
	    if (!(out=(long*)calloc((outsize=(LOG10RAD*sa)/ldiv+2),SIZEOFLONG)))
		zhalt((long)21);
	}
	zsetlength(&ca,sa
#ifdef REALLOC
			 ,"in zfwrite, local"
#endif
					     );
	for (i= (ca[0] = sa);i;i--) ca[i] = a[i];
	i = -1;
	do {
		if ((++i) >= outsize) {
		  if (!(out=(long*)realloc(out,(outsize+=SIZE)*SIZEOFLONG))) {
		  	zhalt((long)22);
		  }
		}
		out[i] = zsdiv(ca,div,&ca);
	} while (ca[1] || ca[0] != 1);
	sa = 0;
	result = out[i];
	do {
		sa ++;
	} while (result /= 10);
	result = sa + i*ldiv;;
	fprintf(f,"%ld",out[i--]);
	for (;i>=0;i--) {
		if (zeros = 10*out[i]) {
			while (zeros<div) {
				fprintf(f,"0");
				zeros *= 10;
			}
		} else { for (zeros=ldiv-1;zeros;zeros--) fprintf(f,"0"); }
		fprintf(f,"%ld",out[i]);
		sa += ldiv;
		if (linelen && (sa > linelen-strlen1) && i)
			{ fprintf(f,"\\\n%s",str2); sa = 0; strlen1= strlen2; }
	}
	return(result);
}

/* write a to standard output, return decimal length a */

long zwrite(a) long *a; {
	return zfwrite(stdout,a,(long)LINE,"","");
}

/* write a to standard output followed by newline, return decimal length a */

long zwriteln(a) long *a; {
	long ret = zfwrite(stdout,a,(long)LINE,"","");
	printf("\n");
	return ret;
}

char eulav(i) long i; {
	switch(i) {
	case 0 : return('0');
	case 1 : return('1');
	case 2 : return('2');
	case 3 : return('3');
	case 4 : return('4');
	case 5 : return('5');
	case 6 : return('6');
	case 7 : return('7');
	case 8 : return('8');
	case 9 : return('9');
	case 10 : return('a');
	case 11 : return('b');
	case 12 : return('c');
	case 13 : return('d');
	case 14 : return('e');
	case 15 : return('f');
	}
}

zhfwrite(fn,a) FILE *fn; long *a; {
	static char *b=0;
	static bl=0;
	static long *aa=0;
	register long i, cnt=0, mb=0, lab=0;
	if (!a) fprintf(fn,"0");
	zcopy(a,&aa); if (aa[0]<0) { aa[0] = -aa[0]; fprintf(fn,"-"); }
	if (!b || (bl<(aa[0]<<3))) b = (char *)malloc(bl=(aa[0]<<3));
	do {
		b[cnt] = eulav(aa[1]&15); cnt ++;
		zrshift(aa,4,&aa);
	} while ((aa[1] != 0) || (aa[0] != 1));
	for (i=cnt;i--;) {
		fprintf(fn,"%c",b[i]);
		mb ++;
		if (mb == 8) {
			lab ++;
			if (lab == 7) {
				if (i) fprintf(fn,"\\\n");
				lab = 0; mb = 0;
			} else {
				mb = 0; fprintf(fn," ");
			}
		}
	}

}

zhwrite(a) long *a; {
	zhfwrite(stdout,a);
}

zhwriteln(a) long *a; {
	zhfwrite(stdout,a); printf("\n");
}

long value(c) char c; {
	switch(c) {
	case '0' : return(0);
	case '1' : return(1);
	case '2' : return(2);
	case '3' : return(3);
	case '4' : return(4);
	case '5' : return(5);
	case '6' : return(6);
	case '7' : return(7);
	case '8' : return(8);
	case '9' : return(9);
	case 'A': case 'a' : return(10);
	case 'B': case 'b' : return(11);
	case 'C': case 'c' : return(12);
	case 'D': case 'd' : return(13);
	case 'E': case 'e' : return(14);
	case 'F': case 'f' : return(15);
	}
}

zhfread(fn,a) FILE *fn; long **a; {
	/* read hexadecimal from fn; skips newlines, continues only on \ */
	/* at end of line, accepts sign on first line */
	char prev, c;
	long anegative = 0;
	zintoz(0,a);
	do {
		prev = ' ';
		do { fscanf(fn,"%c",&c); } while (c == '\n');
		if ((c == '-') && (!anegative)) {
			anegative = 1; 
			fscanf(fn,"%c",&c);
		}
		while (c != '\n') {
			prev = c;
			fscanf(fn,"%c",&c);
			if ((prev != '\\') && (prev != ' ')) {
				zlshift(*a,4,a); zsadd(*a,value(prev),a);
			}
		}
		if (!anegative) anegative = -1;
	} while (prev == '\\');
	if (anegative>0) if (((*a)[1]) || ((*a)[0]!=1)) (*a)[0] = -((*a)[0]);
}

zhread(a) long **a; {
	zhfread(stdin,a);
}

#endif

