// Copyright Michael Monagan 2019-2021 // Compile with gcc -O3 -shared -o gcd6.so -fPIC gcd6.c // gcc -O3 -c gcd6.c // This file has my classical O(d^2) arithmetic library for Fp[x] // It supports 63 bit primes i.e., p < 2^63 // It eliminates the O(d^2) divisions by p using accumulators // It also uses Roman Pearce's mulrec64 routine for multiplication mod p from the file "int128g.c" // For 2021 I've added routines to support the multivarate GCD using Kronecker substitution // Fall 2021 I've added fast evaluation for Ayoola // Copyright Michael Monagan 2000--2021. // polyalg4.c inplace polLambda(R,n,L,W,p); #include #include #include #define LONG long long int #define ULONG unsigned long long int /******************************************************************************************/ /* Zp utilities */ /******************************************************************************************/ #define UINT64 unsigned long long #include "int128g.c" //typedef struct { // UINT64 s; /* shift */ // UINT64 v; /* reciprocal of d */ // UINT64 d0; /* divisor shifted up */ // UINT64 d1; //} recint; // //recint recip1(UINT64 p); //UINT64 mulrec64(UINT64 a, UINT64 b, recint v); ULONG seed; ULONG mult; LONG rand64s(LONG p) { LONG x,y; extern ULONG seed, mult; seed = mult*seed; x = seed >> 32; seed = mult*seed; y = seed >> 32; x = (x<<31) | y; x = x % p; return(x); } int min32s(int a, int b) { if( ab ) return a; else return b; } LONG add64s(LONG a, LONG b, LONG p) { LONG t; t = (a-p)+b; t += (t>>63) & p; return t; } LONG sub64s(LONG a, LONG b, LONG p) { LONG t; t = a-b; t += (t>>63) & p; return t; } LONG neg64s(LONG a, LONG p) { return (a==0) ? 0 : p-a; } LONG mul64s(LONG a, LONG b, LONG p) { LONG q, r; __asm__ __volatile__( \ " mulq %%rdx \n\t" \ " divq %4 \n\t" \ : "=a"(q), "=d"(r) : "0"(a), "1"(b), "rm"(p)); return r; } /* z += a1:a0 */ #define zadd(z,a0,a1) __asm__(\ " addq %4, %0 \n\t" \ " adcq %5, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "r"(a0), "r"(a1)) /* z -= a1:a0 */ #define zsub(z,a0,a1) __asm__(\ " subq %4, %0 \n\t" \ " sbbq %5, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "r"(a0), "r"(a1)) /* z = a*b */ #define zmul(z,a,b) __asm__(\ " mulq %%rdx \n\t" \ : "=a"(z[0]), "=d"(z[1]) : "a"(a), "d"(b)) /* z += a*b */ #define zfma(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " addq %%rax, %0 \n\t" \ " adcq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z -= a*b */ #define zfms(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " subq %%rax, %0 \n\t" \ " sbbq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z[0] = z % p */ /* z[1] = z / p */ /* quotient can overflow */ #define zdiv(z,p) __asm__(\ " divq %4 \n\t" \ : "=a"(z[1]), "=d"(z[0]) : "a"(z[0]), "d"(z[1]), "r"(p)) /* z = z % p safe */ #define zmod(z,p) __asm__(\ " divq %4 \n\t" \ " xorq %0, %0 \n\t" \ : "=a"(z[1]), "=d"(z[0]) : "a"(z[0]), "d"(z[1] < p ? z[1] : z[1] % p), "r"(p)) /* z = z << s */ #define zshl(z,s) __asm__(\ " shldq %%cl, %0, %1 \n\t" \ " shlq %%cl, %0 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "c"(s)) /* c^(-1) mod p assuming 0 < c < p < 2^63 */ LONG modinv64s( LONG c, LONG p ) { LONG d,r,q,r1,c1,d1; d = p; c1 = 1; d1 = 0; while( d != 0 ) { q = c / d; r = c - q*d; r1 = c1 - q*d1; c = d; c1 = d1; d = r; d1 = r1; } if( c!=1 ) return( 0 ); if( c1 < 0 ) c1 += p; return( c1 ); } /* a^n mod p assuming 0 <= a < p < 2^63 */ LONG powmod64s( LONG a, LONG n, LONG p ) { LONG r,s; a += (a>>63) & p; // protect from bad input if( n==0 ) return 1; if( n==1 ) return a; for( r=1, s=a; n>0; n /= 2 ) { if( n & 1 ) r = mul64s(r,s,p); s = mul64s(s,s,p); } return r; } /* a^n mod p assuming 0 <= a < p < 2^63 */ LONG powmodP64s( LONG a, LONG n, LONG p, recint P ) { LONG r,s; a += (a>>63) & p; // protect from bad input if( n==0 ) return 1; if( n==1 ) return a; //for( r=1, s=a; n>0; n /= 2 ) { if( n & 1 ) r = mul64s(r,s,p); s = mul64s(s,s,p); } for( r=1, s=a; n>0; n /= 2 ) { if( n & 1 ) r = mulrec64(r,s,P); s = mulrec64(s,s,P); } return r; } /******************************************************************************************/ /* Polynomial routines */ /******************************************************************************************/ void vecfill64s( LONG x, LONG *A, int n ) { int i; for( i=0; i0; i-- ) if( A[i]!=0 ) printf("%lld*x^%d+",A[i],i); printf("%lld;\n",A[0]); return; } int polequal64s( LONG *a, LONG *b, int d ) { int i,equal; for( equal=1,i=0; i<=d; i++ ) equal = equal && (a[i]==b[i]); return equal; } LONG poleval64s(LONG *a, int d, LONG x, LONG p) { int i; LONG r; recint P; P = recip1(p); if( d==-1 ) return 0; // a[0]+x(a[1]+x(a[2]+x(a[3]))) for( r=a[d],i=d-1; i>=0; i-- ) r = add64s(a[i], mulrec64(x,r,P), p); return r; } void polmultieval64s(LONG *a, int d, LONG *x, LONG *y, int n, LONG p) { int j; for( j=0; j=0 && c[da]==0 ) da--; return da; } if( da=0 && c[da]==0 ) da--; return da; } if( da>db ) { for ( i=db+1; i<=da; i++ ) c[i] = a[i]; return da; } else { for( i=da+1; i<=db; i++ ) c[i] = neg64s(b[i],p); return db; } } int polsubmul( LONG *A, LONG *B, LONG a, LONG b, int dA, int dB, LONG p ) { // compute A = A - (ax+b) B efficiently LONG t; int i; ULONG z[2]; if( dB==-1 ) return dA; // B = 0 z[0] = z[1] = 0LL; // if deg(A) <= deg(B) then pad A with zeroes while( dA<=dB ) A[++dA] = 0; // constant term is special t = mul64s(b,B[0],p) ; A[0] = sub64s(A[0],t,p); for( i=1; i<=dB; i++ ) { zmul(z,a,B[i-1]); zfma(z,b,B[i]); zmod(z,p); t = A[i]-z[0]; A[i] = t + ((t>>63)&p); } // update leading term from B t = mul64s(a,B[dB],p); A[dB+1] = sub64s(A[dB+1],t,p); // compute and return degree while( dA>=0 && (A[dA]==0 || A[dA]==p) ) dA--; return dA; } /* compute gcd(A,B) and put gcd in A and return it's degree */ int polsubmulP( LONG *A, LONG *B, LONG a, LONG b, int dA, int dB, LONG p, recint P ) { // compute A = A - (ax+b) B efficiently LONG s,t; int i, d; if( dB==-1 ) return dA; // B = 0 d = dA; // if deg(A) <= deg(B) then pad A with zeroes while( dA<=dB ) A[++dA] = 0; // constant term is special t = mulrec64(b,B[0],P); A[0] = sub64s(A[0],t,p); //for( i=1; i<=dB; i++ ) { t = mul64s(a,B[i-1],p); t = add64s(t,mul64s(b,B[i],p),p); A[i] = sub64s(A[i],t,p); } for( i=1; i<=dB; i++ ) { t = mulrec64(a,B[i-1],P); t = add64s(t,mulrec64(b,B[i],P),p); A[i] = sub64s(A[i],t,p); } // update leading term from B t = mulrec64(a,B[dB],P); A[dB+1] = sub64s(A[dB+1],t,p); // compute and return degree while( dA>=0 && (A[dA]==0 || A[dA]==p) ) dA--; if( dA==d ) printf("polsubmul failure: dAin=%d dAout=%d\n",d,dA); return dA; } int poldiff64s( LONG *f, int d, LONG *fp, LONG p ) { int i; recint P; P = recip1(p); for( i=1; i<=d; i++ ) fp[i-1] = mulrec64(f[i],(LONG) i,P); for( d--; d>=0 && fp[d]==0; d-- ); return d; } /* compute C(x) = A(x)^2 mod p and return deg(C) */ /* we allow C to overwrite A i.e. polsqr64s(A,A,d,p) */ int polsqr64s( LONG * A, LONG * C, int d, LONG p ) { int i,k,m,dc; ULONG z[2]; if( d<0 ) return d; for( k=2*d; k>=0; k-- ) { m = min32s(k,d); i = max32s(0,k-d); z[0] ^= z[0]; // = 0? z[1] ^= z[1]; // = 0? while( i=p ) z[1] -= p; zfma(z,A[i++],A[m--]); } if( i=p ) z[1] -= p; } zadd(z,z[0],z[1]); if( z[1]>=p ) z[1] -= p; if( i==m ) zfma(z,A[i],A[i]); zmod(z,p); C[k] = z[0]; } for( dc = 2*d; dc>=0 && C[dc]==0; dc-- ); // Why is this loop here? Z_p has no zero-divisors. // Because p may not be prime!! return( dc ); } /* compute C(x) = A(x) * B(x) mod p and return deg(C) */ /* we allow C to overwrite either A or B i.e. polmul64s(A,B,A,da,db,p) */ int polmul64s( LONG * A, LONG * B, LONG * C, int da, int db, LONG p) { int i,k,m; ULONG z[2]; if( da<0 || db<0 ) return da; int dc = da+db; for( k=dc; k>=0; k-- ) { i = max32s(0,k-db); m = min32s(k,da); z[0] ^= z[0]; // = 0? z[1] ^= z[1]; // = 0? while( i=p ) z[1] -= p; zfma(z,A[i],B[k-i]); i++; } if( i==m ) { zfma(z,A[i],B[k-i]); if( z[1]>=p ) z[1] -= p; } zmod(z,p); C[k] = z[0]; } for( ; dc>=0 && C[dc]==0; dc-- ); return( dc ); } /* divide A by B and put the remainder and quotient in A */ /* return the degree of the remainder */ int poldiv64s( LONG * A, LONG * B, int da, int db, LONG p ) { int dq,dr,k,j,m; LONG t,inv; ULONG z[2]; if( db<0 ) { printf("division by zero\n"); exit(1); } if( da=0; k-- ) { z[0] = 0ll; z[1] = 0ll; m = min32s(dr,k); j = max32s(0,k-dq); //for( j=max32s(0,k-dq); j<=m; j++ ) { t -= ((LONG) B[j])*A[k-j+db]; t += (t>>63) & M; } while( j=p ) z[1] -= p; zfma(z,B[j],A[k-j+db]); j++; } if( j==m ) zfma(z,B[j],A[k-j+db]); if( z[1]>=p ) z[1] -= p; zmod(z,p); t = A[k] - z[0]; t += (t>>63) & p; if( k>=db && inv!=1 ) t = mul64s(t,inv,p); A[k] = t; } while( dr>=0 && A[dr]==0 ) dr--; return( dr ); } void polscamul64s( LONG x, LONG *A, int d, LONG p ) { int i; if( x==1 ) return; if( x==-1 ) for( i=0; i<=d; i++ ) A[i] = neg64s(A[i],p); else for( i=0; i<=d; i++ ) A[i] = mul64s(x,A[i],p); return; } /* make polynomial in A monic */ void monic64s( LONG *A, int d, LONG p ) { int i; LONG inv; if( d<0 || A[d]==1 ) return; inv = modinv64s(A[d],p); for( i=0; i=0; i--,j++ ) { B[j+1] = B[j]; for( k=j; k>0; k-- ) B[k] = add64s(mulrec64(B[k],alpha,P),B[k-1],p); B[0] = add64s(mulrec64(B[0],alpha,P),A[i],p); } return; } /* compute gcd(A,B) and put gcd in A and return it's degree */ /* Both A and B are destroyed */ int polgcd64s( LONG * A, LONG * B, int da, int db, LONG p ) { int dr; LONG *C, *D, *R, u, a, b; recint P; if( db<0 ) { printf("division by zero\n"); exit(1); } P = recip1(p); C = A; D = B; if( da0 && da-db==1 ) { // normal case u = modinv64s(D[db],p); a = mulrec64(C[da],u,P); // a = mul64s(C[da],u,p); b = mulrec64(a,D[db-1],P); // b = mul64s(a,D[db-1],p); b = mulrec64(u,sub64s(C[da-1],b,p),P); // quotient = a x + b // b = mul64s(u,sub64s(C[da-1],b,p),p); // quotient = a x + b dr = polsubmulP(C,D,a,b,da,db,p,P); // C = C - (a x + b) D // dr = polsubmul(C,D,a,b,da,db,p); // C = C - (a x + b) D if( dr>=db ) printf("failure\n"); } else dr = poldiv64s(C,D,da,db,p); if( dr<0 ) { /* D|C so gcd(A,B)=D */ if( D!=A ) polcopy64s(D,db,A); monic64s( A, db, p ); return db; } R = C; C = D; D = R; da = db; db = dr; //printf("da=%d db=%d\n",da,db); } } void polgcdext64s( LONG *A, LONG *B, int da, int db, LONG *G, LONG *S, LONG *T, int *dG, int *dS, int *dT, LONG *W, LONG p ) { // Solve S A + T B = G = monic gcd(A,B) for G,S,T in Zp[x] // The arrays A and B are used for the remainder sequence so they are destroyed // G,S,T must all be of size max(da+1,db+1) // W is working storage of size 2max(da+1,db+1) // if S==0 or T==0 then S (and/or T) are not computed int m,dr,ds,dt,dq,ds1,ds2,dt1,dt2; LONG a,b,u; LONG *q,*r,*r1,*r2,*s,*s1,*s2,*t,*t1,*t2; recint P; P = recip1(p); if( da<0 || db<0 ) { printf("inputs must be non-zero\n"); exit(1); } m = max32s(da+1,db+1); r1 = A; r2 = B; if(S) { s1 = S; s2 = W; s1[0]=1; ds1=0; ds2=-1; } if(T) { t1 = T; t2 = W+m; t2[0]=1; dt2=0; dt1=-1; } while( 1 ) { if( db>0 && da-db==1 ) { // normal case u = modinv64s(r2[db],p); a = mul64s(r1[da],u,p); b = mul64s(a,r2[db-1],p); b = mul64s(u,sub64s(r1[da-1],b,p),p); // quotient = a x + b dr = polsubmul(r1,r2,a,b,da,db,p); // r1 = r1 - (a x + b) r2 if(S) ds = polsubmul(s1,s2,a,b,ds1,ds2,p); // s1 = s1 - (a x + b) s2 if(T) dt = polsubmul(t1,t2,a,b,dt1,dt2,p); // t1 = t1 - (a x + b) t2 //dr = polsubmulP(r1,r2,a,b,da,db,p,P); // r1 = r1 - (a x + b) r2 //if(S) ds = polsubmulP(s1,s2,a,b,ds1,ds2,p,P); // s1 = s1 - (a x + b) s2 //if(T) dt = polsubmulP(t1,t2,a,b,dt1,dt2,p,P); // t1 = t1 - (a x + b) t2 } else { dr = poldiv64s(r1,r2,da,db,p); // r1 = [remainder,quotient] q = r1+db; dq = da-db; if(S) ds = polmul64s(q,s2,G,dq,ds2,p); if(S) ds = polsub64s(s1,G,s1,ds1,ds,p); // s1 = s1 - q s2 if(T) dt = polmul64s(q,t2,G,dq,dt2,p); if(T) dt = polsub64s(t1,G,t1,dt1,dt,p); // t1 = t1 - q t2 } if( dr<0 ) { /* D|C so gcd(A,B)=D */ polcopy64s(r2,db,G); if(S) if( s2!=S ) polcopy64s(s2,ds2,S); if(T) if( t2!=T ) polcopy64s(t2,dt2,T); if( G[db]!=1 ) { u = modinv64s(G[db],p); polscamul64s(u,G,db,p); if(S) polscamul64s(u,S,ds2,p); if(T) polscamul64s(u,T,dt2,p); } dG[0] = db; if(S) dS[0] = ds2; if(T) dT[0] = dt2; return; } r = r1; r1 = r2; r2 = r; da = db; db = dr; if(S) { s = s1; s1 = s2; s2 = s; ds1 = ds2; ds2 = ds; } if(T) { t = t1; t1 = t2; t2 = t; dt1 = dt2; dt2 = dt; } } } /* C(x) := A(x)^n mod B(x) mod p; 0<=deg(A)=db ) da = poldiv64s(A,B,da,db,p); // reduce A mod B first for( k=0; n>0; k++ ) { b[k]=n&1; n=n/2; } polcopy64s(A,da,C); dc = da; k--; while( k>0 ) { k--; // Main step: compute C := C^2 mod B in Zp[x] //dc = polmul64s(C,C,R,dc,dc,p); //printf("deg(R) = %d; R = ",dc); polprint64s(R,dc); dc = polsqr64s(C,R,dc,p); //printf("deg(R) = %d; R = ",dc); polprint64s(R,dc); dc = poldiv64s(R,B,dc,db,p); polcopy64s(R,dc,C); //printf("deg(C) = %d; C = ",dc); polprint64s(C,dc); if( b[k]==1 ) { //printf(" b[%d]=%d \n", k, b[k] ); dc = polmul64s(A,C,R,da,dc,p); //printf("deg(R) = %d; R = ",dc); polprint64s(R,dc); dc = poldiv64s(R,B,dc,db,p); polcopy64s(R,dc,C); //printf("deg(C) = %d; C = ",dc); polprint64s(C,dc); } } return dc; } // Input f in Zp[x] of degree d > 0, a known product of d linear factors. // Output roots of f in R. // The input array f is destroyed. // W is a scratch array of size at least 3*d void polsplit64s( LONG *f, int d, LONG *R, LONG *W, LONG p ) { int da,dg; LONG alpha, A[2]; if( d==1 ) { alpha = p-f[0]; R[0] = alpha; return; } alpha = rand64s(p); A[1] = 1; A[0] = alpha; da = polpowmod64s( A, (p-1)/2, f, 1, d, W, W+d, p ); if( da==0 ) return polsplit64s(f,d,R,W,p); // alpha is unlucky, try again W[0] = add64s(W[0],1,p); // W = (x+alpha)^((p-1)/2) + 1 mod f polcopy64s( f, d, W+d ); dg = polgcd64s( W, W+d, da, d, p ); // g = gcd( W, f ) in W if( dg==0 ) return polsplit64s(f,d,R,W,p); // g = 1 ==> alpha is unlucky, try again poldiv64s(f,W,d,dg,p); // compute quotient q = f/g destroying f polcopy64s(W,dg-1,f); // f = [ g mod x^dg followed by q ] polsplit64s(f+dg,d-dg,R,W,p); f[dg] = 1; polsplit64s(f,dg,R+d-dg,W,p); return; } int polroots64s( LONG * f, int d, LONG * R, LONG *W, LONG p ) { int i, da, dg; LONG A[2]; extern ULONG seed,mult; clock_t st,et; //printf("roots: deg(f)=%d\n",d); // printf("f := "); polprint64s(f,d); for( i=0; i0 ) { R[0]=0; return( 1 + polroots64s(f+i,d-i,R+1,W,p) ); } if( f[d]!=1 ) monic64s(f,d,p); A[1] = 1; A[0] = 0; //st = clock(); da = polpowmod64s( A, p-1, f, 1, d, W, W+d, p ); // W = x^(p-1) mod f //et = clock(); //printf("Roots: powmod: x^(p-1) mod f = x^%d + ... time = %10d ms\n", da, (et-st)/1000 ); //printf("da = %d, a := ",da); polprint64s(W,da); if( da==0 && W[0]==1 ) dg = d; // f is all linear factors else { W[0] = sub64s(W[0],1,p); dg = polgcd64s( f, W, d, da, p ); } // f = gcd(f,W-1) //printf("g := "); polprint64s(f,dg); //printf("Roots: def(f)=%d #roots=%d\n",d,dg); if( dg==0 ) return 0; seed = 1; mult = 6364136223846793003ll; //st = clock(); polsplit64s( f, dg, R, W, p ); //et = clock(); //printf("Roots: split time=%10d ms\n", (et-st)/1000 ); return dg; // number of roots in R } int BerlekampMassey64s( LONG *a, int N, LONG *L, LONG *W, LONG p ) { // Input sequence a = [a1,a2,a3,...,aN] // Output polynomial Lambda(x) is written to L // Uses the half extended Euclidean algorithm int i,m,n,dr,dq,dr0,dr1,dv0,dv1,dt; LONG *r,*q,*r0,*r1,*v0,*v1,*t,u,A,b; //recint P; while( N>0 && a[N-1]==0 ) N--; // ignore leading zeroes n = N/2; N = 2*n; if( N==0 ) { L[0] = 1; return 0; } m = N-1; // W is space for r0 = x^N and r1 of degree m and v0 and v1 of degree at most n r0 = W; r1 = r0+N+1; v0 = r1+N; v1 = v0+n+1; vecfill64s(0,r0,N); r0[N] = 1; dr0 = N; // r0 = x^(2*n) for(i=0; i=0 && r1[dr1]==0; dr1--); // r1 = sum(a[m-i]*x^i,i=0..m) if( dr1==-1 ) return -1; dv0 = -1; // v0 = 0 v1[0] = 1; dv1 = 0; // v1 = 1 //P = recip1(p); while( n <= dr1 ) { if( dr1>0 && dr0-dr1==1 ) { // normal case u = modinv64s(r1[dr1],p); A = mul64s(r0[dr0],u,p); b = mul64s(A,r1[dr1-1],p); b = mul64s(u,sub64s(r0[dr0-1],b,p),p); // quotient q = A x + b //dr = polsubmulP(r0,r1,A,b,dr0,dr1,p,P); // r0 = r0 - (A x + b) r1 dr = polsubmul(r0,r1,A,b,dr0,dr1,p); // r0 = r0 - (A x + b) r1 // dt = polsubmulP(v0,v1,A,b,dv0,dv1,p,P); // v0 = v0 - (A x + b) v1 dt = polsubmul(v0,v1,A,b,dv0,dv1,p); // v0 = v0 - (A x + b) v1 } else { dr = poldiv64s(r0,r1,dr0,dr1,p); q = r0+dr1; dq = dr0-dr1; // q = quo(r0,r1) dt = polmul64s(q,v1,L,dq,dv1,p); dt = polsub64s(v0,L,v0,dv0,dt,p); } r = r0; r0 = r1; r1 = r; dr0 = dr1; dr1 = dr; // r0,r1 = r1,rem(r0,r1) t = v0; v0 = v1; v1 = t; dv0 = dv1; dv1 = dt; // v0,v1 = v1,v0 - q*v1 //printf("r0 = "); polprint64s(r0,dr0); //printf("r1 = "); polprint64s(r1,dr1); //printf("v0 = "); polprint64s(v0,dv0); //printf("v1 = "); polprint64s(v1,dv1); } if( dv1>=0 ) { polcopy64s(v1,dv1,L); monic64s(L,dv1,p); } return dv1; } void polLambda64s( LONG *R, int n, LONG *L, LONG *W, LONG p ) { // Compute L = proc_{i=0}^{n-1} (x-R[i]) // L must be of length n+1 and W must be of length n // You can use R for W as in polLambda64s( R, n, L, R, p ) which will destroy R int i,m,d; if( n==0 ) { L[0] = 1; return; } if( n==1 ) { L[0] = neg64s(R[0],p); L[1] = 1; return; } if( n==2 ) { L[0] = mul64s(R[0],R[1],p); L[1] = neg64s(add64s(R[0],R[1],p),p); L[2] = 1; return; } m = n/2; d = n-m; polLambda64s( R, m, L, W, p ); // L = [ x, x, 1, -, - ] if n=4 polLambda64s( R+m, d, L+m, W, p ); // L = [ x, x, y, y, 1 ] for( i=0; i=0 && B[k]==0; k-- ); return k; } #define MAXPOW 64 LONG powmodPnew( LONG x, LONG n, LONG *alpha, recint P ) { LONG t; if( n<0 ) {printf("powmod -ve\n"); exit(1); } if( n> 32) & 63; return id; } LONG getMapleLEN( LONG *a ) { LONG len; len = a[0] & ((1LL << 32)-1); return len; } void getdegs64s( LONG *f, int nf, int *D, int n ) { int i,j,numbits,id; LONG mask,z,d; // extract degrees of f in all n variables simulateneously id = getMapleID(f); if( id!=POLY ) { printf("getdeg64s: input is not a POLY DAG\n"); exit(1); } numbits = 64/(n+1); // printf("getdegs64s: n=%d\n",n); mask = ( 1LL << numbits ) - 1; for( j=0; j=0; j-- ) { d = z & mask; if( d>D[j] ) D[j] = d; z = z >> numbits; } } return; } void getterms64s( LONG *f, int nf, int *T, int dx, int x, int n ) { int i,j,d,numbits,shift; LONG mask,z; // Input f a POLY dag in n variables and nf terms and degree dx in x // Ouptut T[d] = the number of terms in f of degree d in x numbits = 64/(n+1); mask = ( 1LL << numbits ) - 1; shift = numbits*(n-x); for( j=0; j<=dx; j++ ) T[j] = 0; for( i=0; i> shift; d = z & mask; //printf("d=%d\n",d); if( d>dx ) { printf("dx too small\n"); exit(1); } T[d] = T[d]+1; } return; } void gettermshomo64s( LONG *f, int nf, int *T, int D, int n ) { int i,j,d,numbits,shift; LONG mask,z; // Input f a POLY dag in n variables and nf terms and total degree D. // Ouptut T[d] = the number of terms in f of degree total d for 0 <= d <= D // The terms in f are supposed to be sorted by total degree numbits = 64/(n+1); shift = numbits*n; for( j=0; j<=D; j++ ) T[j] = 0; for( i=0; i> shift; //printf("d=%d\n",d); if( d>D ) { printf("D too small\n"); exit(1); } T[d] = T[d]+1; } return; } int * array32s( LONG n ) { int *A; A = (int *) malloc( n*sizeof(int) ); if( A==0 ) { printf("out of memory\n"); exit(1); } return A; } void getsupport64s( LONG *f, int nf, LONG *alpha, int n, int x, int dx, int *T, LONG *C, LONG *M, LONG p ) { // f = 3*x^2*y^2+5*z^3*y+2*x^2+4*y*z // T = [#coeff(f,x,i), i=0..dx] // sort terms in f on x so that g = (5z^3y+4yz) x^0 + 0 x^1 + (3y^2+2)x^2 // C = [(5,4),(),(3,2)] and M = [(z^3*y,y*z),(),(y^2,z^3*y)] | y=alpha[1],z=alpha[2] int i,j,k,numbits,d,index; LONG mask,y,z,c,m; recint P; LONG *Powers[32], *W; int D[32], *K; //printf("getsupport: n=%d dx=%d\n",n,dx); getterms64s(f,nf,T,dx,x,n); //for( i=0; i<=dx; i++ ) printf("x^%d = %d ",i,T[i]); printf("\n"); K = array32s(dx+1); K[0] = 0; for( i=0; i32 ) { printf("too many variables\n"); exit(1); } if( x<1 || x>n ) { printf("x=%d out of range\n",d); exit(1); } numbits = 64/(n+1); mask = ( 1LL << numbits ) - 1; getdegs64s( f, nf, D, n ); P = recip1(p); for( i=0; i> 1; //if |c|<2^62 then store C = 2*c+1 = odd else create a GMP long integer (array) and store a pointer to it. //[POLY | [x,y,z] | m1 | 3 | m2 | c2 ] p<2^62 //printf("c=%lld z=%lld\n",c,z); m = 1; for( j=n; j>0; j-- ) { d = z & mask; if( j==x ) k = d; else { y = Powers[j-1][d]; //y = powmod64s(alpha[j-1],d,p); m = mulrec64(m,y,P); } z = z >> numbits; } index = T[k]+K[k]; //printf("k=%d index=%d\n",k,index); C[index] = c; M[index] = m; T[k] = T[k]+1; } free(K); for( i=0; i32 ) { printf("too many variables\n"); exit(1); } getdegs64s( f, nf, D, n ); numbits = 64/(n+1); mask = ( 1LL << numbits ) - 1; P = recip1(p); for( i=0; i> 1; printf("c=%lld z=%llu\n",c,z); m = 1; for( j=n; j>0; j-- ) { d = z & mask; y = Powers[j-1][d]; //y = powmod64s(alpha[j-1],d,p); m = mulrec64(m,y,P); z = z >> numbits; } d = z; if( d>deg ) { printf("deg too small\n"); exit(1); } C[i] = c; M[i] = m; T[d] = T[d]+1; } for( i=0; i x}) mod p and return deg(g) LONG mask,c,z,y; int numbits,k,d,i,j,id; LONG *Powers[32], *W; int D[32]; recint P; //printf("#f=%d n=%d dx=%d x=%d\n",nf,n,dx,x); id = getMapleID(f); if( id!=POLY ) { printf("evaldeg64s: input must be a POLY DAG\n"); exit(1); } k = getMapleLEN((LONG *)f[1]); if( k-1!=n ) { printf("evaldeg64s: input has %d variables not %d\n",k-1,n); exit(1); } //printf("alpha=["); for( i=0; i32 ) { printf("too many variables\n"); exit(1); } if( x<1 || x>n ) { printf("x out of range\n"); exit(1); } numbits = 64/(n+1); mask = ( 1LL << numbits ) - 1; P = recip1(p); getdegs64s(f,nf,D,n); for( i=0; i> 1; //printf("c=%lld z=%lld\n",c,z); // ^5 ^2 ^0 ^3 numbits=64/4=16 for( j=n; j>0; j-- ) { d = z & mask; if( j==x ) k = d; else { y = Powers[j-1][d]; //y = powmod64s(alpha[j-1],d,p); c = mulrec64(c,y,P); } z = z >> numbits; } g[k] = add64s(g[k],c,p); } while( dx>=0 && g[dx]==0 ) dx--; for( i=0; i> 1; // printf("x=%lld z=%lld\n",x,z); // z = z%p; // this is expensive if( z<-p || z>=p ) z = z%p; if( z<0 ) z = z+p; for( j=n-1; j>0; j-- ) { d = x & mask; //z = mulrec64(z,powmodP64s(alpha[j],d,p,P),P); //printf("x[%lld]^%lld=%lld\n",j,d,S[j][d]); if( d ) z = mulrec64(z,S[j][d],P); x = x >> numbits; } s = add64s(s,z,p); } // free(ALPHA); return s; } int indexofx( LONG *X, int n, LONG x ) { // X = [EXPSEQ|x1|x2|...|xn] int i; for( i=1; i> 1; x = x%p; if( x<0 ) x += p; return x; } id = getMapleID(f); if( id==SUM ) { L = getMapleLEN(f); x = 0; for( i=1; i> 1; if( d<0 ) b = -d; else b = d; j = indexofx( X, n, f[i] ); if( j ) a = T[j][b]; else { // e.g. x^2 (2xy^2 + 3y) and (x+y)/(x^2+y+2) a = evalrec( (LONG *) f[i], X, alpha, T, p, P ); if( a<0 ) return a; a = powmodP64s(a,b,p,P); } if( d<0 ) { if( a==0 ) { printf("division by zero\n"); return -2; } a = modinv64s(a,p); } x = mulrec64( x, a, P ); //a = evalrec( (LONG *) f[i], X, alpha, T, p, P ); //b = evalrec( (LONG *) f[i+1], X, alpha, T, p, P ); //printf("PROD: a=%lld b=%lld\n",a,b); //if( a<0 || b<0 ) return -1; // x = mul64s( x, powmod64s( a, b, p ), p ); } //printf("PROD: x=%lld\n",x); return x; } else if( id==NAME || id==TABLEREF ) { n = getMapleLEN(X); //for( i=1; i>1; x = x%p; if( x<0 ) x = x+p; beta[i] = x; } else return -6; //printf(" x[%d] = %lld\n", i, x); } if( getMapleID(D)!=LIST ) return -7; else D = (LONG *) D[1]; if( getMapleID(D)!=EXPSEQ ) return -8; if( getMapleLEN(D) != n ) return -9; for( space=0, i=1; i> 1; space += d+1; } else return -10; } A = array(space); // printf("allocating %lld words\n",space); B = A; // pointer to position in A for( i=1; i> 1; // printf("d[%lld]=%lld\n",i,d); x = beta[i]; //printf("x[%d] = %lld", i, x); B[0] = 1; if( d>0 ) B[1] = x; for( j=2; j<=d; j++ ) B[j] = mulrec64(B[j-1],x,P); //printf("x[%lld] = [",i); //for( j=0; j>1; x = x%p; if( x<0 ) x = x+p; beta[i] = x; } else return -4; } if( getMapleID(D)==LIST ) D = (LONG *) D[1]; else return -5; if( getMapleLEN(D) != n ) return -6; if( getMapleID(LL)==LIST ) LL = (LONG *) LL[1]; else return -7; r = getMapleLEN(LL); for( i=1; i0 ) { d = d >> 1; space += d+1; } else return -10; } //for( d=0, i=1; i> 1; //printf("d[%lld]=%lld\n",i,d); x = beta[i]; //printf("x[%d] = %lld", i, x); B[0] = 1; if( d>0 ) B[1] = x; for( j=2; j<=d; j++ ) B[j] = mulrec64(B[j-1],x,P); B = B + d+1; } //printf("evallists: evaluating ...\n"); for( i=1; i<=r; i++ ) { L = (LONG *) LL[i]; L = (LONG *) L[1]; for( j=1; j<=c; j++ ) { x = evalrec( (LONG *) L[j], X, beta, T, p, P ); if( x<0 ) return x; A[(i-1)*r+(j-1)] = x; } } free(W); return(1); } /* Test polgcd64s */ /* int main() { LONG *R, *W, *L, p, *g, *a, *b; int i,n,d; p = 5; p = (p<<55)+1; printf("p := %lld;\n",p); n = 10; R = array(n); for( i=0; i