/* > ecdl2K-108.32bit.c * Purpose: Fast arithmetic for computing discrete logs on elliptic curves. * Copyright: Robert J. Harley, 1997-1999. * Contact: Robert.Harley@inria.fr * Legalese: This source code is subject to the GNU General Public Licence v2. */ /* This is Rob's 32-bit ECDL program for Unix, version 1.1.0. * * Consult the following URL for more information: * http://cristal.inria.fr/~harley/ecdl7/ */ /*** Quickstart ***/ /* Do this: cc -O3 ecdl2K-108.32bit.c -o ecdl.exe echo Machine name is `hostname` mkdir `hostname` cd `hostname` nice ../ecdl.exe mail by my@email.address on `hostname` > ecdl.log & * * Then go sign up, using the exact same email address, at this form: * * http://cristal.inria.fr/bin/ecdldb * */ /*** Detailed instructions ***/ /** 1. Signing up ** * * Go to this form to sign up: * * http://cristal.inria.fr/bin/ecdldb * * Give your email address (or at least something that identifies you * uniquely) and a passphrase you just invented. Keep a safe copy of * the passphrase! * * Note: If you prefer to stay anonymous and not sign up, that's fine. * **/ /** 2. Compiling ** * * Compile with something like: * gcc -O3 ecdl2K-108.32bit.c -o ecdl.exe * * You can test the binary like this: * ./ecdl.exe test by me@here on bla * * The test output should match this sample except for the reported * times and rates: > ./ecdl.exe test by me@here on bla This is Rob's ECDL program for Unix32, version 1.1.0 MMX Mode ...... test Email ..... me@here Machine ... bla Starting at Sun Dec 12 15:34:13 1999 Generating 32 new starting points. Computing iterations... Test point found at: Sun Dec 12 15:34:18 1999 TEST-L0|Unix32|1.1.0 MMX| TEST-L1|EF7DC6C12ACAF01D48CE4D44EB6|5163373649C868AD3265FACCF99| TEST-L2|me@here| TEST-L3|000000004887| TEST-L4|bla| Iterations used = 18567. Total iterations = 594144. Rate = 150416 per second. Test point found at: Sun Dec 12 15:34:45 1999 TEST-L0|Unix32|1.1.0 MMX| TEST-L1|B0152E85E4E22B6EE3C7120FC46|026063744AFC9DB003098294BD6| TEST-L2|me@here| TEST-L3|00000001BACA| TEST-L4|bla| Iterations used = 113354. Total iterations = 3.62733e+06. Rate = 148600 per second. Test point found at: Sun Dec 12 15:34:54 1999 TEST-L0|Unix32|1.1.0 MMX| TEST-L1|607BB600DECAF410A7B62ED3DD3|F43D17AAF642BBF4EBFAA9C1984| TEST-L2|me@here| TEST-L3|000000023C69| TEST-L4|bla| Iterations used = 146537. Total iterations = 4.68918e+06. Rate = 147598 per second. [...and so on...] * If the program complains that it cannot find sendmail, you can * compile with -DSENDMAIL='"/usr/sbin/sendmail"' or something similar. * **/ /** 3. Optimising ** * * You can compile with various optimisation flags to maximise speed * before starting on the real calculations. Speeds to aim for are * roughly 150 k iterations per second for a 500 MHz x86 (80 k without * MMX), 100 k for a 450 MHz PowerPC G3 and 40 k for a 275 MHz StrongARM. * * Good flags to use with gcc are: * -fomit-frame-pointer -funroll-loops -fexpensive-optimizations * * You might also consider specifying "-march=pentiumpro" or similar. * * If you have a chip with MMX instructions, definitely use "-DMMX" as * the program will run about twice as fast! (Compiling the MMX * version requires a recent version of gcc). For non-MMX chips, try * adding -DPROD=2 (or 3, 4 or 5) to the compilation flags to use a * different implementation of speed critical functions. It might be * a little faster; then again, maybe not! * * You can also try tweaking the value of PARAL. Just add -DPARAL=40, * for instance. The default is 32 and useful values are roughly in * the range 20 to 50. Note that the test output will no longer * exactly match the sample given above. * **/ /** 4. Starting up ** * * When you're ready to start up: on each machine you want to run on * make a directory for that machine, "cd" into it and go. * * echo Machine name is `hostname` * mkdir `hostname` * cd `hostname` * unlimit cputime || (echo Using ulimit instead; ulimit -t unlimited) * nice +15 ../ecdl.exe [...arguments, see below...] > ecdl.log & * * Note: Some shells need "nice -15" instead. * Note: You can keep an eye on the output using "tail -f ecdl.log". * Note: If you have a machine with several CPUs, make a directory for * each CPU and start up an ecdl process in each one. * * The arguments have the following syntax: * ecdl by on * * : Either "test", "batch", "mail", "alt" or "http". * : Something to identify you uniquely e.g., your email address. * : The machine name e.g., as given by "hostname". * * Note: For give the exact same string you used when signing up, * since otherwise the Web pages will not be able to show correct * statistics for your contribution. * * Examples: * ecdl mail by John.Smith@example.com on avalon * ecdl batch by anonymous on 'Mystery machine' * * Test mode quickly produces some (useless) test output to stdout along * with info on iterations per second. This allows you to check that * your binary is OK, to compare compiler optimisation flags etc. * * The other modes produce things called "distinguished points" every * few hours. Each distinguished point looks something like this: Distinguished point found at: Wed Nov 24 03:52:02 1999 ECC-L0|Unix32|1.1.0| ECC-L1|6B445FC02D031AEA01F28B7AF92|ABD2FF8960D196A4AB43719A64F| ECC-L2|me@here| ECC-L3|000015AE4C35| ECC-L4|bla| * These points are valuable! The ECDL project needs to collect * roughly 1.3 million of them to solve Certicom's ECC2K-108 problem. * The points are always appended to a file called "dist.points" and * also written to stdout along with some verbose info. * * When running in "mail" mode each point is automatically emailed * (using "sendmail") to ecdl2K-108@cristal.inria.fr as well. INRIA's * excessively aggressive spam filters might refuse the email; in that * case switch to "alt" mode which sends to ecdl2K-108@rupture.net instead. * * When running in "http" mode each point is automatically sent (using * the HTTP POST protocol) to the Web server on cristal.inria.fr. If * your machine is behind a firewall, you'll need to specify a Web * proxy to use for sending the points via HTTP. Specify it on the * command line as follows: * * ecdl http by on proxy : * * where is the host name of the proxy, e.g. * "myproxy.mydomain.com", and is the port number of the * HTTP proxy on this host, e.g. "3128". * * For machines unable to send email or make HTTP connections, * for instance not permanently connected to the Net, use "batch" mode. * Then every few days do: * * mv dist.points dist.points.nov.25 * * or similar using the correct date, and email the "dist.points.nov.25" * file manually. When the next distinguished point is found, a new * "dist.points" file will be created for it and further points will * be appended to this new file until the next time you "mv" it. * * Mode: test batch mail alt http * ---------------------------------------------------------------------- * * Writes info on stdout x x x x x * * Writes points to "dist.points" x x x x * * Sends points via sendmail x x * * Sends points via http x * **/ /** 5. Shutting down (and saved states) ** * * When the program is running it saves its internal state in a file * called "saved.state" from time to time. If you want to stop the * program, send it a SIGINT or SIGTERM signal so it can save its * state before exiting. If it is running in the foreground, you can * send SIGINT by typing Ctrl-C. If it is running in the background * send SIGTERM by "killall ecdl.exe". If you don't have "killall" * installed, find the PID (first column of the "ps" output) and use "kill": * * ps x | grep ecdl | grep -v grep * kill * * When you restart in the same directory: * * nice +15 ../ecdl.exe [...arguments...] >> ecdl.log & * * the program will look for the saved state file. If no saved state * is found it will generate a new state and work from there. If one * is found, the program reads it and continues where it left off. * Note that it would be wasteful to use a given saved state on more * than one CPU because work would just be duplicated. * * The reason each machine should have its own directory is to avoid * the dist.points and saved.state files getting mixed up! Note that * test mode does not mess with these files. * **/ /*== #includes =============================================================*/ /** Ansi includes. **/ #include /* For strerror(), strcmp() and strlen(). */ #include /* For signal(), SIGINT and SIGTERM. */ #include /* For errno. */ #include /* For time and date: time() and ctime(). */ #include /* For malloc(). */ #include /** Unix includes. **/ /* For getpid(), access() and timing. */ #include /* For timing: getrusage() and struct rusage. */ #include #if !MACOS #include #endif /* Socket stuff */ #include #include #include #include /** Windows (Cygwin32) includes. **/ #if __CYGWIN__ /* For SetPriorityClass */ #include /* For times() */ #include #endif /** MacOS includes. **/ #if MACOS #include "main.h" #endif /*== Types =================================================================*/ /* Abbreviations used when bit-length does not matter. */ typedef unsigned int uint; /* Integers. */ typedef unsigned char u8; typedef unsigned int u32; typedef struct { u32 hi, lo; } u64; typedef struct { u32 t, h, m, l; } u128; /* Polynomials over the two-element field GF(2) = Z/2Z. */ typedef struct { u32 t, h, m, l; } poly128; /* MMX types. Endianness, alignment and packing matter here. */ #ifdef MMX typedef struct { u32 lo, hi; } __attribute__((packed)) __attribute__ ((aligned (8))) mmx64; typedef struct { mmx64 hi, lo; } mmx128; #endif /* Operating modes. */ typedef enum { Bad, Test, Batch, Mail, Alt, Http } modeType; /*== #defines ==============================================================*/ /*-= Stuff that you can change =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* The place to find sendmail. Often it's in /usr/sbin/sendmail but * then /usr/lib/sendmail is a link to it, so this works fairly universally. */ #ifndef SENDMAIL #define SENDMAIL "/usr/lib/sendmail" #endif /* Name of device for reading entropy from (it isn't strictly necessary). */ #ifndef RANDOM_DEV_FILENAME #define RANDOM_DEV_FILENAME "/dev/urandom" #endif /* Name of file for writing distinguished points to (as well as stdout). */ #ifndef DIST_POINTS_FILENAME #define DIST_POINTS_FILENAME "dist.points" #endif /* Name of file for saving state after each distinguished point or when * stopping due to a SIGINT or SIGTERM. It is read when restarting. */ #ifndef SAVED_STATE_FILENAME #define SAVED_STATE_FILENAME "saved.state" #endif /* The state is checked and saved every 2^SAVED_STATE_SHIFT iterations * and also after a distinguished point is found. */ #ifndef SAVED_STATE_SHIFT #define SAVED_STATE_SHIFT (26) #endif #if SAVED_STATE_SHIFT < 20 || SAVED_STATE_SHIFT > 31 #error Please define SAVED_STATE_SHIFT in the range 20..31. #endif /* Number of iterations to run "in parallel". This is done so that * PARAL inversions can be replaced by one inversion and 3*PARAL-3 mults. * Try something roughly in the range 20 to 50. */ #ifndef PARAL #define PARAL (32) #endif #if PARAL < 1 #error Please define PARAL to be greater than 0. #endif #ifdef MMX #ifdef PROD #warning PROD is ignored in MMX mode... #undef PROD #endif #else /* Define PROD to choose a different implementation of the critical * GF2ProductNNxNN() and product() functions. The default is 1. * Try 2, 3, 4 or 5. They might be faster. Then again maybe not! */ #ifndef PROD #define PROD 1 #endif #if PROD < 1 || PROD > 5 #error Please define PROD in the range 1..5. #endif #endif #ifdef MMX /* For good performance with MMX, variables of type mmx64 need 8-byte * alignment. On systems that do not align the stack sufficiently, it * is best to force such variables into the data area with "static". * Even if the stack is aligned this does not hurt performance, so we * do it by default. */ /*#define MMX_STATIC*/ #define MMX_STATIC static #endif /* Keyword for giving "inline" hint. * Adjust for your compiler or leave it out. */ #if defined(__GNUC__) #define INLINE __inline #else #define INLINE #endif /*-= Stuff that must not be changed =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ /* Version 0.9.0: 31 September 1999 * - Does 66 k iters/sec with gcc 2.95+Linux+500 MHz Pentium III (PARAL=20). * Version 0.99.0: 24 November 1999 * - Does 82 k iters/sec with gcc 2.95+Linux+500 MHz Pentium III (PARAL=32) * and 145 k when using MMX! * Version 1.0.0: 3 December 1999 * Version 1.0.1: 7 December 1999 * - Added HTTP mode (Xavier Leroy). * - Ported to MS Windows with Cygwin32 (Xavier Leroy). * Version 1.0.2: 9 December 1999 * - Put variables of type mmx64 in the data area (Jacques Garrigue). * - Added MacOS stuff (Damien Doligez). * - Gross hack to work around spurious error reports in http mode. * Version 1.1.0: 12 December 1999 * - Print time at start-up. * - Proper solution for http mode: use HTTP/1.0 (Roy T. Fielding). */ #if __CYGWIN__ #define VARIANT "Cygwin32" #elif MACOS #define VARIANT "MacOS" #else #define VARIANT "Unix32" #endif #ifdef MMX #define VERSION "1.1.0 MMX" #else #define VERSION "1.1.0" #endif /* These are used a few places for types poly128 and u128. */ #define ZERO { 0, 0, 0, 0 } #define ONE { 0, 0, 0, 1 } /* The number of cases used in pseudo-random function. */ #define CASES (7) /* Population count used to decide if a point is distinguished. * Expect a given popc to occur one time in 2^108/binomial(109, popc) * if it's odd (even ones never occur). */ /* 1 time in 1422267 or so (for testing). */ #define TEST_POP_COUNT (29) /* 1 time in 1407700775 or so (for real). */ #define DIST_POP_COUNT (23) /* For clarity. */ #define square(x) (squareNTimes((x), 1)) /*-= Stuff for MMX =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* Define this for a version that uses lots of MMX instructions. * It is roughly twice as fast with MMX as without. */ /*#define MMX*/ /* The MMX instructions are emitted using gcc-specific asm()s. */ #if defined(MMX) && !defined(__GNUC__) #error Compiling for MMX requires gcc. #endif #ifdef MMX #define STRNG(x) #x #define STRING(x) STRNG(x) extern mmx64 mmx_regs[8]; /* Dummy memory area. */ #define START_MMX() \ asm volatile ("/* Start of MMX section */" : : ) #define STR(MEM, RVAL) \ asm("movq %%mm" STRING(RVAL) ", %0" : "=m" (MEM) : "m" (mmx_regs[RVAL])) #define LOD(LVAL, MEM) \ asm("movq %1, %%mm" STRING(LVAL) : "=m" (mmx_regs[LVAL]) : "m" (MEM)) #define CPY(LVAL, RVAL) \ asm( "movq %%mm" STRING(RVAL) ", %%mm" STRING(LVAL) \ : "=m" (mmx_regs[LVAL]) : "m" (mmx_regs[RVAL]) \ ) #define SLL(LVAL, SH) \ asm("psllq $" STRING(SH) ", %%mm" STRING(LVAL) : "+m" (mmx_regs[LVAL]) : ) #define SRL(LVAL, SH) \ asm("psrlq $" STRING(SH) ", %%mm" STRING(LVAL) : "+m" (mmx_regs[LVAL]) : ) #define ORR(LVAL, RVAL) \ asm( "por %%mm" STRING(RVAL) ", %%mm" STRING(LVAL) \ : "+m" (mmx_regs[LVAL]) : "m" (mmx_regs[RVAL]) \ ) #define AND(LVAL, RVAL) \ asm( "pand %%mm" STRING(RVAL) ", %%mm" STRING(LVAL) \ : "+m" (mmx_regs[LVAL]) : "m" (mmx_regs[RVAL]) \ ) #define XOR(LVAL, RVAL) \ asm( "pxor %%mm" STRING(RVAL) ", %%mm" STRING(LVAL) \ : "+m" (mmx_regs[LVAL]) : "m" (mmx_regs[RVAL]) \ ) #define AND_MEM(LVAL, MEM) \ asm("pand %1, %%mm" STRING(LVAL) : "+m" (mmx_regs[LVAL]) : "m" (MEM)) #define ORR_MEM(LVAL, MEM) \ asm("por %1, %%mm" STRING(LVAL) : "+m" (mmx_regs[LVAL]) : "m" (MEM)) #define XOR_MEM(LVAL, MEM) \ asm("pxor %1, %%mm" STRING(LVAL) : "+m" (mmx_regs[LVAL]) : "m" (MEM)) #define END_MMX() \ asm volatile ("emms /* End of MMX section */" : : ) #endif /* defined(MMX) ... */ /*-= Data defining which ECDL problem to solve =-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* Field is GF(2^109) represented as (Z/2Z)[t] / (t^109+t^9+t^2+t+1) * (t^109+t^9+t^2+t+1 is irreducible in (Z/2Z)[t]). */ /* Curve from Certicom ECC2K-108 problem is y^2 + x*y = x^3 + x^2 + 1 * over GF(2^109). * * Group order is G = 2^109+1-w^109-conj(w)^109 where w = (1-sqrt(-7))/2. * Use subgroup of order G/2 = 324518553658426701487448656461467 (prime). * * Frobenius is multiplication by w i.e., by * 138423345589698157369693034392981 modulo g. */ #define A { 0, 0, 0, 1 } #define B { 0, 0, 0, 1 } #define ORDER { 0x0FFF, 0xFFFFFFFF, 0xFFA621B0, 0x2C383E9B } /* Points from Certicom ECC2K-108 problem: */ #define XP { 0x0478, 0xC46CC963, 0x38CED915, 0x65E17257 } #define YP { 0x1E79, 0x65E4A3AF, 0xB73A48FC, 0x9AB790E9 } #define XQ { 0x1FF0, 0xCE5EC618, 0x93F2119C, 0x3077C59E } #define YQ { 0x1F20, 0xE9B010AC, 0x691C9B87, 0xB438241D } /*== Function declarations =================================================*/ /*-= Short little functions =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ static INLINE int ge(u128 x, u128 y); static INLINE u128 diff(u128 x, u128 y); static INLINE u128 sumMod(u128 x, u128 y, u128 m); static INLINE u128 doubleMod(u128 x, u128 m); static INLINE u128 incMod(u128 x, u128 m); static INLINE int equal(poly128 x, poly128 y); static INLINE poly128 xor(poly128 x, poly128 y); static INLINE u32 topBit(u32 x); static INLINE u128 squareMod(u128 x, u128 m); /*-= Main and friends =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ static modeType parse(int argc, char *argv[]); void catchSignal(int sigNum); int main(int argc, char *argv[]); /*-= Stuff for checking that a point is distinguished =-=-=-=-=-=-=-=-=-=-=-*/ static uint popc(poly128 n); static poly128 changeBasis(poly128 x); /*-= Arithmetic in the field GF(2^109) -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ static poly128 squareNTimes(poly128 x, uint n); #if PROD == 1 static u32 GF2Product27x27(u32 a, u32 b, u32 *ph); #elif PROD == 3 static u32 GF2Product28x28(u32 a, u32 b, u32 *ph); #elif PROD == 5 static u32 GF2Product32x32(u32 a, u32 b, u32 *ph); #endif #ifdef MMX static void MMX_GF2Product56x56 (u32 xh, u32 xl, const mmx64 *py, mmx64 *ph, mmx64 *pl); #elif PROD == 1 || PROD == 2 static u32 GF2Product54x54 (u32 xh, u32 xl, u32 yh, u32 yl, u32 *pt, u32 *ph, u32 *pm); #else static u32 GF2Product56x56 (u32 xh, u32 xl, u32 yh, u32 yl, u32 *pt, u32 *ph, u32 *pm); #endif static poly128 product(poly128 x, poly128 y); static poly128 inverse(poly128 y); static INLINE poly128 quotient(poly128 x, poly128 y); /*-= Arithmetic mod the group order =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ static u128 productMod(u128 x, u128 y, u128 m); static u128 powerMod(u128 x, u64 y, u128 m); static u128 findMultiplier(u64 *cases); /*-= Arithmetic on elliptic curve over the field =-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int ellipticDouble (poly128 x, poly128 y, int z, poly128 *px2, poly128 *py2); static int ellipticSum ( poly128 x1, poly128 y1, int z1, poly128 x2, poly128 y2, int z2 , poly128 *px3, poly128 *py3 ); static int ellipticProduct (poly128 x, poly128 y, int z, u128 fac, poly128 *px2, poly128 *py2); /*-= Various file manipulation thingies =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ static int syncAndCheck(u64 *cases, u128 *pu, u128 *pv, poly128 x, poly128 y); static void reportDistinguished ( modeType mode, char *argv[] , u64 iters, u128 u, u128 v, u64 total, double dStart ); static void u32ToBytes(u32 data, u8 *p); static u32 bytesToU32(u8 *p); static void writeState (u64 *itersT, u128 *uT, u128 *vT, poly128 *xT, poly128 *yT); static uint readState (u64 *itersT, u128 *uT, u128 *vT, poly128 *xT, poly128 *yT); /*-= Stuff for http mode =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int httpPost(char * payload); static int httpParseProxy(char * proxy); static int httpInit(void); /*== Global variables ======================================================*/ int gotSignal = 0; /*== Function definitions ==================================================*/ /*-= Short little functions =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ /*-- ge --------------------------------------------------------------------*/ /* Return whether x >= y, for 0 <= x, y < 2^128. */ static INLINE int ge(u128 x, u128 y) { return x.t > y.t || x.t == y.t && ( x.h > y.h || x.h == y.h && (x.m > y.m || x.m == y.m && x.l >= y.l) ) ; } /* end function ge */ /*-- diff ------------------------------------------------------------------*/ /* Return x-y for 0 <= y <= x < 2^128. */ static INLINE u128 diff(u128 x, u128 y) { u32 c, d; c = x.l < y.l; x.l -= y.l; d = x.m < c; x.m -= c; c = d | x.m < y.m; x.m -= y.m; d = x.h < c; x.h -= c; c = d | x.h < y.h; x.h -= y.h; x.t -= c; x.t -= y.t; return x; } /* end function diff */ /*-- sumMod ----------------------------------------------------------------*/ /* Return sum x+y modulo m, 0 <= x, y < m <= 2^127. */ static INLINE u128 sumMod(u128 x, u128 y, u128 m) { u32 c; x.l += y.l; c = x.l < y.l; x.m += c; c = x.m < c; x.m += y.m; c |= x.m < y.m; x.h += c; c = x.h < c; x.h += y.h; c |= x.h < y.h; x.t += c; x.t += y.t; if (ge(x, m)) x = diff(x, m); return x; } /* end function sumMod */ /*-- doubleMod -------------------------------------------------------------*/ /* Return double x<<1 modulo m, where 0 <= x < m <= 2^127. */ static INLINE u128 doubleMod(u128 x, u128 m) { x.t <<= 1; x.t |= x.h>>31; x.h <<= 1; x.h |= x.m>>31; x.m <<= 1; x.m |= x.l>>31; x.l <<= 1; if (ge(x, m)) x = diff(x, m); return x; } /* end function doubleMod */ /*-- incMod ----------------------------------------------------------------*/ /* Increment x modulo m, for 0 <= x < m < 2^128. */ static INLINE u128 incMod(u128 x, u128 m) { const u128 intZero = ZERO; if (!++x.l) if (!++x.m) if (!++x.h) ++x.t; if (x.l == m.l && x.m == m.m && x.h == m.h && x.t == m.t) { x = intZero; } /* end if */ return x; } /* end function incMod */ /*-- equal -----------------------------------------------------------------*/ /* Return whether x == y, for polys over Z/2Z. */ static INLINE int equal(poly128 x, poly128 y) { return x.l == y.l && x.m == y.m && x.h == y.h && x.t == y.t; } /* end function equal */ /*-- xor -------------------------------------------------------------------*/ /* Return x XOR y, in other words add them as polys over Z/2Z. */ static INLINE poly128 xor(poly128 x, poly128 y) { x.l ^= y.l; x.m ^= y.m; x.h ^= y.h; x.t ^= y.t; return x; } /* end function xor */ /*-- topBit ----------------------------------------------------------------*/ /* Returns top bit of x, or 0 if x = 0. */ static INLINE u32 topBit(u32 x) { x |= x>>1; x |= x>>2; x |= x>>4; x |= x>>8; x |= x>>16; return x ^ x>>1; } /* end function topBit */ /*-- squareMod -------------------------------------------------------------*/ static INLINE u128 squareMod(u128 x, u128 m) { return productMod(x, x, m); } /* end function squareMod */ /*-= Main and friends =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ /*-- parse -----------------------------------------------------------------*/ /* Parse command line and return mode Bad, Batch, Test, Mail, Alt or Http. */ static modeType parse(int argc, char *argv[]) { char ch, *str; modeType mode; if (argc != 6 && argc != 8) return Bad; str = argv[1]; if (!strcmp(str, "test")) mode = Test; else if (!strcmp(str, "batch")) mode = Batch; else if (!strcmp(str, "mail")) mode = Mail; else if (!strcmp(str, "alt")) mode = Alt; else if (!strcmp(str, "http")) mode = Http; else return Bad; if (strcmp(argv[2], "by") ) return Bad; for (str = argv[3]; (ch = *str) != '\0'; ++str) if (ch == '|') return Bad; if (strcmp(argv[4], "on")) return Bad; for (str = argv[5]; (ch = *str) != '\0'; ++str) if (ch == '|') return Bad; if ( argc == 8 && ( mode != Http || strcmp(argv[6], "proxy") || httpParseProxy(argv[7]) == -1 ) ) { return Bad; } /* end if */ return mode; } /* end function parse */ /*-- catchSignal -----------------------------------------------------------*/ /* Signal catcher for SIGINT or SIGTERM. * Allows program to stop cleanly and save state. * User can send the signal by Ctrl-C or "kill ", for instance. */ void catchSignal(int sigNum) { gotSignal = 1; } /* end function catchSignal */ /*-- main ------------------------------------------------------------------*/ int main(int argc, char *argv[]) { /* Constant stuff. */ const int zP = 1, zQ = 1; const u128 order = ORDER; const poly128 xP = XP, yP = YP, xQ = XQ, yQ = YQ; /* These get initialised once and for all. */ uint distPopc; modeType mode; /* These are variable. */ u64 total; double dStart; /* For timing. */ uint pops[PARAL]; u64 casesT[PARAL][CASES], itersT[PARAL]; u128 uT[PARAL], vT[PARAL]; poly128 denT[PARAL], x1T[PARAL], x2T[PARAL], y1T[PARAL], y2T[PARAL]; /** Simple sanity check. **/ if (sizeof(u32) != 4) { puts("Fatal error: *** the size of u32 is wrong. ***"); return 1; } /* end if */ #if MACOS init_mac(&argc, &argv, &total); #endif /** Parse command line. **/ mode = parse(argc, argv); if (mode == Bad) { printf( "Syntax: %s by on " "[proxy :]\n" "See top of source code or README file for details. Stopping.\n" , argv[0] ); return 1; } /* end if */ /** See if we can find sendmail with execute permission. **/ #if !MACOS /* Note: Mac mailing is different... */ if ((mode == Mail || mode == Alt) && access(SENDMAIL, X_OK)) { printf( "Fatal error: *** sendmail program " SENDMAIL " does not exist\n" " or is not executable ***\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); return 1; } /* end if */ #endif /** Initialize HTTP stuff if http mode selected */ if (mode == Http && httpInit() == -1) { puts("Fatal error: *** http initialisation failed. ***"); return 1; } /* end if */ /** Put up banner. **/ printf( "This is Rob's ECDL program for " VARIANT ", version " VERSION "\n" "Mode ...... %s\n" "Email ..... %s\n" "Machine ... %s\n" , argv[1], argv[3], argv[5] ); { time_t now; /* For time and date. */ now = time(NULL); if (now == -1) puts("Warning: can't determine time and date."); else printf("Starting at %s", ctime(&now)); } /* end block */ fflush(stdout); /** Lower our priority (under Unix, this is done by the shell) */ #ifdef _WIN32 SetPriorityClass(GetCurrentProcess(), IDLE_PRIORITY_CLASS); #endif /** Initialisation. **/ /* Read starting points from the saved state or generate new random ones. */ { uint i; if (mode == Test) i = 0; else i = readState(itersT, uT, vT, x1T, y1T); if (i < PARAL) { u128 seed; printf("Generating %u new starting points.\n", (uint)PARAL-i); fflush(stdout); if (mode == Test) { /* Fixed seed for testing. */ seed.t = 0; seed.h = 0; seed.m = 0x01234567; seed.l = 0x89ABCDEF; } else { FILE *handle; /* Get really random seed. */ handle = fopen(RANDOM_DEV_FILENAME, "rb"); if (handle) { int status; fread((void *)&seed, sizeof(seed), (size_t)1, handle); status = fclose(handle); if (status == EOF) { printf( "Warning: fclose() of random device " RANDOM_DEV_FILENAME " failed\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); } /* end if */ } else { printf( "Warning: random device " RANDOM_DEV_FILENAME " not found\n" " (errno = %d: %s). Making do without...\n" , errno, strerror(errno) ); } /* end if */ /* Randomise seed some more (if random device was not found, * seed is stack junk which is OK but may not be random enough!) */ seed.h ^= getpid(); seed.l ^= time(NULL); /* Ensure seed < order. */ seed.t &= 0x00000FFF; seed.h &= 0xFFFFFFFE; } /* end if/else */ /* Generate random points. */ for (/* i was set above */ ; i < PARAL; ++i) { int z, zu, zv; poly128 xu, xv, yu, yv; /* Euler gamma times 2^108. */ const u128 gamma = { 0x0000093C, 0x467E37DB, 0xC7A4D1B, 0xE3F81015 }; itersT[i].hi = itersT[i].lo = 0; do { /* Not terribly random, but it will do! */ uT[i] = seed; zu = ellipticProduct(xP, yP, zP, seed, &xu, &yu); seed = sumMod(seed, gamma, order); vT[i] = seed; zv = ellipticProduct(xQ, yQ, zQ, seed, &xv, &yv); seed = sumMod(seed, gamma, order); z = ellipticSum(xu, yu, zu, xv, yv, zv, &x1T[i], &y1T[i]); } while (!z); } /* end for (i) */ } /* end if */ /* Initialise counts of cases to 0. */ for (i = 0; i < PARAL; ++i) { uint j; for (j = 0; j < CASES; ++j) casesT[i][j].hi = casesT[i][j].lo = 0; } /* end for (i) */ } /* end block */ /* Timing (measured in seconds of user time for this process). */ #if MACOS dStart = get_time(); #elif __CYGWIN__ { struct tms tm; times(&tm); dStart = (double)tm.tms_utime/(double)CLK_TCK; } /* end block */ #else { struct rusage ru; getrusage(RUSAGE_SELF, &ru); dStart = (double)ru.ru_utime.tv_sec+(double)ru.ru_utime.tv_usec*0.000001; } /* end block */ #endif /** Main loop. **/ /* Sketch of main loop structure: * for ever { * for (all PARAL points) { * while (this one is distinguished) { output it; replace by new one; } * } * for (all PARAL points) { * get denominator for this one; * } * get inverses of all PARAL denominators at once; * for (all PARAL points) { * apply pseudo-random function to this one using inverse(denominator); * } * } */ puts("Computing iterations..."); fflush(stdout); distPopc = mode == Test ? TEST_POP_COUNT : DIST_POP_COUNT; signal(SIGINT, catchSignal); signal(SIGTERM, catchSignal); total.hi = total.lo = 0; while (!gotSignal) { uint i; int someWereDistinguished = 0; #if MACOS ROTATECURSOR_MAGIC(); #endif /** Check for distinguished points. **/ for (i = 0; i < PARAL; ++i) { while ((pops[i] = popc(changeBasis(x1T[i]))) == distPopc) { /* Point is distinguished. */ int z; /* Check that the point is OK. */ if (!syncAndCheck(&casesT[i][0], &uT[i], &vT[i], x1T[i], y1T[i])) { puts("Fatal error: *** bad distinguished point detected! ***"); return 2; } /* end if */ /* Report it. */ reportDistinguished( mode, argv , itersT[i], uT[i], vT[i], total, dStart ); /* Restart this point by incrementing v. */ itersT[i].hi = itersT[i].lo = 0; do { vT[i] = incMod(vT[i], order); z = ellipticSum(x1T[i], y1T[i], 1, xQ, yQ, zQ, &x1T[i], &y1T[i]); } while (!z); /* Note this for use immediately below. */ someWereDistinguished = 1; } /* end while (distinguished point) */ } /* end for (i) */ /** Save state after a bunch of iterations or a distinguished point. **/ if ( (total.lo | total.hi) && (total.lo & ((1UL<>6); if (m >= 7) m -= 7; /* Reduce modulo 7. */ ++casesT[i][m].lo; if (!casesT[i][m].lo) ++casesT[i][m].hi; m += 2; if (m == 3) m = 1; x2T[i] = squareNTimes(x1T[i], m); denT[i] = xor(x1T[i], x2T[i]); y2T[i] = squareNTimes(y1T[i], m); } /* end for (i) */ /** Invert PARAL denominators with 1 inversion and 3*PARAL-3 mults. **/ { poly128 ix, prod, q, prodT[PARAL]; prod = denT[0]; for (i = 1; i < PARAL; ++i) { prodT[i] = prod; prod = product(prod, denT[i]); } /* end for */ q = inverse(prod); for (i = PARAL; --i; ) { ix = product(q, prodT[i]); q = product(q, denT[i]); denT[i] = ix; } /* end for */ denT[0] = q; } /* end block */ /** Get new points. **/ for (i = 0; i < PARAL; ++i) { /* Always in general case. */ poly128 nx, y1 = y1T[i]; poly128 lam = product(xor(y1, y2T[i]), denT[i]); poly128 t = xor(xor(square(lam), lam), x2T[i]); const poly128 a = A; t.l ^= a.l; /* t = xor(t, a) with a = 0 or 1. */ x1T[i] = nx = xor(t, x1T[i]); y1T[i] = xor(xor(product(lam, t), nx), y1); ++itersT[i].lo; if (!itersT[i].lo) ++itersT[i].hi; } /* end for (i) */ total.lo += PARAL; total.hi += total.lo < PARAL; } /* end main loop */ puts("Received signal."); /** Save state after receiving signal (except when testing). **/ { uint i; /* Check all points. */ for (i = 0; i < PARAL; ++i) { if (!syncAndCheck(&casesT[i][0], &uT[i], &vT[i], x1T[i], y1T[i])) { puts("Fatal error: *** bad point detected after signal! ***"); return 4; } /* end if */ } /* end for */ /* Save state. */ if (mode != Test) writeState(itersT, uT, vT, x1T, y1T); } /* end block */ puts("Bye!"); return 0; } /* end function main */ /*-= Stuff for checking that a point is distinguished =-=-=-=-=-=-=-=-=-=-=-*/ /*-- popc ------------------------------------------------------------------*/ /* Counts bits set in a 109-bit poly over Z/2Z. */ static uint popc(poly128 n) { u32 n1, n2, nt; const u32 mask1 = 0x55555555, mask2 = 0x33333333, mask3 = 0x0F0F0F0F; /* Full adder. */ n1 = n.l ^ n.m; n2 = n.l & n.m; n2 |= n1 & n.h; n1 ^= n.h; /* Partial popc. */ n2 -= n2>>1 & mask1; /* Count 2-by-2. */ n2 = (n2 & mask2)+(n2>>2 & mask2); /* Count 4-by-4. */ n2 += n2>>4; n2 &= mask3; /* Count 8-by-8. */ /* Partial popc. */ n1 -= n1>>1 & mask1; n1 = (n1 & mask2)+(n1>>2 & mask2); n1 += n1>>4; n1 &= mask3; nt = n.t; /* Partial popc. */ nt -= nt>>1 & mask1; nt = (nt & mask2)+(nt>>2 & mask2); nt += nt>>4; nt &= mask3; /* Accumulate pieces. */ n1 += n2<<1; n1 += nt; n1 += n1>> 8; n1 += n1>>16; return n1 & 255; } /* end function popc */ /*-- changeBasis -----------------------------------------------------------*/ /* Go from basis { t^k | 0 <= k < 109 } to basis { (t+1)^2^k | 0 <= k < 109 } * (both over Z/2Z). */ static poly128 changeBasis(poly128 x) { #ifdef MMX static const mmx128 matrix[218] = { { { 0, 0 }, { 0, 0 } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFFF, 0xFFFFFFFF } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFFE, 0xFFFFFFFF } } , { { 0x00000000, 0x0000 }, { 0x00000001, 0x00000000 } } , { { 0, 0 }, { 0, 0 } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFFD, 0xFFFFFFFF } } , { { 0x1A34BEEC, 0x09EA }, { 0xBDEDA6AB, 0x0A908559 } } , { { 0xE5CB4113, 0x1615 }, { 0x42125956, 0xF56F7AA6 } } , { { 0, 0 }, { 0, 0 } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFFB, 0xFFFFFFFF } } , { { 0x810828A6, 0x0536 }, { 0x2FC46FD6, 0x03436150 } } , { { 0x7EF7D759, 0x1AC9 }, { 0xD03B902D, 0xFCBC9EAF } } , { { 0, 0 }, { 0, 0 } } , { { 0x34697DD8, 0x13D4 }, { 0x7BDB4D56, 0x15210AB3 } } , { { 0xB0C11920, 0x1748 }, { 0x09A60AC2, 0x5DC6E0E9 } } , { { 0x84A864F8, 0x049C }, { 0x727D4794, 0x48E7EA5A } } , { { 0, 0 }, { 0, 0 } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFF7, 0xFFFFFFFF } } , { { 0xE07B050A, 0x1E31 }, { 0x30FAD343, 0x23A45550 } } , { { 0x1F84FAF5, 0x01CE }, { 0xCF052CB4, 0xDC5BAAAF } } , { { 0, 0 }, { 0, 0 } } , { { 0x0210514C, 0x0A6D }, { 0x5F88DFAC, 0x0686C2A0 } } , { { 0x94F5E66A, 0x02C7 }, { 0x6F735E1B, 0xAA1EE736 } } , { { 0x96E5B726, 0x08AA }, { 0x30FB81B7, 0xAC982596 } } , { { 0, 0 }, { 0, 0 } } , { { 0x68D2FBB0, 0x07A8 }, { 0xF7B69AAD, 0x2A421566 } } , { { 0x9E40B570, 0x0346 }, { 0x71B8CCC4, 0xCE15C900 } } , { { 0xF6924EC0, 0x04EE }, { 0x860E5669, 0xE457DC66 } } , { { 0, 0 }, { 0, 0 } } , { { 0x61823240, 0x0E91 }, { 0x134C1585, 0xBB8DC1D2 } } , { { 0x6CC8E146, 0x0A5E }, { 0x0ACD96E5, 0x486897C2 } } , { { 0x0D4AD306, 0x04CF }, { 0x19818360, 0xF3E55610 } } , { { 0, 0 }, { 0, 0 } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFEF, 0xFFFFFFFF } } , { { 0xDF8084BB, 0x1DE5 }, { 0xA3BF99C1, 0x9970280C } } , { { 0x207F7B44, 0x021A }, { 0x5C40662E, 0x668FD7F3 } } , { { 0, 0 }, { 0, 0 } } , { { 0xC0F60A14, 0x1C63 }, { 0x61F5A687, 0x4748AAA0 } } , { { 0x01B79A31, 0x1F55 }, { 0x9FBDDCC1, 0xD77E3E4D } } , { { 0xC1419025, 0x0336 }, { 0xFE487A46, 0x903694ED } } , { { 0, 0 }, { 0, 0 } } , { { 0x0420A298, 0x14DA }, { 0xBF11BF58, 0x0D0D8540 } } , { { 0x396CA3D7, 0x0952 }, { 0xF9CBB7EC, 0x5297C756 } } , { { 0x3D4C014F, 0x1D88 }, { 0x46DA08B4, 0x5F9A4216 } } , { { 0, 0 }, { 0, 0 } } , { { 0x29EBCCD5, 0x058F }, { 0xDEE6BC36, 0x543DCE6C } } , { { 0x18AA70FE, 0x1BFA }, { 0xCD057310, 0x45B2D97A } } , { { 0x3141BC2B, 0x1E75 }, { 0x13E3CF26, 0x118F1716 } } , { { 0, 0 }, { 0, 0 } } , { { 0xD1A5F760, 0x0F50 }, { 0xEF6D355A, 0x54842ACD } } , { { 0xAFD26253, 0x192A }, { 0x56604C4E, 0x0FD989FC } } , { { 0x7E779533, 0x167A }, { 0xB90D7914, 0x5B5DA331 } } , { { 0, 0 }, { 0, 0 } } , { { 0x3C816AE1, 0x068D }, { 0xE3719988, 0x9C2B9200 } } , { { 0xEAC2D27F, 0x0095 }, { 0xB6D89E6C, 0xAD933BEF } } , { { 0xD643B89E, 0x0618 }, { 0x55A907E4, 0x31B8A9EF } } , { { 0, 0 }, { 0, 0 } } , { { 0xC3046481, 0x1D22 }, { 0x26982B0A, 0x771B83A4 } } , { { 0x542E870A, 0x0A5E }, { 0x4E573666, 0x9F98AD6E } } , { { 0x972AE38B, 0x177C }, { 0x68CF1D6C, 0xE8832ECA } } , { { 0, 0 }, { 0, 0 } } , { { 0xD991C28C, 0x14BC }, { 0x159B2DCA, 0x90D12F84 } } , { { 0x27449B58, 0x09C5 }, { 0x2E783164, 0x9E98DABF } } , { { 0xFED559D4, 0x1D79 }, { 0x3BE31CAE, 0x0E49F53B } } , { { 0, 0 }, { 0, 0 } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFDF, 0xFFFFFFFF } } , { { 0xE648868C, 0x1C9F }, { 0x22FDFD76, 0xB844216D } } , { { 0x19B77973, 0x0360 }, { 0xDD0202A9, 0x47BBDE92 } } , { { 0, 0 }, { 0, 0 } } , { { 0xBF010977, 0x1BCB }, { 0x477F3383, 0x32E05019 } } , { { 0x4DACA9C6, 0x00C4 }, { 0x2356535F, 0xF0CE03AB } } , { { 0xF2ADA0B1, 0x1B0F }, { 0x642960DC, 0xC22E53B2 } } , { { 0, 0 }, { 0, 0 } } , { { 0x81EC1428, 0x18C7 }, { 0xC3EB4D0F, 0x8E915540 } } , { { 0x982774E6, 0x1003 }, { 0x52DD5203, 0xEC619F31 } } , { { 0x19CB60CE, 0x08C4 }, { 0x91361F0C, 0x62F0CA71 } } , { { 0, 0 }, { 0, 0 } } , { { 0x036F3463, 0x1EAA }, { 0x3F7BB983, 0xAEFC7C9B } } , { { 0xDA6A67E3, 0x0BE2 }, { 0x32ADBEF9, 0x9960FC56 } } , { { 0xD9055380, 0x1548 }, { 0x0DD6077A, 0x379C80CD } } , { { 0, 0 }, { 0, 0 } } , { { 0x08414530, 0x09B4 }, { 0x7E237EB1, 0x1A1B0A81 } } , { { 0x11FD1F88, 0x1CF9 }, { 0xD921E2F2, 0x9BF5DF89 } } , { { 0x19BC5AB8, 0x154D }, { 0xA7029C43, 0x81EED508 } } , { { 0, 0 }, { 0, 0 } } , { { 0x72D947AE, 0x12A4 }, { 0xF3976FD8, 0xA52F8EAD } } , { { 0x8176618A, 0x1CE4 }, { 0x362CFC89, 0x5063CF7B } } , { { 0xF3AF2624, 0x0E40 }, { 0xC5BB9351, 0xF54C41D6 } } , { { 0, 0 }, { 0, 0 } } , { { 0x53D799AA, 0x0B1E }, { 0xBDCD786C, 0xA87B9CD9 } } , { { 0xA61FFC6D, 0x0048 }, { 0xB0EC51FA, 0x7BBE85CF } } , { { 0xF5C865C7, 0x0B56 }, { 0x0D212996, 0xD3C51916 } } , { { 0, 0 }, { 0, 0 } } , { { 0x3154E1FC, 0x17F4 }, { 0x9A0AE621, 0x8B65B2F5 } } , { { 0xB34684B4, 0x19E3 }, { 0x14E90A5D, 0xAE959235 } } , { { 0x82126548, 0x0E17 }, { 0x8EE3EC7C, 0x25F020C0 } } , { { 0, 0 }, { 0, 0 } } , { { 0xA34BEEC0, 0x1EA1 }, { 0xDEDA6AB4, 0xA908559B } } , { { 0xFFAC3CF8, 0x128C }, { 0x4DB5CD19, 0x8C2931BE } } , { { 0x5CE7D238, 0x0C2D }, { 0x936FA7AD, 0x25216425 } } , { { 0, 0 }, { 0, 0 } } , { { 0x5FA4C4A6, 0x1255 }, { 0xACC0989D, 0x1FB313F8 } } , { { 0xF28924AA, 0x0EEF }, { 0x1C93544F, 0x2F887351 } } , { { 0xAD2DE00C, 0x1CBA }, { 0xB053CCD2, 0x303B60A9 } } , { { 0, 0 }, { 0, 0 } } , { { 0x7902D5C3, 0x0D1A }, { 0xC6E33310, 0x38572401 } } , { { 0x19AD60B0, 0x0C5C }, { 0x09CBC60F, 0x5E11225D } } , { { 0x60AFB573, 0x0146 }, { 0xCF28F51F, 0x6646065C } } , { { 0, 0 }, { 0, 0 } } , { { 0xD585A4FF, 0x012B }, { 0x6DB13CD8, 0x5B2677DF } } , { { 0x8C1277D0, 0x01C3 }, { 0xF132BC82, 0x060B23FC } } , { { 0x5997D32F, 0x00E8 }, { 0x9C83805A, 0x5D2D5423 } } , { { 0, 0 }, { 0, 0 } } , { { 0x8608C902, 0x1A45 }, { 0x4D305615, 0xEE370748 } } , { { 0xB2113F0F, 0x179D }, { 0x277A9EE3, 0x7814A56C } } , { { 0x3419F60D, 0x0DD8 }, { 0x6A4AC8F6, 0x9623A224 } } , { { 0, 0 }, { 0, 0 } } , { { 0xA85D0E15, 0x14BC }, { 0x9CAE6CCC, 0x3F315ADC } } , { { 0x5B345C1C, 0x157C }, { 0x30FA7A39, 0x643A6D33 } } , { { 0xF3695209, 0x01C0 }, { 0xAC5416F5, 0x5B0B37EF } } , { { 0, 0 }, { 0, 0 } } , { { 0xB3238519, 0x0979 }, { 0x2B365B95, 0x21A25F08 } } , { { 0x5D70D551, 0x0103 }, { 0x5B6FF9C9, 0xB4E6A8BE } } , { { 0xEE535048, 0x087A }, { 0x7059A25C, 0x9544F7B6 } } , { { 0, 0 }, { 0, 0 } } , { { 0x4E8936B1, 0x138A }, { 0x5CF062C8, 0x3D31B57E } } , { { 0x11203F3F, 0x1C2F }, { 0xDF433D07, 0xCAB69770 } } , { { 0x5FA9098E, 0x0FA5 }, { 0x83B35FCF, 0xF787220E } } , { { 0, 0 }, { 0, 0 } } , { { 0xFFFFFFFF, 0x1FFF }, { 0xFFFFFFBF, 0xFFFFFFFF } } , { { 0xE8DE6EB8, 0x01CD }, { 0x7FD9AA80, 0x69E4FFE2 } } , { { 0x17219147, 0x1E32 }, { 0x8026553F, 0x961B001D } } , { { 0, 0 }, { 0, 0 } } , { { 0xCC910D19, 0x193F }, { 0x45FBFAED, 0x708842DA } } , { { 0xE34869DD, 0x127C }, { 0x225B3C14, 0x0640B805 } } , { { 0x2FD964C4, 0x0B43 }, { 0x67A0C6F9, 0x76C8FADF } } , { { 0, 0 }, { 0, 0 } } , { { 0x7E0212EE, 0x1797 }, { 0x8EFE6707, 0x65C0A032 } } , { { 0xD4CA755E, 0x14C6 }, { 0x2567C9A5, 0x9F969267 } } , { { 0xAAC867B0, 0x0351 }, { 0xAB99AEA2, 0xFA563255 } } , { { 0, 0 }, { 0, 0 } } , { { 0x9B59538D, 0x0188 }, { 0x46ACA6BE, 0xE19C0756 } } , { { 0x331E30C9, 0x0A9A }, { 0x5AA1F939, 0xEFA2FE39 } } , { { 0xA8476344, 0x0B12 }, { 0x1C0D5F87, 0x0E3EF96F } } , { { 0, 0 }, { 0, 0 } } , { { 0x03D82851, 0x118F }, { 0x87D69A1F, 0x1D22AA81 } } , { { 0xB83B634D, 0x095F }, { 0xE280D9AC, 0x284C5684 } } , { { 0xBBE34B1C, 0x18D0 }, { 0x655643B3, 0x356EFC05 } } , { { 0, 0 }, { 0, 0 } } , { { 0x304EE9CD, 0x0007 }, { 0xA5BAA407, 0xD8C33E62 } } , { { 0x5EFB7E85, 0x0076 }, { 0xD82D749F, 0x388546D3 } } , { { 0x6EB59748, 0x0071 }, { 0x7D97D098, 0xE04678B1 } } , { { 0, 0 }, { 0, 0 } } , { { 0x06DE68C7, 0x1D54 }, { 0x7EF77307, 0x5DF8F936 } } , { { 0x78C41EED, 0x07BA }, { 0x29DF40AF, 0x02B46968 } } , { { 0x7E1A762A, 0x1AEE }, { 0x572833A8, 0x5F4C905E } } , { { 0, 0 }, { 0, 0 } } , { { 0xB4D4CFC7, 0x17C5 }, { 0x655B7DF2, 0x32C1F8AC } } , { { 0x7D6E6970, 0x1D45 }, { 0xB0A43683, 0xC19185E5 } } , { { 0xC9BAA6B7, 0x0A80 }, { 0xD5FF4B71, 0xF3507D49 } } , { { 0, 0 }, { 0, 0 } } , { { 0x10828A60, 0x1368 }, { 0xFC46FD62, 0x34361502 } } , { { 0x0759C297, 0x1F9F }, { 0x64DC124E, 0x1F06E180 } } , { { 0x17DB48F7, 0x0CF7 }, { 0x989AEF2C, 0x2B30F482 } } , { { 0, 0 }, { 0, 0 } } , { { 0x23FA3F11, 0x19F2 }, { 0xB243C5E5, 0x37EBBF13 } } , { { 0x46F4B00F, 0x17B1 }, { 0xE76AB97D, 0xA9CBE82C } } , { { 0x650E8F1E, 0x0E43 }, { 0x55297C98, 0x9E20573F } } , { { 0, 0 }, { 0, 0 } } , { { 0xE5B28F5D, 0x0548 }, { 0xE72EDFB1, 0x4A5F1D5B } } , { { 0x4CC047A9, 0x0797 }, { 0x4F38005C, 0x517EC6F3 } } , { { 0xA972C8F4, 0x02DF }, { 0xA816DFED, 0x1B21DBA8 } } , { { 0, 0 }, { 0, 0 } } , { { 0x02ECC314, 0x19C9 }, { 0x6C59F913, 0xA0C79EF6 } } , { { 0xFBA4F1D8, 0x0544 }, { 0xDEE1E43F, 0xE3F79DAF } } , { { 0xF94832CC, 0x1C8D }, { 0xB2B81D2C, 0x43300359 } } , { { 0, 0 }, { 0, 0 } } , { { 0xA7AF3355, 0x163C }, { 0x7B9AF0D8, 0x50F739B3 } } , { { 0xE42CECEE, 0x0408 }, { 0x2E18F5E8, 0xD1B4C9D2 } } , { { 0x4383DFBB, 0x1234 }, { 0x55820530, 0x8143F061 } } , { { 0, 0 }, { 0, 0 } } , { { 0x4C3FF8DA, 0x0091 }, { 0x61D8A3F4, 0xF77D0B9F } } , { { 0x7ABA658A, 0x186E }, { 0x16AA6668, 0x7FF0C893 } } , { { 0x36859D50, 0x18FF }, { 0x7772C59C, 0x888DC30C } } , { { 0, 0 }, { 0, 0 } } , { { 0x62A9C3F9, 0x0FE8 }, { 0x3415CC43, 0x16CB65EB } } , { { 0xD9C93DA7, 0x1A79 }, { 0xC83CF966, 0x2891C56B } } , { { 0xBB60FE5E, 0x1591 }, { 0xFC293525, 0x3E5AA080 } } , { { 0, 0 }, { 0, 0 } } , { { 0x668D0969, 0x13C7 }, { 0x29D214BB, 0x5D2B246A } } , { { 0x97345AA9, 0x08A6 }, { 0x2816E691, 0xBB2DDE90 } } , { { 0xF1B953C0, 0x1B61 }, { 0x01C4F22A, 0xE606FAFA } } , { { 0, 0 }, { 0, 0 } } , { { 0x4697DD81, 0x1D43 }, { 0xBDB4D569, 0x5210AB37 } } , { { 0xE982BE06, 0x146E }, { 0x6A2904E5, 0x27B2F0E0 } } , { { 0xAF156387, 0x092D }, { 0xD79DD18C, 0x75A25BD7 } } , { { 0, 0 }, { 0, 0 } } , { { 0xFF5879F1, 0x0519 }, { 0x9B6B9A33, 0x1852637C } } , { { 0x16F88427, 0x1CF7 }, { 0x6100D523, 0xA0B5B4D1 } } , { { 0xE9A0FDD6, 0x19EE }, { 0xFA6B4F10, 0xB8E7D7AD } } , { { 0, 0 }, { 0, 0 } } , { { 0xBF49894C, 0x04AA }, { 0x5981313B, 0x3F6627F1 } } , { { 0xE6B11399, 0x0E63 }, { 0xF86F51E9, 0xC8C3EC99 } } , { { 0x59F89AD5, 0x0AC9 }, { 0xA1EE60D2, 0xF7A5CB68 } } , { { 0, 0 }, { 0, 0 } } , { { 0xE5124954, 0x1DDF }, { 0x3926A89E, 0x5F10E6A2 } } , { { 0x19BC4158, 0x1A9C }, { 0xC1EAE3F5, 0x11BBB1FB } } , { { 0xFCAE080C, 0x0743 }, { 0xF8CC4B6B, 0x4EAB5759 } } , { { 0, 0 }, { 0, 0 } } , { { 0xF205AB86, 0x1A34 }, { 0x8DC66620, 0x70AE4803 } } , { { 0x811DFA1A, 0x0553 }, { 0xF16331C3, 0xC53F7E1C } } , { { 0x7318519C, 0x1F67 }, { 0x7CA557E3, 0xB591361F } } , { { 0, 0 }, { 0, 0 } } , { { 0x335AC160, 0x18B8 }, { 0x13978C1E, 0xBC2244BA } } , { { 0xB2233CDC, 0x0E4F }, { 0x9BFF7FF6, 0x78EC2FD5 } } , { { 0x8179FDBC, 0x16F7 }, { 0x8868F3E8, 0xC4CE6B6F } } , { { 0, 0 }, { 0, 0 } } , { { 0xAB0B49FE, 0x0257 }, { 0xDB6279B0, 0xB64CEFBE } } } ; uint i; u32 s, t; MMX_STATIC mmx64 yhi, ylo; const mmx128 *p; p = &matrix[0]; START_MMX(); #define mm_XHI 0 #define mm_XLO 1 #define mm_YHI 2 #define mm_YLO 3 XOR(mm_YHI, mm_YHI); XOR(mm_YLO, mm_YLO); #define FOR_BLOCK \ { s = t & 3; XOR_MEM(mm_YHI, p[s].hi); XOR_MEM(mm_YLO, p[s].lo); \ t >>= 2; p += 4; \ s = t & 3; XOR_MEM(mm_YHI, p[s].hi); XOR_MEM(mm_YLO, p[s].lo); \ t >>= 2; p += 4; \ } t = x.l; for (i = 8; i; --i) FOR_BLOCK t = x.m; for (i = 8; i; --i) FOR_BLOCK t = x.h; for (i = 8; i; --i) FOR_BLOCK t = x.t; for (i = 3; i; --i) FOR_BLOCK /* Last bit. */ s = t & 1; XOR_MEM(mm_YHI, p[s].hi); XOR_MEM(mm_YLO, p[s].lo); #undef FOR_BLOCK STR(yhi, mm_YHI); STR(ylo, mm_YLO); #undef mm_XHI #undef mm_XLO #undef mm_YHI #undef mm_YLO END_MMX(); { poly128 res; res.t = yhi.hi; res.h = yhi.lo; res.m = ylo.hi; res.l = ylo.lo; return res; } /* end block */ #else /* !defined(MMX) */ static const poly128 matrix[218] = { { 0, 0, 0, 0 } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE } , { 0x0000, 0x00000000, 0x00000000, 0x00000001 } , { 0, 0, 0, 0 } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFD } , { 0x09EA, 0x1A34BEEC, 0x0A908559, 0xBDEDA6AB } , { 0x1615, 0xE5CB4113, 0xF56F7AA6, 0x42125956 } , { 0, 0, 0, 0 } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFB } , { 0x0536, 0x810828A6, 0x03436150, 0x2FC46FD6 } , { 0x1AC9, 0x7EF7D759, 0xFCBC9EAF, 0xD03B902D } , { 0, 0, 0, 0 } , { 0x13D4, 0x34697DD8, 0x15210AB3, 0x7BDB4D56 } , { 0x1748, 0xB0C11920, 0x5DC6E0E9, 0x09A60AC2 } , { 0x049C, 0x84A864F8, 0x48E7EA5A, 0x727D4794 } , { 0, 0, 0, 0 } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF7 } , { 0x1E31, 0xE07B050A, 0x23A45550, 0x30FAD343 } , { 0x01CE, 0x1F84FAF5, 0xDC5BAAAF, 0xCF052CB4 } , { 0, 0, 0, 0 } , { 0x0A6D, 0x0210514C, 0x0686C2A0, 0x5F88DFAC } , { 0x02C7, 0x94F5E66A, 0xAA1EE736, 0x6F735E1B } , { 0x08AA, 0x96E5B726, 0xAC982596, 0x30FB81B7 } , { 0, 0, 0, 0 } , { 0x07A8, 0x68D2FBB0, 0x2A421566, 0xF7B69AAD } , { 0x0346, 0x9E40B570, 0xCE15C900, 0x71B8CCC4 } , { 0x04EE, 0xF6924EC0, 0xE457DC66, 0x860E5669 } , { 0, 0, 0, 0 } , { 0x0E91, 0x61823240, 0xBB8DC1D2, 0x134C1585 } , { 0x0A5E, 0x6CC8E146, 0x486897C2, 0x0ACD96E5 } , { 0x04CF, 0x0D4AD306, 0xF3E55610, 0x19818360 } , { 0, 0, 0, 0 } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFEF } , { 0x1DE5, 0xDF8084BB, 0x9970280C, 0xA3BF99C1 } , { 0x021A, 0x207F7B44, 0x668FD7F3, 0x5C40662E } , { 0, 0, 0, 0 } , { 0x1C63, 0xC0F60A14, 0x4748AAA0, 0x61F5A687 } , { 0x1F55, 0x01B79A31, 0xD77E3E4D, 0x9FBDDCC1 } , { 0x0336, 0xC1419025, 0x903694ED, 0xFE487A46 } , { 0, 0, 0, 0 } , { 0x14DA, 0x0420A298, 0x0D0D8540, 0xBF11BF58 } , { 0x0952, 0x396CA3D7, 0x5297C756, 0xF9CBB7EC } , { 0x1D88, 0x3D4C014F, 0x5F9A4216, 0x46DA08B4 } , { 0, 0, 0, 0 } , { 0x058F, 0x29EBCCD5, 0x543DCE6C, 0xDEE6BC36 } , { 0x1BFA, 0x18AA70FE, 0x45B2D97A, 0xCD057310 } , { 0x1E75, 0x3141BC2B, 0x118F1716, 0x13E3CF26 } , { 0, 0, 0, 0 } , { 0x0F50, 0xD1A5F760, 0x54842ACD, 0xEF6D355A } , { 0x192A, 0xAFD26253, 0x0FD989FC, 0x56604C4E } , { 0x167A, 0x7E779533, 0x5B5DA331, 0xB90D7914 } , { 0, 0, 0, 0 } , { 0x068D, 0x3C816AE1, 0x9C2B9200, 0xE3719988 } , { 0x0095, 0xEAC2D27F, 0xAD933BEF, 0xB6D89E6C } , { 0x0618, 0xD643B89E, 0x31B8A9EF, 0x55A907E4 } , { 0, 0, 0, 0 } , { 0x1D22, 0xC3046481, 0x771B83A4, 0x26982B0A } , { 0x0A5E, 0x542E870A, 0x9F98AD6E, 0x4E573666 } , { 0x177C, 0x972AE38B, 0xE8832ECA, 0x68CF1D6C } , { 0, 0, 0, 0 } , { 0x14BC, 0xD991C28C, 0x90D12F84, 0x159B2DCA } , { 0x09C5, 0x27449B58, 0x9E98DABF, 0x2E783164 } , { 0x1D79, 0xFED559D4, 0x0E49F53B, 0x3BE31CAE } , { 0, 0, 0, 0 } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFDF } , { 0x1C9F, 0xE648868C, 0xB844216D, 0x22FDFD76 } , { 0x0360, 0x19B77973, 0x47BBDE92, 0xDD0202A9 } , { 0, 0, 0, 0 } , { 0x1BCB, 0xBF010977, 0x32E05019, 0x477F3383 } , { 0x00C4, 0x4DACA9C6, 0xF0CE03AB, 0x2356535F } , { 0x1B0F, 0xF2ADA0B1, 0xC22E53B2, 0x642960DC } , { 0, 0, 0, 0 } , { 0x18C7, 0x81EC1428, 0x8E915540, 0xC3EB4D0F } , { 0x1003, 0x982774E6, 0xEC619F31, 0x52DD5203 } , { 0x08C4, 0x19CB60CE, 0x62F0CA71, 0x91361F0C } , { 0, 0, 0, 0 } , { 0x1EAA, 0x036F3463, 0xAEFC7C9B, 0x3F7BB983 } , { 0x0BE2, 0xDA6A67E3, 0x9960FC56, 0x32ADBEF9 } , { 0x1548, 0xD9055380, 0x379C80CD, 0x0DD6077A } , { 0, 0, 0, 0 } , { 0x09B4, 0x08414530, 0x1A1B0A81, 0x7E237EB1 } , { 0x1CF9, 0x11FD1F88, 0x9BF5DF89, 0xD921E2F2 } , { 0x154D, 0x19BC5AB8, 0x81EED508, 0xA7029C43 } , { 0, 0, 0, 0 } , { 0x12A4, 0x72D947AE, 0xA52F8EAD, 0xF3976FD8 } , { 0x1CE4, 0x8176618A, 0x5063CF7B, 0x362CFC89 } , { 0x0E40, 0xF3AF2624, 0xF54C41D6, 0xC5BB9351 } , { 0, 0, 0, 0 } , { 0x0B1E, 0x53D799AA, 0xA87B9CD9, 0xBDCD786C } , { 0x0048, 0xA61FFC6D, 0x7BBE85CF, 0xB0EC51FA } , { 0x0B56, 0xF5C865C7, 0xD3C51916, 0x0D212996 } , { 0, 0, 0, 0 } , { 0x17F4, 0x3154E1FC, 0x8B65B2F5, 0x9A0AE621 } , { 0x19E3, 0xB34684B4, 0xAE959235, 0x14E90A5D } , { 0x0E17, 0x82126548, 0x25F020C0, 0x8EE3EC7C } , { 0, 0, 0, 0 } , { 0x1EA1, 0xA34BEEC0, 0xA908559B, 0xDEDA6AB4 } , { 0x128C, 0xFFAC3CF8, 0x8C2931BE, 0x4DB5CD19 } , { 0x0C2D, 0x5CE7D238, 0x25216425, 0x936FA7AD } , { 0, 0, 0, 0 } , { 0x1255, 0x5FA4C4A6, 0x1FB313F8, 0xACC0989D } , { 0x0EEF, 0xF28924AA, 0x2F887351, 0x1C93544F } , { 0x1CBA, 0xAD2DE00C, 0x303B60A9, 0xB053CCD2 } , { 0, 0, 0, 0 } , { 0x0D1A, 0x7902D5C3, 0x38572401, 0xC6E33310 } , { 0x0C5C, 0x19AD60B0, 0x5E11225D, 0x09CBC60F } , { 0x0146, 0x60AFB573, 0x6646065C, 0xCF28F51F } , { 0, 0, 0, 0 } , { 0x012B, 0xD585A4FF, 0x5B2677DF, 0x6DB13CD8 } , { 0x01C3, 0x8C1277D0, 0x060B23FC, 0xF132BC82 } , { 0x00E8, 0x5997D32F, 0x5D2D5423, 0x9C83805A } , { 0, 0, 0, 0 } , { 0x1A45, 0x8608C902, 0xEE370748, 0x4D305615 } , { 0x179D, 0xB2113F0F, 0x7814A56C, 0x277A9EE3 } , { 0x0DD8, 0x3419F60D, 0x9623A224, 0x6A4AC8F6 } , { 0, 0, 0, 0 } , { 0x14BC, 0xA85D0E15, 0x3F315ADC, 0x9CAE6CCC } , { 0x157C, 0x5B345C1C, 0x643A6D33, 0x30FA7A39 } , { 0x01C0, 0xF3695209, 0x5B0B37EF, 0xAC5416F5 } , { 0, 0, 0, 0 } , { 0x0979, 0xB3238519, 0x21A25F08, 0x2B365B95 } , { 0x0103, 0x5D70D551, 0xB4E6A8BE, 0x5B6FF9C9 } , { 0x087A, 0xEE535048, 0x9544F7B6, 0x7059A25C } , { 0, 0, 0, 0 } , { 0x138A, 0x4E8936B1, 0x3D31B57E, 0x5CF062C8 } , { 0x1C2F, 0x11203F3F, 0xCAB69770, 0xDF433D07 } , { 0x0FA5, 0x5FA9098E, 0xF787220E, 0x83B35FCF } , { 0, 0, 0, 0 } , { 0x1FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFBF } , { 0x01CD, 0xE8DE6EB8, 0x69E4FFE2, 0x7FD9AA80 } , { 0x1E32, 0x17219147, 0x961B001D, 0x8026553F } , { 0, 0, 0, 0 } , { 0x193F, 0xCC910D19, 0x708842DA, 0x45FBFAED } , { 0x127C, 0xE34869DD, 0x0640B805, 0x225B3C14 } , { 0x0B43, 0x2FD964C4, 0x76C8FADF, 0x67A0C6F9 } , { 0, 0, 0, 0 } , { 0x1797, 0x7E0212EE, 0x65C0A032, 0x8EFE6707 } , { 0x14C6, 0xD4CA755E, 0x9F969267, 0x2567C9A5 } , { 0x0351, 0xAAC867B0, 0xFA563255, 0xAB99AEA2 } , { 0, 0, 0, 0 } , { 0x0188, 0x9B59538D, 0xE19C0756, 0x46ACA6BE } , { 0x0A9A, 0x331E30C9, 0xEFA2FE39, 0x5AA1F939 } , { 0x0B12, 0xA8476344, 0x0E3EF96F, 0x1C0D5F87 } , { 0, 0, 0, 0 } , { 0x118F, 0x03D82851, 0x1D22AA81, 0x87D69A1F } , { 0x095F, 0xB83B634D, 0x284C5684, 0xE280D9AC } , { 0x18D0, 0xBBE34B1C, 0x356EFC05, 0x655643B3 } , { 0, 0, 0, 0 } , { 0x0007, 0x304EE9CD, 0xD8C33E62, 0xA5BAA407 } , { 0x0076, 0x5EFB7E85, 0x388546D3, 0xD82D749F } , { 0x0071, 0x6EB59748, 0xE04678B1, 0x7D97D098 } , { 0, 0, 0, 0 } , { 0x1D54, 0x06DE68C7, 0x5DF8F936, 0x7EF77307 } , { 0x07BA, 0x78C41EED, 0x02B46968, 0x29DF40AF } , { 0x1AEE, 0x7E1A762A, 0x5F4C905E, 0x572833A8 } , { 0, 0, 0, 0 } , { 0x17C5, 0xB4D4CFC7, 0x32C1F8AC, 0x655B7DF2 } , { 0x1D45, 0x7D6E6970, 0xC19185E5, 0xB0A43683 } , { 0x0A80, 0xC9BAA6B7, 0xF3507D49, 0xD5FF4B71 } , { 0, 0, 0, 0 } , { 0x1368, 0x10828A60, 0x34361502, 0xFC46FD62 } , { 0x1F9F, 0x0759C297, 0x1F06E180, 0x64DC124E } , { 0x0CF7, 0x17DB48F7, 0x2B30F482, 0x989AEF2C } , { 0, 0, 0, 0 } , { 0x19F2, 0x23FA3F11, 0x37EBBF13, 0xB243C5E5 } , { 0x17B1, 0x46F4B00F, 0xA9CBE82C, 0xE76AB97D } , { 0x0E43, 0x650E8F1E, 0x9E20573F, 0x55297C98 } , { 0, 0, 0, 0 } , { 0x0548, 0xE5B28F5D, 0x4A5F1D5B, 0xE72EDFB1 } , { 0x0797, 0x4CC047A9, 0x517EC6F3, 0x4F38005C } , { 0x02DF, 0xA972C8F4, 0x1B21DBA8, 0xA816DFED } , { 0, 0, 0, 0 } , { 0x19C9, 0x02ECC314, 0xA0C79EF6, 0x6C59F913 } , { 0x0544, 0xFBA4F1D8, 0xE3F79DAF, 0xDEE1E43F } , { 0x1C8D, 0xF94832CC, 0x43300359, 0xB2B81D2C } , { 0, 0, 0, 0 } , { 0x163C, 0xA7AF3355, 0x50F739B3, 0x7B9AF0D8 } , { 0x0408, 0xE42CECEE, 0xD1B4C9D2, 0x2E18F5E8 } , { 0x1234, 0x4383DFBB, 0x8143F061, 0x55820530 } , { 0, 0, 0, 0 } , { 0x0091, 0x4C3FF8DA, 0xF77D0B9F, 0x61D8A3F4 } , { 0x186E, 0x7ABA658A, 0x7FF0C893, 0x16AA6668 } , { 0x18FF, 0x36859D50, 0x888DC30C, 0x7772C59C } , { 0, 0, 0, 0 } , { 0x0FE8, 0x62A9C3F9, 0x16CB65EB, 0x3415CC43 } , { 0x1A79, 0xD9C93DA7, 0x2891C56B, 0xC83CF966 } , { 0x1591, 0xBB60FE5E, 0x3E5AA080, 0xFC293525 } , { 0, 0, 0, 0 } , { 0x13C7, 0x668D0969, 0x5D2B246A, 0x29D214BB } , { 0x08A6, 0x97345AA9, 0xBB2DDE90, 0x2816E691 } , { 0x1B61, 0xF1B953C0, 0xE606FAFA, 0x01C4F22A } , { 0, 0, 0, 0 } , { 0x1D43, 0x4697DD81, 0x5210AB37, 0xBDB4D569 } , { 0x146E, 0xE982BE06, 0x27B2F0E0, 0x6A2904E5 } , { 0x092D, 0xAF156387, 0x75A25BD7, 0xD79DD18C } , { 0, 0, 0, 0 } , { 0x0519, 0xFF5879F1, 0x1852637C, 0x9B6B9A33 } , { 0x1CF7, 0x16F88427, 0xA0B5B4D1, 0x6100D523 } , { 0x19EE, 0xE9A0FDD6, 0xB8E7D7AD, 0xFA6B4F10 } , { 0, 0, 0, 0 } , { 0x04AA, 0xBF49894C, 0x3F6627F1, 0x5981313B } , { 0x0E63, 0xE6B11399, 0xC8C3EC99, 0xF86F51E9 } , { 0x0AC9, 0x59F89AD5, 0xF7A5CB68, 0xA1EE60D2 } , { 0, 0, 0, 0 } , { 0x1DDF, 0xE5124954, 0x5F10E6A2, 0x3926A89E } , { 0x1A9C, 0x19BC4158, 0x11BBB1FB, 0xC1EAE3F5 } , { 0x0743, 0xFCAE080C, 0x4EAB5759, 0xF8CC4B6B } , { 0, 0, 0, 0 } , { 0x1A34, 0xF205AB86, 0x70AE4803, 0x8DC66620 } , { 0x0553, 0x811DFA1A, 0xC53F7E1C, 0xF16331C3 } , { 0x1F67, 0x7318519C, 0xB591361F, 0x7CA557E3 } , { 0, 0, 0, 0 } , { 0x18B8, 0x335AC160, 0xBC2244BA, 0x13978C1E } , { 0x0E4F, 0xB2233CDC, 0x78EC2FD5, 0x9BFF7FF6 } , { 0x16F7, 0x8179FDBC, 0xC4CE6B6F, 0x8868F3E8 } , { 0, 0, 0, 0 } , { 0x0257, 0xAB0B49FE, 0xB64CEFBE, 0xDB6279B0 } } ; uint i; u32 t, yt,yh,ym,yl; const poly128 *p, *q; yt = yh = ym = yl = 0; p = &matrix[0]; #define FOR_BLOCK \ { q = &p[t & 3]; yt ^= q->t; yh ^= q->h; ym ^= q->m; yl ^= q->l; \ t >>= 2; p += 4; \ q = &p[t & 3]; yt ^= q->t; yh ^= q->h; ym ^= q->m; yl ^= q->l; \ t >>= 2; p += 4; \ } t = x.l; for (i = 8; i; --i) FOR_BLOCK t = x.m; for (i = 8; i; --i) FOR_BLOCK t = x.h; for (i = 8; i; --i) FOR_BLOCK t = x.t; for (i = 3; i; --i) FOR_BLOCK /* Last bit. */ q = &p[t & 1]; yt ^= q->t; yh ^= q->h; ym ^= q->m; yl ^= q->l; #undef FOR_BLOCK { poly128 res; res.t = yt; res.h = yh; res.m = ym; res.l = yl; return res; } /* end block */ #endif /* defined(MMX) ... #else ... */ } /* end function changeBasis */ /*-= Arithmetic in the field GF(2^109) -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ /*-- squareNTimes ----------------------------------------------------------*/ /* Square x, n times, in the field GF(2^109) i.e., as a poly in (Z/2Z)[t] * reduced modulo t^109+t^9+t^2+t+1. Degree x < 109. */ static poly128 squareNTimes(poly128 x, uint n) { /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #ifdef MMX static const mmx64 tab2[256] = { { 0, 0 }, { 1, 0 }, { 4, 0 }, { 5, 0 }, { 16, 0 } , { 17, 0 }, { 20, 0 }, { 21, 0 }, { 64, 0 }, { 65, 0 } , { 68, 0 }, { 69, 0 }, { 80, 0 }, { 81, 0 }, { 84, 0 } , { 85, 0 }, { 256, 0 }, { 257, 0 }, { 260, 0 }, { 261, 0 } , { 272, 0 }, { 273, 0 }, { 276, 0 }, { 277, 0 }, { 320, 0 } , { 321, 0 }, { 324, 0 }, { 325, 0 }, { 336, 0 }, { 337, 0 } , { 340, 0 }, { 341, 0 }, { 1024, 0 }, { 1025, 0 }, { 1028, 0 } , { 1029, 0 }, { 1040, 0 }, { 1041, 0 }, { 1044, 0 }, { 1045, 0 } , { 1088, 0 }, { 1089, 0 }, { 1092, 0 }, { 1093, 0 }, { 1104, 0 } , { 1105, 0 }, { 1108, 0 }, { 1109, 0 }, { 1280, 0 }, { 1281, 0 } , { 1284, 0 }, { 1285, 0 }, { 1296, 0 }, { 1297, 0 }, { 1300, 0 } , { 1301, 0 }, { 1344, 0 }, { 1345, 0 }, { 1348, 0 }, { 1349, 0 } , { 1360, 0 }, { 1361, 0 }, { 1364, 0 }, { 1365, 0 }, { 4096, 0 } , { 4097, 0 }, { 4100, 0 }, { 4101, 0 }, { 4112, 0 }, { 4113, 0 } , { 4116, 0 }, { 4117, 0 }, { 4160, 0 }, { 4161, 0 }, { 4164, 0 } , { 4165, 0 }, { 4176, 0 }, { 4177, 0 }, { 4180, 0 }, { 4181, 0 } , { 4352, 0 }, { 4353, 0 }, { 4356, 0 }, { 4357, 0 }, { 4368, 0 } , { 4369, 0 }, { 4372, 0 }, { 4373, 0 }, { 4416, 0 }, { 4417, 0 } , { 4420, 0 }, { 4421, 0 }, { 4432, 0 }, { 4433, 0 }, { 4436, 0 } , { 4437, 0 }, { 5120, 0 }, { 5121, 0 }, { 5124, 0 }, { 5125, 0 } , { 5136, 0 }, { 5137, 0 }, { 5140, 0 }, { 5141, 0 }, { 5184, 0 } , { 5185, 0 }, { 5188, 0 }, { 5189, 0 }, { 5200, 0 }, { 5201, 0 } , { 5204, 0 }, { 5205, 0 }, { 5376, 0 }, { 5377, 0 }, { 5380, 0 } , { 5381, 0 }, { 5392, 0 }, { 5393, 0 }, { 5396, 0 }, { 5397, 0 } , { 5440, 0 }, { 5441, 0 }, { 5444, 0 }, { 5445, 0 }, { 5456, 0 } , { 5457, 0 }, { 5460, 0 }, { 5461, 0 }, { 16384, 0 }, { 16385, 0 } , { 16388, 0 }, { 16389, 0 }, { 16400, 0 }, { 16401, 0 }, { 16404, 0 } , { 16405, 0 }, { 16448, 0 }, { 16449, 0 }, { 16452, 0 }, { 16453, 0 } , { 16464, 0 }, { 16465, 0 }, { 16468, 0 }, { 16469, 0 }, { 16640, 0 } , { 16641, 0 }, { 16644, 0 }, { 16645, 0 }, { 16656, 0 }, { 16657, 0 } , { 16660, 0 }, { 16661, 0 }, { 16704, 0 }, { 16705, 0 }, { 16708, 0 } , { 16709, 0 }, { 16720, 0 }, { 16721, 0 }, { 16724, 0 }, { 16725, 0 } , { 17408, 0 }, { 17409, 0 }, { 17412, 0 }, { 17413, 0 }, { 17424, 0 } , { 17425, 0 }, { 17428, 0 }, { 17429, 0 }, { 17472, 0 }, { 17473, 0 } , { 17476, 0 }, { 17477, 0 }, { 17488, 0 }, { 17489, 0 }, { 17492, 0 } , { 17493, 0 }, { 17664, 0 }, { 17665, 0 }, { 17668, 0 }, { 17669, 0 } , { 17680, 0 }, { 17681, 0 }, { 17684, 0 }, { 17685, 0 }, { 17728, 0 } , { 17729, 0 }, { 17732, 0 }, { 17733, 0 }, { 17744, 0 }, { 17745, 0 } , { 17748, 0 }, { 17749, 0 }, { 20480, 0 }, { 20481, 0 }, { 20484, 0 } , { 20485, 0 }, { 20496, 0 }, { 20497, 0 }, { 20500, 0 }, { 20501, 0 } , { 20544, 0 }, { 20545, 0 }, { 20548, 0 }, { 20549, 0 }, { 20560, 0 } , { 20561, 0 }, { 20564, 0 }, { 20565, 0 }, { 20736, 0 }, { 20737, 0 } , { 20740, 0 }, { 20741, 0 }, { 20752, 0 }, { 20753, 0 }, { 20756, 0 } , { 20757, 0 }, { 20800, 0 }, { 20801, 0 }, { 20804, 0 }, { 20805, 0 } , { 20816, 0 }, { 20817, 0 }, { 20820, 0 }, { 20821, 0 }, { 21504, 0 } , { 21505, 0 }, { 21508, 0 }, { 21509, 0 }, { 21520, 0 }, { 21521, 0 } , { 21524, 0 }, { 21525, 0 }, { 21568, 0 }, { 21569, 0 }, { 21572, 0 } , { 21573, 0 }, { 21584, 0 }, { 21585, 0 }, { 21588, 0 }, { 21589, 0 } , { 21760, 0 }, { 21761, 0 }, { 21764, 0 }, { 21765, 0 }, { 21776, 0 } , { 21777, 0 }, { 21780, 0 }, { 21781, 0 }, { 21824, 0 }, { 21825, 0 } , { 21828, 0 }, { 21829, 0 }, { 21840, 0 }, { 21841, 0 }, { 21844, 0 } , { 21845, 0 } }; static const mmx64 tab4[256] = { { 0x00000000, 0 }, { 0x00000001, 0 }, { 0x00000010, 0 }, { 0x00000011, 0 } , { 0x00000100, 0 }, { 0x00000101, 0 }, { 0x00000110, 0 }, { 0x00000111, 0 } , { 0x00001000, 0 }, { 0x00001001, 0 }, { 0x00001010, 0 }, { 0x00001011, 0 } , { 0x00001100, 0 }, { 0x00001101, 0 }, { 0x00001110, 0 }, { 0x00001111, 0 } , { 0x00010000, 0 }, { 0x00010001, 0 }, { 0x00010010, 0 }, { 0x00010011, 0 } , { 0x00010100, 0 }, { 0x00010101, 0 }, { 0x00010110, 0 }, { 0x00010111, 0 } , { 0x00011000, 0 }, { 0x00011001, 0 }, { 0x00011010, 0 }, { 0x00011011, 0 } , { 0x00011100, 0 }, { 0x00011101, 0 }, { 0x00011110, 0 }, { 0x00011111, 0 } , { 0x00100000, 0 }, { 0x00100001, 0 }, { 0x00100010, 0 }, { 0x00100011, 0 } , { 0x00100100, 0 }, { 0x00100101, 0 }, { 0x00100110, 0 }, { 0x00100111, 0 } , { 0x00101000, 0 }, { 0x00101001, 0 }, { 0x00101010, 0 }, { 0x00101011, 0 } , { 0x00101100, 0 }, { 0x00101101, 0 }, { 0x00101110, 0 }, { 0x00101111, 0 } , { 0x00110000, 0 }, { 0x00110001, 0 }, { 0x00110010, 0 }, { 0x00110011, 0 } , { 0x00110100, 0 }, { 0x00110101, 0 }, { 0x00110110, 0 }, { 0x00110111, 0 } , { 0x00111000, 0 }, { 0x00111001, 0 }, { 0x00111010, 0 }, { 0x00111011, 0 } , { 0x00111100, 0 }, { 0x00111101, 0 }, { 0x00111110, 0 }, { 0x00111111, 0 } , { 0x01000000, 0 }, { 0x01000001, 0 }, { 0x01000010, 0 }, { 0x01000011, 0 } , { 0x01000100, 0 }, { 0x01000101, 0 }, { 0x01000110, 0 }, { 0x01000111, 0 } , { 0x01001000, 0 }, { 0x01001001, 0 }, { 0x01001010, 0 }, { 0x01001011, 0 } , { 0x01001100, 0 }, { 0x01001101, 0 }, { 0x01001110, 0 }, { 0x01001111, 0 } , { 0x01010000, 0 }, { 0x01010001, 0 }, { 0x01010010, 0 }, { 0x01010011, 0 } , { 0x01010100, 0 }, { 0x01010101, 0 }, { 0x01010110, 0 }, { 0x01010111, 0 } , { 0x01011000, 0 }, { 0x01011001, 0 }, { 0x01011010, 0 }, { 0x01011011, 0 } , { 0x01011100, 0 }, { 0x01011101, 0 }, { 0x01011110, 0 }, { 0x01011111, 0 } , { 0x01100000, 0 }, { 0x01100001, 0 }, { 0x01100010, 0 }, { 0x01100011, 0 } , { 0x01100100, 0 }, { 0x01100101, 0 }, { 0x01100110, 0 }, { 0x01100111, 0 } , { 0x01101000, 0 }, { 0x01101001, 0 }, { 0x01101010, 0 }, { 0x01101011, 0 } , { 0x01101100, 0 }, { 0x01101101, 0 }, { 0x01101110, 0 }, { 0x01101111, 0 } , { 0x01110000, 0 }, { 0x01110001, 0 }, { 0x01110010, 0 }, { 0x01110011, 0 } , { 0x01110100, 0 }, { 0x01110101, 0 }, { 0x01110110, 0 }, { 0x01110111, 0 } , { 0x01111000, 0 }, { 0x01111001, 0 }, { 0x01111010, 0 }, { 0x01111011, 0 } , { 0x01111100, 0 }, { 0x01111101, 0 }, { 0x01111110, 0 }, { 0x01111111, 0 } , { 0x10000000, 0 }, { 0x10000001, 0 }, { 0x10000010, 0 }, { 0x10000011, 0 } , { 0x10000100, 0 }, { 0x10000101, 0 }, { 0x10000110, 0 }, { 0x10000111, 0 } , { 0x10001000, 0 }, { 0x10001001, 0 }, { 0x10001010, 0 }, { 0x10001011, 0 } , { 0x10001100, 0 }, { 0x10001101, 0 }, { 0x10001110, 0 }, { 0x10001111, 0 } , { 0x10010000, 0 }, { 0x10010001, 0 }, { 0x10010010, 0 }, { 0x10010011, 0 } , { 0x10010100, 0 }, { 0x10010101, 0 }, { 0x10010110, 0 }, { 0x10010111, 0 } , { 0x10011000, 0 }, { 0x10011001, 0 }, { 0x10011010, 0 }, { 0x10011011, 0 } , { 0x10011100, 0 }, { 0x10011101, 0 }, { 0x10011110, 0 }, { 0x10011111, 0 } , { 0x10100000, 0 }, { 0x10100001, 0 }, { 0x10100010, 0 }, { 0x10100011, 0 } , { 0x10100100, 0 }, { 0x10100101, 0 }, { 0x10100110, 0 }, { 0x10100111, 0 } , { 0x10101000, 0 }, { 0x10101001, 0 }, { 0x10101010, 0 }, { 0x10101011, 0 } , { 0x10101100, 0 }, { 0x10101101, 0 }, { 0x10101110, 0 }, { 0x10101111, 0 } , { 0x10110000, 0 }, { 0x10110001, 0 }, { 0x10110010, 0 }, { 0x10110011, 0 } , { 0x10110100, 0 }, { 0x10110101, 0 }, { 0x10110110, 0 }, { 0x10110111, 0 } , { 0x10111000, 0 }, { 0x10111001, 0 }, { 0x10111010, 0 }, { 0x10111011, 0 } , { 0x10111100, 0 }, { 0x10111101, 0 }, { 0x10111110, 0 }, { 0x10111111, 0 } , { 0x11000000, 0 }, { 0x11000001, 0 }, { 0x11000010, 0 }, { 0x11000011, 0 } , { 0x11000100, 0 }, { 0x11000101, 0 }, { 0x11000110, 0 }, { 0x11000111, 0 } , { 0x11001000, 0 }, { 0x11001001, 0 }, { 0x11001010, 0 }, { 0x11001011, 0 } , { 0x11001100, 0 }, { 0x11001101, 0 }, { 0x11001110, 0 }, { 0x11001111, 0 } , { 0x11010000, 0 }, { 0x11010001, 0 }, { 0x11010010, 0 }, { 0x11010011, 0 } , { 0x11010100, 0 }, { 0x11010101, 0 }, { 0x11010110, 0 }, { 0x11010111, 0 } , { 0x11011000, 0 }, { 0x11011001, 0 }, { 0x11011010, 0 }, { 0x11011011, 0 } , { 0x11011100, 0 }, { 0x11011101, 0 }, { 0x11011110, 0 }, { 0x11011111, 0 } , { 0x11100000, 0 }, { 0x11100001, 0 }, { 0x11100010, 0 }, { 0x11100011, 0 } , { 0x11100100, 0 }, { 0x11100101, 0 }, { 0x11100110, 0 }, { 0x11100111, 0 } , { 0x11101000, 0 }, { 0x11101001, 0 }, { 0x11101010, 0 }, { 0x11101011, 0 } , { 0x11101100, 0 }, { 0x11101101, 0 }, { 0x11101110, 0 }, { 0x11101111, 0 } , { 0x11110000, 0 }, { 0x11110001, 0 }, { 0x11110010, 0 }, { 0x11110011, 0 } , { 0x11110100, 0 }, { 0x11110101, 0 }, { 0x11110110, 0 }, { 0x11110111, 0 } , { 0x11111000, 0 }, { 0x11111001, 0 }, { 0x11111010, 0 }, { 0x11111011, 0 } , { 0x11111100, 0 }, { 0x11111101, 0 }, { 0x11111110, 0 }, { 0x11111111, 0 } }; u32 xt,xh,xm,xl; xt = x.t; xh = x.h; xm = x.m; xl = x.l; START_MMX(); #define mm_YL 0 #define mm_YH 1 #define mm_T 2 #define mm_U 3 #define MUNGE \ CPY(mm_U, mm_T); SRL(mm_U, 36); SLL(mm_T, 19); \ XOR(mm_YH, mm_U); XOR(mm_YL, mm_T); \ SRL(mm_U, 7); SLL(mm_T, 1); \ XOR(mm_YH, mm_U); XOR(mm_YL, mm_T); \ SRL(mm_U, 1); SLL(mm_T, 1); \ XOR(mm_YH, mm_U); XOR(mm_YL, mm_T); \ SRL(mm_U, 1); SLL(mm_T, 7); \ XOR(mm_YH, mm_U); XOR(mm_YL, mm_T) /* ; */ /* Two at a time. */ for ( ; n >= 2; n -= 2) { MMX_STATIC mmx64 yh, yl; LOD(mm_T, tab4[xt>> 8 /* & 31 */ ]); LOD(mm_YH, tab4[xh>>24]); LOD(mm_YL, tab4[xh>> 8 & 255]); SLL(mm_T, 32); SLL(mm_YH, 32); SLL(mm_YL, 32); XOR_MEM(mm_T, tab4[xt & 255]); XOR_MEM(mm_YH, tab4[xh>>16 & 255]); XOR_MEM(mm_YL, tab4[xh & 255]); MUNGE; CPY(mm_T, mm_YH); CPY(mm_YH, mm_YL); LOD(mm_YL, tab4[xm>>24]); SLL(mm_YL, 32); XOR_MEM(mm_YL, tab4[xm>>16 & 255]); MUNGE; CPY(mm_T, mm_YH); CPY(mm_YH, mm_YL); LOD(mm_YL, tab4[xm>> 8 & 255]); SLL(mm_YL, 32); XOR_MEM(mm_YL, tab4[xm & 255]); MUNGE; CPY(mm_T, mm_YH); CPY(mm_YH, mm_YL); LOD(mm_YL, tab4[xl>>24]); SLL(mm_YL, 32); XOR_MEM(mm_YL, tab4[xl>>16 & 255]); MUNGE; CPY(mm_T, mm_YH); CPY(mm_YH, mm_YL); LOD(mm_YL, tab4[xl>> 8 & 255]); SLL(mm_YL, 32); XOR_MEM(mm_YL, tab4[xl & 255]); MUNGE; /* Reduce modulo t^109+t^9+t^2+t+1. */ CPY(mm_T, mm_YH); SRL(mm_T, 45); SLL(mm_YH, 19); SRL(mm_YH, 19); XOR(mm_YL, mm_T); SLL(mm_T, 1); XOR(mm_YL, mm_T); SLL(mm_T, 1); XOR(mm_YL, mm_T); SLL(mm_T, 7); XOR(mm_YL, mm_T); STR(yh, mm_YH); STR(yl, mm_YL); xt = yh.hi; xh = yh.lo; xm = yl.hi; xl = yl.lo; } /* end for */ #undef MUNGE #define mm_TMP 4 #define mm_TMP2 5 /* Last one (if n was odd). */ if (n) { MMX_STATIC mmx64 yh,yl; LOD(mm_U, tab2[xt>> 8 /* & 31 */ ]); LOD(mm_T, tab2[xh>>24]); LOD(mm_YH, tab2[xm>>24]); LOD(mm_YL, tab2[xl>>24]); SLL(mm_U, 16); ORR_MEM(mm_U, tab2[xt & 255]); SLL(mm_T, 16); ORR_MEM(mm_T, tab2[xh>>16 & 255]); SLL(mm_YH, 16); ORR_MEM(mm_YH, tab2[xm>>16 & 255]); SLL(mm_YL, 16); ORR_MEM(mm_YL, tab2[xl>>16 & 255]); SLL(mm_T, 16); ORR_MEM(mm_T, tab2[xh>> 8 & 255]); SLL(mm_YH, 16); ORR_MEM(mm_YH, tab2[xm>> 8 & 255]); SLL(mm_YL, 16); ORR_MEM(mm_YL, tab2[xl>> 8 & 255]); SLL(mm_T, 16); ORR_MEM(mm_T, tab2[xh & 255]); SLL(mm_YH, 16); ORR_MEM(mm_YH, tab2[xm & 255]); SLL(mm_YL, 16); ORR_MEM(mm_YL, tab2[xl & 255]); /* Reduce modulo t^109+t^9+t^2+t+1. */ CPY(mm_TMP, mm_T); SLL(mm_U, 19); XOR(mm_YH, mm_U); SRL(mm_TMP, 36); XOR(mm_YH, mm_TMP); SLL(mm_T, 19); XOR(mm_YL, mm_T); SLL(mm_U, 1); XOR(mm_YH, mm_U); SRL(mm_TMP, 7); XOR(mm_YH, mm_TMP); SLL(mm_T, 1); XOR(mm_YL, mm_T); SLL(mm_U, 1); XOR(mm_YH, mm_U); SRL(mm_TMP, 1); XOR(mm_YH, mm_TMP); SLL(mm_T, 1); XOR(mm_YL, mm_T); SLL(mm_U, 7); XOR(mm_YH, mm_U); SRL(mm_TMP, 1); XOR(mm_YH, mm_TMP); SLL(mm_T, 7); XOR(mm_YL, mm_T); CPY(mm_TMP, mm_YH); SRL(mm_TMP, 45); SLL(mm_YH, 19); SRL(mm_YH, 19); XOR(mm_YL, mm_TMP); SLL(mm_TMP, 1); XOR(mm_YL, mm_TMP); SLL(mm_TMP, 1); XOR(mm_YL, mm_TMP); SLL(mm_TMP, 7); XOR(mm_YL, mm_TMP); STR(yh, mm_YH); STR(yl, mm_YL); xt = yh.hi; xh = yh.lo; xm = yl.hi; xl = yl.lo; } /* end if */ #undef mm_YL #undef mm_YH #undef mm_T #undef mm_U #undef mm_TMP #undef mm_TMP2 END_MMX(); { poly128 r; r.t = xt; r.h = xh; r.m = xm; r.l = xl; return r; } /* end block */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #else /* !defined(MMX) */ static const u32 tab[256] = { 0, 1, 4, 5, 16, 17, 20, 21 , 64, 65, 68, 69, 80, 81, 84, 85 , 256, 257, 260, 261, 272, 273, 276, 277 , 320, 321, 324, 325, 336, 337, 340, 341 , 1024, 1025, 1028, 1029, 1040, 1041, 1044, 1045 , 1088, 1089, 1092, 1093, 1104, 1105, 1108, 1109 , 1280, 1281, 1284, 1285, 1296, 1297, 1300, 1301 , 1344, 1345, 1348, 1349, 1360, 1361, 1364, 1365 , 4096, 4097, 4100, 4101, 4112, 4113, 4116, 4117 , 4160, 4161, 4164, 4165, 4176, 4177, 4180, 4181 , 4352, 4353, 4356, 4357, 4368, 4369, 4372, 4373 , 4416, 4417, 4420, 4421, 4432, 4433, 4436, 4437 , 5120, 5121, 5124, 5125, 5136, 5137, 5140, 5141 , 5184, 5185, 5188, 5189, 5200, 5201, 5204, 5205 , 5376, 5377, 5380, 5381, 5392, 5393, 5396, 5397 , 5440, 5441, 5444, 5445, 5456, 5457, 5460, 5461 , 16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405 , 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469 , 16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661 , 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725 , 17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429 , 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493 , 17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685 , 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749 , 20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501 , 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565 , 20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757 , 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821 , 21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525 , 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589 , 21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781 , 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845 }; u32 xt,xh,xm,xl; xt = x.t; xh = x.h; xm = x.m; xl = x.l; for ( ; n; --n) { u32 t0,t1,t2,t3,t4,t5,t6, w; t6 = tab[xt & 255] | tab[xt>>8 & 255]<<16; t5 = tab[xh>>16 & 255] | tab[xh>>24]<<16; t4 = tab[xh & 255] | tab[xh>>8 & 255]<<16; t3 = tab[xm>>16 & 255] | tab[xm>>24]<<16; t2 = tab[xm & 255] | tab[xm>>8 & 255]<<16; t1 = tab[xl>>16 & 255] | tab[xl>>24]<<16; t0 = tab[xl & 255] | tab[xl>>8 & 255]<<16; /* Reduce modulo t^109+t^9+t^2+t+1. */ t3 ^= t6>>13; t3 ^= t6>>12; t3 ^= t6>>11; t3 ^= t6>>4; t2 ^= t6<<19; t2 ^= t6<<20; t2 ^= t6<<21; t2 ^= t6<<28; t2 ^= t5>>13; t2 ^= t5>>12; t2 ^= t5>>11; t2 ^= t5>>4; t1 ^= t5<<19; t1 ^= t5<<20; t1 ^= t5<<21; t1 ^= t5<<28; t1 ^= t4>>13; t1 ^= t4>>12; t1 ^= t4>>11; t1 ^= t4>>4; t0 ^= t4<<19; t0 ^= t4<<20; t0 ^= t4<<21; t0 ^= t4<<28; w = t3>>13; t3 ^= w<<13; t0 ^= w; t0 ^= w<<1; t0 ^= w<<2; t0 ^= w<<9; xt = t3; xh = t2; xm = t1; xl = t0; } /* end for */ { poly128 r; r.t = xt; r.h = xh; r.m = xm; r.l = xl; return r; } /* end block */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #endif /* defined(MMX) ... #else ... */ } /* end function squareNTimes */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #if PROD == 1 /*-- GF2Product27x27 -------------------------------------------------------*/ /* Computes 53-bit product of polynomials over Z/2Z with degree < 27. * Returns low 32 bits of result, and leaves top 21 bits in *ph. */ static u32 GF2Product27x27(u32 a, u32 b, u32 *ph) { u32 a2,a4, h, l; a2 = a<<1; a4 = a<<2; { u32 s, t, tab[8]; tab[0] = 0; tab[1] = a; tab[2] = a2; tab[3] = a ^ a2; tab[4] = a4; tab[5] = a ^ a4; tab[6] = a2 ^ a4; tab[7] = a ^ a2 ^ a4; #if 0 l = tab[b>>24]; l = l<<3 ^ tab[b>>21 & 7]; h = l>>29; l = l<<3 ^ tab[b>>18 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>15 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>12 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>9 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>6 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>3 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b & 7]; #else s = tab[b & 7]; t = tab[b>> 3 & 7]; l = s; s = tab[b>> 6 & 7]; l ^= t<< 3; t = tab[b>> 9 & 7]; l ^= s<< 6; h = s>>26; s = tab[b>>12 & 7]; l ^= t<< 9; h ^= t>>23; t = tab[b>>15 & 7]; l ^= s<<12; h ^= s>>20; s = tab[b>>18 & 7]; l ^= t<<15; h ^= t>>17; t = tab[b>>21 & 7]; l ^= s<<18; h ^= s>>14; s = tab[b>>24 ]; l ^= t<<21; h ^= t>>11; l ^= s<<24; h ^= s>> 8; #endif *ph = h; return l; } /* end block */ } /* end function GF2Product27x27 */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #elif PROD == 3 /*-- GF2Product28x28 -------------------------------------------------------*/ /* Computes 55-bit product of polynomials over Z/2Z with degree < 28. * Returns low 32 bits of result, and leaves top 23 bits in *ph. */ static u32 GF2Product28x28(u32 a, u32 b, u32 *ph) { u32 a2,a4, h, l; a2 = a<<1; a4 = a<<2; { u32 s, t, tab[8]; tab[0] = 0; tab[1] = a; tab[2] = a2; tab[3] = a ^ a2; tab[4] = a4; tab[5] = a ^ a4; tab[6] = a2 ^ a4; tab[7] = a ^ a2 ^ a4; #if 0 l = tab[b>>27]; l = l<<3 ^ tab[b>>24 & 7]; h = l>>29; l = l<<3 ^ tab[b>>21 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>18 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>15 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>12 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>9 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>6 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b>>3 & 7]; h = h<<3 | l>>29; l = l<<3 ^ tab[b & 7]; #else s = tab[b & 7]; t = tab[b>> 3 & 7]; l = s; s = tab[b>> 6 & 7]; l ^= t<< 3; h = t>>29; t = tab[b>> 9 & 7]; l ^= s<< 6; h ^= s>>26; s = tab[b>>12 & 7]; l ^= t<< 9; h ^= t>>23; t = tab[b>>15 & 7]; l ^= s<<12; h ^= s>>20; s = tab[b>>18 & 7]; l ^= t<<15; h ^= t>>17; t = tab[b>>21 & 7]; l ^= s<<18; h ^= s>>14; s = tab[b>>24 & 7]; l ^= t<<21; h ^= t>>11; t = tab[b>>27 ]; l ^= s<<24; h ^= s>> 8; l ^= t<<27; h ^= t>> 5; #endif *ph = h; return l; } /* end block */ } /* end function GF2Product28x28 */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #elif PROD == 5 /*-- GF2Product32x32 -------------------------------------------------------*/ /* Computes 63-bit product of polynomials over Z/2Z with degree < 32. * Returns low 32 bits of result, and leaves top 31 bits in *ph. */ static u32 GF2Product32x32(u32 a, u32 b, u32 *ph) { u32 a1,a2,a4, h, l; a4 = a<<2; a2 = a4>>1; a1 = a4>>2; { u32 s, t, tab[8]; tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1 ^ a2; tab[4] = a4; tab[5] = a1 ^ a4; tab[6] = a2 ^ a4; tab[7] = a1 ^ a2 ^ a4; #if 0 l = h = 0; if (a>>31 & 1) { l = b<<31; h = b>>1; } if (a>>30 & 1) { l ^= b<<30; h ^= b>>2; } s = tab[b & 7]; t = tab[b>> 3 & 7]; l ^= s; s = tab[b>> 6 & 7]; l ^= t<< 3; h ^= t>>29; t = tab[b>> 9 & 7]; l ^= s<< 6; h ^= s>>26; s = tab[b>>12 & 7]; l ^= t<< 9; h ^= t>>23; t = tab[b>>15 & 7]; l ^= s<<12; h ^= s>>20; s = tab[b>>18 & 7]; l ^= t<<15; h ^= t>>17; t = tab[b>>21 & 7]; l ^= s<<18; h ^= s>>14; s = tab[b>>24 & 7]; l ^= t<<21; h ^= t>>11; t = tab[b>>27 & 7]; l ^= s<<24; h ^= s>> 8; s = tab[b>>30 ]; l ^= t<<27; h ^= t>> 5; l ^= s<<30; h ^= s>> 2; #else s = tab[b & 7]; t = tab[b>> 3 & 7]; l = s; s = tab[b>> 6 & 7]; l ^= t<< 3; h = t>>29; t = tab[b>> 9 & 7]; l ^= s<< 6; h ^= s>>26; s = tab[b>>12 & 7]; l ^= t<< 9; h ^= t>>23; t = tab[b>>15 & 7]; l ^= s<<12; h ^= s>>20; s = tab[b>>18 & 7]; l ^= t<<15; h ^= t>>17; t = tab[b>>21 & 7]; l ^= s<<18; h ^= s>>14; s = tab[b>>24 & 7]; l ^= t<<21; h ^= t>>11; t = tab[b>>27 & 7]; l ^= s<<24; h ^= s>> 8; s = tab[b>>30 ]; l ^= t<<27; h ^= t>> 5; l ^= s<<30; h ^= s>> 2; if (a>>31 & 1) { l ^= b<<31; h ^= b>>1; } if (a>>30 & 1) { l ^= b<<30; h ^= b>>2; } #endif } /* end block */ *ph = h; return l; } /* end function GF2Product32x32 */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #endif /* PROD == 1 ... #elif PROD == 3 ... #elif PROD == 5 ... */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #ifdef MMX /*-- MMX_GF2Product56x56 ---------------------------------------------------*/ /* Computes 111-bit product of polynomials xh:xl and *py over Z/2Z * with degree < 56. Speed-critical auxiliary function used for product(). * Returns with low 64 bits of result in *pl and high 47 bits in *ph. */ static void MMX_GF2Product56x56 ( u32 xh, u32 xl, const mmx64 *py, mmx64 *ph, mmx64 *pl ) { u32 w; MMX_STATIC mmx64 tab[16]; /* Note: No START_MMX() here. It is done at start of product(). */ #define mm_E 0 #define mm_Y1 1 #define mm_Y2 2 #define mm_Y4 3 #define mm_Y8 4 LOD(mm_Y1, *py); CPY(mm_Y2, mm_Y1); SLL(mm_Y2, 1); CPY(mm_Y4, mm_Y1); SLL(mm_Y4, 2); CPY(mm_Y8, mm_Y1); SLL(mm_Y8, 3); /* Gray code walk through table. */ XOR(mm_E, mm_E); STR(tab[ 0], mm_E); CPY(mm_E, mm_Y1); STR(tab[ 1], mm_Y1); XOR(mm_E, mm_Y2); STR(tab[ 3], mm_E); CPY(mm_E, mm_Y2); STR(tab[ 2], mm_Y2); XOR(mm_E, mm_Y4); STR(tab[ 6], mm_E); XOR(mm_E, mm_Y1); STR(tab[ 7], mm_E); XOR(mm_E, mm_Y2); STR(tab[ 5], mm_E); CPY(mm_E, mm_Y4); STR(tab[ 4], mm_Y4); XOR(mm_E, mm_Y8); STR(tab[12], mm_E); XOR(mm_E, mm_Y1); STR(tab[13], mm_E); XOR(mm_E, mm_Y2); STR(tab[15], mm_E); XOR(mm_E, mm_Y1); STR(tab[14], mm_E); XOR(mm_E, mm_Y4); STR(tab[10], mm_E); XOR(mm_E, mm_Y1); STR(tab[11], mm_E); XOR(mm_E, mm_Y2); STR(tab[ 9], mm_E); STR(tab[ 8], mm_Y8); #undef mm_E #undef mm_Y1 #undef mm_Y2 #undef mm_Y4 #undef mm_Y8 #define mm_A 0 #define mm_B 1 #define mm_C 2 #define mm_D 3 #define mm_E 4 #define mm_F 5 #define mm_G 6 #define mm_T 7 w = xl; LOD(mm_A, tab[w>> 4 & 15]); LOD(mm_B, tab[w>>12 & 15]); SLL(mm_A, 4); SLL(mm_B, 4); XOR_MEM(mm_A, tab[w & 15]); XOR_MEM(mm_B, tab[w>> 8 & 15]); LOD(mm_C, tab[w>>20 & 15]); LOD(mm_D, tab[w>>28/* & 15*/]); SLL(mm_C, 4); SLL(mm_D, 4); XOR_MEM(mm_C, tab[w>>16 & 15]); XOR_MEM(mm_D, tab[w>>24 & 15]); w = xh; LOD(mm_E, tab[w>> 4 & 15]); LOD(mm_F, tab[w>>12 & 15]); SLL(mm_E, 4); SLL(mm_F, 4); XOR_MEM(mm_E, tab[w & 15]); XOR_MEM(mm_F, tab[w>> 8 & 15]); LOD(mm_G, tab[w>>20/* & 15*/]); SLL(mm_G, 4); XOR_MEM(mm_G, tab[w>>16 & 15]); CPY(mm_T, mm_B); SLL(mm_B, 8); SRL(mm_T, 8); XOR(mm_A, mm_B); XOR(mm_T, mm_C); SLL(mm_C, 16); SRL(mm_T, 8); XOR(mm_A, mm_C); XOR(mm_T, mm_D); SLL(mm_D, 24); SRL(mm_T, 8); XOR(mm_A, mm_D); XOR(mm_T, mm_E); SLL(mm_E, 32); SRL(mm_T, 8); XOR(mm_A, mm_E); XOR(mm_T, mm_F); SLL(mm_F, 40); SRL(mm_T, 8); XOR(mm_A, mm_F); XOR(mm_T, mm_G); SLL(mm_G, 48); SRL(mm_T, 16); XOR(mm_A, mm_G); STR(*ph, mm_T); STR(*pl, mm_A); #undef mm_A #undef mm_B #undef mm_C #undef mm_D #undef mm_E #undef mm_F #undef mm_G #undef mm_T /* Note: No END_MMX() here. It is done at end of product(). */ } /* end function MMX_GF2Product56x56 */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #elif PROD == 1 || PROD == 2 /*-- GF2Product54x54 -------------------------------------------------------*/ /* Computes 107-bit product of polynomials xh:xl and yh:yl over Z/2Z * with degree < 54. Speed-critical auxiliary function used for product(). * Returns low 32 bits of result, leaves middle 64 bits in *pm and *ph * and high 11 bits in *pt. */ static u32 GF2Product54x54 ( u32 xh, u32 xl, u32 yh, u32 yl, u32 *pt, u32 *ph, u32 *pm ) { #if PROD == 1 u32 l0,l1, m0,m1, h0,h1; const u32 mask = (1UL<<27)-1; xh = xh<<5 | xl>>27; xl &= mask; yh = yh<<5 | yl>>27; yl &= mask; h0 = GF2Product27x27(xh, yh, &h1); l0 = GF2Product27x27(xl, yl, &l1); m0 = GF2Product27x27(xl ^ xh, yl ^ yh, &m1); m0 ^= l0; m1 ^= l1; m0 ^= h0; m1 ^= h1; l1 ^= h0<<22; h0 = h0>>10 | h1<<22; h1 >>= 10; l0 ^= m0<<27; l1 ^= m0>>5 | m1<<27; h0 ^= m1>>5; *pt = h1; *ph = h0; *pm = l1; return l0; #else /* PROD == 2 */ u32 tab[32], w0, w1; { u32 yh2, yh4, yh8; yh2 = yh<<1 ^ yl>>31; yh4 = yh<<2 ^ yl>>30; yh8 = yh<<3 ^ yl>>29; #define STASH(t) do { tab[2*t] = w0; tab[2*t+1] = w1; } while (0) /* Gray code walk through table. */ /* 0000 */ w0 = 0; w1 = 0; STASH(0); /* 0001 */ w0 = yl; w1 = yh; STASH(1); /* 0011 */ w0 = yl ^ yl<<1; w1 = yh ^ yh2; STASH(3); /* 0010 */ w0 = yl<<1; w1 = yh2; STASH(2); /* 0110 */ w0 ^= yl<<2; w1 = yh2 ^ yh4; STASH(6); /* 0111 */ w0 ^= yl; w1 ^= yh; STASH(7); /* 0101 */ w0 = yl ^ yl<<2; w1 = yh ^ yh4; STASH(5); /* 0100 */ w0 = yl<<2; w1 = yh4; STASH(4); /* 1100 */ w0 ^= yl<<3; w1 = yh4 ^ yh8; STASH(12); /* 1101 */ w0 ^= yl; w1 ^= yh; STASH(13); /* 1111 */ w0 ^= yl<<1; w1 ^= yh2; STASH(15); /* 1110 */ w0 ^= yl; w1 ^= yh; STASH(14); /* 1010 */ w0 ^= yl<<2; w1 = yh2 ^ yh8; STASH(10); /* 1011 */ w0 ^= yl; w1 ^= yh; STASH(11); /* 1001 */ w0 = yl ^ yl<<3; w1 = yh ^ yh8; STASH(9); /* 1000 */ w0 = yl<<3; w1 = yh8; STASH(8); #undef STASH } /* end block */ { u32 a0,a1,a2,a3, t; #define GRAB do { w0 = tab[2*t]; w1 = tab[2*t+1]; } while (0) t = xl & 15; GRAB; a0 = w0; a1 = w1; t = xl>> 4 & 15; GRAB; a0 ^= w0<< 4; a1 ^= w0>>28; a1 ^= w1<< 4; t = xl>> 8 & 15; GRAB; a0 ^= w0<< 8; a1 ^= w0>>24; a1 ^= w1<< 8; a2 = w1>>24; t = xl>>12 & 15; GRAB; a0 ^= w0<<12; a1 ^= w0>>20; a1 ^= w1<<12; a2 ^= w1>>20; t = xl>>16 & 15; GRAB; a0 ^= w0<<16; a1 ^= w0>>16; a1 ^= w1<<16; a2 ^= w1>>16; t = xl>>20 & 15; GRAB; a0 ^= w0<<20; a1 ^= w0>>12; a1 ^= w1<<20; a2 ^= w1>>12; t = xl>>24 & 15; GRAB; a0 ^= w0<<24; a1 ^= w0>> 8; a1 ^= w1<<24; a2 ^= w1>>8; t = xl>>28; GRAB; a0 ^= w0<<28; a1 ^= w0>> 4; a1 ^= w1<<28; a2 ^= w1>>4; t = xh & 15; GRAB; a1 ^= w0; a2 ^= w1; t = xh>> 4 & 15; GRAB; a1 ^= w0<< 4; a2 ^= w0>>28; a2 ^= w1<< 4; t = xh>> 8 & 15; GRAB; a1 ^= w0<< 8; a2 ^= w0>>24; a2 ^= w1<< 8; a3 = w1>>24; t = xh>>12 & 15; GRAB; a1 ^= w0<<12; a2 ^= w0>>20; a2 ^= w1<<12; a3 ^= w1>>20; t = xh>>16 & 15; GRAB; a1 ^= w0<<16; a2 ^= w0>>16; a2 ^= w1<<16; a3 ^= w1>>16; t = xh>>20 /* & 3 */; GRAB; a1 ^= w0<<20; a2 ^= w0>>12; a2 ^= w1<<20; a3 ^= w1>>12; #undef GRAB *pt = a3; *ph = a2; *pm = a1; return a0; } /* end block */ #endif /* PROD == 1 ... #else ... */ } /* end function GF2Product54x54 */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #else /* PROD == 3 || PROD == 4 || PROD = 5 */ /*-- GF2Product56x56 -------------------------------------------------------*/ /* Computes 111-bit product of polynomials xh:xl and yh:yl over Z/2Z * with degree < 56. Speed-critical auxiliary function used for product(). * Returns low 32 bits of result, leaves middle 64 bits in *pm and *ph * and high 15 bits in *pt. */ static u32 GF2Product56x56 ( u32 xh, u32 xl, u32 yh, u32 yl, u32 *pt, u32 *ph, u32 *pm ) { #if PROD == 3 u32 l0,l1, m0,m1, h0,h1; const u32 mask = (1UL<<28)-1; xh = xh<<4 | xl>>28; xl &= mask; yh = yh<<4 | yl>>28; yl &= mask; h0 = GF2Product28x28(xh, yh, &h1); l0 = GF2Product28x28(xl, yl, &l1); m0 = GF2Product28x28(xl ^ xh, yl ^ yh, &m1); m0 ^= l0; m1 ^= l1; m0 ^= h0; m1 ^= h1; l1 ^= h0<<24; h0 = h0>>8 | h1<<24; h1 >>= 8; l0 ^= m0<<28; l1 ^= m0>>4 | m1<<28; h0 ^= m1>>4; *pt = h1; *ph = h0; *pm = l1; return l0; #elif PROD == 4 u32 tab[32], w0, w1; { u32 yh2, yh4, yh8; yh2 = yh<<1 ^ yl>>31; yh4 = yh<<2 ^ yl>>30; yh8 = yh<<3 ^ yl>>29; #define STASH(t) do { tab[2*t] = w0; tab[2*t+1] = w1; } while (0) /* Gray code walk through table. */ /* 0000 */ w0 = 0; w1 = 0; STASH(0); /* 0001 */ w0 = yl; w1 = yh; STASH(1); /* 0011 */ w0 = yl ^ yl<<1; w1 = yh ^ yh2; STASH(3); /* 0010 */ w0 = yl<<1; w1 = yh2; STASH(2); /* 0110 */ w0 ^= yl<<2; w1 = yh2 ^ yh4; STASH(6); /* 0111 */ w0 ^= yl; w1 ^= yh; STASH(7); /* 0101 */ w0 = yl ^ yl<<2; w1 = yh ^ yh4; STASH(5); /* 0100 */ w0 = yl<<2; w1 = yh4; STASH(4); /* 1100 */ w0 ^= yl<<3; w1 = yh4 ^ yh8; STASH(12); /* 1101 */ w0 ^= yl; w1 ^= yh; STASH(13); /* 1111 */ w0 ^= yl<<1; w1 ^= yh2; STASH(15); /* 1110 */ w0 ^= yl; w1 ^= yh; STASH(14); /* 1010 */ w0 ^= yl<<2; w1 = yh2 ^ yh8; STASH(10); /* 1011 */ w0 ^= yl; w1 ^= yh; STASH(11); /* 1001 */ w0 = yl ^ yl<<3; w1 = yh ^ yh8; STASH(9); /* 1000 */ w0 = yl<<3; w1 = yh8; STASH(8); #undef STASH } /* end block */ { u32 a0,a1,a2,a3, t; #define GRAB do { w0 = tab[2*t]; w1 = tab[2*t+1]; } while (0) t = xl & 15; GRAB; a0 = w0; a1 = w1; t = xl>> 4 & 15; GRAB; a0 ^= w0<< 4; a1 ^= w0>>28; a1 ^= w1<< 4; t = xl>> 8 & 15; GRAB; a0 ^= w0<< 8; a1 ^= w0>>24; a1 ^= w1<< 8; a2 = w1>>24; t = xl>>12 & 15; GRAB; a0 ^= w0<<12; a1 ^= w0>>20; a1 ^= w1<<12; a2 ^= w1>>20; t = xl>>16 & 15; GRAB; a0 ^= w0<<16; a1 ^= w0>>16; a1 ^= w1<<16; a2 ^= w1>>16; t = xl>>20 & 15; GRAB; a0 ^= w0<<20; a1 ^= w0>>12; a1 ^= w1<<20; a2 ^= w1>>12; t = xl>>24 & 15; GRAB; a0 ^= w0<<24; a1 ^= w0>> 8; a1 ^= w1<<24; a2 ^= w1>>8; t = xl>>28; GRAB; a0 ^= w0<<28; a1 ^= w0>> 4; a1 ^= w1<<28; a2 ^= w1>>4; t = xh & 15; GRAB; a1 ^= w0; a2 ^= w1; t = xh>> 4 & 15; GRAB; a1 ^= w0<< 4; a2 ^= w0>>28; a2 ^= w1<< 4; t = xh>> 8 & 15; GRAB; a1 ^= w0<< 8; a2 ^= w0>>24; a2 ^= w1<< 8; a3 = w1>>24; t = xh>>12 & 15; GRAB; a1 ^= w0<<12; a2 ^= w0>>20; a2 ^= w1<<12; a3 ^= w1>>20; t = xh>>16 & 15; GRAB; a1 ^= w0<<16; a2 ^= w0>>16; a2 ^= w1<<16; a3 ^= w1>>16; t = xh>>20 /* & 15 */; GRAB; a1 ^= w0<<20; a2 ^= w0>>12; a2 ^= w1<<20; a3 ^= w1>>12; #undef GRAB *pt = a3; *ph = a2; *pm = a1; return a0; } /* end block */ #else /* PROD == 5 */ u32 l0,l1, m0,m1, h0,h1; m0 = GF2Product32x32(xl ^ xh, yl ^ yh, &m1); l0 = GF2Product32x32(xl, yl, &l1); h0 = GF2Product32x32(xh, yh, &h1); { u32 t; t = l1 ^ h0; l1 = m0 ^ l0 ^ t; h0 = m1 ^ h1 ^ t; } *pt = h1; *ph = h0; *pm = l1; return l0; #endif /* PROD == 3 ... #elif PROD == 4 ... #else ... */ } /* end function GF2Product56x56 */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #endif /* defined(MMX) ... #elif PROD == 1 || PROD == 2 ... #else ... */ /*-- product ---------------------------------------------------------------*/ /* Product of x and y in the field GF(2^109) i.e., as polys in (Z/2Z)[t] * reduced modulo t^109+t^9+t^2+t+1. Degree x, y < 109. */ static poly128 product(poly128 x, poly128 y) { /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #ifdef MMX MMX_STATIC mmx64 hh,hl, lh,ll, mh,ml; START_MMX(); { const u32 mask = (1UL<<24)-1; u32 xt,xh, yt,yh; MMX_STATIC mmx64 t; xt = x.t<<8 | x.h>>24; xh = x.h<<8 | x.m>>24; yt = y.t<<8 | y.h>>24; yh = y.h<<8 | y.m>>24; t.hi = yt & mask; t.lo = yh; MMX_GF2Product56x56(xt & mask, xh, &t, &hh, &hl); t.hi = (yt ^ y.m) & mask; t.lo = yh ^ y.l; MMX_GF2Product56x56((xt ^ x.m) & mask, xh ^ x.l, &t, &mh, &ml); t.hi = y.m & mask; t.lo = y.l; MMX_GF2Product56x56(x.m & mask, x.l, &t, &lh, &ll); } /* end block */ #define mm_T0 0 #define mm_T1 1 #define mm_T2 2 #define mm_T3 3 #define mm_MH 4 #define mm_ML 5 #define mm_TMP 6 LOD(mm_T0, ll); LOD(mm_T1, lh); LOD(mm_T2, hl); LOD(mm_T3, hh); LOD(mm_ML, hl); LOD(mm_MH, hh); SRL(mm_T2, 16); SRL(mm_T3, 16); SLL(mm_ML, 48); SLL(mm_MH, 48); ORR(mm_T1, mm_ML); ORR(mm_T2, mm_MH); LOD(mm_ML, ml); LOD(mm_MH, mh); XOR_MEM(mm_ML, ll); XOR_MEM(mm_MH, lh); XOR_MEM(mm_ML, hl); XOR_MEM(mm_MH, hh); CPY(mm_TMP, mm_ML); SLL(mm_TMP, 56); XOR(mm_T0, mm_TMP); SRL(mm_ML, 8); XOR(mm_T1, mm_ML); CPY(mm_TMP, mm_MH); SLL(mm_TMP, 56); XOR(mm_T1, mm_TMP); SRL(mm_MH, 8); XOR(mm_T2, mm_MH); /* Reduce modulo t^109+t^9+t^2+t+1. */ CPY(mm_TMP, mm_T2); SLL(mm_T3, 19); XOR(mm_T1, mm_T3); SRL(mm_TMP, 36); XOR(mm_T1, mm_TMP); SLL(mm_T2, 19); XOR(mm_T0, mm_T2); SLL(mm_T3, 1); XOR(mm_T1, mm_T3); SRL(mm_TMP, 7); XOR(mm_T1, mm_TMP); SLL(mm_T2, 1); XOR(mm_T0, mm_T2); SLL(mm_T3, 1); XOR(mm_T1, mm_T3); SRL(mm_TMP, 1); XOR(mm_T1, mm_TMP); SLL(mm_T2, 1); XOR(mm_T0, mm_T2); SLL(mm_T3, 7); XOR(mm_T1, mm_T3); SRL(mm_TMP, 1); XOR(mm_T1, mm_TMP); SLL(mm_T2, 7); XOR(mm_T0, mm_T2); CPY(mm_TMP, mm_T1); SRL(mm_TMP, 45); SLL(mm_T1, 19); SRL(mm_T1, 19); XOR(mm_T0, mm_TMP); SLL(mm_TMP, 1); XOR(mm_T0, mm_TMP); SLL(mm_TMP, 1); XOR(mm_T0, mm_TMP); SLL(mm_TMP, 7); XOR(mm_T0, mm_TMP); { MMX_STATIC mmx64 rh, rl; poly128 r; STR(rh, mm_T1); STR(rl, mm_T0); #undef mm_T0 #undef mm_T1 #undef mm_T2 #undef mm_T3 #undef mm_MH #undef mm_ML #undef mm_TMP END_MMX(); r.t = rh.hi; r.h = rh.lo; r.m = rl.hi; r.l = rl.lo; return r; } /* end block */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #else /* !defined(MMX) */ u32 lt,lh,lm,ll, mt,mh,mm,ml, ht,hh,hm,hl, t0,t1,t2,t3,t4,t5,t6, w; #if PROD == 1 || PROD == 2 { const u32 mask = (1UL<<22)-1; u32 xt,xh, yt,yh; ll = GF2Product54x54(x.m & mask, x.l, y.m & mask, y.l, <, &lh, &lm); xt = x.t<<10 | x.h>>22; xh = x.h<<10 | x.m>>22; yt = y.t<<10 | y.h>>22; yh = y.h<<10 | y.m>>22; ml = GF2Product54x54( (xt ^ x.m) & mask, xh ^ x.l , (yt ^ y.m) & mask, yh ^ y.l, &mt, &mh, &mm ); hl = GF2Product54x54(xt & mask, xh, yt & mask, yh, &ht, &hh, &hm); } /* end block */ ml ^= hl; mm ^= hm; mh ^= lh; mt ^= lt; ml ^= ll; mm ^= lm; mh ^= hh; mt ^= ht; t0 = ll; t1 = lm; t2 = lh; t3 = lt | hl<<12; t4 = hl>>20 | hm<<12; t5 = hm>>20 | hh<<12; t6 = hh>>20 | ht<<12; t1 ^= ml<<22; t2 ^= ml>>10 | mm<<22; t3 ^= mm>>10 | mh<<22; t4 ^= mh>>10 | mt<<22; t5 ^= mt>>10; /* Handle 109th bit. */ /* TO DO: the "& 1" is not necessary */ if (x.t>>12 & 1) { t3 ^= y.l<<12; t4 ^= y.l>>20 | y.m<<12; t5 ^= y.m>>20 | y.h<<12; t6 ^= y.h>>20 | y.t<<12; } /* end if */ if (y.t>>12 & 1) { t3 ^= x.l<<12; t4 ^= x.l>>20 | x.m<<12; t5 ^= x.m>>20 | x.h<<12; t6 ^= x.h>>20 | x.t<<12; } /* end if */ if ((x.t & y.t)>>12 & 1) t6 ^= 1UL<<24; #else /* PROD == 3 || PROD == 4 || PROD == 5 */ { const u32 mask = (1UL<<24)-1; u32 xt,xh, yt,yh; xt = x.t<<8 | x.h>>24; xh = x.h<<8 | x.m>>24; yt = y.t<<8 | y.h>>24; yh = y.h<<8 | y.m>>24; ml = GF2Product56x56( (xt ^ x.m) & mask, xh ^ x.l , (yt ^ y.m) & mask, yh ^ y.l, &mt, &mh, &mm ); hl = GF2Product56x56(xt & mask, xh, yt & mask, yh, &ht, &hh, &hm); ll = GF2Product56x56(x.m & mask, x.l, y.m & mask, y.l, <, &lh, &lm); } /* end block */ ml ^= ll; mm ^= lm; mh ^= hh; mt ^= ht; ml ^= hl; mm ^= hm; mh ^= lh; mt ^= lt; t0 = ll; t1 = lm; t2 = lh; t3 = lt | hl<<16; t4 = hl>>16 | hm<<16; t5 = hm>>16 | hh<<16; t6 = hh>>16 | ht<<16; t1 ^= ml<<24; t2 ^= ml>>8 | mm<<24; t3 ^= mm>>8 | mh<<24; t4 ^= mh>>8 | mt<<24; t5 ^= mt>>8; #endif /* PROD == 1 || PROD == 2 ... #else ... */ /* Reduce modulo t^109+t^9+t^2+t+1. */ t3 ^= t6>>13; t3 ^= t6>>12; t3 ^= t6>>11; t3 ^= t6>>4; t2 ^= t6<<19; t2 ^= t6<<20; t2 ^= t6<<21; t2 ^= t6<<28; t2 ^= t5>>13; t2 ^= t5>>12; t2 ^= t5>>11; t2 ^= t5>>4; t1 ^= t5<<19; t1 ^= t5<<20; t1 ^= t5<<21; t1 ^= t5<<28; t1 ^= t4>>13; t1 ^= t4>>12; t1 ^= t4>>11; t1 ^= t4>>4; t0 ^= t4<<19; t0 ^= t4<<20; t0 ^= t4<<21; t0 ^= t4<<28; w = t3>>13; t3 ^= w<<13; t0 ^= w; t0 ^= w<<1; t0 ^= w<<2; t0 ^= w<<9; { poly128 r; r.t = t3; r.h = t2; r.m = t1; r.l = t0; return r; } /* end block */ /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */ #endif /* defined(MMX) ... #else ... */ } /* end function product */ /*-- inverse ---------------------------------------------------------------*/ /* Useful macros: * SHR(al, am, ah, at, sh, th) shifts at:ah:am:al right by sh bits. * (where 0 < sh < 32 and th = 32-sh). * SHR1(al, am, ah, at) shifts at:ah:am:al right by one bit. */ #define SHR(al, am, ah, at, sh, th) \ do { \ al = al>>sh | am<>sh | ah<>sh | at<>= sh; \ } while (0) #define SHR1(al, am, ah, at) \ do { \ al = al>>1 | am<<31; \ am = am>>1 | ah<<31; \ ah = ah>>1 | at<<31; \ at >>= 1; \ } while (0) /* Invert y in the field GF(2^109) i.e., as a poly in (Z/2Z)[t] * reduced modulo t^109+t^9+t^2+t+1. Degree y < 109, y != 0. * Note: 1/(1+t+t^2+t^9+O(t^10)) = 1+t+t^3+t^4+t^6+t^7+O(t^10) */ static poly128 inverse(poly128 y) { uint sh, th; u32 al,am,ah,at, bl,bm,bh,bt, t, ul,um,uh,ut, vl,vm,vh,vt; const u32 mt = 1UL<<13, ml = 519; /* Note: mm and mh are 0 */ static const u8 shiftTab[64] = { 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 , 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 }; static const u8 xorTab[64] = { 0, 27, 54, 45, 44, 55, 26, 1, 24, 3, 46, 53, 52, 47, 2, 25, 48, 43 , 6, 29, 28, 7, 42, 49, 40, 51, 30, 5, 4, 31, 50, 41, 32, 59, 22, 13 , 12, 23, 58, 33, 56, 35, 14, 21, 20, 15, 34, 57, 16, 11, 38, 61, 60, 39 , 10, 17, 8, 19, 62, 37, 36, 63, 18, 9 }; static const u32 xorTab2[64] = { 0, 13889, 27778, 23235, 22724, 28293, 13382, 519, 12360, 1545 , 23754, 27275, 26764, 24269, 1038, 12879, 24720, 22225, 3090, 14931 , 14420, 3605, 21718, 25239, 20696, 26265, 15450, 2587, 2076, 15965 , 25758, 21215, 16608, 30369, 11362, 6691, 6180, 11877, 29862, 17127 , 28840, 18153, 7210, 10859, 10348, 7725, 17646, 29359, 8304, 5681 , 19698, 31411, 30900, 20213, 5174, 8823, 4152, 9849, 31930, 19195 , 18684, 32445, 9342, 4671 }; /* Maintain: u.y = a and v.y = b modulo t^109+t^9+t^2+t+1. */ al = y.l; am = y.m; ah = y.h; at = y.t; ul = 1; um = uh = ut = 0; while (!(al & 1)) { t = ul & 1; ut ^= t<<13; if (t) ul ^= ml; SHR1(al, am, ah, at); SHR1(ul, um, uh, ut); } /* end while */ bl = ml; bm = bh = 0; bt = mt; vl = vm = vh = vt = 0; do { do { do { do { do { bl ^= al; bm ^= am; bh ^= ah; bt ^= at; vl ^= ul; vm ^= um; vh ^= uh; vt ^= ut; sh = shiftTab[bl & 63]; th = 32-sh; t = vl & 63; vt ^= (u32)xorTab[t]<<13; vl ^= xorTab2[t]; SHR(bl, bm, bh, bt, sh, th); SHR(vl, vm, vh, vt, sh, th); while (!(bl & 1)) { t = vl & 1; vt ^= t<<13; if (t) vl ^= ml; SHR1(bl, bm, bh, bt); SHR1(vl, vm, vh, vt); } /* end while */ } while ( at < bt || at == bt && ( ah < bh || ah == bh && (am < bm || am == bm && al < bl) ) ); if (al == bl && am == bm && ah == bh && at == bt) break; do { al ^= bl; am ^= bm; ah ^= bh; at ^= bt; ul ^= vl; um ^= vm; uh ^= vh; ut ^= vt; sh = shiftTab[al & 63]; th = 32-sh; t = ul & 63; ut ^= (u32)xorTab[t]<<13; ul ^= xorTab2[t]; SHR(al, am, ah, at, sh, th); SHR(ul, um, uh, ut, sh, th); while (!(al & 1)) { t = ul & 1; ut ^= t<<13; if (t) ul ^= ml; SHR1(al, am, ah, at); SHR1(ul, um, uh, ut); } /* end while */ } while ( at > bt || at == bt && ( ah > bh || ah == bh && (am > bm || am == bm && al > bl) ) ); } while (al != bl); } while (am != bm); } while (ah != bh); } while (at != bt); /* Now a (and b) must equal 1. */ /* Reduce u modulo t^109+t^9+t^2+t+1. */ t = ut>>13; ut ^= t<<13; ul ^= t; ul ^= t<<1; ul ^= t<<2; ul ^= t<<9; { poly128 r; r.t = ut; r.h = uh; r.m = um; r.l = ul; return r; } /* end block */ } /* end function inverse */ #undef SHR #undef SHR1 /*-- quotient --------------------------------------------------------------*/ /* Divide x by y in the field GF(2^109) i.e., as polys in (Z/2Z)[t] * reduced modulo t^109+t^9+t^2+t+1. Degree x, y < 109, y != 0. */ static INLINE poly128 quotient(poly128 x, poly128 y) { return product(x, inverse(y)); } /* end function quotient */ /*-= Arithmetic mod the group order =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ /*-- productMod ------------------------------------------------------------*/ /* Return product x*y modulo m, 0 <= x, y < m <= 2^127. * Could this possibly be any slower? =:-) */ static u128 productMod(u128 x, u128 y, u128 m) { uint i; u32 b, w, yy[3]; u128 z; const u128 uZero = ZERO; yy[0] = y.l; yy[1] = y.m; yy[2] = y.h; w = y.t; for (i = 3; !w && i; ) w = yy[--i]; if (!w) return uZero; b = topBit(w); z = x; while (b >>= 1) { z = doubleMod(z, m); if (w & b) z = sumMod(z, x, m); } /* end while */ while (i) { b = 1UL<<31; w = yy[--i]; do { z = doubleMod(z, m); if (w & b) z = sumMod(z, x, m); b >>= 1; } while (b); } /* end if */ return z; } /* end function productMod */ /*-- powerMod --------------------------------------------------------------*/ /* Return power x^y modulo m, 0 <= x < m <= 2^127, 0 <= y < 2^64. * Note that 0^0 returns 1 (even when m = 1). */ static u128 powerMod(u128 x, u64 y, u128 m) { u32 b, w; u128 z; const u128 uOne = ONE; w = y.hi; if (!w) { w = y.lo; if (!w) return uOne; } b = topBit(w); z = x; while (b >>= 1) { z = squareMod(z, m); if (w & b) z = productMod(z, x, m); } /* end while */ if (y.hi) { w = y.lo; b = 1UL<<31; do { z = squareMod(z, m); if (w & b) z = productMod(z, x, m); } while (b >>= 1); } /* end if */ return z; } /* end function powerMod */ /*-- findMultiplier --------------------------------------------------------*/ /* Computes multiplier Prod_i pow(multipliers[i], cases[i]) modulo order. */ static u128 findMultiplier(u64 *cases) { uint i; u128 f; const u128 order = ORDER; /* Here multiplier[i] = (Frobenius to power i) + 1 modulo group order. */ static const u128 multipliers[CASES] = { { 0x6D3, 0x25F3CBE9, 0x20245E26, 0x59822D94 } , { 0x6D3, 0x25F3CBE9, 0x20245E26, 0x59822D96 } , { 0xB86, 0x8E249C44, 0x9EDF28ED, 0x4BE9F47A } , { 0x92C, 0xDA0C3416, 0xDF81C389, 0xD2B6110D } , { 0x21F, 0xBDC2FB8D, 0xA169935F, 0x671A66B6 } , { 0xFC6, 0x09AA935F, 0xE1B24FAC, 0x1A1EC1D4 } , { 0xB86, 0x8E249C44, 0x9EDF28ED, 0x4BE9F46A } }; f = powerMod(multipliers[0], cases[0], order); cases[0].hi = cases[0].lo = 0; for (i = 1; i < CASES; ++i) { f = productMod(f, powerMod(multipliers[i], cases[i], order), order); cases[i].hi = cases[i].lo = 0; } /* end for */ return f; } /* end function findMultiplier */ /*-= Arithmetic on elliptic curve over the field =-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /*-- ellipticDouble --------------------------------------------------------*/ /* Given a point (x:y:z) on y^2 + x*y = x^3 + a*x^2 + b, this computes * its double (x2:y2:z2) by the group law. * It puts x2 in *px2, y2 in *py2 and returns z2. * The point at infinity is represented by (0:1:0). * Finite points are represented by (x:y:1). */ static int ellipticDouble ( poly128 x, poly128 y, int z, poly128 *px2, poly128 *py2 ) { poly128 lam, x2,y2; const poly128 a = A, one = ONE, zero = ZERO; if (!z || equal(x, zero)) { *px2 = zero; *py2 = one; return 0; } lam = xor(x, quotient(y, x)); x2 = xor(xor(square(lam), lam), a); y2 = xor(xor(product(lam, xor(x, x2)), x2), y); *px2 = x2; *py2 = y2; return 1; } /* end function ellipticDouble */ /*-- ellipticSum -----------------------------------------------------------*/ /* Given points (x1:y1:z1) and (x2:y2:z2) on y^2 + x*y = x^3 + a*x^2 + b, * this computes their sum (x3:y3:z3) by the group law. * It puts x3 in *px3, y3 in *py3 and returns z3. * The point at infinity is represented by (0:1:0). * Finite points are represented by (x:y:1). */ static int ellipticSum ( poly128 x1, poly128 y1, int z1, poly128 x2, poly128 y2, int z2 , poly128 *px3, poly128 *py3 ) { poly128 lam, t, x3,y3; const poly128 a = A; if (!z1) { *px3 = x2; *py3 = y2; return z2; } if (!z2) { *px3 = x1; *py3 = y1; return z1; } if (equal(x1, x2)) { const poly128 zero = ZERO, one = ONE; if (equal(y1, y2)) return ellipticDouble(x1, y1, z1, px3, py3); *px3 = zero; *py3 = one; return 0; } /* end if */ lam = quotient(xor(y1, y2), xor(x1, x2)); t = xor(xor(xor(square(lam), lam), x2), a); x3 = xor(t, x1); y3 = xor(xor(product(lam, t), x3), y1); *px3 = x3; *py3 = y3; return 1; } /* end function ellipticSum */ /*-- ellipticProduct -------------------------------------------------------*/ /* Given a point (x:y:z) on y^2 + x*y = x^3 + a*x^2 + b, this computes * its fac-th multiple (x2:y2:z2) by the group law. * It puts x2 in *px2, y2 in *py2 and returns z2. * The point at infinity is represented by (0:1:0). * Finite points are represented by (x:y:1). */ static int ellipticProduct ( poly128 x, poly128 y, int z, u128 fac, poly128 *px2, poly128 *py2 ) { int z2; u32 f, i, m, tab[4]; poly128 x2,y2; /* Get most significant 32-bit word of fac. */ tab[0] = fac.l; tab[1] = fac.m; tab[2] = fac.h; tab[3] = fac.t; for (i = 3; !(f = tab[i]) && i; --i) ; /* Quick exit if fac was 0. */ if (!f) { const poly128 zero = ZERO, one = ONE; *px2 = zero; *py2 = one; return 0; } /* end if */ /* Run through bits of most significant word. */ m = topBit(f); x2 = x; y2 = y; z2 = z; while (m >>= 1) { z2 = ellipticDouble(x2, y2, z2, &x2, &y2); if (f & m) z2 = ellipticSum(x, y, z, x2, y2, z2, &x2, &y2); } /* end while */ /* Run through bits of less significant words. */ while (i--) { f = tab[i]; m = 1U<<31; do { z2 = ellipticDouble(x2, y2, z2, &x2, &y2); if (f & m) z2 = ellipticSum(x, y, z, x2, y2, z2, &x2, &y2); m >>= 1; } while (m); } /* end while */ *px2 = x2; *py2 = y2; return z2; } /* end function ellipticProduct */ /*-= Various file manipulation thingies =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ /*-- syncAndCheck ----------------------------------------------------------*/ /* Check that *pu, *pv are in range, update them using cases[] and reset it. * Then check that x, y are in range and that [u]P+[v]Q = (x:y:1). * Note: used before reporting a distinguished point or saving state. */ static int syncAndCheck(u64 *cases, u128 *pu, u128 *pv, poly128 x, poly128 y) { int zc, zu, zv; u128 f; poly128 xc,yc, xu,yu, xv,yv; const int zP = 1, zQ = 1; const poly128 xP = XP, xQ = XQ, yP = YP, yQ = YQ; const u128 order = ORDER; if (ge(*pu, order)) return 0; if (ge(*pv, order)) return 0; f = findMultiplier(&cases[0]); /* Also resets cases[]. */ *pu = productMod(f, *pu, order); *pv = productMod(f, *pv, order); if (x.t > 0x00001FFF) return 0; if (y.t > 0x00001FFF) return 0; zu = ellipticProduct(xP, yP, zP, *pu, &xu, &yu); zv = ellipticProduct(xQ, yQ, zQ, *pv, &xv, &yv); zc = ellipticSum(xu, yu, zu, xv, yv, zv, &xc, &yc); if (!equal(xc, x) || !equal(yc, y) || zc != 1) return 0; return 1; } /* end function syncAndCheck */ /*-- reportDistinguished ---------------------------------------------------*/ /* Reporting of distinguished points back to headquarters. */ static void reportDistinguished ( modeType mode, char *argv[] , u64 iters, u128 u, u128 v, u64 total, double dStart ) { const char format[] = "ECC-L0|" VARIANT "|" VERSION "|\n" "ECC-L1|%03X%08X%08X%08X|%03X%08X%08X%08X|\n" "ECC-L2|%s|\n" "ECC-L3|%04X%08X|\n" "ECC-L4|%s|\n" , test_format[] = "TEST-L0|" VARIANT "|" VERSION "|\n" "TEST-L1|%03X%08X%08X%08X|%03X%08X%08X%08X|\n" "TEST-L2|%s|\n" "TEST-L3|%04X%08X|\n" "TEST-L4|%s|\n" ; int status; FILE *handle; time_t now; /* For time and date. */ static char *buf = NULL; /** Allocate buffer first time around. **/ if (!buf) { buf = malloc( strlen(format) /* Upper bound on size of fixed text. */ + 27 + 27 /* Two 27-digit hex numbers on line 1. */ + strlen(argv[3]) /* The user ID on line 2. */ + 12 /* One 12-digit hex number on line 3. */ + strlen(argv[5]) /* The machine ID on line 4. */ + 1 /* The terminating '\0'. */ ); } /* end if */ /** Output the distinguished point to stdout and the points file. **/ if (mode == Test) handle = NULL; else { handle = fopen(DIST_POINTS_FILENAME, "a"); if (!handle) { printf( "Warning: couldn't fopen() file " DIST_POINTS_FILENAME " for appending\n" " (errno = %d: %s). Following point is not saved!\n" , errno, strerror(errno) ); } /* end if */ } /* end if */ putchar('\n'); if (handle) putc('\n', handle); /* Time and date. */ now = time(NULL); if (now == -1) puts("Warning: can't determine time and date."); else { char *str1, *str2; str1 = mode == Test ? "Test" : "Distinguished"; str2 = ctime(&now); printf( "%s point found at: %s", str1, str2); if (handle) fprintf(handle, "%s point found at: %s", str1, str2); } /* end if/else */ /* The distinguished point. */ printf( mode == Test ? test_format : format , u.t, u.h, u.m, u.l, v.t, v.h, v.m, v.l , argv[3], iters.hi, iters.lo, argv[5] ); if (handle) { /* Note: mode != Test. */ fprintf( handle, format , u.t, u.h, u.m, u.l, v.t, v.h, v.m, v.l , argv[3], iters.hi, iters.lo, argv[5] ); } /* end if */ if (handle) { status = fclose(handle); if (status == EOF) { printf( "Warning: fclose() of file " DIST_POINTS_FILENAME " failed\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); } /* end if */ } /* end if */ /** Output extra verbosity to stdout. **/ { double dIters, dTotal , dNow, dUserTime /* For timing. */ ; /* Iteration counts. */ dIters = (double)iters.hi*4294967296.0+(double)iters.lo; dTotal = (double)total.hi*4294967296.0+(double)total.lo; printf( "Iterations used = %g.\n" "Total iterations = %g.\n" , dIters, dTotal ); /* Iteration rate. */ #if MACOS dNow = get_time(); #elif __CYGWIN__ { struct tms tm; times(&tm); dNow = (double)tm.tms_utime/(double)CLK_TCK; } /* end block */ #else { struct rusage ru; getrusage(RUSAGE_SELF, &ru); dNow = (double)ru.ru_utime.tv_sec+(double)ru.ru_utime.tv_usec*0.000001; } /* end block */ #endif dUserTime = dNow-dStart; if (dUserTime) printf("Rate = %g per second.\n", dTotal/dUserTime); } /* end block */ /** Send the distinguished point by email. **/ #if MACOS if (mode == Batch || mode == Mail || mode == Alt) { if (!buf) { puts( "Warning: couldn't allocate temp buffer for sending mail, " "send by hand!" ); } else { sprintf( buf, format , u.t, u.h, u.m, u.l, v.t, v.h, v.m, v.l , argv[3], iters.hi, iters.lo, argv[5] ); switch (mode){ case Mail: destination = "ecdl2K-108@cristal.inria.fr"; break; case Alt: destination = "ecdl2K-108@rupture.net"; break; default: destination = ""; break; } /* end switch */ resultsOut(buf); } /* end if/else */ } /* end if */ #else /* !MACOS */ if (mode == Mail || mode == Alt) { handle = popen(SENDMAIL " -t", "w"); if (handle == NULL) { puts("Warning: couldn't pipe to " SENDMAIL ", send by hand!"); } else { int status; fprintf( handle , "To: %s\n\n" , mode == Mail ? "ecdl2K-108@cristal.inria.fr" : "ecdl2K-108@rupture.net" ); /* The distinguished point. Note: mode != Test. */ fprintf( handle, format , u.t, u.h, u.m, u.l, v.t, v.h, v.m, v.l , argv[3], iters.hi, iters.lo, argv[5] ); status = fflush(handle); if (status == EOF) { printf( "Warning: fflush() of pipe failed\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); } /* end if */ status = pclose(handle); if (status) { if (status == -1) { printf( "Warning: pclose() failed\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); } else printf("Warning: " SENDMAIL " returned status %d.\n", status); } /* end if */ } /* end if/else */ } /* end if (mode == Mail || mode == Alt) */ #endif /* MACOS */ /** Send the distinguished point by HTTP. **/ if (mode == Http) { if (!buf) { puts( "Warning: couldn't allocate temp buffer for HTTP sending, " "send by hand!" ); } else { sprintf( buf, format , u.t, u.h, u.m, u.l, v.t, v.h, v.m, v.l , argv[3], iters.hi, iters.lo, argv[5] ); if (httpPost(buf) == -1) { puts( "Warning: an error occurred while sending point via HTTP, " "send by hand!" ); } #if MACOS else ++points_sent; #endif } /* end if/else */ } /* end if (mode == Http) */ fflush(stdout); } /* end function reportDistinguished */ /*-- u32ToBytes ------------------------------------------------------------*/ static void u32ToBytes(u32 data, u8 *p) { p[0] = data; p[1] = data>> 8; p[2] = data>>16; p[3] = data>>24; } /* end function u32ToBytes */ /*-- bytesToU32 ------------------------------------------------------------*/ static u32 bytesToU32(u8 *p) { return p[0] | (u32)p[1]<<8 | (u32)p[2]<<16 | (u32)p[3]<<24; } /* end function bytesToU32 */ /*-- writeState ------------------------------------------------------------*/ /* Write state out to file in a portable binary format. */ static void writeState ( u64 *itersT, u128 *uT, u128 *vT, poly128 *xT, poly128 *yT ) { FILE *handle; puts("Saving state to " SAVED_STATE_FILENAME "."); handle = fopen(SAVED_STATE_FILENAME, "wb"); if (handle) { uint i; int status; u8 buf[72]; for (i = 0; i < PARAL; ++i) { #define OUT128(offset, X) \ u32ToBytes(X.l, buf+offset); \ u32ToBytes(X.m, buf+offset+4); \ u32ToBytes(X.h, buf+offset+8); \ u32ToBytes(X.t, buf+offset+12) /* ; */ u32ToBytes(itersT[i].lo, buf); u32ToBytes(itersT[i].hi, buf+4); OUT128( 8, uT[i]); OUT128(24, vT[i]); OUT128(40, xT[i]); OUT128(56, yT[i]); #undef OUT128 if (fwrite((void *)buf, sizeof(buf), 1, handle) != 1) { printf( "Warning: couldn't fwrite() to saved state file\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); break; } /* end if */ } /* end for */ status = fclose(handle); if (status == EOF) { printf( "Warning: fclose() of saved state file failed\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); } /* end if */ } else printf( "Warning: couldn't fopen() saved state file for writing\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); fflush(stdout); } /* end function writeState */ /*-- readState -------------------------------------------------------------*/ /* Reads state back from file (if it exists) in a portable binary format * and returns number of valid items (i.e., u,v,x,y,z data) found. * Note: There is nothing to check for the itersT[] array. */ static uint readState ( u64 *itersT, u128 *uT, u128 *vT, poly128 *xT, poly128 *yT ) { uint i; FILE *handle; i = 0; handle = fopen(SAVED_STATE_FILENAME, "rb"); if (handle) { int status; u8 buf[72]; for (i = 0; i < PARAL; ++i) { int OK; if (fread((void *)buf, sizeof(buf), 1, handle) != 1) { if (feof(handle)) puts("Warning: short saved state file."); else { printf( "Warning: couldn't fread() from saved state file\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); } /* end if/else */ break; } /* end if */ #define IN128(offset, X) \ X.l = bytesToU32(buf+offset); \ X.m = bytesToU32(buf+offset+4); \ X.h = bytesToU32(buf+offset+8); \ X.t = bytesToU32(buf+offset+12) /* ; */ itersT[i].lo = bytesToU32(buf); itersT[i].hi = bytesToU32(buf+4); IN128( 8, uT[i]); IN128(24, vT[i]); IN128(40, xT[i]); IN128(56, yT[i]); #undef IN128 OK = 0; do { int z, zu, zv; poly128 x,y, xu,yu, xv,yv; const int zP = 1, zQ = 1; const poly128 xP = XP, xQ = XQ, yP = YP, yQ = YQ; const u128 order = ORDER; if (ge(uT[i], order)) break; if (ge(vT[i], order)) break; if (xT[i].t > 0x00001FFF) break; if (yT[i].t > 0x00001FFF) break; zu = ellipticProduct(xP, yP, zP, uT[i], &xu, &yu); zv = ellipticProduct(xQ, yQ, zQ, vT[i], &xv, &yv); z = ellipticSum(xu, yu, zu, xv, yv, zv, &x, &y); if (!equal(x, xT[i]) || !equal(y, yT[i]) || z != 1) break; OK = 1; } while (0); if (!OK) { puts("Warning: invalid starting point found in saved state!"); break; } /* end if */ } /* end for */ printf( "Read %u of %u starting points from saved state.\n" , i, (uint)PARAL ); status = fclose(handle); if (status == EOF) { printf( "Warning: fclose() of saved state file failed\n" " (errno = %d: %s).\n" , errno, strerror(errno) ); } /* end if */ } else printf( "Warning: couldn't fopen() saved state file for reading\n" " (errno = %d: %s).\n" "Note: 'No such file' is normal for the first run.\n" , errno, strerror(errno) ); fflush(stdout); return i; } /* end function readState */ /*-= Stuff for http mode =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /*-- httpPost --------------------------------------------------------------*/ /* Configuration */ #define HTTP_URL "http://cristal.inria.fr/bin/ecdl7" #define HTTP_SERVER_HOST "cristal.inria.fr" #define HTTP_SERVER_PORT 80 /* INET address of HTTP server or proxy */ static struct sockaddr_in http_addr; /* Name of HTTP server or proxy */ static char * http_host = HTTP_SERVER_HOST; /* Port number on HTTP server or proxy */ static int http_port = HTTP_SERVER_PORT; /* Send a string back to the base using the HTTP POST protocol */ static int httpPost(char * payload) { int s, retcode; FILE * f, * r; /* Connect to HTTP server or proxy */ s = socket(AF_INET, SOCK_STREAM, 0); if (s == -1) { printf("Warning: socket() failed (errno = %d: %s).\n", errno, strerror(errno)); return -1; } retcode = connect(s, (struct sockaddr *) &http_addr, sizeof(http_addr)); if (retcode == -1) { printf("Warning: couldn't connect to host %s port %d (errno = %d: %s).\n", http_host, http_port, errno, strerror(errno)); return -1; } /* Prepare to send data */ f = fdopen(s, "wb"); if (f == NULL) { fclose(f); printf("Warning: fdopen(socket, \"wb\") failed (errno = %d: %s).\n", errno, strerror(errno)); return -1; } /* Send POST request + data */ fprintf(f, "POST " HTTP_URL " HTTP/1.0\r\n"); fprintf(f, "Host: %s:%d\r\n", HTTP_SERVER_HOST, HTTP_SERVER_PORT); fprintf(f, "Content-length: %d\r\n", strlen(payload)); fprintf(f, "\r\n"); fputs(payload, f); fflush(f); if (ferror(f)) { fclose(f); printf("Warning: error while sending data via HTTP to %s.\n", http_host); return -1; } /* Say that we're done sending data */ shutdown(s, 1); /* Read response from server or proxy */ r = fdopen(s, "rb"); if (r == NULL) { close(s); printf("Warning: fdopen(socket, \"rb\") failed (errno = %d: %s).\n", errno, strerror(errno)); return -1; } /* First line of response is the status line, with format HTTP-Version SP Status-Code SP Reason-Phrase CRLF. */ retcode = -1; fscanf(r, "%*s %d ", &retcode); /* Discard remainder of response */ fclose(f); fclose(r); /* Check status code -- should be 2xx */ if (! (retcode >= 200 && retcode < 300)) { printf("Warning: Web server or proxy %s returned an error " "(status code = %d)\n", http_host, retcode); return -1; } return 0; } /*-- httpParseProxy --------------------------------------------------------*/ /* Parse proxy specification and store proxy host name and port number * in http_host and http_port */ static int httpParseProxy(char * proxy) { http_host = malloc(strlen(proxy)); if (http_host == NULL) { printf("Error: cannot allocate proxy host name\n"); return -1; } if (sscanf(proxy, "%[^:]:%d", http_host, &http_port) != 2) { printf("Error: bad proxy specification `%s'\n", proxy); return -1; } return 0; } /*-- httpInit --------------------------------------------------------------*/ /* Initialization of the HTTP machinery. Determine INET address of * server or proxy and store it in http_addr */ static int httpInit(void) { struct hostent * entry; entry = gethostbyname(http_host); if (entry == NULL) { printf("Error: unknown host %s\n", http_host); return -1; } memset(&http_addr, 0, sizeof(struct sockaddr_in)); http_addr.sin_family = AF_INET; memcpy(&http_addr.sin_addr.s_addr, entry->h_addr, entry->h_length); http_addr.sin_port = htons(http_port); return 0; } /*== end of file ecdl2K-108.32bit.c ========================================*/