00001 #if !defined(_XBOX) && !defined(X360)
00002
00003 #include "BigInt.h"
00004 #include <ctype.h>
00005 #include <string.h>
00006
00007 #include "RakAlloca.h"
00008 #include "RakMemoryOverride.h"
00009 #include "Rand.h"
00010
00011 #if defined(_MSC_VER) && !defined(_DEBUG) && _MSC_VER > 1310
00012 #include <intrin.h>
00013 #endif
00014
00015 namespace big
00016 {
00017 static const char Bits256[] = {
00018 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00019 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00020 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00021 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00022 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
00023 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
00024 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
00025 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
00026 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00027 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00028 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00029 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00030 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00031 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00032 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00033 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
00034 };
00035
00036
00037
00038
00039 uint32_t Degree(uint32_t v)
00040 {
00041
00042
00043
00044
00045 uint32_t r, t = v >> 16;
00046
00047 if (t) r = (r = t >> 8) ? 24 + Bits256[r] : 16 + Bits256[t];
00048 else r = (r = v >> 8) ? 8 + Bits256[r] : Bits256[v];
00049
00050 return r;
00051
00052 }
00053
00054
00055 int LimbDegree(const uint32_t *n, int limbs)
00056 {
00057 while (limbs--)
00058 if (n[limbs])
00059 return limbs + 1;
00060
00061 return 0;
00062 }
00063
00064
00065 uint32_t Degree(const uint32_t *n, int limbs)
00066 {
00067 uint32_t limb_degree = LimbDegree(n, limbs);
00068 if (!limb_degree) return 0;
00069 --limb_degree;
00070
00071 uint32_t msl_degree = Degree(n[limb_degree]);
00072
00073 return msl_degree + limb_degree*32;
00074 }
00075
00076 void Set(uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00077 {
00078 int min = lhs_limbs < rhs_limbs ? lhs_limbs : rhs_limbs;
00079
00080 memcpy(lhs, rhs, min*4);
00081 memset(&lhs[min], 0, (lhs_limbs - min)*4);
00082 }
00083 void Set(uint32_t *lhs, int limbs, const uint32_t *rhs)
00084 {
00085 memcpy(lhs, rhs, limbs*4);
00086 }
00087 void Set32(uint32_t *lhs, int lhs_limbs, const uint32_t rhs)
00088 {
00089 *lhs = rhs;
00090 memset(&lhs[1], 0, (lhs_limbs - 1)*4);
00091 }
00092
00093 #if defined(__BIG_ENDIAN__)
00094
00095
00096 void ToLittleEndian(uint32_t *n, int limbs)
00097 {
00098 for (int ii = 0; ii < limbs; ++ii)
00099 {
00100 swapLE(n[ii]);
00101 }
00102 }
00103
00104
00105 void FromLittleEndian(uint32_t *n, int limbs)
00106 {
00107
00108 ToLittleEndian(n, limbs);
00109 }
00110
00111 #endif // __BIG_ENDIAN__
00112
00113 bool Less(int limbs, const uint32_t *lhs, const uint32_t *rhs)
00114 {
00115 for (int ii = limbs-1; ii >= 0; --ii)
00116 if (lhs[ii] != rhs[ii])
00117 return lhs[ii] < rhs[ii];
00118
00119 return false;
00120 }
00121 bool Greater(int limbs, const uint32_t *lhs, const uint32_t *rhs)
00122 {
00123 for (int ii = limbs-1; ii >= 0; --ii)
00124 if (lhs[ii] != rhs[ii])
00125 return lhs[ii] > rhs[ii];
00126
00127 return false;
00128 }
00129 bool Equal(int limbs, const uint32_t *lhs, const uint32_t *rhs)
00130 {
00131 return 0 == memcmp(lhs, rhs, limbs*4);
00132 }
00133
00134 bool Less(const uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00135 {
00136 if (lhs_limbs > rhs_limbs)
00137 do if (lhs[--lhs_limbs] != 0) return false; while (lhs_limbs > rhs_limbs);
00138 else if (lhs_limbs < rhs_limbs)
00139 do if (rhs[--rhs_limbs] != 0) return true; while (lhs_limbs < rhs_limbs);
00140
00141 while (lhs_limbs--) if (lhs[lhs_limbs] != rhs[lhs_limbs]) return lhs[lhs_limbs] < rhs[lhs_limbs];
00142 return false;
00143 }
00144 bool Greater(const uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00145 {
00146 if (lhs_limbs > rhs_limbs)
00147 do if (lhs[--lhs_limbs] != 0) return true; while (lhs_limbs > rhs_limbs);
00148 else if (lhs_limbs < rhs_limbs)
00149 do if (rhs[--rhs_limbs] != 0) return false; while (lhs_limbs < rhs_limbs);
00150
00151 while (lhs_limbs--) if (lhs[lhs_limbs] != rhs[lhs_limbs]) return lhs[lhs_limbs] > rhs[lhs_limbs];
00152 return false;
00153 }
00154 bool Equal(const uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00155 {
00156 if (lhs_limbs > rhs_limbs)
00157 do if (lhs[--lhs_limbs] != 0) return false; while (lhs_limbs > rhs_limbs);
00158 else if (lhs_limbs < rhs_limbs)
00159 do if (rhs[--rhs_limbs] != 0) return false; while (lhs_limbs < rhs_limbs);
00160
00161 while (lhs_limbs--) if (lhs[lhs_limbs] != rhs[lhs_limbs]) return false;
00162 return true;
00163 }
00164
00165 bool Greater32(const uint32_t *lhs, int lhs_limbs, uint32_t rhs)
00166 {
00167 if (*lhs > rhs) return true;
00168 while (--lhs_limbs)
00169 if (*++lhs) return true;
00170 return false;
00171 }
00172 bool Equal32(const uint32_t *lhs, int lhs_limbs, uint32_t rhs)
00173 {
00174 if (*lhs != rhs) return false;
00175 while (--lhs_limbs)
00176 if (*++lhs) return false;
00177 return true;
00178 }
00179
00180
00181
00182 void ShiftRight(int limbs, uint32_t *out, const uint32_t *in, int shift)
00183 {
00184 if (!shift)
00185 {
00186 Set(out, limbs, in);
00187 return;
00188 }
00189
00190 uint32_t carry = 0;
00191
00192 for (int ii = limbs - 1; ii >= 0; --ii)
00193 {
00194 uint32_t r = in[ii];
00195
00196 out[ii] = (r >> shift) | carry;
00197
00198 carry = r << (32 - shift);
00199 }
00200 }
00201
00202
00203
00204 uint32_t ShiftLeft(int limbs, uint32_t *out, const uint32_t *in, int shift)
00205 {
00206 if (!shift)
00207 {
00208 Set(out, limbs, in);
00209 return 0;
00210 }
00211
00212 uint32_t carry = 0;
00213
00214 for (int ii = 0; ii < limbs; ++ii)
00215 {
00216 uint32_t r = in[ii];
00217
00218 out[ii] = (r << shift) | carry;
00219
00220 carry = r >> (32 - shift);
00221 }
00222
00223 return carry;
00224 }
00225
00226
00227
00228 uint32_t Add(uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00229 {
00230 int ii;
00231 uint64_t r = (uint64_t)lhs[0] + rhs[0];
00232 lhs[0] = (uint32_t)r;
00233
00234 for (ii = 1; ii < rhs_limbs; ++ii)
00235 {
00236 r = ((uint64_t)lhs[ii] + rhs[ii]) + (uint32_t)(r >> 32);
00237 lhs[ii] = (uint32_t)r;
00238 }
00239
00240 for (; ii < lhs_limbs && (uint32_t)(r >>= 32) != 0; ++ii)
00241 {
00242 r += lhs[ii];
00243 lhs[ii] = (uint32_t)r;
00244 }
00245
00246 return (uint32_t)(r >> 32);
00247 }
00248
00249
00250
00251 uint32_t Add(uint32_t *out, const uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00252 {
00253 int ii;
00254 uint64_t r = (uint64_t)lhs[0] + rhs[0];
00255 out[0] = (uint32_t)r;
00256
00257 for (ii = 1; ii < rhs_limbs; ++ii)
00258 {
00259 r = ((uint64_t)lhs[ii] + rhs[ii]) + (uint32_t)(r >> 32);
00260 out[ii] = (uint32_t)r;
00261 }
00262
00263 for (; ii < lhs_limbs && (uint32_t)(r >>= 32) != 0; ++ii)
00264 {
00265 r += lhs[ii];
00266 out[ii] = (uint32_t)r;
00267 }
00268
00269 return (uint32_t)(r >> 32);
00270 }
00271
00272
00273
00274 uint32_t Add32(uint32_t *lhs, int lhs_limbs, uint32_t rhs)
00275 {
00276 uint32_t n = lhs[0];
00277 uint32_t r = n + rhs;
00278 lhs[0] = r;
00279
00280 if (r >= n)
00281 return 0;
00282
00283 for (int ii = 1; ii < lhs_limbs; ++ii)
00284 if (++lhs[ii])
00285 return 0;
00286
00287 return 1;
00288 }
00289
00290
00291
00292 int32_t Subtract(uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00293 {
00294 int ii;
00295 int64_t r = (int64_t)lhs[0] - rhs[0];
00296 lhs[0] = (uint32_t)r;
00297
00298 for (ii = 1; ii < rhs_limbs; ++ii)
00299 {
00300 r = ((int64_t)lhs[ii] - rhs[ii]) + (int32_t)(r >> 32);
00301 lhs[ii] = (uint32_t)r;
00302 }
00303
00304 for (; ii < lhs_limbs && (int32_t)(r >>= 32) != 0; ++ii)
00305 {
00306 r += lhs[ii];
00307 lhs[ii] = (uint32_t)r;
00308 }
00309
00310 return (int32_t)(r >> 32);
00311 }
00312
00313
00314
00315 int32_t Subtract(uint32_t *out, const uint32_t *lhs, int lhs_limbs, const uint32_t *rhs, int rhs_limbs)
00316 {
00317 int ii;
00318 int64_t r = (int64_t)lhs[0] - rhs[0];
00319 out[0] = (uint32_t)r;
00320
00321 for (ii = 1; ii < rhs_limbs; ++ii)
00322 {
00323 r = ((int64_t)lhs[ii] - rhs[ii]) + (int32_t)(r >> 32);
00324 out[ii] = (uint32_t)r;
00325 }
00326
00327 for (; ii < lhs_limbs && (int32_t)(r >>= 32) != 0; ++ii)
00328 {
00329 r += lhs[ii];
00330 out[ii] = (uint32_t)r;
00331 }
00332
00333 return (int32_t)(r >> 32);
00334 }
00335
00336
00337
00338 int32_t Subtract32(uint32_t *lhs, int lhs_limbs, uint32_t rhs)
00339 {
00340 uint32_t n = lhs[0];
00341 uint32_t r = n - rhs;
00342 lhs[0] = r;
00343
00344 if (r <= n)
00345 return 0;
00346
00347 for (int ii = 1; ii < lhs_limbs; ++ii)
00348 if (lhs[ii]--)
00349 return 0;
00350
00351 return -1;
00352 }
00353
00354
00355 void Negate(int limbs, uint32_t *lhs, const uint32_t *rhs)
00356 {
00357
00358 while (limbs-- > 0 && !(*lhs++ = -(int32_t)(*rhs++)));
00359
00360
00361 while (limbs-- > 0) *lhs++ = ~(*rhs++);
00362 }
00363
00364
00365 void BitNot(uint32_t *n, int limbs)
00366 {
00367 limbs = LimbDegree(n, limbs);
00368 if (limbs)
00369 {
00370 uint32_t high = n[--limbs];
00371 uint32_t high_degree = 32 - Degree(high);
00372
00373 n[limbs] = ((uint32_t)(~high << high_degree) >> high_degree);
00374 while (limbs--) n[limbs] = ~n[limbs];
00375 }
00376 }
00377
00378
00379 void LimbNot(uint32_t *n, int limbs)
00380 {
00381 while (limbs--) *n++ = ~(*n);
00382 }
00383
00384
00385 void Xor(int limbs, uint32_t *lhs, const uint32_t *rhs)
00386 {
00387 while (limbs--) *lhs++ ^= *rhs++;
00388 }
00389
00390
00391 uint32_t AddLeftShift32(
00392 int limbs,
00393 uint32_t *A,
00394 const uint32_t *B,
00395 uint32_t S)
00396 {
00397 uint64_t sum = 0;
00398 uint32_t last = 0;
00399
00400 while (limbs--)
00401 {
00402 uint32_t b = *B++;
00403
00404 sum = (uint64_t)((b << S) | (last >> (32 - S))) + *A + (uint32_t)(sum >> 32);
00405
00406 last = b;
00407 *A++ = (uint32_t)sum;
00408 }
00409
00410 return (uint32_t)(sum >> 32) + (last >> (32 - S));
00411 }
00412
00413
00414 uint32_t Multiply32(
00415 int limbs,
00416 uint32_t *result,
00417 const uint32_t *A,
00418 uint32_t B)
00419 {
00420 uint64_t p = (uint64_t)A[0] * B;
00421 result[0] = (uint32_t)p;
00422
00423 while (--limbs)
00424 {
00425 p = (uint64_t)*(++A) * B + (uint32_t)(p >> 32);
00426 *(++result) = (uint32_t)p;
00427 }
00428
00429 return (uint32_t)(p >> 32);
00430 }
00431
00432
00433 uint32_t MultiplyAdd32(
00434 int limbs,
00435 uint32_t *X,
00436 uint32_t M,
00437 uint32_t A)
00438 {
00439 uint64_t p = (uint64_t)X[0] * M + A;
00440 X[0] = (uint32_t)p;
00441
00442 while (--limbs)
00443 {
00444 p = (uint64_t)*(++X) * M + (uint32_t)(p >> 32);
00445 *X = (uint32_t)p;
00446 }
00447
00448 return (uint32_t)(p >> 32);
00449 }
00450
00451
00452 uint32_t AddMultiply32(
00453 int limbs,
00454 uint32_t *A,
00455 const uint32_t *B,
00456 uint32_t M)
00457 {
00458
00459 #if defined(ASSEMBLY_INTEL_SYNTAX) && !defined (_XBOX) && !defined(X360)
00460 ASSEMBLY_BLOCK
00461 {
00462 mov esi, [B]
00463 mov edi, [A]
00464 mov eax, [esi]
00465 mul [M] ; (edx,eax) = [M]*[esi]
00466 add eax, [edi] ; (edx,eax) += [edi]
00467 adc edx, 0
00468 ; (edx,eax) = [B]*[M] + [A]
00469
00470 mov [edi], eax
00471 ; [A] = eax
00472
00473 mov ecx, [limbs]
00474 sub ecx, 1
00475 jz loop_done
00476 loop_head:
00477 lea esi, [esi + 4] ; ++B
00478 mov eax, [esi] ; eax = [B]
00479 mov ebx, edx ; ebx = last carry
00480 lea edi, [edi + 4] ; ++A
00481 mul [M] ; (edx,eax) = [M]*[esi]
00482 add eax, [edi] ; (edx,eax) += [edi]
00483 adc edx, 0
00484 add eax, ebx ; (edx,eax) += ebx
00485 adc edx, 0
00486 ; (edx,eax) = [esi]*[M] + [edi] + (ebx=last carry)
00487
00488 mov [edi], eax
00489 ; [A] = eax
00490
00491 sub ecx, 1
00492 jnz loop_head
00493 loop_done:
00494 mov [M], edx ; Use [M] to copy the carry into C++ land
00495 }
00496
00497 return M;
00498 #else
00499
00500 uint64_t p = B[0] * (uint64_t)M + A[0];
00501 A[0] = (uint32_t)p;
00502
00503 while (--limbs)
00504 {
00505 p = (*(++B) * (uint64_t)M + *(++A)) + (uint32_t)(p >> 32);
00506 A[0] = (uint32_t)p;
00507 }
00508
00509 return (uint32_t)(p >> 32);
00510 #endif
00511 }
00512
00513
00514 void SimpleMultiply(
00515 int limbs,
00516 uint32_t *product,
00517 const uint32_t *x,
00518 const uint32_t *y)
00519 {
00520
00521 product[limbs] = Multiply32(limbs, product, x, y[0]);
00522
00523 uint32_t ctr = limbs;
00524 while (--ctr)
00525 {
00526 ++product;
00527 product[limbs] = AddMultiply32(limbs, product, x, (++y)[0]);
00528 }
00529 }
00530
00531
00532 void SimpleMultiplyLowHalf(
00533 int limbs,
00534 uint32_t *product,
00535 const uint32_t *x,
00536 const uint32_t *y)
00537 {
00538 Multiply32(limbs, product, x, y[0]);
00539
00540 while (--limbs)
00541 {
00542 ++product;
00543 ++y;
00544 AddMultiply32(limbs, product, x, y[0]);
00545 }
00546 }
00547
00548
00549 void SimpleSquare(
00550 int limbs,
00551 uint32_t *product,
00552 const uint32_t *x)
00553 {
00554
00555 uint32_t *cross_product = (uint32_t*)alloca(limbs*2*4);
00556
00557
00558 cross_product[limbs] = Multiply32(limbs - 1, cross_product + 1, x + 1, x[0]);
00559 for (int ii = 1; ii < limbs - 1; ++ii)
00560 {
00561 cross_product[limbs + ii] = AddMultiply32(limbs - ii - 1, cross_product + ii*2 + 1, x + ii + 1, x[ii]);
00562 }
00563
00564
00565 for (int ii = 0; ii < limbs; ++ii)
00566 {
00567 uint32_t xi = x[ii];
00568 uint64_t si = (uint64_t)xi * xi;
00569 product[ii*2] = (uint32_t)si;
00570 product[ii*2+1] = (uint32_t)(si >> 32);
00571 }
00572
00573
00574 product[limbs*2 - 1] += AddLeftShift32(limbs*2 - 2, product + 1, cross_product + 1, 1);
00575 }
00576
00577
00578
00579 void Multiply(
00580 int limbs,
00581 uint32_t *product,
00582 const uint32_t *x,
00583 const uint32_t *y)
00584 {
00585
00586 if (limbs < 30 || (limbs & 1))
00587 {
00588 SimpleMultiply(limbs, product, x, y);
00589 return;
00590 }
00591
00592
00593 Multiply(limbs/2, product, x, y);
00594 Multiply(limbs/2, product + limbs, x + limbs/2, y + limbs/2);
00595
00596
00597 uint32_t *xsum = (uint32_t*)alloca((limbs/2)*4);
00598 uint32_t xcarry = Add(xsum, x, limbs/2, x + limbs/2, limbs/2);
00599
00600
00601 uint32_t *ysum = (uint32_t*)alloca((limbs/2)*4);
00602 uint32_t ycarry = Add(ysum, y, limbs/2, y + limbs/2, limbs/2);
00603
00604
00605 uint32_t *cross_product = (uint32_t*)alloca(limbs*4);
00606 Multiply(limbs/2, cross_product, xsum, ysum);
00607
00608
00609 int32_t cross_carry = Subtract(cross_product, limbs, product, limbs);
00610 cross_carry += Subtract(cross_product, limbs, product + limbs, limbs);
00611
00612
00613 if (ycarry) cross_carry += Add(cross_product + limbs/2, limbs/2, xsum, limbs/2);
00614 if (xcarry) cross_carry += Add(cross_product + limbs/2, limbs/2, ysum, limbs/2);
00615 cross_carry += (xcarry & ycarry);
00616
00617
00618 cross_carry += Add(product + limbs/2, limbs*3/2, cross_product, limbs);
00619
00620
00621 if (cross_carry) Add32(product + limbs*3/2, limbs/2, cross_carry);
00622 }
00623
00624
00625
00626 void Square(
00627 int limbs,
00628 uint32_t *product,
00629 const uint32_t *x)
00630 {
00631
00632 if (limbs < 40 || (limbs & 1))
00633 {
00634 SimpleSquare(limbs, product, x);
00635 return;
00636 }
00637
00638
00639 Square(limbs/2, product, x);
00640 Square(limbs/2, product + limbs, x + limbs/2);
00641
00642
00643 uint32_t *cross_product = (uint32_t*)alloca(limbs*4);
00644 Multiply(limbs/2, cross_product, x, x + limbs/2);
00645
00646
00647 uint32_t cross_carry = AddLeftShift32(limbs, product + limbs/2, cross_product, 1);
00648
00649
00650 if (cross_carry) Add32(product + limbs*3/2, limbs/2, cross_carry);
00651 }
00652
00653
00654 uint32_t Modulus32(
00655 int limbs,
00656 const uint32_t *N,
00657 uint32_t divisor)
00658 {
00659 uint32_t remainder = N[limbs-1] < divisor ? N[limbs-1] : 0;
00660 uint32_t counter = N[limbs-1] < divisor ? limbs-1 : limbs;
00661
00662 while (counter--) remainder = (uint32_t)((((uint64_t)remainder << 32) | N[counter]) % divisor);
00663
00664 return remainder;
00665 }
00666
00667
00668
00669
00670
00671
00672
00673 uint32_t Divide32(
00674 int limbs,
00675 uint32_t *A,
00676 uint32_t divisor)
00677 {
00678 uint64_t r = 0;
00679 for (int ii = limbs-1; ii >= 0; --ii)
00680 {
00681 uint64_t n = (r << 32) | A[ii];
00682 A[ii] = (uint32_t)(n / divisor);
00683 r = n % divisor;
00684 }
00685
00686 return (uint32_t)r;
00687 }
00688
00689
00690 uint32_t MulInverse32(uint32_t n)
00691 {
00692
00693 uint32_t hb = (~(n - 1) >> 31);
00694 uint32_t u1 = -(int32_t)(0xFFFFFFFF / n + hb);
00695 uint32_t g1 = ((-(int32_t)hb) & (0xFFFFFFFF % n + 1)) - n;
00696
00697 if (!g1) {
00698 if (n != 1) return 0;
00699 else return 1;
00700 }
00701
00702 uint32_t q, u = 1, g = n;
00703
00704 for (;;) {
00705 q = g / g1;
00706 g %= g1;
00707
00708 if (!g) {
00709 if (g1 != 1) return 0;
00710 else return u1;
00711 }
00712
00713 u -= q*u1;
00714 q = g1 / g;
00715 g1 %= g;
00716
00717 if (!g1) {
00718 if (g != 1) return 0;
00719 else return u;
00720 }
00721
00722 u1 -= q*u;
00723 }
00724 }
00725
00726
00727
00728
00729
00730
00731
00732
00733 bool MulInverse(
00734 int limbs,
00735 const uint32_t *u,
00736 uint32_t *result)
00737 {
00738 uint32_t *u1 = (uint32_t*)alloca(limbs*4);
00739 uint32_t *u3 = (uint32_t*)alloca(limbs*4);
00740 uint32_t *v1 = (uint32_t*)alloca(limbs*4);
00741 uint32_t *v3 = (uint32_t*)alloca(limbs*4);
00742 uint32_t *t1 = (uint32_t*)alloca(limbs*4);
00743 uint32_t *t3 = (uint32_t*)alloca(limbs*4);
00744 uint32_t *q = (uint32_t*)alloca((limbs+1)*4);
00745 uint32_t *w = (uint32_t*)alloca((limbs+1)*4);
00746
00747
00748 {
00749 Set32(u1, limbs, 0);
00750 Set32(v1, limbs, 1);
00751 Set(v3, limbs, u);
00752 }
00753
00754
00755 if (!LimbDegree(v3, limbs))
00756 return false;
00757
00758
00759 Set32(w, limbs, 0);
00760 w[limbs] = 1;
00761 Divide(w, limbs+1, v3, limbs, q, t3);
00762
00763 SimpleMultiplyLowHalf(limbs, t1, q, v1);
00764 Add(t1, limbs, u1, limbs);
00765
00766 for (;;)
00767 {
00768 if (!LimbDegree(t3, limbs))
00769 {
00770 Set(result, limbs, v1);
00771 return Equal32(v3, limbs, 1);
00772 }
00773
00774 Divide(v3, limbs, t3, limbs, q, u3);
00775 SimpleMultiplyLowHalf(limbs, u1, q, t1);
00776 Add(u1, limbs, v1, limbs);
00777
00778 if (!LimbDegree(u3, limbs))
00779 {
00780 Negate(limbs, result, t1);
00781 return Equal32(t3, limbs, 1);
00782 }
00783
00784 Divide(t3, limbs, u3, limbs, q, v3);
00785 SimpleMultiplyLowHalf(limbs, v1, q, u1);
00786 Add(v1, limbs, t1, limbs);
00787
00788 if (!LimbDegree(v3, limbs))
00789 {
00790 Set(result, limbs, u1);
00791 return Equal32(u3, limbs, 1);
00792 }
00793
00794 Divide(u3, limbs, v3, limbs, q, t3);
00795 SimpleMultiplyLowHalf(limbs, t1, q, v1);
00796 Add(t1, limbs, u1, limbs);
00797
00798 if (!LimbDegree(t3, limbs))
00799 {
00800 Negate(limbs, result, v1);
00801 return Equal32(v3, limbs, 1);
00802 }
00803
00804 Divide(v3, limbs, t3, limbs, q, u3);
00805 SimpleMultiplyLowHalf(limbs, u1, q, t1);
00806 Add(u1, limbs, v1, limbs);
00807
00808 if (!LimbDegree(u3, limbs))
00809 {
00810 Set(result, limbs, t1);
00811 return Equal32(t3, limbs, 1);
00812 }
00813
00814 Divide(t3, limbs, u3, limbs, q, v3);
00815 SimpleMultiplyLowHalf(limbs, v1, q, u1);
00816 Add(v1, limbs, t1, limbs);
00817
00818 if (!LimbDegree(v3, limbs))
00819 {
00820 Negate(limbs, result, u1);
00821 return Equal32(u3, limbs, 1);
00822 }
00823
00824 Divide(u3, limbs, v3, limbs, q, t3);
00825 SimpleMultiplyLowHalf(limbs, t1, q, v1);
00826 Add(t1, limbs, u1, limbs);
00827 }
00828 }
00829
00830
00831
00832
00833 bool Divide(
00834 const uint32_t *u,
00835 int u_limbs,
00836 const uint32_t *v,
00837 int v_limbs,
00838 uint32_t *q,
00839 uint32_t *r)
00840 {
00841
00842 int v_used = LimbDegree(v, v_limbs);
00843 if (!v_used) return false;
00844
00845 int u_used = LimbDegree(u, u_limbs);
00846
00847
00848 if (u_used <= v_used && Less(u, u_used, v, v_used))
00849 {
00850
00851 Set(r, v_limbs, u, u_used);
00852 Set32(q, u_limbs, 0);
00853 return true;
00854 }
00855
00856
00857 if (v_used == 1)
00858 {
00859
00860 Set(q, u_limbs, u);
00861 Set32(r, v_limbs, Divide32(u_limbs, q, v[0]));
00862 return true;
00863 }
00864
00865
00866 int shift = 32 - Degree(v[v_used - 1]);
00867 int uu_used = u_used;
00868 if (shift > 0) uu_used++;
00869
00870 uint32_t *uu = (uint32_t*)alloca(uu_used*4);
00871 uint32_t *vv = (uint32_t*)alloca(v_used*4);
00872
00873
00874 if (shift > 0)
00875 {
00876 ShiftLeft(v_used, vv, v, shift);
00877 uu[u_used] = ShiftLeft(u_used, uu, u, shift);
00878 }
00879 else
00880 {
00881 Set(uu, u_used, u);
00882 Set(vv, v_used, v);
00883 }
00884
00885 int q_high_index = uu_used - v_used;
00886
00887 if (GreaterOrEqual(uu + q_high_index, v_used, vv, v_used))
00888 {
00889 Subtract(uu + q_high_index, v_used, vv, v_used);
00890 Set32(q + q_high_index, u_used - q_high_index, 1);
00891 }
00892 else
00893 {
00894 Set32(q + q_high_index, u_used - q_high_index, 0);
00895 }
00896
00897 uint32_t *vq_product = (uint32_t*)alloca((v_used+1)*4);
00898
00899
00900 for (int ii = q_high_index - 1; ii >= 0; --ii)
00901 {
00902 uint64_t q_full = *(uint64_t*)(uu + ii + v_used - 1) / vv[v_used - 1];
00903 uint32_t q_low = (uint32_t)q_full;
00904 uint32_t q_high = (uint32_t)(q_full >> 32);
00905
00906 vq_product[v_used] = Multiply32(v_used, vq_product, vv, q_low);
00907
00908 if (q_high)
00909 Add(vq_product + 1, v_used, vv, v_used);
00910
00911 if (Subtract(uu + ii, v_used + 1, vq_product, v_used + 1))
00912 {
00913 --q_low;
00914 if (Add(uu + ii, v_used + 1, vv, v_used) == 0)
00915 {
00916 --q_low;
00917 Add(uu + ii, v_used + 1, vv, v_used);
00918 }
00919 }
00920
00921 q[ii] = q_low;
00922 }
00923
00924 memset(r + v_used, 0, (v_limbs - v_used)*4);
00925 ShiftRight(v_used, r, uu, shift);
00926
00927 return true;
00928 }
00929
00930
00931
00932 bool Modulus(
00933 const uint32_t *u,
00934 int u_limbs,
00935 const uint32_t *v,
00936 int v_limbs,
00937 uint32_t *r)
00938 {
00939
00940 int v_used = LimbDegree(v, v_limbs);
00941 if (!v_used) return false;
00942
00943 int u_used = LimbDegree(u, u_limbs);
00944
00945
00946 if (u_used <= v_used && Less(u, u_used, v, v_used))
00947 {
00948
00949 Set(r, v_limbs, u, u_used);
00950
00951 return true;
00952 }
00953
00954
00955 if (v_used == 1)
00956 {
00957
00958
00959 Set32(r, v_limbs, Modulus32(u_limbs, u, v[0]));
00960 return true;
00961 }
00962
00963
00964 int shift = 32 - Degree(v[v_used - 1]);
00965 int uu_used = u_used;
00966 if (shift > 0) uu_used++;
00967
00968 uint32_t *uu = (uint32_t*)alloca(uu_used*4);
00969 uint32_t *vv = (uint32_t*)alloca(v_used*4);
00970
00971
00972 if (shift > 0)
00973 {
00974 ShiftLeft(v_used, vv, v, shift);
00975 uu[u_used] = ShiftLeft(u_used, uu, u, shift);
00976 }
00977 else
00978 {
00979 Set(uu, u_used, u);
00980 Set(vv, v_used, v);
00981 }
00982
00983 int q_high_index = uu_used - v_used;
00984
00985 if (GreaterOrEqual(uu + q_high_index, v_used, vv, v_used))
00986 {
00987 Subtract(uu + q_high_index, v_used, vv, v_used);
00988
00989 }
00990 else
00991 {
00992
00993 }
00994
00995 uint32_t *vq_product = (uint32_t*)alloca((v_used+1)*4);
00996
00997
00998 for (int ii = q_high_index - 1; ii >= 0; --ii)
00999 {
01000 uint64_t q_full = *(uint64_t*)(uu + ii + v_used - 1) / vv[v_used - 1];
01001 uint32_t q_low = (uint32_t)q_full;
01002 uint32_t q_high = (uint32_t)(q_full >> 32);
01003
01004 vq_product[v_used] = Multiply32(v_used, vq_product, vv, q_low);
01005
01006 if (q_high)
01007 Add(vq_product + 1, v_used, vv, v_used);
01008
01009 if (Subtract(uu + ii, v_used + 1, vq_product, v_used + 1))
01010 {
01011
01012 if (Add(uu + ii, v_used + 1, vv, v_used) == 0)
01013 {
01014
01015 Add(uu + ii, v_used + 1, vv, v_used);
01016 }
01017 }
01018
01019
01020 }
01021
01022 memset(r + v_used, 0, (v_limbs - v_used)*4);
01023 ShiftRight(v_used, r, uu, shift);
01024
01025 return true;
01026 }
01027
01028
01029
01030
01031
01032 void BarrettModulusPrecomp(
01033 int limbs,
01034 const uint32_t *m,
01035 uint32_t *m_inv)
01036 {
01037 uint32_t *q = (uint32_t*)alloca((limbs*2+1)*4);
01038
01039
01040 big::Set32(q, limbs*2, 0);
01041 q[limbs*2] = 1;
01042
01043
01044 big::Divide(q, limbs*2+1, m, limbs, q, m_inv);
01045
01046
01047 Set(m_inv, limbs, q);
01048 }
01049
01050
01051
01052 void BarrettModulus(
01053 int limbs,
01054 const uint32_t *x,
01055 const uint32_t *m,
01056 const uint32_t *m_inv,
01057 uint32_t *result)
01058 {
01059
01060
01061
01062 uint32_t *q2 = (uint32_t*)alloca((limbs+3)*4);
01063 int ii, jj = limbs - 1;
01064
01065
01066 *(uint64_t*)q2 = (uint64_t)m_inv[jj] * x[jj];
01067 *(uint64_t*)(q2 + 1) = (uint64_t)q2[1] + x[jj];
01068
01069 for (ii = 1; ii < limbs; ++ii)
01070 *(uint64_t*)(q2 + ii + 1) = ((uint64_t)q2[ii + 1] + x[jj + ii]) + AddMultiply32(ii + 1, q2, m_inv + jj - ii, x[jj + ii]);
01071
01072 *(uint64_t*)(q2 + ii + 1) = ((uint64_t)q2[ii + 1] + x[jj + ii]) + AddMultiply32(ii, q2 + 1, m_inv, x[jj + ii]);
01073
01074 q2 += 2;
01075
01076
01077 uint32_t *r2 = (uint32_t*)alloca((limbs+1)*4);
01078
01079
01080 Multiply32(limbs + 1, r2, q2, m[0]);
01081 for (int ii = 1; ii < limbs; ++ii)
01082 AddMultiply32(limbs + 1 - ii, r2 + ii, q2, m[ii]);
01083
01084
01085 uint32_t *r = (uint32_t*)alloca((limbs+1)*4);
01086 if (Subtract(r, x, limbs+1, r2, limbs+1))
01087 {
01088 while (!Subtract(r, limbs+1, m, limbs));
01089 }
01090 else
01091 {
01092 while (GreaterOrEqual(r, limbs+1, m, limbs))
01093 Subtract(r, limbs+1, m, limbs);
01094 }
01095
01096 Set(result, limbs, r);
01097 }
01098
01099
01100 bool MulMod(
01101 int limbs,
01102 const uint32_t *x,
01103 const uint32_t *y,
01104 const uint32_t *modulus,
01105 uint32_t *result)
01106 {
01107 uint32_t *product = (uint32_t*)alloca(limbs*2*4);
01108
01109 Multiply(limbs, product, x, y);
01110
01111 return Modulus(product, limbs * 2, modulus, limbs, result);
01112 }
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142 int ToInt(uint32_t *lhs, int max_limbs, const char *rhs, uint32_t base)
01143 {
01144 if (max_limbs < 2) return 0;
01145
01146 lhs[0] = 0;
01147 int used = 1;
01148
01149 char ch;
01150 while ((ch = *rhs++))
01151 {
01152 uint32_t mod;
01153 if (ch >= '0' && ch <= '9') mod = ch - '0';
01154 else mod = toupper(ch) - 'A' + 10;
01155 if (mod >= base) return 0;
01156
01157
01158 uint32_t carry = MultiplyAdd32(used, lhs, base, mod);
01159
01160
01161 if (carry)
01162 {
01163 if (used >= max_limbs)
01164 return 0;
01165
01166 lhs[used++] = carry;
01167 }
01168 }
01169
01170 if (used < max_limbs)
01171 Set32(lhs+used, max_limbs-used, 0);
01172
01173 return used;
01174 }
01175
01176
01177
01178
01179
01180
01181 void GCD(
01182 const uint32_t *a,
01183 int a_limbs,
01184 const uint32_t *b,
01185 int b_limbs,
01186 uint32_t *result)
01187 {
01188 int limbs = (a_limbs <= b_limbs) ? a_limbs : b_limbs;
01189
01190 uint32_t *g = (uint32_t*)alloca(limbs*4);
01191 uint32_t *g1 = (uint32_t*)alloca(limbs*4);
01192
01193 if (a_limbs <= b_limbs)
01194 {
01195
01196 Set(g, limbs, a, a_limbs);
01197 Modulus(b, b_limbs, a, a_limbs, g1);
01198 }
01199 else
01200 {
01201
01202 Set(g, limbs, b, b_limbs);
01203 Modulus(a, a_limbs, b, b_limbs, g1);
01204 }
01205
01206 for (;;) {
01207
01208 Modulus(g, limbs, g1, limbs, g);
01209
01210 if (!LimbDegree(g, limbs)) {
01211 Set(result, limbs, g1, limbs);
01212 return;
01213 }
01214
01215
01216 Modulus(g1, limbs, g, limbs, g1);
01217
01218 if (!LimbDegree(g1, limbs)) {
01219 Set(result, limbs, g, limbs);
01220 return;
01221 }
01222 }
01223 }
01224
01225
01226
01227
01228
01229
01230
01231
01232 bool InvMod(
01233 const uint32_t *u,
01234 int u_limbs,
01235 const uint32_t *v,
01236 int limbs,
01237 uint32_t *result)
01238 {
01239 uint32_t *u1 = (uint32_t*)alloca(limbs*4);
01240 uint32_t *u3 = (uint32_t*)alloca(limbs*4);
01241 uint32_t *v1 = (uint32_t*)alloca(limbs*4);
01242 uint32_t *v3 = (uint32_t*)alloca(limbs*4);
01243 uint32_t *t1 = (uint32_t*)alloca(limbs*4);
01244 uint32_t *t3 = (uint32_t*)alloca(limbs*4);
01245 uint32_t *q = (uint32_t*)alloca((limbs + u_limbs)*4);
01246
01247
01248 {
01249 Set32(u1, limbs, 0);
01250 Set32(v1, limbs, 1);
01251 Set(u3, limbs, v);
01252
01253
01254 Modulus(u, u_limbs, v, limbs, v3);
01255 }
01256
01257 for (;;)
01258 {
01259 if (!LimbDegree(v3, limbs))
01260 {
01261 Subtract(result, v, limbs, u1, limbs);
01262 return Equal32(u3, limbs, 1);
01263 }
01264
01265 Divide(u3, limbs, v3, limbs, q, t3);
01266 SimpleMultiplyLowHalf(limbs, t1, q, v1);
01267 Add(t1, limbs, u1, limbs);
01268
01269 if (!LimbDegree(t3, limbs))
01270 {
01271 Set(result, limbs, v1);
01272 return Equal32(v3, limbs, 1);
01273 }
01274
01275 Divide(v3, limbs, t3, limbs, q, u3);
01276 SimpleMultiplyLowHalf(limbs, u1, q, t1);
01277 Add(u1, limbs, v1, limbs);
01278
01279 if (!LimbDegree(u3, limbs))
01280 {
01281 Subtract(result, v, limbs, t1, limbs);
01282 return Equal32(t3, limbs, 1);
01283 }
01284
01285 Divide(t3, limbs, u3, limbs, q, v3);
01286 SimpleMultiplyLowHalf(limbs, v1, q, u1);
01287 Add(v1, limbs, t1, limbs);
01288
01289 if (!LimbDegree(v3, limbs))
01290 {
01291 Set(result, limbs, u1);
01292 return Equal32(u3, limbs, 1);
01293 }
01294
01295 Divide(u3, limbs, v3, limbs, q, t3);
01296 SimpleMultiplyLowHalf(limbs, t1, q, v1);
01297 Add(t1, limbs, u1, limbs);
01298
01299 if (!LimbDegree(t3, limbs))
01300 {
01301 Subtract(result, v, limbs, v1, limbs);
01302 return Equal32(v3, limbs, 1);
01303 }
01304
01305 Divide(v3, limbs, t3, limbs, q, u3);
01306 SimpleMultiplyLowHalf(limbs, u1, q, t1);
01307 Add(u1, limbs, v1, limbs);
01308
01309 if (!LimbDegree(u3, limbs))
01310 {
01311 Set(result, limbs, t1);
01312 return Equal32(t3, limbs, 1);
01313 }
01314
01315 Divide(t3, limbs, u3, limbs, q, v3);
01316 SimpleMultiplyLowHalf(limbs, v1, q, u1);
01317 Add(v1, limbs, t1, limbs);
01318 }
01319 }
01320
01321
01322
01323
01324
01325
01326 bool SquareRoot(
01327 int limbs,
01328 const uint32_t *square,
01329 uint32_t *root)
01330 {
01331 uint32_t *q = (uint32_t*)alloca(limbs*2*4);
01332 uint32_t *r = (uint32_t*)alloca((limbs+1)*4);
01333
01334
01335 Set(root, limbs, square + limbs);
01336
01337 int ctr = 64;
01338 while (ctr--)
01339 {
01340
01341 Divide(square, limbs*2, root, limbs, q, r);
01342
01343
01344 Add(q, limbs+1, root, limbs);
01345
01346
01347
01348 if (q[0] & 1) Add32(q, limbs+1, 2);
01349
01350 ShiftRight(limbs+1, q, q, 1);
01351
01352
01353 if (Equal(limbs, q, root))
01354 return true;
01355
01356
01357 Set(root, limbs, q);
01358 }
01359
01360
01361
01362 return false;
01363 }
01364
01365
01366 uint32_t MonReducePrecomp(uint32_t modulus0)
01367 {
01368
01369 return MulInverse32(-(int32_t)modulus0);
01370 }
01371
01372
01373 void MonInputResidue(
01374 const uint32_t *n,
01375 int n_limbs,
01376 const uint32_t *modulus,
01377 int m_limbs,
01378 uint32_t *n_residue)
01379 {
01380
01381 uint32_t *p = (uint32_t*)alloca((n_limbs+m_limbs)*4);
01382 Set(p+m_limbs, n_limbs, n, n_limbs);
01383 Set32(p, m_limbs, 0);
01384
01385
01386 Modulus(p, n_limbs+m_limbs, modulus, m_limbs, n_residue);
01387 }
01388
01389
01390 void MonPro(
01391 int limbs,
01392 const uint32_t *a_residue,
01393 const uint32_t *b_residue,
01394 const uint32_t *modulus,
01395 uint32_t mod_inv,
01396 uint32_t *result)
01397 {
01398 uint32_t *t = (uint32_t*)alloca(limbs*2*4);
01399
01400 Multiply(limbs, t, a_residue, b_residue);
01401 MonReduce(limbs, t, modulus, mod_inv, result);
01402 }
01403
01404
01405 void MonInverse(
01406 int limbs,
01407 const uint32_t *a_residue,
01408 const uint32_t *modulus,
01409 uint32_t mod_inv,
01410 uint32_t *result)
01411 {
01412 Set(result, limbs, a_residue);
01413 MonFinish(limbs, result, modulus, mod_inv);
01414 InvMod(result, limbs, modulus, limbs, result);
01415 MonInputResidue(result, limbs, modulus, limbs, result);
01416 }
01417
01418
01419
01420
01421 void MonReduce(
01422 int limbs,
01423 uint32_t *s,
01424 const uint32_t *modulus,
01425 uint32_t mod_inv,
01426 uint32_t *result)
01427 {
01428
01429 for (int ii = 0; ii < limbs; ++ii)
01430 {
01431 uint32_t q = s[0] * mod_inv;
01432 s[0] = AddMultiply32(limbs, s, modulus, q);
01433 ++s;
01434 }
01435
01436
01437 if (Add(result, s, limbs, s - limbs, limbs))
01438 {
01439
01440 Subtract(result, limbs, modulus, limbs);
01441 }
01442 }
01443
01444
01445 void MonFinish(
01446 int limbs,
01447 uint32_t *n,
01448 const uint32_t *modulus,
01449 uint32_t mod_inv)
01450 {
01451 uint32_t *t = (uint32_t*)alloca(limbs*2*4);
01452 memcpy(t, n, limbs*4);
01453 memset(t + limbs, 0, limbs*4);
01454
01455
01456 MonReduce(limbs, t, modulus, mod_inv, n);
01457
01458
01459 if (!Less(limbs, n, modulus))
01460 Subtract(n, limbs, modulus, limbs);
01461 }
01462
01463
01464 static void SimpleMonExpMod(
01465 const uint32_t *base,
01466 const uint32_t *exponent,
01467 int exponent_limbs,
01468 const uint32_t *modulus,
01469 int mod_limbs,
01470 uint32_t mod_inv,
01471 uint32_t *result)
01472 {
01473 bool set = false;
01474
01475 uint32_t *temp = (uint32_t*)alloca((mod_limbs*2)*4);
01476
01477
01478 for (int ii = exponent_limbs - 1; ii >= 0; --ii)
01479 {
01480 uint32_t e_i = exponent[ii];
01481
01482 for (uint32_t mask = 0x80000000; mask; mask >>= 1)
01483 {
01484 if (set)
01485 {
01486
01487 Square(mod_limbs, temp, result);
01488 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01489
01490 if (e_i & mask)
01491 {
01492
01493 Multiply(mod_limbs, temp, result, base);
01494 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01495 }
01496 }
01497 else
01498 {
01499 if (e_i & mask)
01500 {
01501
01502 Set(result, mod_limbs, base, mod_limbs);
01503 set = true;
01504 }
01505 }
01506 }
01507 }
01508 }
01509
01510
01511
01512 uint32_t *PrecomputeWindow(const uint32_t *base, const uint32_t *modulus, int limbs, uint32_t mod_inv, int window_bits)
01513 {
01514 uint32_t *temp = (uint32_t*)alloca(limbs*2*4);
01515
01516 uint32_t *base_squared = (uint32_t*)alloca(limbs*4);
01517 Square(limbs, temp, base);
01518 MonReduce(limbs, temp, modulus, mod_inv, base_squared);
01519
01520
01521 uint32_t k = (1 << (window_bits - 1));
01522
01523 uint32_t *window = RakNet::OP_NEW_ARRAY<uint32_t>(limbs * k, __FILE__, __LINE__ );
01524
01525 uint32_t *cw = window;
01526 Set(window, limbs, base);
01527
01528 while (--k)
01529 {
01530
01531 Multiply(limbs, temp, cw, base_squared);
01532 MonReduce(limbs, temp, modulus, mod_inv, cw + limbs);
01533 cw += limbs;
01534 }
01535
01536 return window;
01537 };
01538
01539
01540
01541
01542 void MonExpMod(
01543 const uint32_t *base,
01544 const uint32_t *exponent,
01545 int exponent_limbs,
01546 const uint32_t *modulus,
01547 int mod_limbs,
01548 uint32_t mod_inv,
01549 uint32_t *result)
01550 {
01551
01552 int window_bits = Degree(exponent_limbs);
01553
01554
01555 if (window_bits < 4)
01556 {
01557 SimpleMonExpMod(base, exponent, exponent_limbs, modulus, mod_limbs, mod_inv, result);
01558 return;
01559 }
01560
01561
01562 uint32_t *window = PrecomputeWindow(base, modulus, mod_limbs, mod_inv, window_bits);
01563
01564 bool seen_bits = false;
01565 uint32_t e_bits=0, trailing_zeroes=0, used_bits = 0;
01566
01567 uint32_t *temp = (uint32_t*)alloca((mod_limbs*2)*4);
01568
01569 for (int ii = exponent_limbs - 1; ii >= 0; --ii)
01570 {
01571 uint32_t e_i = exponent[ii];
01572
01573 int wordbits = 32;
01574 while (wordbits--)
01575 {
01576
01577 if (used_bits)
01578 {
01579
01580 if (e_i >> 31)
01581 {
01582 e_bits <<= 1;
01583 e_bits |= 1;
01584
01585 trailing_zeroes = 0;
01586 }
01587 else
01588 {
01589 e_bits <<= 1;
01590
01591 ++trailing_zeroes;
01592 }
01593
01594 ++used_bits;
01595
01596
01597 if (used_bits == (uint32_t) window_bits)
01598 {
01599
01600 uint32_t window_index = e_bits >> (trailing_zeroes + 1);
01601
01602 if (seen_bits)
01603 {
01604 uint32_t ctr = used_bits - trailing_zeroes;
01605 while (ctr--)
01606 {
01607
01608 Square(mod_limbs, temp, result);
01609 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01610 }
01611
01612
01613 Multiply(mod_limbs, temp, result, &window[window_index * mod_limbs]);
01614 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01615 }
01616 else
01617 {
01618
01619 Set(result, mod_limbs, &window[window_index * mod_limbs]);
01620 seen_bits = true;
01621 }
01622
01623 while (trailing_zeroes--)
01624 {
01625
01626 Square(mod_limbs, temp, result);
01627 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01628 }
01629
01630 used_bits = 0;
01631 }
01632 }
01633 else
01634 {
01635
01636 if (e_i >> 31)
01637 {
01638 used_bits = 1;
01639 e_bits = 1;
01640 trailing_zeroes = 0;
01641 }
01642 else
01643 {
01644
01645 if (seen_bits)
01646 {
01647
01648 Square(mod_limbs, temp, result);
01649 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01650 }
01651 }
01652 }
01653
01654 e_i <<= 1;
01655 }
01656 }
01657
01658 if (used_bits)
01659 {
01660
01661 uint32_t window_index = e_bits >> (trailing_zeroes + 1);
01662
01663 if (seen_bits)
01664 {
01665 uint32_t ctr = used_bits - trailing_zeroes;
01666 while (ctr--)
01667 {
01668
01669 Square(mod_limbs, temp, result);
01670 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01671 }
01672
01673
01674 Multiply(mod_limbs, temp, result, &window[window_index * mod_limbs]);
01675 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01676 }
01677 else
01678 {
01679
01680 Set(result, mod_limbs, &window[window_index * mod_limbs]);
01681
01682 }
01683
01684 while (trailing_zeroes--)
01685 {
01686
01687 Square(mod_limbs, temp, result);
01688 MonReduce(mod_limbs, temp, modulus, mod_inv, result);
01689 }
01690
01691
01692 }
01693
01694 RakNet::OP_DELETE_ARRAY(window, __FILE__, __LINE__);
01695 }
01696
01697
01698
01699 void ExpMod(
01700 const uint32_t *base,
01701 int base_limbs,
01702 const uint32_t *exponent,
01703 int exponent_limbs,
01704 const uint32_t *modulus,
01705 int mod_limbs,
01706 uint32_t mod_inv,
01707 uint32_t *result)
01708 {
01709 uint32_t *mon_base = (uint32_t*)alloca(mod_limbs*4);
01710 MonInputResidue(base, base_limbs, modulus, mod_limbs, mon_base);
01711
01712 MonExpMod(mon_base, exponent, exponent_limbs, modulus, mod_limbs, mod_inv, result);
01713
01714 MonFinish(mod_limbs, result, modulus, mod_inv);
01715 }
01716
01717
01718 uint32_t ExpMod(uint32_t b, uint32_t e, uint32_t m)
01719 {
01720
01721 if (b == 0 || m <= 1) return 0;
01722 if (e == 0) return 1;
01723
01724
01725 uint32_t mask = 0x80000000;
01726 while ((e & mask) == 0) mask >>= 1;
01727
01728
01729 uint32_t r = b;
01730
01731 while (mask >>= 1)
01732 {
01733
01734
01735
01736
01737 r = (uint32_t)(((uint64_t)r * r) % m);
01738
01739
01740 if (e & mask) r = (uint32_t)(((uint64_t)r * b) % m);
01741 }
01742
01743 return r;
01744 }
01745
01746
01747
01748 bool RabinMillerPrimeTest(
01749 const uint32_t *n,
01750 int limbs,
01751 uint32_t k)
01752 {
01753
01754 uint32_t *n1 = (uint32_t *)alloca(limbs*4);
01755 Set(n1, limbs, n);
01756 Subtract32(n1, limbs, 1);
01757
01758
01759 uint32_t *d = (uint32_t *)alloca(limbs*4);
01760 Set(d, limbs, n1);
01761
01762
01763 while (!(d[0] & 1))
01764 ShiftRight(limbs, d, d, 1);
01765
01766 uint32_t *a = (uint32_t *)alloca(limbs*4);
01767 uint32_t *t = (uint32_t *)alloca(limbs*4);
01768 uint32_t *p = (uint32_t *)alloca((limbs*2)*4);
01769 uint32_t n_inv = MonReducePrecomp(n[0]);
01770
01771
01772 while (k--)
01773 {
01774
01775 do fillBufferMT(a,limbs*4);
01776 while (GreaterOrEqual(a, limbs, n, limbs));
01777
01778
01779 ExpMod(a, limbs, d, limbs, n, limbs, n_inv, a);
01780
01781 Set(t, limbs, d);
01782 while (!Equal(limbs, t, n1) &&
01783 !Equal32(a, limbs, 1) &&
01784 !Equal(limbs, a, n1))
01785 {
01786
01787 Square(limbs, p, a);
01788 Modulus(p, limbs*2, n, limbs, a);
01789
01790
01791 ShiftLeft(limbs, t, t, 1);
01792 }
01793
01794 if (!Equal(limbs, a, n1) && !(t[0] & 1)) return false;
01795 }
01796
01797 return true;
01798 }
01799
01800
01801 void GenerateStrongPseudoPrime(
01802 uint32_t *n,
01803 int limbs)
01804 {
01805 do {
01806 fillBufferMT(n,limbs*4);
01807 n[limbs-1] |= 0x80000000;
01808 n[0] |= 1;
01809 } while (!RabinMillerPrimeTest(n, limbs, 40));
01810 }
01811 }
01812
01813 #endif