00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017
00018 #include <iostream>
00019
00020 #ifdef SSE2_INTRINSICS_AVAILABLE
00021 #ifdef __GNUC__
00022 #include <xmmintrin.h>
00023 #include <signal.h>
00024 #include <setjmp.h>
00025 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00026 #include <malloc.h>
00027 #else
00028 #include <stdlib.h>
00029 #endif
00030 #else
00031 #include <emmintrin.h>
00032 #endif
00033 #elif defined(_MSC_VER) && defined(_M_IX86)
00034 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00035 #elif defined(__GNUC__) && defined(__i386__)
00036 #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00037 #endif
00038
00039 NAMESPACE_BEGIN(CryptoPP)
00040
00041 bool FunctionAssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00042 {
00043 if (valueType != typeid(Integer))
00044 return false;
00045 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00046 return true;
00047 }
00048
00049 static const char s_RunAtStartup = (AssignIntToInteger = FunctionAssignIntToInteger, 0);
00050
00051 #ifdef SSE2_INTRINSICS_AVAILABLE
00052 template <class T>
00053 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00054 {
00055 CheckSize(n);
00056 if (n == 0)
00057 return NULL;
00058 if (n >= 4)
00059 {
00060 void *p;
00061 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00062 while (!(p = _mm_malloc(sizeof(T)*n, 16)))
00063 #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00064 while (!(p = memalign(16, sizeof(T)*n)))
00065 #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00066 while (!(p = malloc(sizeof(T)*n)))
00067 #else
00068 while (!(p = (byte *)malloc(sizeof(T)*n + 8)))
00069 #endif
00070 CallNewHandler();
00071
00072 #ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00073 assert(m_pBlock == NULL);
00074 m_pBlock = p;
00075 if (!IsAlignedOn(p, 16))
00076 {
00077 assert(IsAlignedOn(p, 8));
00078 p = (byte *)p + 8;
00079 }
00080 #endif
00081
00082 assert(IsAlignedOn(p, 16));
00083 return (T*)p;
00084 }
00085 return new T[n];
00086 }
00087
00088 template <class T>
00089 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00090 {
00091 memset(p, 0, n*sizeof(T));
00092 if (n >= 4)
00093 {
00094 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00095 _mm_free(p);
00096 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00097 assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00098 free(m_pBlock);
00099 m_pBlock = NULL;
00100 #else
00101 free(p);
00102 #endif
00103 }
00104 else
00105 delete [] (T *)p;
00106 }
00107 #endif
00108
00109 static int Compare(const word *A, const word *B, unsigned int N)
00110 {
00111 while (N--)
00112 if (A[N] > B[N])
00113 return 1;
00114 else if (A[N] < B[N])
00115 return -1;
00116
00117 return 0;
00118 }
00119
00120 static word Increment(word *A, unsigned int N, word B=1)
00121 {
00122 assert(N);
00123 word t = A[0];
00124 A[0] = t+B;
00125 if (A[0] >= t)
00126 return 0;
00127 for (unsigned i=1; i<N; i++)
00128 if (++A[i])
00129 return 0;
00130 return 1;
00131 }
00132
00133 static word Decrement(word *A, unsigned int N, word B=1)
00134 {
00135 assert(N);
00136 word t = A[0];
00137 A[0] = t-B;
00138 if (A[0] <= t)
00139 return 0;
00140 for (unsigned i=1; i<N; i++)
00141 if (A[i]--)
00142 return 0;
00143 return 1;
00144 }
00145
00146 static void TwosComplement(word *A, unsigned int N)
00147 {
00148 Decrement(A, N);
00149 for (unsigned i=0; i<N; i++)
00150 A[i] = ~A[i];
00151 }
00152
00153 static word AtomicInverseModPower2(word A)
00154 {
00155 assert(A%2==1);
00156
00157 word R=A%8;
00158
00159 for (unsigned i=3; i<WORD_BITS; i*=2)
00160 R = R*(2-R*A);
00161
00162 assert(R*A==1);
00163 return R;
00164 }
00165
00166
00167
00168 class DWord
00169 {
00170 public:
00171 DWord() {}
00172
00173 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00174 explicit DWord(word low)
00175 {
00176 m_whole = low;
00177 }
00178 #else
00179 explicit DWord(word low)
00180 {
00181 m_halfs.low = low;
00182 m_halfs.high = 0;
00183 }
00184 #endif
00185
00186 DWord(word low, word high)
00187 {
00188 m_halfs.low = low;
00189 m_halfs.high = high;
00190 }
00191
00192 static DWord Multiply(word a, word b)
00193 {
00194 DWord r;
00195 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00196 r.m_whole = (dword)a * b;
00197 #elif defined(__alpha__)
00198 r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b));
00199 #elif defined(__ia64__)
00200 r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b));
00201 #elif defined(_ARCH_PPC64)
00202 r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc");
00203 #elif defined(__x86_64__)
00204 __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc");
00205 #elif defined(__mips64)
00206 __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b));
00207 #elif defined(_M_IX86)
00208
00209 word64 t = (word64)a * b;
00210 r.m_halfs.high = ((word32 *)(&t))[1];
00211 r.m_halfs.low = (word32)t;
00212 #else
00213 #error can not implement DWord
00214 #endif
00215 return r;
00216 }
00217
00218 static DWord MultiplyAndAdd(word a, word b, word c)
00219 {
00220 DWord r = Multiply(a, b);
00221 return r += c;
00222 }
00223
00224 DWord & operator+=(word a)
00225 {
00226 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00227 m_whole = m_whole + a;
00228 #else
00229 m_halfs.low += a;
00230 m_halfs.high += (m_halfs.low < a);
00231 #endif
00232 return *this;
00233 }
00234
00235 DWord operator+(word a)
00236 {
00237 DWord r;
00238 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00239 r.m_whole = m_whole + a;
00240 #else
00241 r.m_halfs.low = m_halfs.low + a;
00242 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00243 #endif
00244 return r;
00245 }
00246
00247 DWord operator-(DWord a)
00248 {
00249 DWord r;
00250 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00251 r.m_whole = m_whole - a.m_whole;
00252 #else
00253 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00254 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00255 #endif
00256 return r;
00257 }
00258
00259 DWord operator-(word a)
00260 {
00261 DWord r;
00262 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00263 r.m_whole = m_whole - a;
00264 #else
00265 r.m_halfs.low = m_halfs.low - a;
00266 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00267 #endif
00268 return r;
00269 }
00270
00271
00272 word operator/(word divisor);
00273
00274 word operator%(word a);
00275
00276 bool operator!() const
00277 {
00278 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279 return !m_whole;
00280 #else
00281 return !m_halfs.high && !m_halfs.low;
00282 #endif
00283 }
00284
00285 word GetLowHalf() const {return m_halfs.low;}
00286 word GetHighHalf() const {return m_halfs.high;}
00287 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00288
00289 private:
00290 union
00291 {
00292 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00293 dword m_whole;
00294 #endif
00295 struct
00296 {
00297 #ifdef IS_LITTLE_ENDIAN
00298 word low;
00299 word high;
00300 #else
00301 word high;
00302 word low;
00303 #endif
00304 } m_halfs;
00305 };
00306 };
00307
00308 class Word
00309 {
00310 public:
00311 Word() {}
00312
00313 Word(word value)
00314 {
00315 m_whole = value;
00316 }
00317
00318 Word(hword low, hword high)
00319 {
00320 m_whole = low | (word(high) << (WORD_BITS/2));
00321 }
00322
00323 static Word Multiply(hword a, hword b)
00324 {
00325 Word r;
00326 r.m_whole = (word)a * b;
00327 return r;
00328 }
00329
00330 Word operator-(Word a)
00331 {
00332 Word r;
00333 r.m_whole = m_whole - a.m_whole;
00334 return r;
00335 }
00336
00337 Word operator-(hword a)
00338 {
00339 Word r;
00340 r.m_whole = m_whole - a;
00341 return r;
00342 }
00343
00344
00345 hword operator/(hword divisor)
00346 {
00347 return hword(m_whole / divisor);
00348 }
00349
00350 bool operator!() const
00351 {
00352 return !m_whole;
00353 }
00354
00355 word GetWhole() const {return m_whole;}
00356 hword GetLowHalf() const {return hword(m_whole);}
00357 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00358 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00359
00360 private:
00361 word m_whole;
00362 };
00363
00364
00365 template <class S, class D>
00366 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00367 {
00368
00369 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00370
00371
00372 S Q;
00373 if (S(B1+1) == 0)
00374 Q = A[2];
00375 else
00376 Q = D(A[1], A[2]) / S(B1+1);
00377
00378
00379 D p = D::Multiply(B0, Q);
00380 D u = (D) A[0] - p.GetLowHalf();
00381 A[0] = u.GetLowHalf();
00382 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00383 A[1] = u.GetLowHalf();
00384 A[2] += u.GetHighHalf();
00385
00386
00387 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00388 {
00389 u = (D) A[0] - B0;
00390 A[0] = u.GetLowHalf();
00391 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00392 A[1] = u.GetLowHalf();
00393 A[2] += u.GetHighHalf();
00394 Q++;
00395 assert(Q);
00396 }
00397
00398 return Q;
00399 }
00400
00401
00402 template <class S, class D>
00403 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00404 {
00405 if (!B)
00406 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00407 else
00408 {
00409 S Q[2];
00410 T[0] = Al.GetLowHalf();
00411 T[1] = Al.GetHighHalf();
00412 T[2] = Ah.GetLowHalf();
00413 T[3] = Ah.GetHighHalf();
00414 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00415 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00416 return D(Q[0], Q[1]);
00417 }
00418 }
00419
00420
00421 inline word DWord::operator/(word a)
00422 {
00423 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00424 return word(m_whole / a);
00425 #else
00426 hword r[4];
00427 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00428 #endif
00429 }
00430
00431 inline word DWord::operator%(word a)
00432 {
00433 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00434 return word(m_whole % a);
00435 #else
00436 if (a < (word(1) << (WORD_BITS/2)))
00437 {
00438 hword h = hword(a);
00439 word r = m_halfs.high % h;
00440 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00441 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00442 }
00443 else
00444 {
00445 hword r[4];
00446 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00447 return Word(r[0], r[1]).GetWhole();
00448 }
00449 #endif
00450 }
00451
00452
00453
00454 class Portable
00455 {
00456 public:
00457 static word Add(word *C, const word *A, const word *B, unsigned int N);
00458 static word Subtract(word *C, const word *A, const word *B, unsigned int N);
00459
00460 static inline void Multiply2(word *C, const word *A, const word *B);
00461 static inline word Multiply2Add(word *C, const word *A, const word *B);
00462 static void Multiply4(word *C, const word *A, const word *B);
00463 static void Multiply8(word *C, const word *A, const word *B);
00464 static inline unsigned int MultiplyRecursionLimit() {return 8;}
00465
00466 static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00467 static void Multiply4Bottom(word *C, const word *A, const word *B);
00468 static void Multiply8Bottom(word *C, const word *A, const word *B);
00469 static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00470
00471 static void Square2(word *R, const word *A);
00472 static void Square4(word *R, const word *A);
00473 static void Square8(word *R, const word *A) {assert(false);}
00474 static inline unsigned int SquareRecursionLimit() {return 4;}
00475 };
00476
00477 word Portable::Add(word *C, const word *A, const word *B, unsigned int N)
00478 {
00479 assert (N%2 == 0);
00480
00481 DWord u(0, 0);
00482 for (unsigned int i = 0; i < N; i+=2)
00483 {
00484 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00485 C[i] = u.GetLowHalf();
00486 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00487 C[i+1] = u.GetLowHalf();
00488 }
00489 return u.GetHighHalf();
00490 }
00491
00492 word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N)
00493 {
00494 assert (N%2 == 0);
00495
00496 DWord u(0, 0);
00497 for (unsigned int i = 0; i < N; i+=2)
00498 {
00499 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00500 C[i] = u.GetLowHalf();
00501 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00502 C[i+1] = u.GetLowHalf();
00503 }
00504 return 0-u.GetHighHalf();
00505 }
00506
00507 void Portable::Multiply2(word *C, const word *A, const word *B)
00508 {
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00538 unsigned int ai = A[1] < A[0];
00539 unsigned int bi = B[0] < B[1];
00540 unsigned int di = ai & bi;
00541 DWord d = DWord::Multiply(D[di], D[di+2]);
00542 D[1] = D[3] = 0;
00543 unsigned int si = ai + !bi;
00544 word s = D[si];
00545
00546 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00547 C[0] = A0B0.GetLowHalf();
00548
00549 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00550 DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00551 C[1] = t.GetLowHalf();
00552
00553 t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00554 C[2] = t.GetLowHalf();
00555 C[3] = t.GetHighHalf();
00556 }
00557
00558 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00559 {
00560 DWord t = DWord::Multiply(A[0], B[0]);
00561 C[0] = t.GetLowHalf();
00562 C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00563 }
00564
00565 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00566 {
00567 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00568 unsigned int ai = A[1] < A[0];
00569 unsigned int bi = B[0] < B[1];
00570 unsigned int di = ai & bi;
00571 DWord d = DWord::Multiply(D[di], D[di+2]);
00572 D[1] = D[3] = 0;
00573 unsigned int si = ai + !bi;
00574 word s = D[si];
00575
00576 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00577 DWord t = A0B0 + C[0];
00578 C[0] = t.GetLowHalf();
00579
00580 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00581 t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00582 C[1] = t.GetLowHalf();
00583
00584 t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00585 C[2] = t.GetLowHalf();
00586
00587 t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00588 C[3] = t.GetLowHalf();
00589 return t.GetHighHalf();
00590 }
00591
00592 #define MulAcc(x, y) \
00593 p = DWord::MultiplyAndAdd(A[x], B[y], c); \
00594 c = p.GetLowHalf(); \
00595 p = (DWord) d + p.GetHighHalf(); \
00596 d = p.GetLowHalf(); \
00597 e += p.GetHighHalf();
00598
00599 #define SaveMulAcc(s, x, y) \
00600 R[s] = c; \
00601 p = DWord::MultiplyAndAdd(A[x], B[y], d); \
00602 c = p.GetLowHalf(); \
00603 p = (DWord) e + p.GetHighHalf(); \
00604 d = p.GetLowHalf(); \
00605 e = p.GetHighHalf();
00606
00607 #define SquAcc(x, y) \
00608 q = DWord::Multiply(A[x], A[y]); \
00609 p = q + c; \
00610 c = p.GetLowHalf(); \
00611 p = (DWord) d + p.GetHighHalf(); \
00612 d = p.GetLowHalf(); \
00613 e += p.GetHighHalf(); \
00614 p = q + c; \
00615 c = p.GetLowHalf(); \
00616 p = (DWord) d + p.GetHighHalf(); \
00617 d = p.GetLowHalf(); \
00618 e += p.GetHighHalf();
00619
00620 #define SaveSquAcc(s, x, y) \
00621 R[s] = c; \
00622 q = DWord::Multiply(A[x], A[y]); \
00623 p = q + d; \
00624 c = p.GetLowHalf(); \
00625 p = (DWord) e + p.GetHighHalf(); \
00626 d = p.GetLowHalf(); \
00627 e = p.GetHighHalf(); \
00628 p = q + c; \
00629 c = p.GetLowHalf(); \
00630 p = (DWord) d + p.GetHighHalf(); \
00631 d = p.GetLowHalf(); \
00632 e += p.GetHighHalf();
00633
00634 void Portable::Multiply4(word *R, const word *A, const word *B)
00635 {
00636 DWord p;
00637 word c, d, e;
00638
00639 p = DWord::Multiply(A[0], B[0]);
00640 R[0] = p.GetLowHalf();
00641 c = p.GetHighHalf();
00642 d = e = 0;
00643
00644 MulAcc(0, 1);
00645 MulAcc(1, 0);
00646
00647 SaveMulAcc(1, 2, 0);
00648 MulAcc(1, 1);
00649 MulAcc(0, 2);
00650
00651 SaveMulAcc(2, 0, 3);
00652 MulAcc(1, 2);
00653 MulAcc(2, 1);
00654 MulAcc(3, 0);
00655
00656 SaveMulAcc(3, 3, 1);
00657 MulAcc(2, 2);
00658 MulAcc(1, 3);
00659
00660 SaveMulAcc(4, 2, 3);
00661 MulAcc(3, 2);
00662
00663 R[5] = c;
00664 p = DWord::MultiplyAndAdd(A[3], B[3], d);
00665 R[6] = p.GetLowHalf();
00666 R[7] = e + p.GetHighHalf();
00667 }
00668
00669 void Portable::Square2(word *R, const word *A)
00670 {
00671 DWord p, q;
00672 word c, d, e;
00673
00674 p = DWord::Multiply(A[0], A[0]);
00675 R[0] = p.GetLowHalf();
00676 c = p.GetHighHalf();
00677 d = e = 0;
00678
00679 SquAcc(0, 1);
00680
00681 R[1] = c;
00682 p = DWord::MultiplyAndAdd(A[1], A[1], d);
00683 R[2] = p.GetLowHalf();
00684 R[3] = e + p.GetHighHalf();
00685 }
00686
00687 void Portable::Square4(word *R, const word *A)
00688 {
00689 #ifdef _MSC_VER
00690
00691
00692
00693
00694 Multiply4(R, A, A);
00695 #else
00696 const word *B = A;
00697 DWord p, q;
00698 word c, d, e;
00699
00700 p = DWord::Multiply(A[0], A[0]);
00701 R[0] = p.GetLowHalf();
00702 c = p.GetHighHalf();
00703 d = e = 0;
00704
00705 SquAcc(0, 1);
00706
00707 SaveSquAcc(1, 2, 0);
00708 MulAcc(1, 1);
00709
00710 SaveSquAcc(2, 0, 3);
00711 SquAcc(1, 2);
00712
00713 SaveSquAcc(3, 3, 1);
00714 MulAcc(2, 2);
00715
00716 SaveSquAcc(4, 2, 3);
00717
00718 R[5] = c;
00719 p = DWord::MultiplyAndAdd(A[3], A[3], d);
00720 R[6] = p.GetLowHalf();
00721 R[7] = e + p.GetHighHalf();
00722 #endif
00723 }
00724
00725 void Portable::Multiply8(word *R, const word *A, const word *B)
00726 {
00727 DWord p;
00728 word c, d, e;
00729
00730 p = DWord::Multiply(A[0], B[0]);
00731 R[0] = p.GetLowHalf();
00732 c = p.GetHighHalf();
00733 d = e = 0;
00734
00735 MulAcc(0, 1);
00736 MulAcc(1, 0);
00737
00738 SaveMulAcc(1, 2, 0);
00739 MulAcc(1, 1);
00740 MulAcc(0, 2);
00741
00742 SaveMulAcc(2, 0, 3);
00743 MulAcc(1, 2);
00744 MulAcc(2, 1);
00745 MulAcc(3, 0);
00746
00747 SaveMulAcc(3, 0, 4);
00748 MulAcc(1, 3);
00749 MulAcc(2, 2);
00750 MulAcc(3, 1);
00751 MulAcc(4, 0);
00752
00753 SaveMulAcc(4, 0, 5);
00754 MulAcc(1, 4);
00755 MulAcc(2, 3);
00756 MulAcc(3, 2);
00757 MulAcc(4, 1);
00758 MulAcc(5, 0);
00759
00760 SaveMulAcc(5, 0, 6);
00761 MulAcc(1, 5);
00762 MulAcc(2, 4);
00763 MulAcc(3, 3);
00764 MulAcc(4, 2);
00765 MulAcc(5, 1);
00766 MulAcc(6, 0);
00767
00768 SaveMulAcc(6, 0, 7);
00769 MulAcc(1, 6);
00770 MulAcc(2, 5);
00771 MulAcc(3, 4);
00772 MulAcc(4, 3);
00773 MulAcc(5, 2);
00774 MulAcc(6, 1);
00775 MulAcc(7, 0);
00776
00777 SaveMulAcc(7, 1, 7);
00778 MulAcc(2, 6);
00779 MulAcc(3, 5);
00780 MulAcc(4, 4);
00781 MulAcc(5, 3);
00782 MulAcc(6, 2);
00783 MulAcc(7, 1);
00784
00785 SaveMulAcc(8, 2, 7);
00786 MulAcc(3, 6);
00787 MulAcc(4, 5);
00788 MulAcc(5, 4);
00789 MulAcc(6, 3);
00790 MulAcc(7, 2);
00791
00792 SaveMulAcc(9, 3, 7);
00793 MulAcc(4, 6);
00794 MulAcc(5, 5);
00795 MulAcc(6, 4);
00796 MulAcc(7, 3);
00797
00798 SaveMulAcc(10, 4, 7);
00799 MulAcc(5, 6);
00800 MulAcc(6, 5);
00801 MulAcc(7, 4);
00802
00803 SaveMulAcc(11, 5, 7);
00804 MulAcc(6, 6);
00805 MulAcc(7, 5);
00806
00807 SaveMulAcc(12, 6, 7);
00808 MulAcc(7, 6);
00809
00810 R[13] = c;
00811 p = DWord::MultiplyAndAdd(A[7], B[7], d);
00812 R[14] = p.GetLowHalf();
00813 R[15] = e + p.GetHighHalf();
00814 }
00815
00816 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00817 {
00818 DWord p;
00819 word c, d, e;
00820
00821 p = DWord::Multiply(A[0], B[0]);
00822 R[0] = p.GetLowHalf();
00823 c = p.GetHighHalf();
00824 d = e = 0;
00825
00826 MulAcc(0, 1);
00827 MulAcc(1, 0);
00828
00829 SaveMulAcc(1, 2, 0);
00830 MulAcc(1, 1);
00831 MulAcc(0, 2);
00832
00833 R[2] = c;
00834 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00835 }
00836
00837 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00838 {
00839 DWord p;
00840 word c, d, e;
00841
00842 p = DWord::Multiply(A[0], B[0]);
00843 R[0] = p.GetLowHalf();
00844 c = p.GetHighHalf();
00845 d = e = 0;
00846
00847 MulAcc(0, 1);
00848 MulAcc(1, 0);
00849
00850 SaveMulAcc(1, 2, 0);
00851 MulAcc(1, 1);
00852 MulAcc(0, 2);
00853
00854 SaveMulAcc(2, 0, 3);
00855 MulAcc(1, 2);
00856 MulAcc(2, 1);
00857 MulAcc(3, 0);
00858
00859 SaveMulAcc(3, 0, 4);
00860 MulAcc(1, 3);
00861 MulAcc(2, 2);
00862 MulAcc(3, 1);
00863 MulAcc(4, 0);
00864
00865 SaveMulAcc(4, 0, 5);
00866 MulAcc(1, 4);
00867 MulAcc(2, 3);
00868 MulAcc(3, 2);
00869 MulAcc(4, 1);
00870 MulAcc(5, 0);
00871
00872 SaveMulAcc(5, 0, 6);
00873 MulAcc(1, 5);
00874 MulAcc(2, 4);
00875 MulAcc(3, 3);
00876 MulAcc(4, 2);
00877 MulAcc(5, 1);
00878 MulAcc(6, 0);
00879
00880 R[6] = c;
00881 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00882 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00883 }
00884
00885 #undef MulAcc
00886 #undef SaveMulAcc
00887 #undef SquAcc
00888 #undef SaveSquAcc
00889
00890 #ifdef CRYPTOPP_X86ASM_AVAILABLE
00891
00892
00893
00894 static bool s_sse2Enabled = true;
00895
00896 static void CpuId(word32 input, word32 *output)
00897 {
00898 #ifdef __GNUC__
00899 __asm__
00900 (
00901
00902 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00903 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
00904 : "a" (input)
00905 );
00906 #else
00907 __asm
00908 {
00909 mov eax, input
00910 cpuid
00911 mov edi, output
00912 mov [edi], eax
00913 mov [edi+4], ebx
00914 mov [edi+8], ecx
00915 mov [edi+12], edx
00916 }
00917 #endif
00918 }
00919
00920 #ifdef SSE2_INTRINSICS_AVAILABLE
00921 #ifndef _MSC_VER
00922 static jmp_buf s_env;
00923 static void SigIllHandler(int)
00924 {
00925 longjmp(s_env, 1);
00926 }
00927 #endif
00928
00929 static bool HasSSE2()
00930 {
00931 if (!s_sse2Enabled)
00932 return false;
00933
00934 word32 cpuid[4];
00935 CpuId(1, cpuid);
00936 if ((cpuid[3] & (1 << 26)) == 0)
00937 return false;
00938
00939 #ifdef _MSC_VER
00940 __try
00941 {
00942 __asm xorpd xmm0, xmm0
00943 }
00944 __except (1)
00945 {
00946 return false;
00947 }
00948 return true;
00949 #else
00950 typedef void (*SigHandler)(int);
00951
00952 SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00953 if (oldHandler == SIG_ERR)
00954 return false;
00955
00956 bool result = true;
00957 if (setjmp(s_env))
00958 result = false;
00959 else
00960 __asm __volatile ("xorps %xmm0, %xmm0");
00961
00962 signal(SIGILL, oldHandler);
00963 return result;
00964 #endif
00965 }
00966 #endif
00967
00968 static bool IsP4()
00969 {
00970 word32 cpuid[4];
00971
00972 CpuId(0, cpuid);
00973 std::swap(cpuid[2], cpuid[3]);
00974 if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
00975 return false;
00976
00977 CpuId(1, cpuid);
00978 return ((cpuid[0] >> 8) & 0xf) == 0xf;
00979 }
00980
00981
00982
00983 class PentiumOptimized : public Portable
00984 {
00985 public:
00986 static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
00987 static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
00988 static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
00989 static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
00990 static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
00991 };
00992
00993 class P4Optimized
00994 {
00995 public:
00996 static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
00997 static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
00998 #ifdef SSE2_INTRINSICS_AVAILABLE
00999 static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
01000 static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
01001 static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
01002 #endif
01003 };
01004
01005 typedef word (CRYPTOPP_CDECL * PAddSub)(word *C, const word *A, const word *B, unsigned int N);
01006 typedef void (CRYPTOPP_CDECL * PMul)(word *C, const word *A, const word *B);
01007
01008 static PAddSub s_pAdd, s_pSub;
01009 #ifdef SSE2_INTRINSICS_AVAILABLE
01010 static PMul s_pMul4, s_pMul8, s_pMul8B;
01011 #endif
01012
01013 static void SetPentiumFunctionPointers()
01014 {
01015 if (IsP4())
01016 {
01017 s_pAdd = &P4Optimized::Add;
01018 s_pSub = &P4Optimized::Subtract;
01019 }
01020 else
01021 {
01022 s_pAdd = &PentiumOptimized::Add;
01023 s_pSub = &PentiumOptimized::Subtract;
01024 }
01025
01026 #ifdef SSE2_INTRINSICS_AVAILABLE
01027 if (HasSSE2())
01028 {
01029 s_pMul4 = &P4Optimized::Multiply4;
01030 s_pMul8 = &P4Optimized::Multiply8;
01031 s_pMul8B = &P4Optimized::Multiply8Bottom;
01032 }
01033 else
01034 {
01035 s_pMul4 = &PentiumOptimized::Multiply4;
01036 s_pMul8 = &PentiumOptimized::Multiply8;
01037 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01038 }
01039 #endif
01040 }
01041
01042 static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);
01043
01044 void DisableSSE2()
01045 {
01046 s_sse2Enabled = false;
01047 SetPentiumFunctionPointers();
01048 }
01049
01050 class LowLevel : public PentiumOptimized
01051 {
01052 public:
01053 inline static word Add(word *C, const word *A, const word *B, unsigned int N)
01054 {return s_pAdd(C, A, B, N);}
01055 inline static word Subtract(word *C, const word *A, const word *B, unsigned int N)
01056 {return s_pSub(C, A, B, N);}
01057 inline static void Square4(word *R, const word *A)
01058 {Multiply4(R, A, A);}
01059 #ifdef SSE2_INTRINSICS_AVAILABLE
01060 inline static void Multiply4(word *C, const word *A, const word *B)
01061 {s_pMul4(C, A, B);}
01062 inline static void Multiply8(word *C, const word *A, const word *B)
01063 {s_pMul8(C, A, B);}
01064 inline static void Multiply8Bottom(word *C, const word *A, const word *B)
01065 {s_pMul8B(C, A, B);}
01066 #endif
01067 };
01068
01069
01070 #ifdef _MSC_VER
01071 #define CRYPTOPP_NAKED __declspec(naked)
01072 #define AS1(x) __asm x
01073 #define AS2(x, y) __asm x, y
01074 #define AddPrologue \
01075 __asm push ebp \
01076 __asm push ebx \
01077 __asm push esi \
01078 __asm push edi \
01079 __asm mov ecx, [esp+20] \
01080 __asm mov edx, [esp+24] \
01081 __asm mov ebx, [esp+28] \
01082 __asm mov esi, [esp+32]
01083 #define AddEpilogue \
01084 __asm pop edi \
01085 __asm pop esi \
01086 __asm pop ebx \
01087 __asm pop ebp \
01088 __asm ret
01089 #define MulPrologue \
01090 __asm push ebp \
01091 __asm push ebx \
01092 __asm push esi \
01093 __asm push edi \
01094 __asm mov ecx, [esp+28] \
01095 __asm mov esi, [esp+24] \
01096 __asm push [esp+20]
01097 #define MulEpilogue \
01098 __asm add esp, 4 \
01099 __asm pop edi \
01100 __asm pop esi \
01101 __asm pop ebx \
01102 __asm pop ebp \
01103 __asm ret
01104 #else
01105 #define CRYPTOPP_NAKED
01106 #define AS1(x) #x ";"
01107 #define AS2(x, y) #x ", " #y ";"
01108 #define AddPrologue \
01109 __asm__ __volatile__ \
01110 ( \
01111 "push %%ebx;" \
01112 "mov %2, %%ebx;" \
01113 ".intel_syntax noprefix;" \
01114 "push ebp;"
01115 #define AddEpilogue \
01116 "pop ebp;" \
01117 ".att_syntax prefix;" \
01118 "pop %%ebx;" \
01119 : \
01120 : "c" (C), "d" (A), "m" (B), "S" (N) \
01121 : "%edi", "memory", "cc" \
01122 );
01123 #define MulPrologue \
01124 __asm__ __volatile__ \
01125 ( \
01126 "push %%ebx;" \
01127 "push %%ebp;" \
01128 "push %0;" \
01129 ".intel_syntax noprefix;"
01130 #define MulEpilogue \
01131 "add esp, 4;" \
01132 "pop ebp;" \
01133 "pop ebx;" \
01134 ".att_syntax prefix;" \
01135 : \
01136 : "rm" (Z), "S" (X), "c" (Y) \
01137 : "%eax", "%edx", "%edi", "memory", "cc" \
01138 );
01139 #endif
01140
01141 CRYPTOPP_NAKED word PentiumOptimized::Add(word *C, const word *A, const word *B, unsigned int N)
01142 {
01143 AddPrologue
01144
01145
01146 AS2( sub ecx, edx)
01147 AS2( xor eax, eax)
01148
01149 AS2( sub eax, esi)
01150 AS2( lea ebx, [ebx+4*esi])
01151
01152 AS2( sar eax, 1)
01153 AS1( jz loopendAdd)
01154
01155 AS1(loopstartAdd:)
01156 AS2( mov esi,[edx])
01157 AS2( mov ebp,[edx+4])
01158
01159 AS2( mov edi,[ebx+8*eax])
01160 AS2( lea edx,[edx+8])
01161
01162 AS2( adc esi,edi)
01163 AS2( mov edi,[ebx+8*eax+4])
01164
01165 AS2( adc ebp,edi)
01166 AS1( inc eax)
01167
01168 AS2( mov [edx+ecx-8],esi)
01169 AS2( mov [edx+ecx-4],ebp)
01170
01171 AS1( jnz loopstartAdd)
01172
01173 AS1(loopendAdd:)
01174 AS2( adc eax, 0)
01175
01176 AddEpilogue
01177 }
01178
01179 CRYPTOPP_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01180 {
01181 AddPrologue
01182
01183
01184 AS2( sub ecx, edx)
01185 AS2( xor eax, eax)
01186
01187 AS2( sub eax, esi)
01188 AS2( lea ebx, [ebx+4*esi])
01189
01190 AS2( sar eax, 1)
01191 AS1( jz loopendSub)
01192
01193 AS1(loopstartSub:)
01194 AS2( mov esi,[edx])
01195 AS2( mov ebp,[edx+4])
01196
01197 AS2( mov edi,[ebx+8*eax])
01198 AS2( lea edx,[edx+8])
01199
01200 AS2( sbb esi,edi)
01201 AS2( mov edi,[ebx+8*eax+4])
01202
01203 AS2( sbb ebp,edi)
01204 AS1( inc eax)
01205
01206 AS2( mov [edx+ecx-8],esi)
01207 AS2( mov [edx+ecx-4],ebp)
01208
01209 AS1( jnz loopstartSub)
01210
01211 AS1(loopendSub:)
01212 AS2( adc eax, 0)
01213
01214 AddEpilogue
01215 }
01216
01217
01218
01219 CRYPTOPP_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, unsigned int N)
01220 {
01221 AddPrologue
01222
01223
01224 AS2( xor eax, eax)
01225 AS1( neg esi)
01226 AS1( jz loopendAddP4)
01227
01228 AS2( mov edi, [edx])
01229 AS2( mov ebp, [ebx])
01230 AS1( jmp carry1AddP4)
01231
01232 AS1(loopstartAddP4:)
01233 AS2( mov edi, [edx+8])
01234 AS2( add ecx, 8)
01235 AS2( add edx, 8)
01236 AS2( mov ebp, [ebx])
01237 AS2( add edi, eax)
01238 AS1( jc carry1AddP4)
01239 AS2( xor eax, eax)
01240
01241 AS1(carry1AddP4:)
01242 AS2( add edi, ebp)
01243 AS2( mov ebp, 1)
01244 AS2( mov [ecx], edi)
01245 AS2( mov edi, [edx+4])
01246 AS2( cmovc eax, ebp)
01247 AS2( mov ebp, [ebx+4])
01248 AS2( add ebx, 8)
01249 AS2( add edi, eax)
01250 AS1( jc carry2AddP4)
01251 AS2( xor eax, eax)
01252
01253 AS1(carry2AddP4:)
01254 AS2( add edi, ebp)
01255 AS2( mov ebp, 1)
01256 AS2( cmovc eax, ebp)
01257 AS2( mov [ecx+4], edi)
01258 AS2( add esi, 2)
01259 AS1( jnz loopstartAddP4)
01260
01261 AS1(loopendAddP4:)
01262
01263 AddEpilogue
01264 }
01265
01266 CRYPTOPP_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01267 {
01268 AddPrologue
01269
01270
01271 AS2( xor eax, eax)
01272 AS1( neg esi)
01273 AS1( jz loopendSubP4)
01274
01275 AS2( mov edi, [edx])
01276 AS2( mov ebp, [ebx])
01277 AS1( jmp carry1SubP4)
01278
01279 AS1(loopstartSubP4:)
01280 AS2( mov edi, [edx+8])
01281 AS2( add edx, 8)
01282 AS2( add ecx, 8)
01283 AS2( mov ebp, [ebx])
01284 AS2( sub edi, eax)
01285 AS1( jc carry1SubP4)
01286 AS2( xor eax, eax)
01287
01288 AS1(carry1SubP4:)
01289 AS2( sub edi, ebp)
01290 AS2( mov ebp, 1)
01291 AS2( mov [ecx], edi)
01292 AS2( mov edi, [edx+4])
01293 AS2( cmovc eax, ebp)
01294 AS2( mov ebp, [ebx+4])
01295 AS2( add ebx, 8)
01296 AS2( sub edi, eax)
01297 AS1( jc carry2SubP4)
01298 AS2( xor eax, eax)
01299
01300 AS1(carry2SubP4:)
01301 AS2( sub edi, ebp)
01302 AS2( mov ebp, 1)
01303 AS2( cmovc eax, ebp)
01304 AS2( mov [ecx+4], edi)
01305 AS2( add esi, 2)
01306 AS1( jnz loopstartSubP4)
01307
01308 AS1(loopendSubP4:)
01309
01310 AddEpilogue
01311 }
01312
01313
01314
01315 #define MulStartup \
01316 AS2(xor ebp, ebp) \
01317 AS2(xor edi, edi) \
01318 AS2(xor ebx, ebx)
01319
01320 #define MulShiftCarry \
01321 AS2(mov ebp, edx) \
01322 AS2(mov edi, ebx) \
01323 AS2(xor ebx, ebx)
01324
01325 #define MulAccumulateBottom(i,j) \
01326 AS2(mov eax, [ecx+4*j]) \
01327 AS2(imul eax, dword ptr [esi+4*i]) \
01328 AS2(add ebp, eax)
01329
01330 #define MulAccumulate(i,j) \
01331 AS2(mov eax, [ecx+4*j]) \
01332 AS1(mul dword ptr [esi+4*i]) \
01333 AS2(add ebp, eax) \
01334 AS2(adc edi, edx) \
01335 AS2(adc bl, bh)
01336
01337 #define MulStoreDigit(i) \
01338 AS2(mov edx, edi) \
01339 AS2(mov edi, [esp]) \
01340 AS2(mov [edi+4*i], ebp)
01341
01342 #define MulLastDiagonal(digits) \
01343 AS2(mov eax, [ecx+4*(digits-1)]) \
01344 AS1(mul dword ptr [esi+4*(digits-1)]) \
01345 AS2(add ebp, eax) \
01346 AS2(adc edx, edi) \
01347 AS2(mov edi, [esp]) \
01348 AS2(mov [edi+4*(2*digits-2)], ebp) \
01349 AS2(mov [edi+4*(2*digits-1)], edx)
01350
01351 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01352 {
01353 MulPrologue
01354
01355 MulStartup
01356 MulAccumulate(0,0)
01357 MulStoreDigit(0)
01358 MulShiftCarry
01359
01360 MulAccumulate(1,0)
01361 MulAccumulate(0,1)
01362 MulStoreDigit(1)
01363 MulShiftCarry
01364
01365 MulAccumulate(2,0)
01366 MulAccumulate(1,1)
01367 MulAccumulate(0,2)
01368 MulStoreDigit(2)
01369 MulShiftCarry
01370
01371 MulAccumulate(3,0)
01372 MulAccumulate(2,1)
01373 MulAccumulate(1,2)
01374 MulAccumulate(0,3)
01375 MulStoreDigit(3)
01376 MulShiftCarry
01377
01378 MulAccumulate(3,1)
01379 MulAccumulate(2,2)
01380 MulAccumulate(1,3)
01381 MulStoreDigit(4)
01382 MulShiftCarry
01383
01384 MulAccumulate(3,2)
01385 MulAccumulate(2,3)
01386 MulStoreDigit(5)
01387 MulShiftCarry
01388
01389 MulLastDiagonal(4)
01390 MulEpilogue
01391 }
01392
01393 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01394 {
01395 MulPrologue
01396
01397 MulStartup
01398 MulAccumulate(0,0)
01399 MulStoreDigit(0)
01400 MulShiftCarry
01401
01402 MulAccumulate(1,0)
01403 MulAccumulate(0,1)
01404 MulStoreDigit(1)
01405 MulShiftCarry
01406
01407 MulAccumulate(2,0)
01408 MulAccumulate(1,1)
01409 MulAccumulate(0,2)
01410 MulStoreDigit(2)
01411 MulShiftCarry
01412
01413 MulAccumulate(3,0)
01414 MulAccumulate(2,1)
01415 MulAccumulate(1,2)
01416 MulAccumulate(0,3)
01417 MulStoreDigit(3)
01418 MulShiftCarry
01419
01420 MulAccumulate(4,0)
01421 MulAccumulate(3,1)
01422 MulAccumulate(2,2)
01423 MulAccumulate(1,3)
01424 MulAccumulate(0,4)
01425 MulStoreDigit(4)
01426 MulShiftCarry
01427
01428 MulAccumulate(5,0)
01429 MulAccumulate(4,1)
01430 MulAccumulate(3,2)
01431 MulAccumulate(2,3)
01432 MulAccumulate(1,4)
01433 MulAccumulate(0,5)
01434 MulStoreDigit(5)
01435 MulShiftCarry
01436
01437 MulAccumulate(6,0)
01438 MulAccumulate(5,1)
01439 MulAccumulate(4,2)
01440 MulAccumulate(3,3)
01441 MulAccumulate(2,4)
01442 MulAccumulate(1,5)
01443 MulAccumulate(0,6)
01444 MulStoreDigit(6)
01445 MulShiftCarry
01446
01447 MulAccumulate(7,0)
01448 MulAccumulate(6,1)
01449 MulAccumulate(5,2)
01450 MulAccumulate(4,3)
01451 MulAccumulate(3,4)
01452 MulAccumulate(2,5)
01453 MulAccumulate(1,6)
01454 MulAccumulate(0,7)
01455 MulStoreDigit(7)
01456 MulShiftCarry
01457
01458 MulAccumulate(7,1)
01459 MulAccumulate(6,2)
01460 MulAccumulate(5,3)
01461 MulAccumulate(4,4)
01462 MulAccumulate(3,5)
01463 MulAccumulate(2,6)
01464 MulAccumulate(1,7)
01465 MulStoreDigit(8)
01466 MulShiftCarry
01467
01468 MulAccumulate(7,2)
01469 MulAccumulate(6,3)
01470 MulAccumulate(5,4)
01471 MulAccumulate(4,5)
01472 MulAccumulate(3,6)
01473 MulAccumulate(2,7)
01474 MulStoreDigit(9)
01475 MulShiftCarry
01476
01477 MulAccumulate(7,3)
01478 MulAccumulate(6,4)
01479 MulAccumulate(5,5)
01480 MulAccumulate(4,6)
01481 MulAccumulate(3,7)
01482 MulStoreDigit(10)
01483 MulShiftCarry
01484
01485 MulAccumulate(7,4)
01486 MulAccumulate(6,5)
01487 MulAccumulate(5,6)
01488 MulAccumulate(4,7)
01489 MulStoreDigit(11)
01490 MulShiftCarry
01491
01492 MulAccumulate(7,5)
01493 MulAccumulate(6,6)
01494 MulAccumulate(5,7)
01495 MulStoreDigit(12)
01496 MulShiftCarry
01497
01498 MulAccumulate(7,6)
01499 MulAccumulate(6,7)
01500 MulStoreDigit(13)
01501 MulShiftCarry
01502
01503 MulLastDiagonal(8)
01504 MulEpilogue
01505 }
01506
01507 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01508 {
01509 MulPrologue
01510
01511 MulStartup
01512 MulAccumulate(0,0)
01513 MulStoreDigit(0)
01514 MulShiftCarry
01515
01516 MulAccumulate(1,0)
01517 MulAccumulate(0,1)
01518 MulStoreDigit(1)
01519 MulShiftCarry
01520
01521 MulAccumulate(2,0)
01522 MulAccumulate(1,1)
01523 MulAccumulate(0,2)
01524 MulStoreDigit(2)
01525 MulShiftCarry
01526
01527 MulAccumulate(3,0)
01528 MulAccumulate(2,1)
01529 MulAccumulate(1,2)
01530 MulAccumulate(0,3)
01531 MulStoreDigit(3)
01532 MulShiftCarry
01533
01534 MulAccumulate(4,0)
01535 MulAccumulate(3,1)
01536 MulAccumulate(2,2)
01537 MulAccumulate(1,3)
01538 MulAccumulate(0,4)
01539 MulStoreDigit(4)
01540 MulShiftCarry
01541
01542 MulAccumulate(5,0)
01543 MulAccumulate(4,1)
01544 MulAccumulate(3,2)
01545 MulAccumulate(2,3)
01546 MulAccumulate(1,4)
01547 MulAccumulate(0,5)
01548 MulStoreDigit(5)
01549 MulShiftCarry
01550
01551 MulAccumulate(6,0)
01552 MulAccumulate(5,1)
01553 MulAccumulate(4,2)
01554 MulAccumulate(3,3)
01555 MulAccumulate(2,4)
01556 MulAccumulate(1,5)
01557 MulAccumulate(0,6)
01558 MulStoreDigit(6)
01559 MulShiftCarry
01560
01561 MulAccumulateBottom(7,0)
01562 MulAccumulateBottom(6,1)
01563 MulAccumulateBottom(5,2)
01564 MulAccumulateBottom(4,3)
01565 MulAccumulateBottom(3,4)
01566 MulAccumulateBottom(2,5)
01567 MulAccumulateBottom(1,6)
01568 MulAccumulateBottom(0,7)
01569 MulStoreDigit(7)
01570 MulEpilogue
01571 }
01572
01573 #undef AS1
01574 #undef AS2
01575
01576 #else
01577
01578 typedef Portable LowLevel;
01579
01580 #endif
01581
01582 #ifdef SSE2_INTRINSICS_AVAILABLE
01583
01584 #ifdef __GNUC__
01585 #define CRYPTOPP_FASTCALL
01586 #else
01587 #define CRYPTOPP_FASTCALL __fastcall
01588 #endif
01589
01590 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01591 {
01592 __m128i a3210 = _mm_load_si128(A);
01593 __m128i b3210 = _mm_load_si128(B);
01594
01595 __m128i sum;
01596
01597 __m128i z = _mm_setzero_si128();
01598 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01599 C[0] = a2b2_a0b0;
01600
01601 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01602 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01603 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01604 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01605 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01606 C[1] = _mm_add_epi64(a1b0, a0b1);
01607
01608 __m128i a31 = _mm_srli_epi64(a3210, 32);
01609 __m128i b31 = _mm_srli_epi64(b3210, 32);
01610 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01611 C[6] = a3b3_a1b1;
01612
01613 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01614 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01615 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01616 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01617 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01618 sum = _mm_add_epi64(a1b1, a0b2);
01619 C[2] = _mm_add_epi64(sum, a2b0);
01620
01621 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01622 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01623 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01624 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01625 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01626 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01627 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01628 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01629 __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01630 sum = _mm_add_epi64(a2b1, a0b3);
01631 C[3] = _mm_add_epi64(sum, sum1);
01632
01633 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01634 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01635 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01636 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01637 sum = _mm_add_epi64(a2b2, a3b1);
01638 C[4] = _mm_add_epi64(sum, a1b3);
01639
01640 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01641 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01642 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01643 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01644 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01645 C[5] = _mm_add_epi64(a3b2, a2b3);
01646 }
01647
01648 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
01649 {
01650 __m128i temp[7];
01651 const word *w = (word *)temp;
01652 const __m64 *mw = (__m64 *)w;
01653
01654 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01655
01656 C[0] = w[0];
01657
01658 __m64 s1, s2;
01659
01660 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01661 __m64 w4 = mw[2];
01662 __m64 w6 = mw[3];
01663 __m64 w8 = mw[4];
01664 __m64 w10 = mw[5];
01665 __m64 w12 = mw[6];
01666 __m64 w14 = mw[7];
01667 __m64 w16 = mw[8];
01668 __m64 w18 = mw[9];
01669 __m64 w20 = mw[10];
01670 __m64 w22 = mw[11];
01671 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01672
01673 s1 = _mm_add_si64(w1, w4);
01674 C[1] = _mm_cvtsi64_si32(s1);
01675 s1 = _mm_srli_si64(s1, 32);
01676
01677 s2 = _mm_add_si64(w6, w8);
01678 s1 = _mm_add_si64(s1, s2);
01679 C[2] = _mm_cvtsi64_si32(s1);
01680 s1 = _mm_srli_si64(s1, 32);
01681
01682 s2 = _mm_add_si64(w10, w12);
01683 s1 = _mm_add_si64(s1, s2);
01684 C[3] = _mm_cvtsi64_si32(s1);
01685 s1 = _mm_srli_si64(s1, 32);
01686
01687 s2 = _mm_add_si64(w14, w16);
01688 s1 = _mm_add_si64(s1, s2);
01689 C[4] = _mm_cvtsi64_si32(s1);
01690 s1 = _mm_srli_si64(s1, 32);
01691
01692 s2 = _mm_add_si64(w18, w20);
01693 s1 = _mm_add_si64(s1, s2);
01694 C[5] = _mm_cvtsi64_si32(s1);
01695 s1 = _mm_srli_si64(s1, 32);
01696
01697 s2 = _mm_add_si64(w22, w26);
01698 s1 = _mm_add_si64(s1, s2);
01699 C[6] = _mm_cvtsi64_si32(s1);
01700 s1 = _mm_srli_si64(s1, 32);
01701
01702 C[7] = _mm_cvtsi64_si32(s1) + w[27];
01703 _mm_empty();
01704 }
01705
01706 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
01707 {
01708 __m128i temp[28];
01709 const word *w = (word *)temp;
01710 const __m64 *mw = (__m64 *)w;
01711 const word *x = (word *)temp+7*4;
01712 const __m64 *mx = (__m64 *)x;
01713 const word *y = (word *)temp+7*4*2;
01714 const __m64 *my = (__m64 *)y;
01715 const word *z = (word *)temp+7*4*3;
01716 const __m64 *mz = (__m64 *)z;
01717
01718 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01719
01720 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01721
01722 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01723
01724 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01725
01726 C[0] = w[0];
01727
01728 __m64 s1, s2, s3, s4;
01729
01730 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01731 __m64 w4 = mw[2];
01732 __m64 w6 = mw[3];
01733 __m64 w8 = mw[4];
01734 __m64 w10 = mw[5];
01735 __m64 w12 = mw[6];
01736 __m64 w14 = mw[7];
01737 __m64 w16 = mw[8];
01738 __m64 w18 = mw[9];
01739 __m64 w20 = mw[10];
01740 __m64 w22 = mw[11];
01741 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01742 __m64 w27 = _mm_cvtsi32_si64(w[27]);
01743
01744 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01745 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01746 __m64 x4 = mx[2];
01747 __m64 x6 = mx[3];
01748 __m64 x8 = mx[4];
01749 __m64 x10 = mx[5];
01750 __m64 x12 = mx[6];
01751 __m64 x14 = mx[7];
01752 __m64 x16 = mx[8];
01753 __m64 x18 = mx[9];
01754 __m64 x20 = mx[10];
01755 __m64 x22 = mx[11];
01756 __m64 x26 = _mm_cvtsi32_si64(x[26]);
01757 __m64 x27 = _mm_cvtsi32_si64(x[27]);
01758
01759 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01760 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01761 __m64 y4 = my[2];
01762 __m64 y6 = my[3];
01763 __m64 y8 = my[4];
01764 __m64 y10 = my[5];
01765 __m64 y12 = my[6];
01766 __m64 y14 = my[7];
01767 __m64 y16 = my[8];
01768 __m64 y18 = my[9];
01769 __m64 y20 = my[10];
01770 __m64 y22 = my[11];
01771 __m64 y26 = _mm_cvtsi32_si64(y[26]);
01772 __m64 y27 = _mm_cvtsi32_si64(y[27]);
01773
01774 __m64 z0 = _mm_cvtsi32_si64(z[0]);
01775 __m64 z1 = _mm_cvtsi32_si64(z[1]);
01776 __m64 z4 = mz[2];
01777 __m64 z6 = mz[3];
01778 __m64 z8 = mz[4];
01779 __m64 z10 = mz[5];
01780 __m64 z12 = mz[6];
01781 __m64 z14 = mz[7];
01782 __m64 z16 = mz[8];
01783 __m64 z18 = mz[9];
01784 __m64 z20 = mz[10];
01785 __m64 z22 = mz[11];
01786 __m64 z26 = _mm_cvtsi32_si64(z[26]);
01787
01788 s1 = _mm_add_si64(w1, w4);
01789 C[1] = _mm_cvtsi64_si32(s1);
01790 s1 = _mm_srli_si64(s1, 32);
01791
01792 s2 = _mm_add_si64(w6, w8);
01793 s1 = _mm_add_si64(s1, s2);
01794 C[2] = _mm_cvtsi64_si32(s1);
01795 s1 = _mm_srli_si64(s1, 32);
01796
01797 s2 = _mm_add_si64(w10, w12);
01798 s1 = _mm_add_si64(s1, s2);
01799 C[3] = _mm_cvtsi64_si32(s1);
01800 s1 = _mm_srli_si64(s1, 32);
01801
01802 s3 = _mm_add_si64(x0, y0);
01803 s2 = _mm_add_si64(w14, w16);
01804 s1 = _mm_add_si64(s1, s3);
01805 s1 = _mm_add_si64(s1, s2);
01806 C[4] = _mm_cvtsi64_si32(s1);
01807 s1 = _mm_srli_si64(s1, 32);
01808
01809 s3 = _mm_add_si64(x1, y1);
01810 s4 = _mm_add_si64(x4, y4);
01811 s1 = _mm_add_si64(s1, w18);
01812 s3 = _mm_add_si64(s3, s4);
01813 s1 = _mm_add_si64(s1, w20);
01814 s1 = _mm_add_si64(s1, s3);
01815 C[5] = _mm_cvtsi64_si32(s1);
01816 s1 = _mm_srli_si64(s1, 32);
01817
01818 s3 = _mm_add_si64(x6, y6);
01819 s4 = _mm_add_si64(x8, y8);
01820 s1 = _mm_add_si64(s1, w22);
01821 s3 = _mm_add_si64(s3, s4);
01822 s1 = _mm_add_si64(s1, w26);
01823 s1 = _mm_add_si64(s1, s3);
01824 C[6] = _mm_cvtsi64_si32(s1);
01825 s1 = _mm_srli_si64(s1, 32);
01826
01827 s3 = _mm_add_si64(x10, y10);
01828 s4 = _mm_add_si64(x12, y12);
01829 s1 = _mm_add_si64(s1, w27);
01830 s3 = _mm_add_si64(s3, s4);
01831 s1 = _mm_add_si64(s1, s3);
01832 C[7] = _mm_cvtsi64_si32(s1);
01833 s1 = _mm_srli_si64(s1, 32);
01834
01835 s3 = _mm_add_si64(x14, y14);
01836 s4 = _mm_add_si64(x16, y16);
01837 s1 = _mm_add_si64(s1, z0);
01838 s3 = _mm_add_si64(s3, s4);
01839 s1 = _mm_add_si64(s1, s3);
01840 C[8] = _mm_cvtsi64_si32(s1);
01841 s1 = _mm_srli_si64(s1, 32);
01842
01843 s3 = _mm_add_si64(x18, y18);
01844 s4 = _mm_add_si64(x20, y20);
01845 s1 = _mm_add_si64(s1, z1);
01846 s3 = _mm_add_si64(s3, s4);
01847 s1 = _mm_add_si64(s1, z4);
01848 s1 = _mm_add_si64(s1, s3);
01849 C[9] = _mm_cvtsi64_si32(s1);
01850 s1 = _mm_srli_si64(s1, 32);
01851
01852 s3 = _mm_add_si64(x22, y22);
01853 s4 = _mm_add_si64(x26, y26);
01854 s1 = _mm_add_si64(s1, z6);
01855 s3 = _mm_add_si64(s3, s4);
01856 s1 = _mm_add_si64(s1, z8);
01857 s1 = _mm_add_si64(s1, s3);
01858 C[10] = _mm_cvtsi64_si32(s1);
01859 s1 = _mm_srli_si64(s1, 32);
01860
01861 s3 = _mm_add_si64(x27, y27);
01862 s1 = _mm_add_si64(s1, z10);
01863 s1 = _mm_add_si64(s1, z12);
01864 s1 = _mm_add_si64(s1, s3);
01865 C[11] = _mm_cvtsi64_si32(s1);
01866 s1 = _mm_srli_si64(s1, 32);
01867
01868 s3 = _mm_add_si64(z14, z16);
01869 s1 = _mm_add_si64(s1, s3);
01870 C[12] = _mm_cvtsi64_si32(s1);
01871 s1 = _mm_srli_si64(s1, 32);
01872
01873 s3 = _mm_add_si64(z18, z20);
01874 s1 = _mm_add_si64(s1, s3);
01875 C[13] = _mm_cvtsi64_si32(s1);
01876 s1 = _mm_srli_si64(s1, 32);
01877
01878 s3 = _mm_add_si64(z22, z26);
01879 s1 = _mm_add_si64(s1, s3);
01880 C[14] = _mm_cvtsi64_si32(s1);
01881 s1 = _mm_srli_si64(s1, 32);
01882
01883 C[15] = z[27] + _mm_cvtsi64_si32(s1);
01884 _mm_empty();
01885 }
01886
01887 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01888 {
01889 __m128i temp[21];
01890 const word *w = (word *)temp;
01891 const __m64 *mw = (__m64 *)w;
01892 const word *x = (word *)temp+7*4;
01893 const __m64 *mx = (__m64 *)x;
01894 const word *y = (word *)temp+7*4*2;
01895 const __m64 *my = (__m64 *)y;
01896
01897 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01898
01899 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01900
01901 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01902
01903 C[0] = w[0];
01904
01905 __m64 s1, s2, s3, s4;
01906
01907 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01908 __m64 w4 = mw[2];
01909 __m64 w6 = mw[3];
01910 __m64 w8 = mw[4];
01911 __m64 w10 = mw[5];
01912 __m64 w12 = mw[6];
01913 __m64 w14 = mw[7];
01914 __m64 w16 = mw[8];
01915 __m64 w18 = mw[9];
01916 __m64 w20 = mw[10];
01917 __m64 w22 = mw[11];
01918 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01919
01920 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01921 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01922 __m64 x4 = mx[2];
01923 __m64 x6 = mx[3];
01924 __m64 x8 = mx[4];
01925
01926 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01927 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01928 __m64 y4 = my[2];
01929 __m64 y6 = my[3];
01930 __m64 y8 = my[4];
01931
01932 s1 = _mm_add_si64(w1, w4);
01933 C[1] = _mm_cvtsi64_si32(s1);
01934 s1 = _mm_srli_si64(s1, 32);
01935
01936 s2 = _mm_add_si64(w6, w8);
01937 s1 = _mm_add_si64(s1, s2);
01938 C[2] = _mm_cvtsi64_si32(s1);
01939 s1 = _mm_srli_si64(s1, 32);
01940
01941 s2 = _mm_add_si64(w10, w12);
01942 s1 = _mm_add_si64(s1, s2);
01943 C[3] = _mm_cvtsi64_si32(s1);
01944 s1 = _mm_srli_si64(s1, 32);
01945
01946 s3 = _mm_add_si64(x0, y0);
01947 s2 = _mm_add_si64(w14, w16);
01948 s1 = _mm_add_si64(s1, s3);
01949 s1 = _mm_add_si64(s1, s2);
01950 C[4] = _mm_cvtsi64_si32(s1);
01951 s1 = _mm_srli_si64(s1, 32);
01952
01953 s3 = _mm_add_si64(x1, y1);
01954 s4 = _mm_add_si64(x4, y4);
01955 s1 = _mm_add_si64(s1, w18);
01956 s3 = _mm_add_si64(s3, s4);
01957 s1 = _mm_add_si64(s1, w20);
01958 s1 = _mm_add_si64(s1, s3);
01959 C[5] = _mm_cvtsi64_si32(s1);
01960 s1 = _mm_srli_si64(s1, 32);
01961
01962 s3 = _mm_add_si64(x6, y6);
01963 s4 = _mm_add_si64(x8, y8);
01964 s1 = _mm_add_si64(s1, w22);
01965 s3 = _mm_add_si64(s3, s4);
01966 s1 = _mm_add_si64(s1, w26);
01967 s1 = _mm_add_si64(s1, s3);
01968 C[6] = _mm_cvtsi64_si32(s1);
01969 s1 = _mm_srli_si64(s1, 32);
01970
01971 C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01972 _mm_empty();
01973 }
01974
01975 #endif // #ifdef SSE2_INTRINSICS_AVAILABLE
01976
01977
01978
01979 #define A0 A
01980 #define A1 (A+N2)
01981 #define B0 B
01982 #define B1 (B+N2)
01983
01984 #define T0 T
01985 #define T1 (T+N2)
01986 #define T2 (T+N)
01987 #define T3 (T+N+N2)
01988
01989 #define R0 R
01990 #define R1 (R+N2)
01991 #define R2 (R+N)
01992 #define R3 (R+N+N2)
01993
01994
01995
01996
01997
01998
01999 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N)
02000 {
02001 assert(N>=2 && N%2==0);
02002
02003 if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02004 LowLevel::Multiply8(R, A, B);
02005 else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02006 LowLevel::Multiply4(R, A, B);
02007 else if (N==2)
02008 LowLevel::Multiply2(R, A, B);
02009 else
02010 {
02011 const unsigned int N2 = N/2;
02012 int carry;
02013
02014 int aComp = Compare(A0, A1, N2);
02015 int bComp = Compare(B0, B1, N2);
02016
02017 switch (2*aComp + aComp + bComp)
02018 {
02019 case -4:
02020 LowLevel::Subtract(R0, A1, A0, N2);
02021 LowLevel::Subtract(R1, B0, B1, N2);
02022 RecursiveMultiply(T0, T2, R0, R1, N2);
02023 LowLevel::Subtract(T1, T1, R0, N2);
02024 carry = -1;
02025 break;
02026 case -2:
02027 LowLevel::Subtract(R0, A1, A0, N2);
02028 LowLevel::Subtract(R1, B0, B1, N2);
02029 RecursiveMultiply(T0, T2, R0, R1, N2);
02030 carry = 0;
02031 break;
02032 case 2:
02033 LowLevel::Subtract(R0, A0, A1, N2);
02034 LowLevel::Subtract(R1, B1, B0, N2);
02035 RecursiveMultiply(T0, T2, R0, R1, N2);
02036 carry = 0;
02037 break;
02038 case 4:
02039 LowLevel::Subtract(R0, A1, A0, N2);
02040 LowLevel::Subtract(R1, B0, B1, N2);
02041 RecursiveMultiply(T0, T2, R0, R1, N2);
02042 LowLevel::Subtract(T1, T1, R1, N2);
02043 carry = -1;
02044 break;
02045 default:
02046 SetWords(T0, 0, N);
02047 carry = 0;
02048 }
02049
02050 RecursiveMultiply(R0, T2, A0, B0, N2);
02051 RecursiveMultiply(R2, T2, A1, B1, N2);
02052
02053
02054
02055 carry += LowLevel::Add(T0, T0, R0, N);
02056 carry += LowLevel::Add(T0, T0, R2, N);
02057 carry += LowLevel::Add(R1, R1, T0, N);
02058
02059 assert (carry >= 0 && carry <= 2);
02060 Increment(R3, N2, carry);
02061 }
02062 }
02063
02064
02065
02066
02067
02068 void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)
02069 {
02070 assert(N && N%2==0);
02071 if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02072 LowLevel::Square8(R, A);
02073 if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02074 LowLevel::Square4(R, A);
02075 else if (N==2)
02076 LowLevel::Square2(R, A);
02077 else
02078 {
02079 const unsigned int N2 = N/2;
02080
02081 RecursiveSquare(R0, T2, A0, N2);
02082 RecursiveSquare(R2, T2, A1, N2);
02083 RecursiveMultiply(T0, T2, A0, A1, N2);
02084
02085 word carry = LowLevel::Add(R1, R1, T0, N);
02086 carry += LowLevel::Add(R1, R1, T0, N);
02087 Increment(R3, N2, carry);
02088 }
02089 }
02090
02091
02092
02093
02094
02095
02096 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
02097 {
02098 assert(N>=2 && N%2==0);
02099 if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02100 LowLevel::Multiply8Bottom(R, A, B);
02101 else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02102 LowLevel::Multiply4Bottom(R, A, B);
02103 else if (N==2)
02104 LowLevel::Multiply2Bottom(R, A, B);
02105 else
02106 {
02107 const unsigned int N2 = N/2;
02108
02109 RecursiveMultiply(R, T, A0, B0, N2);
02110 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02111 LowLevel::Add(R1, R1, T0, N2);
02112 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02113 LowLevel::Add(R1, R1, T0, N2);
02114 }
02115 }
02116
02117
02118
02119
02120
02121
02122
02123 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
02124 {
02125 assert(N>=2 && N%2==0);
02126
02127 if (N==4)
02128 {
02129 LowLevel::Multiply4(T, A, B);
02130 memcpy(R, T+4, 4*WORD_SIZE);
02131 }
02132 else if (N==2)
02133 {
02134 LowLevel::Multiply2(T, A, B);
02135 memcpy(R, T+2, 2*WORD_SIZE);
02136 }
02137 else
02138 {
02139 const unsigned int N2 = N/2;
02140 int carry;
02141
02142 int aComp = Compare(A0, A1, N2);
02143 int bComp = Compare(B0, B1, N2);
02144
02145 switch (2*aComp + aComp + bComp)
02146 {
02147 case -4:
02148 LowLevel::Subtract(R0, A1, A0, N2);
02149 LowLevel::Subtract(R1, B0, B1, N2);
02150 RecursiveMultiply(T0, T2, R0, R1, N2);
02151 LowLevel::Subtract(T1, T1, R0, N2);
02152 carry = -1;
02153 break;
02154 case -2:
02155 LowLevel::Subtract(R0, A1, A0, N2);
02156 LowLevel::Subtract(R1, B0, B1, N2);
02157 RecursiveMultiply(T0, T2, R0, R1, N2);
02158 carry = 0;
02159 break;
02160 case 2:
02161 LowLevel::Subtract(R0, A0, A1, N2);
02162 LowLevel::Subtract(R1, B1, B0, N2);
02163 RecursiveMultiply(T0, T2, R0, R1, N2);
02164 carry = 0;
02165 break;
02166 case 4:
02167 LowLevel::Subtract(R0, A1, A0, N2);
02168 LowLevel::Subtract(R1, B0, B1, N2);
02169 RecursiveMultiply(T0, T2, R0, R1, N2);
02170 LowLevel::Subtract(T1, T1, R1, N2);
02171 carry = -1;
02172 break;
02173 default:
02174 SetWords(T0, 0, N);
02175 carry = 0;
02176 }
02177
02178 RecursiveMultiply(T2, R0, A1, B1, N2);
02179
02180
02181
02182 word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02183 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02184 word t = (Compare(R0, T2, N2) == -1);
02185
02186 carry += t;
02187 carry += Increment(R0, N2, c2+t);
02188 carry += LowLevel::Add(R0, R0, T1, N2);
02189 carry += LowLevel::Add(R0, R0, T3, N2);
02190 assert (carry >= 0 && carry <= 2);
02191
02192 CopyWords(R1, T3, N2);
02193 Increment(R1, N2, carry);
02194 }
02195 }
02196
02197 inline word Add(word *C, const word *A, const word *B, unsigned int N)
02198 {
02199 return LowLevel::Add(C, A, B, N);
02200 }
02201
02202 inline word Subtract(word *C, const word *A, const word *B, unsigned int N)
02203 {
02204 return LowLevel::Subtract(C, A, B, N);
02205 }
02206
02207 inline void Multiply(word *R, word *T, const word *A, const word *B, unsigned int N)
02208 {
02209 RecursiveMultiply(R, T, A, B, N);
02210 }
02211
02212 inline void Square(word *R, word *T, const word *A, unsigned int N)
02213 {
02214 RecursiveSquare(R, T, A, N);
02215 }
02216
02217 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
02218 {
02219 RecursiveMultiplyBottom(R, T, A, B, N);
02220 }
02221
02222 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
02223 {
02224 RecursiveMultiplyTop(R, T, L, A, B, N);
02225 }
02226
02227 static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
02228 {
02229 word carry=0;
02230 for(unsigned i=0; i<N; i++)
02231 {
02232 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02233 C[i] = p.GetLowHalf();
02234 carry = p.GetHighHalf();
02235 }
02236 return carry;
02237 }
02238
02239
02240
02241
02242
02243
02244 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02245 {
02246 if (NA == NB)
02247 {
02248 if (A == B)
02249 Square(R, T, A, NA);
02250 else
02251 Multiply(R, T, A, B, NA);
02252
02253 return;
02254 }
02255
02256 if (NA > NB)
02257 {
02258 std::swap(A, B);
02259 std::swap(NA, NB);
02260 }
02261
02262 assert(NB % NA == 0);
02263 assert((NB/NA)%2 == 0);
02264
02265 if (NA==2 && !A[1])
02266 {
02267 switch (A[0])
02268 {
02269 case 0:
02270 SetWords(R, 0, NB+2);
02271 return;
02272 case 1:
02273 CopyWords(R, B, NB);
02274 R[NB] = R[NB+1] = 0;
02275 return;
02276 default:
02277 R[NB] = LinearMultiply(R, B, A[0], NB);
02278 R[NB+1] = 0;
02279 return;
02280 }
02281 }
02282
02283 Multiply(R, T, A, B, NA);
02284 CopyWords(T+2*NA, R+NA, NA);
02285
02286 unsigned i;
02287
02288 for (i=2*NA; i<NB; i+=2*NA)
02289 Multiply(T+NA+i, T, A, B+i, NA);
02290 for (i=NA; i<NB; i+=2*NA)
02291 Multiply(R+i, T, A, B+i, NA);
02292
02293 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02294 Increment(R+NB, NA);
02295 }
02296
02297
02298
02299
02300
02301 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)
02302 {
02303 if (N==2)
02304 {
02305 T[0] = AtomicInverseModPower2(A[0]);
02306 T[1] = 0;
02307 LowLevel::Multiply2Bottom(T+2, T, A);
02308 TwosComplement(T+2, 2);
02309 Increment(T+2, 2, 2);
02310 LowLevel::Multiply2Bottom(R, T, T+2);
02311 }
02312 else
02313 {
02314 const unsigned int N2 = N/2;
02315 RecursiveInverseModPower2(R0, T0, A0, N2);
02316 T0[0] = 1;
02317 SetWords(T0+1, 0, N2-1);
02318 MultiplyTop(R1, T1, T0, R0, A0, N2);
02319 MultiplyBottom(T0, T1, R0, A1, N2);
02320 Add(T0, R1, T0, N2);
02321 TwosComplement(T0, N2);
02322 MultiplyBottom(R1, T1, R0, T0, N2);
02323 }
02324 }
02325
02326
02327
02328
02329
02330
02331
02332 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N)
02333 {
02334 MultiplyBottom(R, T, X, U, N);
02335 MultiplyTop(T, T+N, X, R, M, N);
02336 word borrow = Subtract(T, X+N, T, N);
02337
02338 word carry = Add(T+N, T, M, N);
02339 assert(carry || !borrow);
02340 CopyWords(R, T + (borrow ? N : 0), N);
02341 }
02342
02343
02344
02345
02346
02347
02348
02349
02350 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, unsigned int N)
02351 {
02352 assert(N%2==0 && N>=4);
02353
02354 #define M0 M
02355 #define M1 (M+N2)
02356 #define V0 V
02357 #define V1 (V+N2)
02358
02359 #define X0 X
02360 #define X1 (X+N2)
02361 #define X2 (X+N)
02362 #define X3 (X+N+N2)
02363
02364 const unsigned int N2 = N/2;
02365 Multiply(T0, T2, V0, X3, N2);
02366 int c2 = Add(T0, T0, X0, N);
02367 MultiplyBottom(T3, T2, T0, U, N2);
02368 MultiplyTop(T2, R, T0, T3, M0, N2);
02369 c2 -= Subtract(T2, T1, T2, N2);
02370 Multiply(T0, R, T3, M1, N2);
02371 c2 -= Subtract(T0, T2, T0, N2);
02372 int c3 = -(int)Subtract(T1, X2, T1, N2);
02373 Multiply(R0, T2, V1, X3, N2);
02374 c3 += Add(R, R, T, N);
02375
02376 if (c2>0)
02377 c3 += Increment(R1, N2);
02378 else if (c2<0)
02379 c3 -= Decrement(R1, N2, -c2);
02380
02381 assert(c3>=-1 && c3<=1);
02382 if (c3>0)
02383 Subtract(R, R, M, N);
02384 else if (c3<0)
02385 Add(R, R, M, N);
02386
02387 #undef M0
02388 #undef M1
02389 #undef V0
02390 #undef V1
02391
02392 #undef X0
02393 #undef X1
02394 #undef X2
02395 #undef X3
02396 }
02397
02398 #undef A0
02399 #undef A1
02400 #undef B0
02401 #undef B1
02402
02403 #undef T0
02404 #undef T1
02405 #undef T2
02406 #undef T3
02407
02408 #undef R0
02409 #undef R1
02410 #undef R2
02411 #undef R3
02412
02413
02414
02415
02416
02417
02418
02419
02420
02421
02422
02423
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02478 {
02479 word T[4];
02480 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02481 Q[0] = q.GetLowHalf();
02482 Q[1] = q.GetHighHalf();
02483
02484 #ifndef NDEBUG
02485 if (B[0] || B[1])
02486 {
02487
02488 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02489 word P[4];
02490 Portable::Multiply2(P, Q, B);
02491 Add(P, P, T, 4);
02492 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02493 }
02494 #endif
02495 }
02496
02497
02498 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N)
02499 {
02500 assert(N && N%2==0);
02501
02502 if (Q[1])
02503 {
02504 T[N] = T[N+1] = 0;
02505 unsigned i;
02506 for (i=0; i<N; i+=4)
02507 LowLevel::Multiply2(T+i, Q, B+i);
02508 for (i=2; i<N; i+=4)
02509 if (LowLevel::Multiply2Add(T+i, Q, B+i))
02510 T[i+5] += (++T[i+4]==0);
02511 }
02512 else
02513 {
02514 T[N] = LinearMultiply(T, B, Q[0], N);
02515 T[N+1] = 0;
02516 }
02517
02518 word borrow = Subtract(R, R, T, N+2);
02519 assert(!borrow && !R[N+1]);
02520
02521 while (R[N] || Compare(R, B, N) >= 0)
02522 {
02523 R[N] -= Subtract(R, R, B, N);
02524 Q[1] += (++Q[0]==0);
02525 assert(Q[0] || Q[1]);
02526 }
02527 }
02528
02529
02530
02531
02532
02533
02534
02535 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02536 {
02537 assert(NA && NB && NA%2==0 && NB%2==0);
02538 assert(B[NB-1] || B[NB-2]);
02539 assert(NB <= NA);
02540
02541
02542 word *const TA=T;
02543 word *const TB=T+NA+2;
02544 word *const TP=T+NA+2+NB;
02545
02546
02547 unsigned shiftWords = (B[NB-1]==0);
02548 TB[0] = TB[NB-1] = 0;
02549 CopyWords(TB+shiftWords, B, NB-shiftWords);
02550 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02551 assert(shiftBits < WORD_BITS);
02552 ShiftWordsLeftByBits(TB, NB, shiftBits);
02553
02554
02555 TA[0] = TA[NA] = TA[NA+1] = 0;
02556 CopyWords(TA+shiftWords, A, NA);
02557 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02558
02559 if (TA[NA+1]==0 && TA[NA] <= 1)
02560 {
02561 Q[NA-NB+1] = Q[NA-NB] = 0;
02562 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02563 {
02564 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02565 ++Q[NA-NB];
02566 }
02567 }
02568 else
02569 {
02570 NA+=2;
02571 assert(Compare(TA+NA-NB, TB, NB) < 0);
02572 }
02573
02574 word BT[2];
02575 BT[0] = TB[NB-2] + 1;
02576 BT[1] = TB[NB-1] + (BT[0]==0);
02577
02578
02579 for (unsigned i=NA-2; i>=NB; i-=2)
02580 {
02581 AtomicDivide(Q+i-NB, TA+i-2, BT);
02582 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02583 }
02584
02585
02586 CopyWords(R, TA+shiftWords, NB);
02587 ShiftWordsRightByBits(R, NB, shiftBits);
02588 }
02589
02590 static inline unsigned int EvenWordCount(const word *X, unsigned int N)
02591 {
02592 while (N && X[N-2]==0 && X[N-1]==0)
02593 N-=2;
02594 return N;
02595 }
02596
02597
02598
02599
02600
02601
02602
02603 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N)
02604 {
02605 assert(NA<=N && N && N%2==0);
02606
02607 word *b = T;
02608 word *c = T+N;
02609 word *f = T+2*N;
02610 word *g = T+3*N;
02611 unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
02612 unsigned int k=0, s=0;
02613
02614 SetWords(T, 0, 3*N);
02615 b[0]=1;
02616 CopyWords(f, A, NA);
02617 CopyWords(g, M, N);
02618
02619 while (1)
02620 {
02621 word t=f[0];
02622 while (!t)
02623 {
02624 if (EvenWordCount(f, fgLen)==0)
02625 {
02626 SetWords(R, 0, N);
02627 return 0;
02628 }
02629
02630 ShiftWordsRightByWords(f, fgLen, 1);
02631 if (c[bcLen-1]) bcLen+=2;
02632 assert(bcLen <= N);
02633 ShiftWordsLeftByWords(c, bcLen, 1);
02634 k+=WORD_BITS;
02635 t=f[0];
02636 }
02637
02638 unsigned int i=0;
02639 while (t%2 == 0)
02640 {
02641 t>>=1;
02642 i++;
02643 }
02644 k+=i;
02645
02646 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02647 {
02648 if (s%2==0)
02649 CopyWords(R, b, N);
02650 else
02651 Subtract(R, M, b, N);
02652 return k;
02653 }
02654
02655 ShiftWordsRightByBits(f, fgLen, i);
02656 t=ShiftWordsLeftByBits(c, bcLen, i);
02657 if (t)
02658 {
02659 c[bcLen] = t;
02660 bcLen+=2;
02661 assert(bcLen <= N);
02662 }
02663
02664 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02665 fgLen-=2;
02666
02667 if (Compare(f, g, fgLen)==-1)
02668 {
02669 std::swap(f, g);
02670 std::swap(b, c);
02671 s++;
02672 }
02673
02674 Subtract(f, f, g, fgLen);
02675
02676 if (Add(b, b, c, bcLen))
02677 {
02678 b[bcLen] = 1;
02679 bcLen+=2;
02680 assert(bcLen <= N);
02681 }
02682 }
02683 }
02684
02685
02686
02687
02688
02689 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02690 {
02691 CopyWords(R, A, N);
02692
02693 while (k--)
02694 {
02695 if (R[0]%2==0)
02696 ShiftWordsRightByBits(R, N, 1);
02697 else
02698 {
02699 word carry = Add(R, R, M, N);
02700 ShiftWordsRightByBits(R, N, 1);
02701 R[N-1] += carry<<(WORD_BITS-1);
02702 }
02703 }
02704 }
02705
02706
02707
02708
02709
02710 void MultiplyByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02711 {
02712 CopyWords(R, A, N);
02713
02714 while (k--)
02715 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02716 Subtract(R, R, M, N);
02717 }
02718
02719
02720
02721 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02722
02723 static inline unsigned int RoundupSize(unsigned int n)
02724 {
02725 if (n<=8)
02726 return RoundupSizeTable[n];
02727 else if (n<=16)
02728 return 16;
02729 else if (n<=32)
02730 return 32;
02731 else if (n<=64)
02732 return 64;
02733 else return 1U << BitPrecision(n-1);
02734 }
02735
02736 Integer::Integer()
02737 : reg(2), sign(POSITIVE)
02738 {
02739 reg[0] = reg[1] = 0;
02740 }
02741
02742 Integer::Integer(const Integer& t)
02743 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02744 {
02745 CopyWords(reg, t.reg, reg.size());
02746 }
02747
02748 Integer::Integer(Sign s, lword value)
02749 : reg(2), sign(s)
02750 {
02751 reg[0] = word(value);
02752 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02753 }
02754
02755 Integer::Integer(signed long value)
02756 : reg(2)
02757 {
02758 if (value >= 0)
02759 sign = POSITIVE;
02760 else
02761 {
02762 sign = NEGATIVE;
02763 value = -value;
02764 }
02765 reg[0] = word(value);
02766 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02767 }
02768
02769 Integer::Integer(Sign s, word high, word low)
02770 : reg(2), sign(s)
02771 {
02772 reg[0] = low;
02773 reg[1] = high;
02774 }
02775
02776 bool Integer::IsConvertableToLong() const
02777 {
02778 if (ByteCount() > sizeof(long))
02779 return false;
02780
02781 unsigned long value = reg[0];
02782 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02783
02784 if (sign==POSITIVE)
02785 return (signed long)value >= 0;
02786 else
02787 return -(signed long)value < 0;
02788 }
02789
02790 signed long Integer::ConvertToLong() const
02791 {
02792 assert(IsConvertableToLong());
02793
02794 unsigned long value = reg[0];
02795 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02796 return sign==POSITIVE ? value : -(signed long)value;
02797 }
02798
02799 Integer::Integer(BufferedTransformation &encodedInteger, unsigned int byteCount, Signedness s)
02800 {
02801 Decode(encodedInteger, byteCount, s);
02802 }
02803
02804 Integer::Integer(const byte *encodedInteger, unsigned int byteCount, Signedness s)
02805 {
02806 Decode(encodedInteger, byteCount, s);
02807 }
02808
02809 Integer::Integer(BufferedTransformation &bt)
02810 {
02811 BERDecode(bt);
02812 }
02813
02814 Integer::Integer(RandomNumberGenerator &rng, unsigned int bitcount)
02815 {
02816 Randomize(rng, bitcount);
02817 }
02818
02819 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02820 {
02821 if (!Randomize(rng, min, max, rnType, equiv, mod))
02822 throw Integer::RandomNumberNotFound();
02823 }
02824
02825 Integer Integer::Power2(unsigned int e)
02826 {
02827 Integer r((word)0, BitsToWords(e+1));
02828 r.SetBit(e);
02829 return r;
02830 }
02831
02832 template <long i>
02833 struct NewInteger
02834 {
02835 Integer * operator()() const
02836 {
02837 return new Integer(i);
02838 }
02839 };
02840
02841 const Integer &Integer::Zero()
02842 {
02843 return Singleton<Integer>().Ref();
02844 }
02845
02846 const Integer &Integer::One()
02847 {
02848 return Singleton<Integer, NewInteger<1> >().Ref();
02849 }
02850
02851 const Integer &Integer::Two()
02852 {
02853 return Singleton<Integer, NewInteger<2> >().Ref();
02854 }
02855
02856 bool Integer::operator!() const
02857 {
02858 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02859 }
02860
02861 Integer& Integer::operator=(const Integer& t)
02862 {
02863 if (this != &t)
02864 {
02865 reg.New(RoundupSize(t.WordCount()));
02866 CopyWords(reg, t.reg, reg.size());
02867 sign = t.sign;
02868 }
02869 return *this;
02870 }
02871
02872 bool Integer::GetBit(unsigned int n) const
02873 {
02874 if (n/WORD_BITS >= reg.size())
02875 return 0;
02876 else
02877 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02878 }
02879
02880 void Integer::SetBit(unsigned int n, bool value)
02881 {
02882 if (value)
02883 {
02884 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02885 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02886 }
02887 else
02888 {
02889 if (n/WORD_BITS < reg.size())
02890 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02891 }
02892 }
02893
02894 byte Integer::GetByte(unsigned int n) const
02895 {
02896 if (n/WORD_SIZE >= reg.size())
02897 return 0;
02898 else
02899 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02900 }
02901
02902 void Integer::SetByte(unsigned int n, byte value)
02903 {
02904 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02905 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02906 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02907 }
02908
02909 unsigned long Integer::GetBits(unsigned int i, unsigned int n) const
02910 {
02911 assert(n <= sizeof(unsigned long)*8);
02912 unsigned long v = 0;
02913 for (unsigned int j=0; j<n; j++)
02914 v |= GetBit(i+j) << j;
02915 return v;
02916 }
02917
02918 Integer Integer::operator-() const
02919 {
02920 Integer result(*this);
02921 result.Negate();
02922 return result;
02923 }
02924
02925 Integer Integer::AbsoluteValue() const
02926 {
02927 Integer result(*this);
02928 result.sign = POSITIVE;
02929 return result;
02930 }
02931
02932 void Integer::swap(Integer &a)
02933 {
02934 reg.swap(a.reg);
02935 std::swap(sign, a.sign);
02936 }
02937
02938 Integer::Integer(word value, unsigned int length)
02939 : reg(RoundupSize(length)), sign(POSITIVE)
02940 {
02941 reg[0] = value;
02942 SetWords(reg+1, 0, reg.size()-1);
02943 }
02944
02945 template <class T>
02946 static Integer StringToInteger(const T *str)
02947 {
02948 word radix;
02949
02950
02951
02952 unsigned int length;
02953 for (length = 0; str[length] != 0; length++) {}
02954
02955 Integer v;
02956
02957 if (length == 0)
02958 return v;
02959
02960 switch (str[length-1])
02961 {
02962 case 'h':
02963 case 'H':
02964 radix=16;
02965 break;
02966 case 'o':
02967 case 'O':
02968 radix=8;
02969 break;
02970 case 'b':
02971 case 'B':
02972 radix=2;
02973 break;
02974 default:
02975 radix=10;
02976 }
02977
02978 if (length > 2 && str[0] == '0' && str[1] == 'x')
02979 radix = 16;
02980
02981 for (unsigned i=0; i<length; i++)
02982 {
02983 word digit;
02984
02985 if (str[i] >= '0' && str[i] <= '9')
02986 digit = str[i] - '0';
02987 else if (str[i] >= 'A' && str[i] <= 'F')
02988 digit = str[i] - 'A' + 10;
02989 else if (str[i] >= 'a' && str[i] <= 'f')
02990 digit = str[i] - 'a' + 10;
02991 else
02992 digit = radix;
02993
02994 if (digit < radix)
02995 {
02996 v *= radix;
02997 v += digit;
02998 }
02999 }
03000
03001 if (str[0] == '-')
03002 v.Negate();
03003
03004 return v;
03005 }
03006
03007 Integer::Integer(const char *str)
03008 : reg(2), sign(POSITIVE)
03009 {
03010 *this = StringToInteger(str);
03011 }
03012
03013 Integer::Integer(const wchar_t *str)
03014 : reg(2), sign(POSITIVE)
03015 {
03016 *this = StringToInteger(str);
03017 }
03018
03019 unsigned int Integer::WordCount() const
03020 {
03021 return CountWords(reg, reg.size());
03022 }
03023
03024 unsigned int Integer::ByteCount() const
03025 {
03026 unsigned wordCount = WordCount();
03027 if (wordCount)
03028 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03029 else
03030 return 0;
03031 }
03032
03033 unsigned int Integer::BitCount() const
03034 {
03035 unsigned wordCount = WordCount();
03036 if (wordCount)
03037 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03038 else
03039 return 0;
03040 }
03041
03042 void Integer::Decode(const byte *input, unsigned int inputLen, Signedness s)
03043 {
03044 StringStore store(input, inputLen);
03045 Decode(store, inputLen, s);
03046 }
03047
03048 void Integer::Decode(BufferedTransformation &bt, unsigned int inputLen, Signedness s)
03049 {
03050 assert(bt.MaxRetrievable() >= inputLen);
03051
03052 byte b;
03053 bt.Peek(b);
03054 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03055
03056 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03057 {
03058 bt.Skip(1);
03059 inputLen--;
03060 bt.Peek(b);
03061 }
03062
03063 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03064
03065 for (unsigned int i=inputLen; i > 0; i--)
03066 {
03067 bt.Get(b);
03068 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03069 }
03070
03071 if (sign == NEGATIVE)
03072 {
03073 for (unsigned i=inputLen; i<reg.size()*WORD_SIZE; i++)
03074 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03075 TwosComplement(reg, reg.size());
03076 }
03077 }
03078
03079 unsigned int Integer::MinEncodedSize(Signedness signedness) const
03080 {
03081 unsigned int outputLen = STDMAX(1U, ByteCount());
03082 if (signedness == UNSIGNED)
03083 return outputLen;
03084 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03085 outputLen++;
03086 if (IsNegative() && *this < -Power2(outputLen*8-1))
03087 outputLen++;
03088 return outputLen;
03089 }
03090
03091 unsigned int Integer::Encode(byte *output, unsigned int outputLen, Signedness signedness) const
03092 {
03093 ArraySink sink(output, outputLen);
03094 return Encode(sink, outputLen, signedness);
03095 }
03096
03097 unsigned int Integer::Encode(BufferedTransformation &bt, unsigned int outputLen, Signedness signedness) const
03098 {
03099 if (signedness == UNSIGNED || NotNegative())
03100 {
03101 for (unsigned int i=outputLen; i > 0; i--)
03102 bt.Put(GetByte(i-1));
03103 }
03104 else
03105 {
03106
03107 Integer temp = Integer::Power2(8*STDMAX(ByteCount(), outputLen)) + *this;
03108 for (unsigned i=0; i<outputLen; i++)
03109 bt.Put(temp.GetByte(outputLen-i-1));
03110 }
03111 return outputLen;
03112 }
03113
03114 void Integer::DEREncode(BufferedTransformation &bt) const
03115 {
03116 DERGeneralEncoder enc(bt, INTEGER);
03117 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03118 enc.MessageEnd();
03119 }
03120
03121 void Integer::BERDecode(const byte *input, unsigned int len)
03122 {
03123 StringStore store(input, len);
03124 BERDecode(store);
03125 }
03126
03127 void Integer::BERDecode(BufferedTransformation &bt)
03128 {
03129 BERGeneralDecoder dec(bt, INTEGER);
03130 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03131 BERDecodeError();
03132 Decode(dec, dec.RemainingLength(), SIGNED);
03133 dec.MessageEnd();
03134 }
03135
03136 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, unsigned int length) const
03137 {
03138 DERGeneralEncoder enc(bt, OCTET_STRING);
03139 Encode(enc, length);
03140 enc.MessageEnd();
03141 }
03142
03143 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, unsigned int length)
03144 {
03145 BERGeneralDecoder dec(bt, OCTET_STRING);
03146 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03147 BERDecodeError();
03148 Decode(dec, length);
03149 dec.MessageEnd();
03150 }
03151
03152 unsigned int Integer::OpenPGPEncode(byte *output, unsigned int len) const
03153 {
03154 ArraySink sink(output, len);
03155 return OpenPGPEncode(sink);
03156 }
03157
03158 unsigned int Integer::OpenPGPEncode(BufferedTransformation &bt) const
03159 {
03160 word16 bitCount = BitCount();
03161 bt.PutWord16(bitCount);
03162 return 2 + Encode(bt, BitsToBytes(bitCount));
03163 }
03164
03165 void Integer::OpenPGPDecode(const byte *input, unsigned int len)
03166 {
03167 StringStore store(input, len);
03168 OpenPGPDecode(store);
03169 }
03170
03171 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03172 {
03173 word16 bitCount;
03174 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03175 throw OpenPGPDecodeErr();
03176 Decode(bt, BitsToBytes(bitCount));
03177 }
03178
03179 void Integer::Randomize(RandomNumberGenerator &rng, unsigned int nbits)
03180 {
03181 const unsigned int nbytes = nbits/8 + 1;
03182 SecByteBlock buf(nbytes);
03183 rng.GenerateBlock(buf, nbytes);
03184 if (nbytes)
03185 buf[0] = (byte)Crop(buf[0], nbits % 8);
03186 Decode(buf, nbytes, UNSIGNED);
03187 }
03188
03189 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03190 {
03191 if (min > max)
03192 throw InvalidArgument("Integer: Min must be no greater than Max");
03193
03194 Integer range = max - min;
03195 const unsigned int nbits = range.BitCount();
03196
03197 do
03198 {
03199 Randomize(rng, nbits);
03200 }
03201 while (*this > range);
03202
03203 *this += min;
03204 }
03205
03206 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03207 {
03208 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03209 }
03210
03211 class KDF2_RNG : public RandomNumberGenerator
03212 {
03213 public:
03214 KDF2_RNG(const byte *seed, unsigned int seedSize)
03215 : m_counter(0), m_counterAndSeed(seedSize + 4)
03216 {
03217 memcpy(m_counterAndSeed + 4, seed, seedSize);
03218 }
03219
03220 byte GenerateByte()
03221 {
03222 byte b;
03223 GenerateBlock(&b, 1);
03224 return b;
03225 }
03226
03227 void GenerateBlock(byte *output, unsigned int size)
03228 {
03229 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03230 ++m_counter;
03231 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03232 }
03233
03234 private:
03235 word32 m_counter;
03236 SecByteBlock m_counterAndSeed;
03237 };
03238
03239 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03240 {
03241 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03242 Integer max;
03243 if (!params.GetValue("Max", max))
03244 {
03245 int bitLength;
03246 if (params.GetIntValue("BitLength", bitLength))
03247 max = Integer::Power2(bitLength);
03248 else
03249 throw InvalidArgument("Integer: missing Max argument");
03250 }
03251 if (min > max)
03252 throw InvalidArgument("Integer: Min must be no greater than Max");
03253
03254 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03255 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03256
03257 if (equiv.IsNegative() || equiv >= mod)
03258 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03259
03260 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03261
03262 member_ptr<KDF2_RNG> kdf2Rng;
03263 ConstByteArrayParameter seed;
03264 if (params.GetValue("Seed", seed))
03265 {
03266 ByteQueue bq;
03267 DERSequenceEncoder seq(bq);
03268 min.DEREncode(seq);
03269 max.DEREncode(seq);
03270 equiv.DEREncode(seq);
03271 mod.DEREncode(seq);
03272 DEREncodeUnsigned(seq, rnType);
03273 DEREncodeOctetString(seq, seed.begin(), seed.size());
03274 seq.MessageEnd();
03275
03276 SecByteBlock finalSeed(bq.MaxRetrievable());
03277 bq.Get(finalSeed, finalSeed.size());
03278 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03279 }
03280 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03281
03282 switch (rnType)
03283 {
03284 case ANY:
03285 if (mod == One())
03286 Randomize(rng, min, max);
03287 else
03288 {
03289 Integer min1 = min + (equiv-min)%mod;
03290 if (max < min1)
03291 return false;
03292 Randomize(rng, Zero(), (max - min1) / mod);
03293 *this *= mod;
03294 *this += min1;
03295 }
03296 return true;
03297
03298 case PRIME:
03299 {
03300 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03301
03302 int i;
03303 i = 0;
03304 while (1)
03305 {
03306 if (++i==16)
03307 {
03308
03309 Integer first = min;
03310 if (FirstPrime(first, max, equiv, mod, pSelector))
03311 {
03312
03313 *this = first;
03314 if (!FirstPrime(first, max, equiv, mod, pSelector))
03315 return true;
03316 }
03317 else
03318 return false;
03319 }
03320
03321 Randomize(rng, min, max);
03322 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03323 return true;
03324 }
03325 }
03326
03327 default:
03328 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03329 }
03330 }
03331
03332 std::istream& operator>>(std::istream& in, Integer &a)
03333 {
03334 char c;
03335 unsigned int length = 0;
03336 SecBlock<char> str(length + 16);
03337
03338 std::ws(in);
03339
03340 do
03341 {
03342 in.read(&c, 1);
03343 str[length++] = c;
03344 if (length >= str.size())
03345 str.Grow(length + 16);
03346 }
03347 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03348
03349 if (in.gcount())
03350 in.putback(c);
03351 str[length-1] = '\0';
03352 a = Integer(str);
03353
03354 return in;
03355 }
03356
03357 std::ostream& operator<<(std::ostream& out, const Integer &a)
03358 {
03359
03360 long f = out.flags() & std::ios::basefield;
03361 int base, block;
03362 char suffix;
03363 switch(f)
03364 {
03365 case std::ios::oct :
03366 base = 8;
03367 block = 8;
03368 suffix = 'o';
03369 break;
03370 case std::ios::hex :
03371 base = 16;
03372 block = 4;
03373 suffix = 'h';
03374 break;
03375 default :
03376 base = 10;
03377 block = 3;
03378 suffix = '.';
03379 }
03380
03381 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03382 Integer temp1=a, temp2;
03383 unsigned i=0;
03384 const char vec[]="0123456789ABCDEF";
03385
03386 if (a.IsNegative())
03387 {
03388 out << '-';
03389 temp1.Negate();
03390 }
03391
03392 if (!a)
03393 out << '0';
03394
03395 while (!!temp1)
03396 {
03397 word digit;
03398 Integer::Divide(digit, temp2, temp1, base);
03399 s[i++]=vec[digit];
03400 temp1=temp2;
03401 }
03402
03403 while (i--)
03404 {
03405 out << s[i];
03406
03407
03408 }
03409 return out << suffix;
03410 }
03411
03412 Integer& Integer::operator++()
03413 {
03414 if (NotNegative())
03415 {
03416 if (Increment(reg, reg.size()))
03417 {
03418 reg.CleanGrow(2*reg.size());
03419 reg[reg.size()/2]=1;
03420 }
03421 }
03422 else
03423 {
03424 word borrow = Decrement(reg, reg.size());
03425 assert(!borrow);
03426 if (WordCount()==0)
03427 *this = Zero();
03428 }
03429 return *this;
03430 }
03431
03432 Integer& Integer::operator--()
03433 {
03434 if (IsNegative())
03435 {
03436 if (Increment(reg, reg.size()))
03437 {
03438 reg.CleanGrow(2*reg.size());
03439 reg[reg.size()/2]=1;
03440 }
03441 }
03442 else
03443 {
03444 if (Decrement(reg, reg.size()))
03445 *this = -One();
03446 }
03447 return *this;
03448 }
03449
03450 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03451 {
03452 word carry;
03453 if (a.reg.size() == b.reg.size())
03454 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03455 else if (a.reg.size() > b.reg.size())
03456 {
03457 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03458 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03459 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03460 }
03461 else
03462 {
03463 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03464 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03465 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03466 }
03467
03468 if (carry)
03469 {
03470 sum.reg.CleanGrow(2*sum.reg.size());
03471 sum.reg[sum.reg.size()/2] = 1;
03472 }
03473 sum.sign = Integer::POSITIVE;
03474 }
03475
03476 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03477 {
03478 unsigned aSize = a.WordCount();
03479 aSize += aSize%2;
03480 unsigned bSize = b.WordCount();
03481 bSize += bSize%2;
03482
03483 if (aSize == bSize)
03484 {
03485 if (Compare(a.reg, b.reg, aSize) >= 0)
03486 {
03487 Subtract(diff.reg, a.reg, b.reg, aSize);
03488 diff.sign = Integer::POSITIVE;
03489 }
03490 else
03491 {
03492 Subtract(diff.reg, b.reg, a.reg, aSize);
03493 diff.sign = Integer::NEGATIVE;
03494 }
03495 }
03496 else if (aSize > bSize)
03497 {
03498 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03499 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03500 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03501 assert(!borrow);
03502 diff.sign = Integer::POSITIVE;
03503 }
03504 else
03505 {
03506 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03507 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03508 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03509 assert(!borrow);
03510 diff.sign = Integer::NEGATIVE;
03511 }
03512 }
03513
03514 Integer Integer::Plus(const Integer& b) const
03515 {
03516 Integer sum((word)0, STDMAX(reg.size(), b.reg.size()));
03517 if (NotNegative())
03518 {
03519 if (b.NotNegative())
03520 PositiveAdd(sum, *this, b);
03521 else
03522 PositiveSubtract(sum, *this, b);
03523 }
03524 else
03525 {
03526 if (b.NotNegative())
03527 PositiveSubtract(sum, b, *this);
03528 else
03529 {
03530 PositiveAdd(sum, *this, b);
03531 sum.sign = Integer::NEGATIVE;
03532 }
03533 }
03534 return sum;
03535 }
03536
03537 Integer& Integer::operator+=(const Integer& t)
03538 {
03539 reg.CleanGrow(t.reg.size());
03540 if (NotNegative())
03541 {
03542 if (t.NotNegative())
03543 PositiveAdd(*this, *this, t);
03544 else
03545 PositiveSubtract(*this, *this, t);
03546 }
03547 else
03548 {
03549 if (t.NotNegative())
03550 PositiveSubtract(*this, t, *this);
03551 else
03552 {
03553 PositiveAdd(*this, *this, t);
03554 sign = Integer::NEGATIVE;
03555 }
03556 }
03557 return *this;
03558 }
03559
03560 Integer Integer::Minus(const Integer& b) const
03561 {
03562 Integer diff((word)0, STDMAX(reg.size(), b.reg.size()));
03563 if (NotNegative())
03564 {
03565 if (b.NotNegative())
03566 PositiveSubtract(diff, *this, b);
03567 else
03568 PositiveAdd(diff, *this, b);
03569 }
03570 else
03571 {
03572 if (b.NotNegative())
03573 {
03574 PositiveAdd(diff, *this, b);
03575 diff.sign = Integer::NEGATIVE;
03576 }
03577 else
03578 PositiveSubtract(diff, b, *this);
03579 }
03580 return diff;
03581 }
03582
03583 Integer& Integer::operator-=(const Integer& t)
03584 {
03585 reg.CleanGrow(t.reg.size());
03586 if (NotNegative())
03587 {
03588 if (t.NotNegative())
03589 PositiveSubtract(*this, *this, t);
03590 else
03591 PositiveAdd(*this, *this, t);
03592 }
03593 else
03594 {
03595 if (t.NotNegative())
03596 {
03597 PositiveAdd(*this, *this, t);
03598 sign = Integer::NEGATIVE;
03599 }
03600 else
03601 PositiveSubtract(*this, t, *this);
03602 }
03603 return *this;
03604 }
03605
03606 Integer& Integer::operator<<=(unsigned int n)
03607 {
03608 const unsigned int wordCount = WordCount();
03609 const unsigned int shiftWords = n / WORD_BITS;
03610 const unsigned int shiftBits = n % WORD_BITS;
03611
03612 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03613 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03614 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03615 return *this;
03616 }
03617
03618 Integer& Integer::operator>>=(unsigned int n)
03619 {
03620 const unsigned int wordCount = WordCount();
03621 const unsigned int shiftWords = n / WORD_BITS;
03622 const unsigned int shiftBits = n % WORD_BITS;
03623
03624 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03625 if (wordCount > shiftWords)
03626 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03627 if (IsNegative() && WordCount()==0)
03628 *this = Zero();
03629 return *this;
03630 }
03631
03632 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03633 {
03634 unsigned aSize = RoundupSize(a.WordCount());
03635 unsigned bSize = RoundupSize(b.WordCount());
03636
03637 product.reg.CleanNew(RoundupSize(aSize+bSize));
03638 product.sign = Integer::POSITIVE;
03639
03640 SecAlignedWordBlock workspace(aSize + bSize);
03641 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03642 }
03643
03644 void Multiply(Integer &product, const Integer &a, const Integer &b)
03645 {
03646 PositiveMultiply(product, a, b);
03647
03648 if (a.NotNegative() != b.NotNegative())
03649 product.Negate();
03650 }
03651
03652 Integer Integer::Times(const Integer &b) const
03653 {
03654 Integer product;
03655 Multiply(product, *this, b);
03656 return product;
03657 }
03658
03659
03660
03661
03662
03663
03664
03665
03666
03667
03668
03669
03670
03671
03672
03673
03674
03675
03676
03677
03678
03679
03680
03681 void PositiveDivide(Integer &remainder, Integer "ient,
03682 const Integer &a, const Integer &b)
03683 {
03684 unsigned aSize = a.WordCount();
03685 unsigned bSize = b.WordCount();
03686
03687 if (!bSize)
03688 throw Integer::DivideByZero();
03689
03690 if (a.PositiveCompare(b) == -1)
03691 {
03692 remainder = a;
03693 remainder.sign = Integer::POSITIVE;
03694 quotient = Integer::Zero();
03695 return;
03696 }
03697
03698 aSize += aSize%2;
03699 bSize += bSize%2;
03700
03701 remainder.reg.CleanNew(RoundupSize(bSize));
03702 remainder.sign = Integer::POSITIVE;
03703 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03704 quotient.sign = Integer::POSITIVE;
03705
03706 SecAlignedWordBlock T(aSize+2*bSize+4);
03707 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03708 }
03709
03710 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03711 {
03712 PositiveDivide(remainder, quotient, dividend, divisor);
03713
03714 if (dividend.IsNegative())
03715 {
03716 quotient.Negate();
03717 if (remainder.NotZero())
03718 {
03719 --quotient;
03720 remainder = divisor.AbsoluteValue() - remainder;
03721 }
03722 }
03723
03724 if (divisor.IsNegative())
03725 quotient.Negate();
03726 }
03727
03728 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03729 {
03730 q = a;
03731 q >>= n;
03732
03733 const unsigned int wordCount = BitsToWords(n);
03734 if (wordCount <= a.WordCount())
03735 {
03736 r.reg.resize(RoundupSize(wordCount));
03737 CopyWords(r.reg, a.reg, wordCount);
03738 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03739 if (n % WORD_BITS != 0)
03740 r.reg[wordCount-1] %= (1 << (n % WORD_BITS));
03741 }
03742 else
03743 {
03744 r.reg.resize(RoundupSize(a.WordCount()));
03745 CopyWords(r.reg, a.reg, r.reg.size());
03746 }
03747 r.sign = POSITIVE;
03748
03749 if (a.IsNegative() && r.NotZero())
03750 {
03751 --q;
03752 r = Power2(n) - r;
03753 }
03754 }
03755
03756 Integer Integer::DividedBy(const Integer &b) const
03757 {
03758 Integer remainder, quotient;
03759 Integer::Divide(remainder, quotient, *this, b);
03760 return quotient;
03761 }
03762
03763 Integer Integer::Modulo(const Integer &b) const
03764 {
03765 Integer remainder, quotient;
03766 Integer::Divide(remainder, quotient, *this, b);
03767 return remainder;
03768 }
03769
03770 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03771 {
03772 if (!divisor)
03773 throw Integer::DivideByZero();
03774
03775 assert(divisor);
03776
03777 if ((divisor & (divisor-1)) == 0)
03778 {
03779 quotient = dividend >> (BitPrecision(divisor)-1);
03780 remainder = dividend.reg[0] & (divisor-1);
03781 return;
03782 }
03783
03784 unsigned int i = dividend.WordCount();
03785 quotient.reg.CleanNew(RoundupSize(i));
03786 remainder = 0;
03787 while (i--)
03788 {
03789 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03790 remainder = DWord(dividend.reg[i], remainder) % divisor;
03791 }
03792
03793 if (dividend.NotNegative())
03794 quotient.sign = POSITIVE;
03795 else
03796 {
03797 quotient.sign = NEGATIVE;
03798 if (remainder)
03799 {
03800 --quotient;
03801 remainder = divisor - remainder;
03802 }
03803 }
03804 }
03805
03806 Integer Integer::DividedBy(word b) const
03807 {
03808 word remainder;
03809 Integer quotient;
03810 Integer::Divide(remainder, quotient, *this, b);
03811 return quotient;
03812 }
03813
03814 word Integer::Modulo(word divisor) const
03815 {
03816 if (!divisor)
03817 throw Integer::DivideByZero();
03818
03819 assert(divisor);
03820
03821 word remainder;
03822
03823 if ((divisor & (divisor-1)) == 0)
03824 remainder = reg[0] & (divisor-1);
03825 else
03826 {
03827 unsigned int i = WordCount();
03828
03829 if (divisor <= 5)
03830 {
03831 DWord sum(0, 0);
03832 while (i--)
03833 sum += reg[i];
03834 remainder = sum % divisor;
03835 }
03836 else
03837 {
03838 remainder = 0;
03839 while (i--)
03840 remainder = DWord(reg[i], remainder) % divisor;
03841 }
03842 }
03843
03844 if (IsNegative() && remainder)
03845 remainder = divisor - remainder;
03846
03847 return remainder;
03848 }
03849
03850 void Integer::Negate()
03851 {
03852 if (!!(*this))
03853 sign = Sign(1-sign);
03854 }
03855
03856 int Integer::PositiveCompare(const Integer& t) const
03857 {
03858 unsigned size = WordCount(), tSize = t.WordCount();
03859
03860 if (size == tSize)
03861 return CryptoPP::Compare(reg, t.reg, size);
03862 else
03863 return size > tSize ? 1 : -1;
03864 }
03865
03866 int Integer::Compare(const Integer& t) const
03867 {
03868 if (NotNegative())
03869 {
03870 if (t.NotNegative())
03871 return PositiveCompare(t);
03872 else
03873 return 1;
03874 }
03875 else
03876 {
03877 if (t.NotNegative())
03878 return -1;
03879 else
03880 return -PositiveCompare(t);
03881 }
03882 }
03883
03884 Integer Integer::SquareRoot() const
03885 {
03886 if (!IsPositive())
03887 return Zero();
03888
03889
03890 Integer x, y = Power2((BitCount()+1)/2);
03891 assert(y*y >= *this);
03892
03893 do
03894 {
03895 x = y;
03896 y = (x + *this/x) >> 1;
03897 } while (y<x);
03898
03899 return x;
03900 }
03901
03902 bool Integer::IsSquare() const
03903 {
03904 Integer r = SquareRoot();
03905 return *this == r.Squared();
03906 }
03907
03908 bool Integer::IsUnit() const
03909 {
03910 return (WordCount() == 1) && (reg[0] == 1);
03911 }
03912
03913 Integer Integer::MultiplicativeInverse() const
03914 {
03915 return IsUnit() ? *this : Zero();
03916 }
03917
03918 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03919 {
03920 return x*y%m;
03921 }
03922
03923 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03924 {
03925 ModularArithmetic mr(m);
03926 return mr.Exponentiate(x, e);
03927 }
03928
03929 Integer Integer::Gcd(const Integer &a, const Integer &b)
03930 {
03931 return EuclideanDomainOf<Integer>().Gcd(a, b);
03932 }
03933
03934 Integer Integer::InverseMod(const Integer &m) const
03935 {
03936 assert(m.NotNegative());
03937
03938 if (IsNegative() || *this>=m)
03939 return (*this%m).InverseMod(m);
03940
03941 if (m.IsEven())
03942 {
03943 if (!m || IsEven())
03944 return Zero();
03945 if (*this == One())
03946 return One();
03947
03948 Integer u = m.InverseMod(*this);
03949 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03950 }
03951
03952 SecBlock<word> T(m.reg.size() * 4);
03953 Integer r((word)0, m.reg.size());
03954 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03955 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03956 return r;
03957 }
03958
03959 word Integer::InverseMod(const word mod) const
03960 {
03961 word g0 = mod, g1 = *this % mod;
03962 word v0 = 0, v1 = 1;
03963 word y;
03964
03965 while (g1)
03966 {
03967 if (g1 == 1)
03968 return v1;
03969 y = g0 / g1;
03970 g0 = g0 % g1;
03971 v0 += y * v1;
03972
03973 if (!g0)
03974 break;
03975 if (g0 == 1)
03976 return mod-v0;
03977 y = g1 / g0;
03978 g1 = g1 % g0;
03979 v1 += y * v0;
03980 }
03981 return 0;
03982 }
03983
03984
03985
03986 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
03987 {
03988 BERSequenceDecoder seq(bt);
03989 OID oid(seq);
03990 if (oid != ASN1::prime_field())
03991 BERDecodeError();
03992 modulus.BERDecode(seq);
03993 seq.MessageEnd();
03994 result.reg.resize(modulus.reg.size());
03995 }
03996
03997 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
03998 {
03999 DERSequenceEncoder seq(bt);
04000 ASN1::prime_field().DEREncode(seq);
04001 modulus.DEREncode(seq);
04002 seq.MessageEnd();
04003 }
04004
04005 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04006 {
04007 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04008 }
04009
04010 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04011 {
04012 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04013 }
04014
04015 const Integer& ModularArithmetic::Half(const Integer &a) const
04016 {
04017 if (a.reg.size()==modulus.reg.size())
04018 {
04019 CryptoPP::DivideByPower2Mod(result.reg.begin(), a.reg, 1, modulus.reg, a.reg.size());
04020 return result;
04021 }
04022 else
04023 return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1));
04024 }
04025
04026 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04027 {
04028 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04029 {
04030 if (CryptoPP::Add(result.reg.begin(), a.reg, b.reg, a.reg.size())
04031 || Compare(result.reg, modulus.reg, a.reg.size()) >= 0)
04032 {
04033 CryptoPP::Subtract(result.reg.begin(), result.reg, modulus.reg, a.reg.size());
04034 }
04035 return result;
04036 }
04037 else
04038 {
04039 result1 = a+b;
04040 if (result1 >= modulus)
04041 result1 -= modulus;
04042 return result1;
04043 }
04044 }
04045
04046 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04047 {
04048 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04049 {
04050 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04051 || Compare(a.reg, modulus.reg, a.reg.size()) >= 0)
04052 {
04053 CryptoPP::Subtract(a.reg, a.reg, modulus.reg, a.reg.size());
04054 }
04055 }
04056 else
04057 {
04058 a+=b;
04059 if (a>=modulus)
04060 a-=modulus;
04061 }
04062
04063 return a;
04064 }
04065
04066 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04067 {
04068 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04069 {
04070 if (CryptoPP::Subtract(result.reg.begin(), a.reg, b.reg, a.reg.size()))
04071 CryptoPP::Add(result.reg.begin(), result.reg, modulus.reg, a.reg.size());
04072 return result;
04073 }
04074 else
04075 {
04076 result1 = a-b;
04077 if (result1.IsNegative())
04078 result1 += modulus;
04079 return result1;
04080 }
04081 }
04082
04083 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04084 {
04085 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
04086 {
04087 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04088 CryptoPP::Add(a.reg, a.reg, modulus.reg, a.reg.size());
04089 }
04090 else
04091 {
04092 a-=b;
04093 if (a.IsNegative())
04094 a+=modulus;
04095 }
04096
04097 return a;
04098 }
04099
04100 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04101 {
04102 if (!a)
04103 return a;
04104
04105 CopyWords(result.reg.begin(), modulus.reg, modulus.reg.size());
04106 if (CryptoPP::Subtract(result.reg.begin(), result.reg, a.reg, a.reg.size()))
04107 Decrement(result.reg.begin()+a.reg.size(), 1, modulus.reg.size()-a.reg.size());
04108
04109 return result;
04110 }
04111
04112 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04113 {
04114 if (modulus.IsOdd())
04115 {
04116 MontgomeryRepresentation dr(modulus);
04117 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04118 }
04119 else
04120 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04121 }
04122
04123 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04124 {
04125 if (modulus.IsOdd())
04126 {
04127 MontgomeryRepresentation dr(modulus);
04128 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04129 for (unsigned int i=0; i<exponentsCount; i++)
04130 results[i] = dr.ConvertOut(results[i]);
04131 }
04132 else
04133 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04134 }
04135
04136 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04137 : ModularArithmetic(m),
04138 u((word)0, modulus.reg.size()),
04139 workspace(5*modulus.reg.size())
04140 {
04141 if (!modulus.IsOdd())
04142 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04143
04144 RecursiveInverseModPower2(u.reg, workspace, modulus.reg, modulus.reg.size());
04145 }
04146
04147 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04148 {
04149 word *const T = workspace.begin();
04150 word *const R = result.reg.begin();
04151 const unsigned int N = modulus.reg.size();
04152 assert(a.reg.size()<=N && b.reg.size()<=N);
04153
04154 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04155 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04156 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04157 return result;
04158 }
04159
04160 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04161 {
04162 word *const T = workspace.begin();
04163 word *const R = result.reg.begin();
04164 const unsigned int N = modulus.reg.size();
04165 assert(a.reg.size()<=N);
04166
04167 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04168 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04169 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04170 return result;
04171 }
04172
04173 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04174 {
04175 word *const T = workspace.begin();
04176 word *const R = result.reg.begin();
04177 const unsigned int N = modulus.reg.size();
04178 assert(a.reg.size()<=N);
04179
04180 CopyWords(T, a.reg, a.reg.size());
04181 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04182 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04183 return result;
04184 }
04185
04186 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04187 {
04188
04189 word *const T = workspace.begin();
04190 word *const R = result.reg.begin();
04191 const unsigned int N = modulus.reg.size();
04192 assert(a.reg.size()<=N);
04193
04194 CopyWords(T, a.reg, a.reg.size());
04195 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04196 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
04197 unsigned k = AlmostInverse(R, T, R, N, modulus.reg, N);
04198
04199
04200
04201 if (k>N*WORD_BITS)
04202 DivideByPower2Mod(R, R, k-N*WORD_BITS, modulus.reg, N);
04203 else
04204 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, modulus.reg, N);
04205
04206 return result;
04207 }
04208
04209 NAMESPACE_END
04210
04211 #endif