Crypto++  8.6
Free C++ class library of cryptographic schemes
gcm.cpp
1 // gcm.cpp - originally written and placed in the public domain by Wei Dai.
2 // ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3 // multiply routines are less efficient because they shadow x86.
4 // The precomputed key table integration makes it tricky to use the
5 // more efficient ARMv8 implementation of the multiply and reduce.
6 
7 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8 
9 #include "pch.h"
10 #include "config.h"
11 
12 #ifndef CRYPTOPP_IMPORTS
13 #ifndef CRYPTOPP_GENERATE_X64_MASM
14 
15 // Visual Studio .Net 2003 compiler crash
16 #if defined(_MSC_VER) && (_MSC_VER < 1400)
17 # pragma optimize("", off)
18 #endif
19 
20 #include "gcm.h"
21 #include "cpu.h"
22 
23 #if defined(CRYPTOPP_DISABLE_GCM_ASM)
24 # undef CRYPTOPP_X86_ASM_AVAILABLE
25 # undef CRYPTOPP_X32_ASM_AVAILABLE
26 # undef CRYPTOPP_X64_ASM_AVAILABLE
27 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
28 #endif
29 
30 NAMESPACE_BEGIN(CryptoPP)
31 
32 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
33 // Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
34 // 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
35 #if defined(CRYPTOPP_DISABLE_MIXED_ASM)
36 // 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path.
37 # define USE_MOVD_REG32 1
38 #elif defined(__GNUC__) || defined(_MSC_VER)
39 // 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
40 # define USE_MOVD_REG32_OR_REG64 1
41 #else
42 // 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
43 # define USE_MOV_REG32_OR_REG64 1
44 #endif
45 #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
46 
47 word16 GCM_Base::s_reductionTable[256];
48 volatile bool GCM_Base::s_reductionTableInitialized = false;
49 
50 void GCM_Base::GCTR::IncrementCounterBy256()
51 {
52  IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
53 }
54 
55 static inline void Xor16(byte *a, const byte *b, const byte *c)
56 {
57  CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
58  CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
59  CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
60  ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
61  ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
62 }
63 
64 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
65 // SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
66 // a source file with a SSE architecture switch. Also see GH #226 and GH #284.
67 extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
68 #endif // SSE2
69 
70 #if CRYPTOPP_ARM_NEON_AVAILABLE
71 extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
72 #endif
73 
74 #if CRYPTOPP_POWER8_AVAILABLE
75 extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
76 #endif
77 
78 #if CRYPTOPP_CLMUL_AVAILABLE
79 extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
80 extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
81 const unsigned int s_cltableSizeInBlocks = 8;
82 extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
83 #endif // CRYPTOPP_CLMUL_AVAILABLE
84 
85 #if CRYPTOPP_ARM_PMULL_AVAILABLE
86 extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
87 extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
88 const unsigned int s_cltableSizeInBlocks = 8;
89 extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
90 #endif // CRYPTOPP_ARM_PMULL_AVAILABLE
91 
92 #if CRYPTOPP_POWER8_VMULL_AVAILABLE
93 extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
94 extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
95 const unsigned int s_cltableSizeInBlocks = 8;
96 extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer);
97 #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
98 
99 void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
100 {
101  BlockCipher &blockCipher = AccessBlockCipher();
102  blockCipher.SetKey(userKey, keylength, params);
103 
104  // GCM is only defined for 16-byte block ciphers at the moment.
105  // However, variable blocksize support means we have to defer
106  // blocksize checks to runtime after the key is set. Also see
107  // https://github.com/weidai11/cryptopp/issues/408.
108  const unsigned int blockSize = blockCipher.BlockSize();
109  CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
110  if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
111  throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
112 
113  int tableSize, i, j, k;
114 
115 #if CRYPTOPP_CLMUL_AVAILABLE
116  if (HasCLMUL())
117  {
118  // Avoid "parameter not used" error and suppress Coverity finding
119  (void)params.GetIntValue(Name::TableSize(), tableSize);
120  tableSize = s_cltableSizeInBlocks * blockSize;
121  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
122  }
123  else
124 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
125  if (HasPMULL())
126  {
127  // Avoid "parameter not used" error and suppress Coverity finding
128  (void)params.GetIntValue(Name::TableSize(), tableSize);
129  tableSize = s_cltableSizeInBlocks * blockSize;
130  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
131  }
132  else
133 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
134  if (HasPMULL())
135  {
136  // Avoid "parameter not used" error and suppress Coverity finding
137  (void)params.GetIntValue(Name::TableSize(), tableSize);
138  tableSize = s_cltableSizeInBlocks * blockSize;
139  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
140  }
141  else
142 #endif
143  {
144  if (params.GetIntValue(Name::TableSize(), tableSize))
145  tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
146  else
147  tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
148 
149  //#if defined(_MSC_VER) && (_MSC_VER < 1400)
150  // VC 2003 workaround: compiler generates bad code for 64K tables
151  //tableSize = 2*1024;
152  //#endif
153  }
154 
155  m_buffer.resize(3*blockSize + tableSize);
156  byte *mulTable = MulTable();
157  byte *hashKey = HashKey();
158  memset(hashKey, 0, REQUIRED_BLOCKSIZE);
159  blockCipher.ProcessBlock(hashKey);
160 
161 #if CRYPTOPP_CLMUL_AVAILABLE
162  if (HasCLMUL())
163  {
164  GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
165  return;
166  }
167 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
168  if (HasPMULL())
169  {
170  GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
171  return;
172  }
173 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
174  if (HasPMULL())
175  {
176  GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
177  return;
178  }
179 #endif
180 
181  word64 V0, V1;
183  Block::Get(hashKey)(V0)(V1);
184 
185  if (tableSize == 64*1024)
186  {
187  for (i=0; i<128; i++)
188  {
189  k = i%8;
190  Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
191 
192  int x = (int)V1 & 1;
193  V1 = (V1>>1) | (V0<<63);
194  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
195  }
196 
197  for (i=0; i<16; i++)
198  {
199  memset(mulTable+i*256*16, 0, 16);
200 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
201  if (HasSSE2())
202  for (j=2; j<=0x80; j*=2)
203  for (k=1; k<j; k++)
204  GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
205  else
206 #elif CRYPTOPP_ARM_NEON_AVAILABLE
207  if (HasNEON())
208  for (j=2; j<=0x80; j*=2)
209  for (k=1; k<j; k++)
210  GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
211  else
212 #elif CRYPTOPP_POWER8_AVAILABLE
213  if (HasPower8())
214  for (j=2; j<=0x80; j*=2)
215  for (k=1; k<j; k++)
216  GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
217  else
218 #endif
219  for (j=2; j<=0x80; j*=2)
220  for (k=1; k<j; k++)
221  Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
222  }
223  }
224  else
225  {
226  if (!s_reductionTableInitialized)
227  {
228  s_reductionTable[0] = 0;
229  word16 x = 0x01c2;
230  s_reductionTable[1] = ByteReverse(x);
231  for (unsigned int ii=2; ii<=0x80; ii*=2)
232  {
233  x <<= 1;
234  s_reductionTable[ii] = ByteReverse(x);
235  for (unsigned int jj=1; jj<ii; jj++)
236  s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
237  }
238  s_reductionTableInitialized = true;
239  }
240 
241  for (i=0; i<128-24; i++)
242  {
243  k = i%32;
244  if (k < 4)
245  Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
246  else if (k < 8)
247  Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
248 
249  int x = (int)V1 & 1;
250  V1 = (V1>>1) | (V0<<63);
251  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
252  }
253 
254  for (i=0; i<4; i++)
255  {
256  memset(mulTable+i*256, 0, 16);
257  memset(mulTable+1024+i*256, 0, 16);
258 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
259  if (HasSSE2())
260  for (j=2; j<=8; j*=2)
261  for (k=1; k<j; k++)
262  {
263  GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
264  GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
265  }
266  else
267 #elif CRYPTOPP_ARM_NEON_AVAILABLE
268  if (HasNEON())
269  for (j=2; j<=8; j*=2)
270  for (k=1; k<j; k++)
271  {
272  GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
273  GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
274  }
275  else
276 #elif CRYPTOPP_POWER8_AVAILABLE
277  if (HasPower8())
278  for (j=2; j<=8; j*=2)
279  for (k=1; k<j; k++)
280  {
281  GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
282  GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
283  }
284  else
285 #endif
286  for (j=2; j<=8; j*=2)
287  for (k=1; k<j; k++)
288  {
289  Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
290  Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
291  }
292  }
293  }
294 }
295 
296 inline void GCM_Base::ReverseHashBufferIfNeeded()
297 {
298 #if CRYPTOPP_CLMUL_AVAILABLE
299  if (HasCLMUL())
300  {
301  GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
302  }
303 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
304  if (HasPMULL())
305  {
306  GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
307  }
308 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
309  if (HasPMULL())
310  {
311  GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
312  }
313 #endif
314 }
315 
316 void GCM_Base::Resync(const byte *iv, size_t len)
317 {
318  BlockCipher &cipher = AccessBlockCipher();
319  byte *hashBuffer = HashBuffer();
320 
321  if (len == 12)
322  {
323  memcpy(hashBuffer, iv, len);
324  memset(hashBuffer+len, 0, 3);
325  hashBuffer[len+3] = 1;
326  }
327  else
328  {
329  size_t origLen = len;
330  memset(hashBuffer, 0, HASH_BLOCKSIZE);
331 
332  if (len >= HASH_BLOCKSIZE)
333  {
334  len = GCM_Base::AuthenticateBlocks(iv, len);
335  iv += (origLen - len);
336  }
337 
338  if (len > 0)
339  {
340  memcpy(m_buffer, iv, len);
341  memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
342  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
343  }
344 
345  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
346  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
347 
348  ReverseHashBufferIfNeeded();
349  }
350 
351  if (m_state >= State_IVSet)
352  m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
353  else
354  m_ctr.SetCipherWithIV(cipher, hashBuffer);
355 
356  m_ctr.Seek(HASH_BLOCKSIZE);
357 
358  memset(hashBuffer, 0, HASH_BLOCKSIZE);
359 }
360 
361 unsigned int GCM_Base::OptimalDataAlignment() const
362 {
363  return
364 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
365  HasSSE2() ? 16 :
366 #elif CRYPTOPP_ARM_NEON_AVAILABLE
367  HasNEON() ? 4 :
368 #elif CRYPTOPP_POWER8_AVAILABLE
369  HasPower8() ? 16 :
370 #endif
371  GetBlockCipher().OptimalDataAlignment();
372 }
373 
374 #if CRYPTOPP_MSC_VERSION
375 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
376 #endif
377 
378 #endif // Not CRYPTOPP_GENERATE_X64_MASM
379 
380 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
381 extern "C" {
382 void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
383 void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer);
384 }
385 #endif
386 
387 #ifndef CRYPTOPP_GENERATE_X64_MASM
388 
389 size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
390 {
391 #if CRYPTOPP_CLMUL_AVAILABLE
392  if (HasCLMUL())
393  {
394  return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
395  }
396 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
397  if (HasPMULL())
398  {
399  return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
400  }
401 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
402  if (HasPMULL())
403  {
404  return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
405  }
406 #endif
407 
409  word64 *hashBuffer = (word64 *)(void *)HashBuffer();
410  CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
411 
412  switch (2*(m_buffer.size()>=64*1024)
413 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
414  + HasSSE2()
415 //#elif CRYPTOPP_ARM_NEON_AVAILABLE
416 // + HasNEON()
417 #endif
418  )
419  {
420  case 0: // non-SSE2 and 2K tables
421  {
422  byte *mulTable = MulTable();
423  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
424 
425  do
426  {
427  word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
428  Block::Get(data)(y0)(y1);
429  x0 ^= y0;
430  x1 ^= y1;
431 
432  data += HASH_BLOCKSIZE;
433  len -= HASH_BLOCKSIZE;
434 
435  #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
436 
437  #if (CRYPTOPP_LITTLE_ENDIAN)
438  #if CRYPTOPP_BOOL_SLOW_WORD64
439  word32 z0 = (word32)x0;
440  word32 z1 = (word32)(x0>>32);
441  word32 z2 = (word32)x1;
442  word32 z3 = (word32)(x1>>32);
443  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
444  #else
445  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
446  #endif
447  #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
448  #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
449  #else
450  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
451  #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
452  #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
453  #endif
454 
455  #define GF_MUL_32BY128(op, a, b, c) \
456  a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
457  a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
458  b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
459  b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
460  c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
461  c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
462  d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
463  d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
464 
465  GF_MUL_32BY128(=, 0, 0, 0)
466  GF_MUL_32BY128(^=, 0, 1, 1)
467  GF_MUL_32BY128(^=, 1, 0, 2)
468  GF_MUL_32BY128(^=, 1, 1, 3)
469 
470  word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
471  GF_SHIFT_8(d)
472  c0 ^= d0; c1 ^= d1;
473  r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
474  GF_SHIFT_8(c)
475  b0 ^= c0; b1 ^= c1;
476  r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
477  GF_SHIFT_8(b)
478  a0 ^= b0; a1 ^= b1;
480  x0 = a0; x1 = a1;
481  }
482  while (len >= HASH_BLOCKSIZE);
483 
484  hashBuffer[0] = x0; hashBuffer[1] = x1;
485  return len;
486  }
487 
488  case 2: // non-SSE2 and 64K tables
489  {
490  byte *mulTable = MulTable();
491  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
492 
493  do
494  {
495  word64 y0, y1, a0, a1;
496  Block::Get(data)(y0)(y1);
497  x0 ^= y0;
498  x1 ^= y1;
499 
500  data += HASH_BLOCKSIZE;
501  len -= HASH_BLOCKSIZE;
502 
503  #undef READ_TABLE_WORD64_COMMON
504  #undef READ_TABLE_WORD64
505 
506  #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
507 
508  #if (CRYPTOPP_LITTLE_ENDIAN)
509  #if CRYPTOPP_BOOL_SLOW_WORD64
510  word32 z0 = (word32)x0;
511  word32 z1 = (word32)(x0>>32);
512  word32 z2 = (word32)x1;
513  word32 z3 = (word32)(x1>>32);
514  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
515  #else
516  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
517  #endif
518  #else
519  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
520  #endif
521 
522  #define GF_MUL_8BY128(op, b, c, d) \
523  a0 op READ_TABLE_WORD64(b, c, d, 0);\
524  a1 op READ_TABLE_WORD64(b, c, d, 1);\
525 
526  GF_MUL_8BY128(=, 0, 0, 0)
527  GF_MUL_8BY128(^=, 0, 0, 1)
528  GF_MUL_8BY128(^=, 0, 0, 2)
529  GF_MUL_8BY128(^=, 0, 0, 3)
530  GF_MUL_8BY128(^=, 0, 1, 0)
531  GF_MUL_8BY128(^=, 0, 1, 1)
532  GF_MUL_8BY128(^=, 0, 1, 2)
533  GF_MUL_8BY128(^=, 0, 1, 3)
534  GF_MUL_8BY128(^=, 1, 2, 0)
535  GF_MUL_8BY128(^=, 1, 2, 1)
536  GF_MUL_8BY128(^=, 1, 2, 2)
537  GF_MUL_8BY128(^=, 1, 2, 3)
538  GF_MUL_8BY128(^=, 1, 3, 0)
539  GF_MUL_8BY128(^=, 1, 3, 1)
540  GF_MUL_8BY128(^=, 1, 3, 2)
541  GF_MUL_8BY128(^=, 1, 3, 3)
542 
543  x0 = a0; x1 = a1;
544  }
545  while (len >= HASH_BLOCKSIZE);
546 
547  hashBuffer[0] = x0; hashBuffer[1] = x1;
548  return len;
549  }
550 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
551 
552 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
553  case 1: // SSE2 and 2K tables
554  GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
555  return len % 16;
556  case 3: // SSE2 and 64K tables
557  GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
558  return len % 16;
559 #endif
560 
561 #if CRYPTOPP_SSE2_ASM_AVAILABLE
562  case 1: // SSE2 and 2K tables
563  {
564  #ifdef __GNUC__
565  __asm__ __volatile__
566  (
567  INTEL_NOPREFIX
568  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
569  ALIGN 8
570  GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
571  rex_push_reg rsi
572  push_reg rdi
573  push_reg rbx
574  .endprolog
575  mov rsi, r8
576  mov r11, r9
577  #else
578  AS2( mov WORD_REG(cx), data )
579  AS2( mov WORD_REG(dx), len )
580  AS2( mov WORD_REG(si), hashBuffer )
581  AS2( shr WORD_REG(dx), 4 )
582  #endif
583 
585  AS1(push rbx)
586  AS1(push rbp)
587  #else
588  AS_PUSH_IF86( bx)
589  AS_PUSH_IF86( bp)
590  #endif
591 
592  #ifdef __GNUC__
593  AS2( mov AS_REG_7, WORD_REG(di))
594  #elif CRYPTOPP_BOOL_X86
595  AS2( lea AS_REG_7, s_reductionTable)
596  #endif
597 
598  AS2( movdqa xmm0, [WORD_REG(si)] )
599 
600  #define MUL_TABLE_0 WORD_REG(si) + 32
601  #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
602  #define RED_TABLE AS_REG_7
603 
604  ASL(0)
605  AS2( movdqu xmm4, [WORD_REG(cx)] )
606  AS2( pxor xmm0, xmm4 )
607 
608  AS2( movd ebx, xmm0 )
609  AS2( mov eax, AS_HEX(f0f0f0f0) )
610  AS2( and eax, ebx )
611  AS2( shl ebx, 4 )
612  AS2( and ebx, AS_HEX(f0f0f0f0) )
613  AS2( movzx edi, ah )
614  AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
615  AS2( movzx edi, al )
616  AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
617  AS2( shr eax, 16 )
618  AS2( movzx edi, ah )
619  AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
620  AS2( movzx edi, al )
621  AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
622 
623  #define SSE2_MUL_32BITS(i) \
624  AS2( psrldq xmm0, 4 )\
625  AS2( movd eax, xmm0 )\
626  AS2( and eax, AS_HEX(f0f0f0f0) )\
627  AS2( movzx edi, bh )\
628  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
629  AS2( movzx edi, bl )\
630  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
631  AS2( shr ebx, 16 )\
632  AS2( movzx edi, bh )\
633  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
634  AS2( movzx edi, bl )\
635  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
636  AS2( movd ebx, xmm0 )\
637  AS2( shl ebx, 4 )\
638  AS2( and ebx, AS_HEX(f0f0f0f0) )\
639  AS2( movzx edi, ah )\
640  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
641  AS2( movzx edi, al )\
642  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
643  AS2( shr eax, 16 )\
644  AS2( movzx edi, ah )\
645  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
646  AS2( movzx edi, al )\
647  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
648 
649  SSE2_MUL_32BITS(1)
650  SSE2_MUL_32BITS(2)
651  SSE2_MUL_32BITS(3)
652 
653  AS2( movzx edi, bh )
654  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
655  AS2( movzx edi, bl )
656  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
657  AS2( shr ebx, 16 )
658  AS2( movzx edi, bh )
659  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
660  AS2( movzx edi, bl )
661  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
662 
663  AS2( movdqa xmm0, xmm3 )
664  AS2( pslldq xmm3, 1 )
665  AS2( pxor xmm2, xmm3 )
666  AS2( movdqa xmm1, xmm2 )
667  AS2( pslldq xmm2, 1 )
668  AS2( pxor xmm5, xmm2 )
669 
670  AS2( psrldq xmm0, 15 )
671 #if USE_MOVD_REG32
672  AS2( movd edi, xmm0 )
673 #elif USE_MOV_REG32_OR_REG64
674  AS2( mov WORD_REG(di), xmm0 )
675 #else // GNU Assembler
676  AS2( movd WORD_REG(di), xmm0 )
677 #endif
678  AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
679  AS2( shl eax, 8 )
680 
681  AS2( movdqa xmm0, xmm5 )
682  AS2( pslldq xmm5, 1 )
683  AS2( pxor xmm4, xmm5 )
684 
685  AS2( psrldq xmm1, 15 )
686 #if USE_MOVD_REG32
687  AS2( movd edi, xmm1 )
688 #elif USE_MOV_REG32_OR_REG64
689  AS2( mov WORD_REG(di), xmm1 )
690 #else
691  AS2( movd WORD_REG(di), xmm1 )
692 #endif
693  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
694  AS2( shl eax, 8 )
695 
696  AS2( psrldq xmm0, 15 )
697 #if USE_MOVD_REG32
698  AS2( movd edi, xmm0 )
699 #elif USE_MOV_REG32_OR_REG64
700  AS2( mov WORD_REG(di), xmm0 )
701 #else
702  AS2( movd WORD_REG(di), xmm0 )
703 #endif
704  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
705 
706  AS2( movd xmm0, eax )
707  AS2( pxor xmm0, xmm4 )
708 
709  AS2( add WORD_REG(cx), 16 )
710  AS2( sub WORD_REG(dx), 1 )
711  // ATT_NOPREFIX
712  ASJ( jnz, 0, b )
713  INTEL_NOPREFIX
714  AS2( movdqa [WORD_REG(si)], xmm0 )
715 
717  AS1(pop rbp)
718  AS1(pop rbx)
719  #else
720  AS_POP_IF86( bp)
721  AS_POP_IF86( bx)
722  #endif
723 
724  #ifdef __GNUC__
725  ATT_PREFIX
726  :
727  : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
728  : "memory", "cc", "%eax"
730  , "%ebx", "%r11"
731  #endif
732  );
733  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
734  pop rbx
735  pop rdi
736  pop rsi
737  ret
738  GCM_AuthenticateBlocks_2K_SSE2 ENDP
739  #endif
740 
741  return len%16;
742  }
743  case 3: // SSE2 and 64K tables
744  {
745  #ifdef __GNUC__
746  __asm__ __volatile__
747  (
748  INTEL_NOPREFIX
749  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
750  ALIGN 8
751  GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
752  rex_push_reg rsi
753  push_reg rdi
754  .endprolog
755  mov rsi, r8
756  #else
757  AS2( mov WORD_REG(cx), data )
758  AS2( mov WORD_REG(dx), len )
759  AS2( mov WORD_REG(si), hashBuffer )
760  AS2( shr WORD_REG(dx), 4 )
761  #endif
762 
763  AS2( movdqa xmm0, [WORD_REG(si)] )
764 
765  #undef MUL_TABLE
766  #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
767 
768  ASL(1)
769  AS2( movdqu xmm1, [WORD_REG(cx)] )
770  AS2( pxor xmm1, xmm0 )
771  AS2( pxor xmm0, xmm0 )
772 
773  #undef SSE2_MUL_32BITS
774  #define SSE2_MUL_32BITS(i) \
775  AS2( movd eax, xmm1 )\
776  AS2( psrldq xmm1, 4 )\
777  AS2( movzx edi, al )\
778  AS2( add WORD_REG(di), WORD_REG(di) )\
779  AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
780  AS2( movzx edi, ah )\
781  AS2( add WORD_REG(di), WORD_REG(di) )\
782  AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
783  AS2( shr eax, 16 )\
784  AS2( movzx edi, al )\
785  AS2( add WORD_REG(di), WORD_REG(di) )\
786  AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
787  AS2( movzx edi, ah )\
788  AS2( add WORD_REG(di), WORD_REG(di) )\
789  AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
790 
791  SSE2_MUL_32BITS(0)
792  SSE2_MUL_32BITS(1)
793  SSE2_MUL_32BITS(2)
794  SSE2_MUL_32BITS(3)
795 
796  AS2( add WORD_REG(cx), 16 )
797  AS2( sub WORD_REG(dx), 1 )
798  // ATT_NOPREFIX
799  ASJ( jnz, 1, b )
800  INTEL_NOPREFIX
801  AS2( movdqa [WORD_REG(si)], xmm0 )
802 
803  #ifdef __GNUC__
804  ATT_PREFIX
805  :
806  : "c" (data), "d" (len/16), "S" (hashBuffer)
807  : "memory", "cc", "%edi", "%eax"
808  );
809  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
810  pop rdi
811  pop rsi
812  ret
813  GCM_AuthenticateBlocks_64K_SSE2 ENDP
814  #endif
815 
816  return len%16;
817  }
818 #endif
819 #ifndef CRYPTOPP_GENERATE_X64_MASM
820  }
821 
822  return len%16;
823 }
824 
825 void GCM_Base::AuthenticateLastHeaderBlock()
826 {
827  if (m_bufferedDataLength > 0)
828  {
829  memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
830  m_bufferedDataLength = 0;
831  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
832  }
833 }
834 
835 void GCM_Base::AuthenticateLastConfidentialBlock()
836 {
837  GCM_Base::AuthenticateLastHeaderBlock();
838  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
839  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
840 }
841 
842 void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
843 {
844  m_ctr.Seek(0);
845  ReverseHashBufferIfNeeded();
846  m_ctr.ProcessData(mac, HashBuffer(), macSize);
847 }
848 
849 NAMESPACE_END
850 
851 #endif // Not CRYPTOPP_GENERATE_X64_MASM
852 #endif
SimpleKeyingInterface::SetKey
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
W64LIT
#define W64LIT(x)
Declare an unsigned word64.
Definition: config_int.h:119
BlockTransformation::ProcessBlock
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:879
IsAlignedOn
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1226
CRYPTOPP_ASSERT
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68
LITTLE_ENDIAN_ORDER
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
word64
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
BlockTransformation::BlockSize
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
pch.h
Precompiled header file.
CRYPTOPP_BOOL_X86
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
Definition: config_cpu.h:52
Name::BlockSize
const char * BlockSize()
int, in bytes
Definition: argnames.h:27
word32
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
CRYPTOPP_BOOL_X64
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition: config_cpu.h:48
IncrementCounterByOne
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition: misc.h:1298
PutBlock
Access a block of memory.
Definition: misc.h:2570
BlockGetAndPut
Access a block of memory.
Definition: misc.h:2607
cpu.h
Functions for CPU features and intrinsics.
NameValuePairs::GetIntValue
CRYPTOPP_DLL bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:415
ConditionalByteReverse
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2187
GCM_Base::AlgorithmName
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: gcm.h:36
Name::TableSize
const char * TableSize()
int, in bytes
Definition: argnames.h:81
CRYPTOPP_BOOL_X32
#define CRYPTOPP_BOOL_X32
32-bit x32 platform
Definition: config_cpu.h:44
gcm.h
GCM block cipher mode of operation.
InvalidArgument
An invalid argument was detected.
Definition: cryptlib.h:203
ByteReverse
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:2021
CryptoPP
Crypto++ library namespace.
config.h
Library configuration file.
BlockCipher
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1283
GCM_64K_Tables
@ GCM_64K_Tables
Use a table with 64K entries.
Definition: gcm.h:27
BlockTransformation::OptimalDataAlignment
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
NameValuePairs
Interface for retrieving values given their names.
Definition: cryptlib.h:322
word16
unsigned short word16
16-bit unsigned datatype
Definition: config_int.h:59
GCM_Base::OptimalDataAlignment
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.