Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 
 

1289 строки
32 KiB

  1. /* ====================================================================
  2. * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in
  13. * the documentation and/or other materials provided with the
  14. * distribution.
  15. *
  16. * 3. All advertising materials mentioning features or use of this
  17. * software must display the following acknowledgment:
  18. * "This product includes software developed by the OpenSSL Project
  19. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  20. *
  21. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  22. * endorse or promote products derived from this software without
  23. * prior written permission. For written permission, please contact
  24. * openssl-core@openssl.org.
  25. *
  26. * 5. Products derived from this software may not be called "OpenSSL"
  27. * nor may "OpenSSL" appear in their names without prior written
  28. * permission of the OpenSSL Project.
  29. *
  30. * 6. Redistributions of any form whatsoever must retain the following
  31. * acknowledgment:
  32. * "This product includes software developed by the OpenSSL Project
  33. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  36. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  37. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  38. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  41. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  42. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  44. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  45. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  46. * OF THE POSSIBILITY OF SUCH DAMAGE.
  47. * ==================================================================== */
  48. #include <openssl/base.h>
  49. #include <assert.h>
  50. #include <string.h>
  51. #include <openssl/mem.h>
  52. #include <openssl/cpu.h>
  53. #include "internal.h"
  54. #include "../internal.h"
  55. #if !defined(OPENSSL_NO_ASM) && \
  56. (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
  57. defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
  58. defined(OPENSSL_PPC64LE))
  59. #define GHASH_ASM
  60. #endif
  61. #if defined(BSWAP4) && STRICT_ALIGNMENT == 1
  62. /* redefine, because alignment is ensured */
  63. #undef GETU32
  64. #define GETU32(p) BSWAP4(*(const uint32_t *)(p))
  65. #undef PUTU32
  66. #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
  67. #endif
  68. #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
  69. #define REDUCE1BIT(V) \
  70. do { \
  71. if (sizeof(size_t) == 8) { \
  72. uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
  73. (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
  74. (V).hi = ((V).hi >> 1) ^ T; \
  75. } else { \
  76. uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
  77. (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
  78. (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
  79. } \
  80. } while (0)
  81. // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
  82. // bits of a |size_t|.
  83. static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
  84. static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
  85. u128 V;
  86. Htable[0].hi = 0;
  87. Htable[0].lo = 0;
  88. V.hi = H[0];
  89. V.lo = H[1];
  90. Htable[8] = V;
  91. REDUCE1BIT(V);
  92. Htable[4] = V;
  93. REDUCE1BIT(V);
  94. Htable[2] = V;
  95. REDUCE1BIT(V);
  96. Htable[1] = V;
  97. Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
  98. V = Htable[4];
  99. Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
  100. Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
  101. Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
  102. V = Htable[8];
  103. Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
  104. Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
  105. Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
  106. Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
  107. Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
  108. Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
  109. Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
  110. #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
  111. /* ARM assembler expects specific dword order in Htable. */
  112. {
  113. int j;
  114. const union {
  115. long one;
  116. char little;
  117. } is_endian = {1};
  118. if (is_endian.little) {
  119. for (j = 0; j < 16; ++j) {
  120. V = Htable[j];
  121. Htable[j].hi = V.lo;
  122. Htable[j].lo = V.hi;
  123. }
  124. } else {
  125. for (j = 0; j < 16; ++j) {
  126. V = Htable[j];
  127. Htable[j].hi = V.lo << 32 | V.lo >> 32;
  128. Htable[j].lo = V.hi << 32 | V.hi >> 32;
  129. }
  130. }
  131. }
  132. #endif
  133. }
  134. #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
  135. static const size_t rem_4bit[16] = {
  136. PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
  137. PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
  138. PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
  139. PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
  140. static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
  141. u128 Z;
  142. int cnt = 15;
  143. size_t rem, nlo, nhi;
  144. const union {
  145. long one;
  146. char little;
  147. } is_endian = {1};
  148. nlo = ((const uint8_t *)Xi)[15];
  149. nhi = nlo >> 4;
  150. nlo &= 0xf;
  151. Z.hi = Htable[nlo].hi;
  152. Z.lo = Htable[nlo].lo;
  153. while (1) {
  154. rem = (size_t)Z.lo & 0xf;
  155. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  156. Z.hi = (Z.hi >> 4);
  157. if (sizeof(size_t) == 8) {
  158. Z.hi ^= rem_4bit[rem];
  159. } else {
  160. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  161. }
  162. Z.hi ^= Htable[nhi].hi;
  163. Z.lo ^= Htable[nhi].lo;
  164. if (--cnt < 0) {
  165. break;
  166. }
  167. nlo = ((const uint8_t *)Xi)[cnt];
  168. nhi = nlo >> 4;
  169. nlo &= 0xf;
  170. rem = (size_t)Z.lo & 0xf;
  171. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  172. Z.hi = (Z.hi >> 4);
  173. if (sizeof(size_t) == 8) {
  174. Z.hi ^= rem_4bit[rem];
  175. } else {
  176. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  177. }
  178. Z.hi ^= Htable[nlo].hi;
  179. Z.lo ^= Htable[nlo].lo;
  180. }
  181. if (is_endian.little) {
  182. #ifdef BSWAP8
  183. Xi[0] = BSWAP8(Z.hi);
  184. Xi[1] = BSWAP8(Z.lo);
  185. #else
  186. uint8_t *p = (uint8_t *)Xi;
  187. uint32_t v;
  188. v = (uint32_t)(Z.hi >> 32);
  189. PUTU32(p, v);
  190. v = (uint32_t)(Z.hi);
  191. PUTU32(p + 4, v);
  192. v = (uint32_t)(Z.lo >> 32);
  193. PUTU32(p + 8, v);
  194. v = (uint32_t)(Z.lo);
  195. PUTU32(p + 12, v);
  196. #endif
  197. } else {
  198. Xi[0] = Z.hi;
  199. Xi[1] = Z.lo;
  200. }
  201. }
  202. /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
  203. * details... Compiler-generated code doesn't seem to give any
  204. * performance improvement, at least not on x86[_64]. It's here
  205. * mostly as reference and a placeholder for possible future
  206. * non-trivial optimization[s]... */
  207. static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  208. size_t len) {
  209. u128 Z;
  210. int cnt;
  211. size_t rem, nlo, nhi;
  212. const union {
  213. long one;
  214. char little;
  215. } is_endian = {1};
  216. do {
  217. cnt = 15;
  218. nlo = ((const uint8_t *)Xi)[15];
  219. nlo ^= inp[15];
  220. nhi = nlo >> 4;
  221. nlo &= 0xf;
  222. Z.hi = Htable[nlo].hi;
  223. Z.lo = Htable[nlo].lo;
  224. while (1) {
  225. rem = (size_t)Z.lo & 0xf;
  226. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  227. Z.hi = (Z.hi >> 4);
  228. if (sizeof(size_t) == 8) {
  229. Z.hi ^= rem_4bit[rem];
  230. } else {
  231. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  232. }
  233. Z.hi ^= Htable[nhi].hi;
  234. Z.lo ^= Htable[nhi].lo;
  235. if (--cnt < 0) {
  236. break;
  237. }
  238. nlo = ((const uint8_t *)Xi)[cnt];
  239. nlo ^= inp[cnt];
  240. nhi = nlo >> 4;
  241. nlo &= 0xf;
  242. rem = (size_t)Z.lo & 0xf;
  243. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  244. Z.hi = (Z.hi >> 4);
  245. if (sizeof(size_t) == 8) {
  246. Z.hi ^= rem_4bit[rem];
  247. } else {
  248. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  249. }
  250. Z.hi ^= Htable[nlo].hi;
  251. Z.lo ^= Htable[nlo].lo;
  252. }
  253. if (is_endian.little) {
  254. #ifdef BSWAP8
  255. Xi[0] = BSWAP8(Z.hi);
  256. Xi[1] = BSWAP8(Z.lo);
  257. #else
  258. uint8_t *p = (uint8_t *)Xi;
  259. uint32_t v;
  260. v = (uint32_t)(Z.hi >> 32);
  261. PUTU32(p, v);
  262. v = (uint32_t)(Z.hi);
  263. PUTU32(p + 4, v);
  264. v = (uint32_t)(Z.lo >> 32);
  265. PUTU32(p + 8, v);
  266. v = (uint32_t)(Z.lo);
  267. PUTU32(p + 12, v);
  268. #endif
  269. } else {
  270. Xi[0] = Z.hi;
  271. Xi[1] = Z.lo;
  272. }
  273. } while (inp += 16, len -= 16);
  274. }
  275. #else /* GHASH_ASM */
  276. void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
  277. void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  278. size_t len);
  279. #endif
  280. #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
  281. #if defined(GHASH_ASM)
  282. #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
  283. /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
  284. * trashing effect. In other words idea is to hash data while it's
  285. * still in L1 cache after encryption pass... */
  286. #define GHASH_CHUNK (3 * 1024)
  287. #endif
  288. #if defined(GHASH_ASM)
  289. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  290. #define GHASH_ASM_X86_OR_64
  291. #define GCM_FUNCREF_4BIT
  292. void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
  293. void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
  294. void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  295. size_t len);
  296. #if defined(OPENSSL_X86)
  297. #define gcm_init_avx gcm_init_clmul
  298. #define gcm_gmult_avx gcm_gmult_clmul
  299. #define gcm_ghash_avx gcm_ghash_clmul
  300. #else
  301. void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
  302. void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
  303. void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
  304. size_t len);
  305. #define AESNI_GCM
  306. static int aesni_gcm_enabled(GCM128_CONTEXT *ctx, ctr128_f stream) {
  307. return stream == aesni_ctr32_encrypt_blocks &&
  308. ctx->ghash == gcm_ghash_avx;
  309. }
  310. size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
  311. const void *key, uint8_t ivec[16], uint64_t *Xi);
  312. size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
  313. const void *key, uint8_t ivec[16], uint64_t *Xi);
  314. #endif
  315. #if defined(OPENSSL_X86)
  316. #define GHASH_ASM_X86
  317. void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
  318. void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  319. size_t len);
  320. void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
  321. void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  322. size_t len);
  323. #endif
  324. #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
  325. #include <openssl/arm_arch.h>
  326. #if __ARM_ARCH__ >= 7
  327. #define GHASH_ASM_ARM
  328. #define GCM_FUNCREF_4BIT
  329. static int pmull_capable(void) {
  330. return CRYPTO_is_ARMv8_PMULL_capable();
  331. }
  332. void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
  333. void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
  334. void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  335. size_t len);
  336. #if defined(OPENSSL_ARM)
  337. /* 32-bit ARM also has support for doing GCM with NEON instructions. */
  338. static int neon_capable(void) {
  339. return CRYPTO_is_NEON_capable();
  340. }
  341. void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
  342. void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
  343. void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  344. size_t len);
  345. #else
  346. /* AArch64 only has the ARMv8 versions of functions. */
  347. static int neon_capable(void) {
  348. return 0;
  349. }
  350. static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
  351. abort();
  352. }
  353. static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
  354. abort();
  355. }
  356. static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
  357. const uint8_t *inp, size_t len) {
  358. abort();
  359. }
  360. #endif
  361. #endif
  362. #elif defined(OPENSSL_PPC64LE)
  363. #define GHASH_ASM_PPC64LE
  364. #define GCM_FUNCREF_4BIT
  365. void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
  366. void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
  367. void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  368. size_t len);
  369. #endif
  370. #endif
  371. #ifdef GCM_FUNCREF_4BIT
  372. #undef GCM_MUL
  373. #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
  374. #ifdef GHASH
  375. #undef GHASH
  376. #define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
  377. #endif
  378. #endif
  379. void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *key,
  380. block128_f block) {
  381. const union {
  382. long one;
  383. char little;
  384. } is_endian = {1};
  385. memset(ctx, 0, sizeof(*ctx));
  386. ctx->block = block;
  387. (*block)(ctx->H.c, ctx->H.c, key);
  388. if (is_endian.little) {
  389. /* H is stored in host byte order */
  390. #ifdef BSWAP8
  391. ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
  392. ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
  393. #else
  394. uint8_t *p = ctx->H.c;
  395. uint64_t hi, lo;
  396. hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  397. lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  398. ctx->H.u[0] = hi;
  399. ctx->H.u[1] = lo;
  400. #endif
  401. }
  402. #if defined(GHASH_ASM_X86_OR_64)
  403. if (crypto_gcm_clmul_enabled()) {
  404. if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
  405. gcm_init_avx(ctx->Htable, ctx->H.u);
  406. ctx->gmult = gcm_gmult_avx;
  407. ctx->ghash = gcm_ghash_avx;
  408. } else {
  409. gcm_init_clmul(ctx->Htable, ctx->H.u);
  410. ctx->gmult = gcm_gmult_clmul;
  411. ctx->ghash = gcm_ghash_clmul;
  412. }
  413. return;
  414. }
  415. gcm_init_4bit(ctx->Htable, ctx->H.u);
  416. #if defined(GHASH_ASM_X86) /* x86 only */
  417. if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
  418. ctx->gmult = gcm_gmult_4bit_mmx;
  419. ctx->ghash = gcm_ghash_4bit_mmx;
  420. } else {
  421. ctx->gmult = gcm_gmult_4bit_x86;
  422. ctx->ghash = gcm_ghash_4bit_x86;
  423. }
  424. #else
  425. ctx->gmult = gcm_gmult_4bit;
  426. ctx->ghash = gcm_ghash_4bit;
  427. #endif
  428. #elif defined(GHASH_ASM_ARM)
  429. if (pmull_capable()) {
  430. gcm_init_v8(ctx->Htable, ctx->H.u);
  431. ctx->gmult = gcm_gmult_v8;
  432. ctx->ghash = gcm_ghash_v8;
  433. } else if (neon_capable()) {
  434. gcm_init_neon(ctx->Htable,ctx->H.u);
  435. ctx->gmult = gcm_gmult_neon;
  436. ctx->ghash = gcm_ghash_neon;
  437. } else {
  438. gcm_init_4bit(ctx->Htable, ctx->H.u);
  439. ctx->gmult = gcm_gmult_4bit;
  440. ctx->ghash = gcm_ghash_4bit;
  441. }
  442. #elif defined(GHASH_ASM_PPC64LE)
  443. if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
  444. gcm_init_p8(ctx->Htable, ctx->H.u);
  445. ctx->gmult = gcm_gmult_p8;
  446. ctx->ghash = gcm_ghash_p8;
  447. } else {
  448. gcm_init_4bit(ctx->Htable, ctx->H.u);
  449. ctx->gmult = gcm_gmult_4bit;
  450. ctx->ghash = gcm_ghash_4bit;
  451. }
  452. #else
  453. gcm_init_4bit(ctx->Htable, ctx->H.u);
  454. ctx->gmult = gcm_gmult_4bit;
  455. ctx->ghash = gcm_ghash_4bit;
  456. #endif
  457. }
  458. void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
  459. const uint8_t *iv, size_t len) {
  460. const union {
  461. long one;
  462. char little;
  463. } is_endian = {1};
  464. unsigned int ctr;
  465. #ifdef GCM_FUNCREF_4BIT
  466. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  467. #endif
  468. ctx->Yi.u[0] = 0;
  469. ctx->Yi.u[1] = 0;
  470. ctx->Xi.u[0] = 0;
  471. ctx->Xi.u[1] = 0;
  472. ctx->len.u[0] = 0; /* AAD length */
  473. ctx->len.u[1] = 0; /* message length */
  474. ctx->ares = 0;
  475. ctx->mres = 0;
  476. if (len == 12) {
  477. memcpy(ctx->Yi.c, iv, 12);
  478. ctx->Yi.c[15] = 1;
  479. ctr = 1;
  480. } else {
  481. uint64_t len0 = len;
  482. while (len >= 16) {
  483. for (size_t i = 0; i < 16; ++i) {
  484. ctx->Yi.c[i] ^= iv[i];
  485. }
  486. GCM_MUL(ctx, Yi);
  487. iv += 16;
  488. len -= 16;
  489. }
  490. if (len) {
  491. for (size_t i = 0; i < len; ++i) {
  492. ctx->Yi.c[i] ^= iv[i];
  493. }
  494. GCM_MUL(ctx, Yi);
  495. }
  496. len0 <<= 3;
  497. if (is_endian.little) {
  498. #ifdef BSWAP8
  499. ctx->Yi.u[1] ^= BSWAP8(len0);
  500. #else
  501. ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
  502. ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
  503. ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
  504. ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
  505. ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
  506. ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
  507. ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
  508. ctx->Yi.c[15] ^= (uint8_t)(len0);
  509. #endif
  510. } else {
  511. ctx->Yi.u[1] ^= len0;
  512. }
  513. GCM_MUL(ctx, Yi);
  514. if (is_endian.little) {
  515. ctr = GETU32(ctx->Yi.c + 12);
  516. } else {
  517. ctr = ctx->Yi.d[3];
  518. }
  519. }
  520. (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
  521. ++ctr;
  522. if (is_endian.little) {
  523. PUTU32(ctx->Yi.c + 12, ctr);
  524. } else {
  525. ctx->Yi.d[3] = ctr;
  526. }
  527. }
  528. int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
  529. unsigned int n;
  530. uint64_t alen = ctx->len.u[0];
  531. #ifdef GCM_FUNCREF_4BIT
  532. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  533. #ifdef GHASH
  534. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  535. size_t len) = ctx->ghash;
  536. #endif
  537. #endif
  538. if (ctx->len.u[1]) {
  539. return 0;
  540. }
  541. alen += len;
  542. if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
  543. return 0;
  544. }
  545. ctx->len.u[0] = alen;
  546. n = ctx->ares;
  547. if (n) {
  548. while (n && len) {
  549. ctx->Xi.c[n] ^= *(aad++);
  550. --len;
  551. n = (n + 1) % 16;
  552. }
  553. if (n == 0) {
  554. GCM_MUL(ctx, Xi);
  555. } else {
  556. ctx->ares = n;
  557. return 1;
  558. }
  559. }
  560. /* Process a whole number of blocks. */
  561. #ifdef GHASH
  562. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  563. if (len_blocks != 0) {
  564. GHASH(ctx, aad, len_blocks);
  565. aad += len_blocks;
  566. len -= len_blocks;
  567. }
  568. #else
  569. while (len >= 16) {
  570. for (size_t i = 0; i < 16; ++i) {
  571. ctx->Xi.c[i] ^= aad[i];
  572. }
  573. GCM_MUL(ctx, Xi);
  574. aad += 16;
  575. len -= 16;
  576. }
  577. #endif
  578. /* Process the remainder. */
  579. if (len != 0) {
  580. n = (unsigned int)len;
  581. for (size_t i = 0; i < len; ++i) {
  582. ctx->Xi.c[i] ^= aad[i];
  583. }
  584. }
  585. ctx->ares = n;
  586. return 1;
  587. }
  588. int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
  589. const unsigned char *in, unsigned char *out,
  590. size_t len) {
  591. const union {
  592. long one;
  593. char little;
  594. } is_endian = {1};
  595. unsigned int n, ctr;
  596. uint64_t mlen = ctx->len.u[1];
  597. block128_f block = ctx->block;
  598. #ifdef GCM_FUNCREF_4BIT
  599. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  600. #ifdef GHASH
  601. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  602. size_t len) = ctx->ghash;
  603. #endif
  604. #endif
  605. mlen += len;
  606. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  607. (sizeof(len) == 8 && mlen < len)) {
  608. return 0;
  609. }
  610. ctx->len.u[1] = mlen;
  611. if (ctx->ares) {
  612. /* First call to encrypt finalizes GHASH(AAD) */
  613. GCM_MUL(ctx, Xi);
  614. ctx->ares = 0;
  615. }
  616. if (is_endian.little) {
  617. ctr = GETU32(ctx->Yi.c + 12);
  618. } else {
  619. ctr = ctx->Yi.d[3];
  620. }
  621. n = ctx->mres;
  622. if (n) {
  623. while (n && len) {
  624. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  625. --len;
  626. n = (n + 1) % 16;
  627. }
  628. if (n == 0) {
  629. GCM_MUL(ctx, Xi);
  630. } else {
  631. ctx->mres = n;
  632. return 1;
  633. }
  634. }
  635. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  636. for (size_t i = 0; i < len; ++i) {
  637. if (n == 0) {
  638. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  639. ++ctr;
  640. if (is_endian.little) {
  641. PUTU32(ctx->Yi.c + 12, ctr);
  642. } else {
  643. ctx->Yi.d[3] = ctr;
  644. }
  645. }
  646. ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
  647. n = (n + 1) % 16;
  648. if (n == 0) {
  649. GCM_MUL(ctx, Xi);
  650. }
  651. }
  652. ctx->mres = n;
  653. return 1;
  654. }
  655. #if defined(GHASH) && defined(GHASH_CHUNK)
  656. while (len >= GHASH_CHUNK) {
  657. size_t j = GHASH_CHUNK;
  658. while (j) {
  659. size_t *out_t = (size_t *)out;
  660. const size_t *in_t = (const size_t *)in;
  661. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  662. ++ctr;
  663. if (is_endian.little) {
  664. PUTU32(ctx->Yi.c + 12, ctr);
  665. } else {
  666. ctx->Yi.d[3] = ctr;
  667. }
  668. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  669. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  670. }
  671. out += 16;
  672. in += 16;
  673. j -= 16;
  674. }
  675. GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
  676. len -= GHASH_CHUNK;
  677. }
  678. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  679. if (len_blocks != 0) {
  680. while (len >= 16) {
  681. size_t *out_t = (size_t *)out;
  682. const size_t *in_t = (const size_t *)in;
  683. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  684. ++ctr;
  685. if (is_endian.little) {
  686. PUTU32(ctx->Yi.c + 12, ctr);
  687. } else {
  688. ctx->Yi.d[3] = ctr;
  689. }
  690. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  691. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  692. }
  693. out += 16;
  694. in += 16;
  695. len -= 16;
  696. }
  697. GHASH(ctx, out - len_blocks, len_blocks);
  698. }
  699. #else
  700. while (len >= 16) {
  701. size_t *out_t = (size_t *)out;
  702. const size_t *in_t = (const size_t *)in;
  703. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  704. ++ctr;
  705. if (is_endian.little) {
  706. PUTU32(ctx->Yi.c + 12, ctr);
  707. } else {
  708. ctx->Yi.d[3] = ctr;
  709. }
  710. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  711. ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  712. }
  713. GCM_MUL(ctx, Xi);
  714. out += 16;
  715. in += 16;
  716. len -= 16;
  717. }
  718. #endif
  719. if (len) {
  720. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  721. ++ctr;
  722. if (is_endian.little) {
  723. PUTU32(ctx->Yi.c + 12, ctr);
  724. } else {
  725. ctx->Yi.d[3] = ctr;
  726. }
  727. while (len--) {
  728. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  729. ++n;
  730. }
  731. }
  732. ctx->mres = n;
  733. return 1;
  734. }
  735. int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
  736. const unsigned char *in, unsigned char *out,
  737. size_t len) {
  738. const union {
  739. long one;
  740. char little;
  741. } is_endian = {1};
  742. unsigned int n, ctr;
  743. uint64_t mlen = ctx->len.u[1];
  744. block128_f block = ctx->block;
  745. #ifdef GCM_FUNCREF_4BIT
  746. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  747. #ifdef GHASH
  748. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  749. size_t len) = ctx->ghash;
  750. #endif
  751. #endif
  752. mlen += len;
  753. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  754. (sizeof(len) == 8 && mlen < len)) {
  755. return 0;
  756. }
  757. ctx->len.u[1] = mlen;
  758. if (ctx->ares) {
  759. /* First call to decrypt finalizes GHASH(AAD) */
  760. GCM_MUL(ctx, Xi);
  761. ctx->ares = 0;
  762. }
  763. if (is_endian.little) {
  764. ctr = GETU32(ctx->Yi.c + 12);
  765. } else {
  766. ctr = ctx->Yi.d[3];
  767. }
  768. n = ctx->mres;
  769. if (n) {
  770. while (n && len) {
  771. uint8_t c = *(in++);
  772. *(out++) = c ^ ctx->EKi.c[n];
  773. ctx->Xi.c[n] ^= c;
  774. --len;
  775. n = (n + 1) % 16;
  776. }
  777. if (n == 0) {
  778. GCM_MUL(ctx, Xi);
  779. } else {
  780. ctx->mres = n;
  781. return 1;
  782. }
  783. }
  784. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  785. for (size_t i = 0; i < len; ++i) {
  786. uint8_t c;
  787. if (n == 0) {
  788. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  789. ++ctr;
  790. if (is_endian.little) {
  791. PUTU32(ctx->Yi.c + 12, ctr);
  792. } else {
  793. ctx->Yi.d[3] = ctr;
  794. }
  795. }
  796. c = in[i];
  797. out[i] = c ^ ctx->EKi.c[n];
  798. ctx->Xi.c[n] ^= c;
  799. n = (n + 1) % 16;
  800. if (n == 0) {
  801. GCM_MUL(ctx, Xi);
  802. }
  803. }
  804. ctx->mres = n;
  805. return 1;
  806. }
  807. #if defined(GHASH) && defined(GHASH_CHUNK)
  808. while (len >= GHASH_CHUNK) {
  809. size_t j = GHASH_CHUNK;
  810. GHASH(ctx, in, GHASH_CHUNK);
  811. while (j) {
  812. size_t *out_t = (size_t *)out;
  813. const size_t *in_t = (const size_t *)in;
  814. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  815. ++ctr;
  816. if (is_endian.little) {
  817. PUTU32(ctx->Yi.c + 12, ctr);
  818. } else {
  819. ctx->Yi.d[3] = ctr;
  820. }
  821. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  822. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  823. }
  824. out += 16;
  825. in += 16;
  826. j -= 16;
  827. }
  828. len -= GHASH_CHUNK;
  829. }
  830. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  831. if (len_blocks != 0) {
  832. GHASH(ctx, in, len_blocks);
  833. while (len >= 16) {
  834. size_t *out_t = (size_t *)out;
  835. const size_t *in_t = (const size_t *)in;
  836. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  837. ++ctr;
  838. if (is_endian.little) {
  839. PUTU32(ctx->Yi.c + 12, ctr);
  840. } else {
  841. ctx->Yi.d[3] = ctr;
  842. }
  843. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  844. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  845. }
  846. out += 16;
  847. in += 16;
  848. len -= 16;
  849. }
  850. }
  851. #else
  852. while (len >= 16) {
  853. size_t *out_t = (size_t *)out;
  854. const size_t *in_t = (const size_t *)in;
  855. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  856. ++ctr;
  857. if (is_endian.little) {
  858. PUTU32(ctx->Yi.c + 12, ctr);
  859. } else {
  860. ctx->Yi.d[3] = ctr;
  861. }
  862. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  863. size_t c = in_t[i];
  864. out_t[i] = c ^ ctx->EKi.t[i];
  865. ctx->Xi.t[i] ^= c;
  866. }
  867. GCM_MUL(ctx, Xi);
  868. out += 16;
  869. in += 16;
  870. len -= 16;
  871. }
  872. #endif
  873. if (len) {
  874. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  875. ++ctr;
  876. if (is_endian.little) {
  877. PUTU32(ctx->Yi.c + 12, ctr);
  878. } else {
  879. ctx->Yi.d[3] = ctr;
  880. }
  881. while (len--) {
  882. uint8_t c = in[n];
  883. ctx->Xi.c[n] ^= c;
  884. out[n] = c ^ ctx->EKi.c[n];
  885. ++n;
  886. }
  887. }
  888. ctx->mres = n;
  889. return 1;
  890. }
  891. int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  892. const uint8_t *in, uint8_t *out, size_t len,
  893. ctr128_f stream) {
  894. const union {
  895. long one;
  896. char little;
  897. } is_endian = {1};
  898. unsigned int n, ctr;
  899. uint64_t mlen = ctx->len.u[1];
  900. #ifdef GCM_FUNCREF_4BIT
  901. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  902. #ifdef GHASH
  903. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  904. size_t len) = ctx->ghash;
  905. #endif
  906. #endif
  907. mlen += len;
  908. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  909. (sizeof(len) == 8 && mlen < len)) {
  910. return 0;
  911. }
  912. ctx->len.u[1] = mlen;
  913. if (ctx->ares) {
  914. /* First call to encrypt finalizes GHASH(AAD) */
  915. GCM_MUL(ctx, Xi);
  916. ctx->ares = 0;
  917. }
  918. n = ctx->mres;
  919. if (n) {
  920. while (n && len) {
  921. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  922. --len;
  923. n = (n + 1) % 16;
  924. }
  925. if (n == 0) {
  926. GCM_MUL(ctx, Xi);
  927. } else {
  928. ctx->mres = n;
  929. return 1;
  930. }
  931. }
  932. #if defined(AESNI_GCM)
  933. if (aesni_gcm_enabled(ctx, stream)) {
  934. /* |aesni_gcm_encrypt| may not process all the input given to it. It may
  935. * not process *any* of its input if it is deemed too small. */
  936. size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
  937. in += bulk;
  938. out += bulk;
  939. len -= bulk;
  940. }
  941. #endif
  942. if (is_endian.little) {
  943. ctr = GETU32(ctx->Yi.c + 12);
  944. } else {
  945. ctr = ctx->Yi.d[3];
  946. }
  947. #if defined(GHASH)
  948. while (len >= GHASH_CHUNK) {
  949. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  950. ctr += GHASH_CHUNK / 16;
  951. if (is_endian.little) {
  952. PUTU32(ctx->Yi.c + 12, ctr);
  953. } else {
  954. ctx->Yi.d[3] = ctr;
  955. }
  956. GHASH(ctx, out, GHASH_CHUNK);
  957. out += GHASH_CHUNK;
  958. in += GHASH_CHUNK;
  959. len -= GHASH_CHUNK;
  960. }
  961. #endif
  962. size_t i = len & kSizeTWithoutLower4Bits;
  963. if (i != 0) {
  964. size_t j = i / 16;
  965. (*stream)(in, out, j, key, ctx->Yi.c);
  966. ctr += (unsigned int)j;
  967. if (is_endian.little) {
  968. PUTU32(ctx->Yi.c + 12, ctr);
  969. } else {
  970. ctx->Yi.d[3] = ctr;
  971. }
  972. in += i;
  973. len -= i;
  974. #if defined(GHASH)
  975. GHASH(ctx, out, i);
  976. out += i;
  977. #else
  978. while (j--) {
  979. for (i = 0; i < 16; ++i) {
  980. ctx->Xi.c[i] ^= out[i];
  981. }
  982. GCM_MUL(ctx, Xi);
  983. out += 16;
  984. }
  985. #endif
  986. }
  987. if (len) {
  988. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  989. ++ctr;
  990. if (is_endian.little) {
  991. PUTU32(ctx->Yi.c + 12, ctr);
  992. } else {
  993. ctx->Yi.d[3] = ctr;
  994. }
  995. while (len--) {
  996. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  997. ++n;
  998. }
  999. }
  1000. ctx->mres = n;
  1001. return 1;
  1002. }
  1003. int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  1004. const uint8_t *in, uint8_t *out, size_t len,
  1005. ctr128_f stream) {
  1006. const union {
  1007. long one;
  1008. char little;
  1009. } is_endian = {1};
  1010. unsigned int n, ctr;
  1011. uint64_t mlen = ctx->len.u[1];
  1012. #ifdef GCM_FUNCREF_4BIT
  1013. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  1014. #ifdef GHASH
  1015. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  1016. size_t len) = ctx->ghash;
  1017. #endif
  1018. #endif
  1019. mlen += len;
  1020. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  1021. (sizeof(len) == 8 && mlen < len)) {
  1022. return 0;
  1023. }
  1024. ctx->len.u[1] = mlen;
  1025. if (ctx->ares) {
  1026. /* First call to decrypt finalizes GHASH(AAD) */
  1027. GCM_MUL(ctx, Xi);
  1028. ctx->ares = 0;
  1029. }
  1030. n = ctx->mres;
  1031. if (n) {
  1032. while (n && len) {
  1033. uint8_t c = *(in++);
  1034. *(out++) = c ^ ctx->EKi.c[n];
  1035. ctx->Xi.c[n] ^= c;
  1036. --len;
  1037. n = (n + 1) % 16;
  1038. }
  1039. if (n == 0) {
  1040. GCM_MUL(ctx, Xi);
  1041. } else {
  1042. ctx->mres = n;
  1043. return 1;
  1044. }
  1045. }
  1046. #if defined(AESNI_GCM)
  1047. if (aesni_gcm_enabled(ctx, stream)) {
  1048. /* |aesni_gcm_decrypt| may not process all the input given to it. It may
  1049. * not process *any* of its input if it is deemed too small. */
  1050. size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
  1051. in += bulk;
  1052. out += bulk;
  1053. len -= bulk;
  1054. }
  1055. #endif
  1056. if (is_endian.little) {
  1057. ctr = GETU32(ctx->Yi.c + 12);
  1058. } else {
  1059. ctr = ctx->Yi.d[3];
  1060. }
  1061. #if defined(GHASH)
  1062. while (len >= GHASH_CHUNK) {
  1063. GHASH(ctx, in, GHASH_CHUNK);
  1064. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  1065. ctr += GHASH_CHUNK / 16;
  1066. if (is_endian.little) {
  1067. PUTU32(ctx->Yi.c + 12, ctr);
  1068. } else {
  1069. ctx->Yi.d[3] = ctr;
  1070. }
  1071. out += GHASH_CHUNK;
  1072. in += GHASH_CHUNK;
  1073. len -= GHASH_CHUNK;
  1074. }
  1075. #endif
  1076. size_t i = len & kSizeTWithoutLower4Bits;
  1077. if (i != 0) {
  1078. size_t j = i / 16;
  1079. #if defined(GHASH)
  1080. GHASH(ctx, in, i);
  1081. #else
  1082. while (j--) {
  1083. size_t k;
  1084. for (k = 0; k < 16; ++k) {
  1085. ctx->Xi.c[k] ^= in[k];
  1086. }
  1087. GCM_MUL(ctx, Xi);
  1088. in += 16;
  1089. }
  1090. j = i / 16;
  1091. in -= i;
  1092. #endif
  1093. (*stream)(in, out, j, key, ctx->Yi.c);
  1094. ctr += (unsigned int)j;
  1095. if (is_endian.little) {
  1096. PUTU32(ctx->Yi.c + 12, ctr);
  1097. } else {
  1098. ctx->Yi.d[3] = ctr;
  1099. }
  1100. out += i;
  1101. in += i;
  1102. len -= i;
  1103. }
  1104. if (len) {
  1105. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  1106. ++ctr;
  1107. if (is_endian.little) {
  1108. PUTU32(ctx->Yi.c + 12, ctr);
  1109. } else {
  1110. ctx->Yi.d[3] = ctr;
  1111. }
  1112. while (len--) {
  1113. uint8_t c = in[n];
  1114. ctx->Xi.c[n] ^= c;
  1115. out[n] = c ^ ctx->EKi.c[n];
  1116. ++n;
  1117. }
  1118. }
  1119. ctx->mres = n;
  1120. return 1;
  1121. }
  1122. int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
  1123. const union {
  1124. long one;
  1125. char little;
  1126. } is_endian = {1};
  1127. uint64_t alen = ctx->len.u[0] << 3;
  1128. uint64_t clen = ctx->len.u[1] << 3;
  1129. #ifdef GCM_FUNCREF_4BIT
  1130. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  1131. #endif
  1132. if (ctx->mres || ctx->ares) {
  1133. GCM_MUL(ctx, Xi);
  1134. }
  1135. if (is_endian.little) {
  1136. #ifdef BSWAP8
  1137. alen = BSWAP8(alen);
  1138. clen = BSWAP8(clen);
  1139. #else
  1140. uint8_t *p = ctx->len.c;
  1141. ctx->len.u[0] = alen;
  1142. ctx->len.u[1] = clen;
  1143. alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  1144. clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  1145. #endif
  1146. }
  1147. ctx->Xi.u[0] ^= alen;
  1148. ctx->Xi.u[1] ^= clen;
  1149. GCM_MUL(ctx, Xi);
  1150. ctx->Xi.u[0] ^= ctx->EK0.u[0];
  1151. ctx->Xi.u[1] ^= ctx->EK0.u[1];
  1152. if (tag && len <= sizeof(ctx->Xi)) {
  1153. return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
  1154. } else {
  1155. return 0;
  1156. }
  1157. }
  1158. void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
  1159. CRYPTO_gcm128_finish(ctx, NULL, 0);
  1160. memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
  1161. }
  1162. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  1163. int crypto_gcm_clmul_enabled(void) {
  1164. #ifdef GHASH_ASM
  1165. return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
  1166. OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */
  1167. #else
  1168. return 0;
  1169. #endif
  1170. }
  1171. #endif