You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1076 line
28 KiB

  1. /* ====================================================================
  2. * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in
  13. * the documentation and/or other materials provided with the
  14. * distribution.
  15. *
  16. * 3. All advertising materials mentioning features or use of this
  17. * software must display the following acknowledgment:
  18. * "This product includes software developed by the OpenSSL Project
  19. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  20. *
  21. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  22. * endorse or promote products derived from this software without
  23. * prior written permission. For written permission, please contact
  24. * openssl-core@openssl.org.
  25. *
  26. * 5. Products derived from this software may not be called "OpenSSL"
  27. * nor may "OpenSSL" appear in their names without prior written
  28. * permission of the OpenSSL Project.
  29. *
  30. * 6. Redistributions of any form whatsoever must retain the following
  31. * acknowledgment:
  32. * "This product includes software developed by the OpenSSL Project
  33. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  36. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  37. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  38. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  41. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  42. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  44. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  45. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  46. * OF THE POSSIBILITY OF SUCH DAMAGE.
  47. * ==================================================================== */
  48. #include <openssl/base.h>
  49. #include <assert.h>
  50. #include <string.h>
  51. #include <openssl/mem.h>
  52. #include <openssl/cpu.h>
  53. #include "internal.h"
  54. #include "../internal.h"
  55. #if !defined(OPENSSL_NO_ASM) && \
  56. (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
  57. defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
  58. defined(OPENSSL_PPC64LE))
  59. #define GHASH_ASM
  60. #endif
  61. #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
  62. #define REDUCE1BIT(V) \
  63. do { \
  64. if (sizeof(size_t) == 8) { \
  65. uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
  66. (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
  67. (V).hi = ((V).hi >> 1) ^ T; \
  68. } else { \
  69. uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
  70. (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
  71. (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
  72. } \
  73. } while (0)
  74. // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
  75. // bits of a |size_t|.
  76. static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
  77. static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
  78. u128 V;
  79. Htable[0].hi = 0;
  80. Htable[0].lo = 0;
  81. V.hi = H[0];
  82. V.lo = H[1];
  83. Htable[8] = V;
  84. REDUCE1BIT(V);
  85. Htable[4] = V;
  86. REDUCE1BIT(V);
  87. Htable[2] = V;
  88. REDUCE1BIT(V);
  89. Htable[1] = V;
  90. Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
  91. V = Htable[4];
  92. Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
  93. Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
  94. Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
  95. V = Htable[8];
  96. Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
  97. Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
  98. Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
  99. Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
  100. Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
  101. Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
  102. Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
  103. #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
  104. for (int j = 0; j < 16; ++j) {
  105. V = Htable[j];
  106. Htable[j].hi = V.lo;
  107. Htable[j].lo = V.hi;
  108. }
  109. #endif
  110. }
  111. #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
  112. static const size_t rem_4bit[16] = {
  113. PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
  114. PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
  115. PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
  116. PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
  117. static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
  118. u128 Z;
  119. int cnt = 15;
  120. size_t rem, nlo, nhi;
  121. nlo = ((const uint8_t *)Xi)[15];
  122. nhi = nlo >> 4;
  123. nlo &= 0xf;
  124. Z.hi = Htable[nlo].hi;
  125. Z.lo = Htable[nlo].lo;
  126. while (1) {
  127. rem = (size_t)Z.lo & 0xf;
  128. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  129. Z.hi = (Z.hi >> 4);
  130. if (sizeof(size_t) == 8) {
  131. Z.hi ^= rem_4bit[rem];
  132. } else {
  133. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  134. }
  135. Z.hi ^= Htable[nhi].hi;
  136. Z.lo ^= Htable[nhi].lo;
  137. if (--cnt < 0) {
  138. break;
  139. }
  140. nlo = ((const uint8_t *)Xi)[cnt];
  141. nhi = nlo >> 4;
  142. nlo &= 0xf;
  143. rem = (size_t)Z.lo & 0xf;
  144. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  145. Z.hi = (Z.hi >> 4);
  146. if (sizeof(size_t) == 8) {
  147. Z.hi ^= rem_4bit[rem];
  148. } else {
  149. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  150. }
  151. Z.hi ^= Htable[nlo].hi;
  152. Z.lo ^= Htable[nlo].lo;
  153. }
  154. Xi[0] = CRYPTO_bswap8(Z.hi);
  155. Xi[1] = CRYPTO_bswap8(Z.lo);
  156. }
  157. /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
  158. * details... Compiler-generated code doesn't seem to give any
  159. * performance improvement, at least not on x86[_64]. It's here
  160. * mostly as reference and a placeholder for possible future
  161. * non-trivial optimization[s]... */
  162. static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
  163. const uint8_t *inp, size_t len) {
  164. u128 Z;
  165. int cnt;
  166. size_t rem, nlo, nhi;
  167. do {
  168. cnt = 15;
  169. nlo = ((const uint8_t *)Xi)[15];
  170. nlo ^= inp[15];
  171. nhi = nlo >> 4;
  172. nlo &= 0xf;
  173. Z.hi = Htable[nlo].hi;
  174. Z.lo = Htable[nlo].lo;
  175. while (1) {
  176. rem = (size_t)Z.lo & 0xf;
  177. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  178. Z.hi = (Z.hi >> 4);
  179. if (sizeof(size_t) == 8) {
  180. Z.hi ^= rem_4bit[rem];
  181. } else {
  182. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  183. }
  184. Z.hi ^= Htable[nhi].hi;
  185. Z.lo ^= Htable[nhi].lo;
  186. if (--cnt < 0) {
  187. break;
  188. }
  189. nlo = ((const uint8_t *)Xi)[cnt];
  190. nlo ^= inp[cnt];
  191. nhi = nlo >> 4;
  192. nlo &= 0xf;
  193. rem = (size_t)Z.lo & 0xf;
  194. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  195. Z.hi = (Z.hi >> 4);
  196. if (sizeof(size_t) == 8) {
  197. Z.hi ^= rem_4bit[rem];
  198. } else {
  199. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  200. }
  201. Z.hi ^= Htable[nlo].hi;
  202. Z.lo ^= Htable[nlo].lo;
  203. }
  204. Xi[0] = CRYPTO_bswap8(Z.hi);
  205. Xi[1] = CRYPTO_bswap8(Z.lo);
  206. } while (inp += 16, len -= 16);
  207. }
  208. #else /* GHASH_ASM */
  209. void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
  210. void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  211. size_t len);
  212. #endif
  213. #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
  214. #if defined(GHASH_ASM)
  215. #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
  216. /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
  217. * trashing effect. In other words idea is to hash data while it's
  218. * still in L1 cache after encryption pass... */
  219. #define GHASH_CHUNK (3 * 1024)
  220. #endif
  221. #if defined(GHASH_ASM)
  222. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  223. #define GHASH_ASM_X86_OR_64
  224. #define GCM_FUNCREF_4BIT
  225. void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
  226. void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
  227. void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  228. size_t len);
  229. #if defined(OPENSSL_X86)
  230. #define gcm_init_avx gcm_init_clmul
  231. #define gcm_gmult_avx gcm_gmult_clmul
  232. #define gcm_ghash_avx gcm_ghash_clmul
  233. #else
  234. void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
  235. void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
  236. void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
  237. size_t len);
  238. #define AESNI_GCM
  239. static int aesni_gcm_enabled(GCM128_CONTEXT *ctx, ctr128_f stream) {
  240. return stream == aesni_ctr32_encrypt_blocks &&
  241. ctx->ghash == gcm_ghash_avx;
  242. }
  243. size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
  244. const void *key, uint8_t ivec[16], uint64_t *Xi);
  245. size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
  246. const void *key, uint8_t ivec[16], uint64_t *Xi);
  247. #endif
  248. #if defined(OPENSSL_X86)
  249. #define GHASH_ASM_X86
  250. void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
  251. void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  252. size_t len);
  253. void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
  254. void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  255. size_t len);
  256. #endif
  257. #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
  258. #include <openssl/arm_arch.h>
  259. #if __ARM_ARCH__ >= 7
  260. #define GHASH_ASM_ARM
  261. #define GCM_FUNCREF_4BIT
  262. static int pmull_capable(void) {
  263. return CRYPTO_is_ARMv8_PMULL_capable();
  264. }
  265. void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
  266. void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
  267. void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  268. size_t len);
  269. #if defined(OPENSSL_ARM)
  270. /* 32-bit ARM also has support for doing GCM with NEON instructions. */
  271. static int neon_capable(void) {
  272. return CRYPTO_is_NEON_capable();
  273. }
  274. void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
  275. void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
  276. void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  277. size_t len);
  278. #else
  279. /* AArch64 only has the ARMv8 versions of functions. */
  280. static int neon_capable(void) {
  281. return 0;
  282. }
  283. static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
  284. abort();
  285. }
  286. static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
  287. abort();
  288. }
  289. static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
  290. const uint8_t *inp, size_t len) {
  291. abort();
  292. }
  293. #endif
  294. #endif
  295. #elif defined(OPENSSL_PPC64LE)
  296. #define GHASH_ASM_PPC64LE
  297. #define GCM_FUNCREF_4BIT
  298. void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
  299. void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
  300. void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  301. size_t len);
  302. #endif
  303. #endif
  304. #ifdef GCM_FUNCREF_4BIT
  305. #undef GCM_MUL
  306. #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
  307. #ifdef GHASH
  308. #undef GHASH
  309. #define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
  310. #endif
  311. #endif
  312. void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
  313. u128 out_table[16], const uint8_t *gcm_key) {
  314. union {
  315. uint64_t u[2];
  316. uint8_t c[16];
  317. } H;
  318. memcpy(H.c, gcm_key, 16);
  319. /* H is stored in host byte order */
  320. H.u[0] = CRYPTO_bswap8(H.u[0]);
  321. H.u[1] = CRYPTO_bswap8(H.u[1]);
  322. #if defined(GHASH_ASM_X86_OR_64)
  323. if (crypto_gcm_clmul_enabled()) {
  324. if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
  325. gcm_init_avx(out_table, H.u);
  326. *out_mult = gcm_gmult_avx;
  327. *out_hash = gcm_ghash_avx;
  328. return;
  329. }
  330. gcm_init_clmul(out_table, H.u);
  331. *out_mult = gcm_gmult_clmul;
  332. *out_hash = gcm_ghash_clmul;
  333. return;
  334. }
  335. #if defined(GHASH_ASM_X86) /* x86 only */
  336. if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
  337. gcm_init_4bit(out_table, H.u);
  338. *out_mult = gcm_gmult_4bit_mmx;
  339. *out_hash = gcm_ghash_4bit_mmx;
  340. return;
  341. }
  342. #endif
  343. #elif defined(GHASH_ASM_ARM)
  344. if (pmull_capable()) {
  345. gcm_init_v8(out_table, H.u);
  346. *out_mult = gcm_gmult_v8;
  347. *out_hash = gcm_ghash_v8;
  348. return;
  349. }
  350. if (neon_capable()) {
  351. gcm_init_neon(out_table, H.u);
  352. *out_mult = gcm_gmult_neon;
  353. *out_hash = gcm_ghash_neon;
  354. return;
  355. }
  356. #elif defined(GHASH_ASM_PPC64LE)
  357. if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
  358. gcm_init_p8(out_table, H.u);
  359. *out_mult = gcm_gmult_p8;
  360. *out_hash = gcm_ghash_p8;
  361. return;
  362. }
  363. #endif
  364. gcm_init_4bit(out_table, H.u);
  365. #if defined(GHASH_ASM_X86)
  366. *out_mult = gcm_gmult_4bit_x86;
  367. *out_hash = gcm_ghash_4bit_x86;
  368. #else
  369. *out_mult = gcm_gmult_4bit;
  370. *out_hash = gcm_ghash_4bit;
  371. #endif
  372. }
  373. void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
  374. block128_f block) {
  375. memset(ctx, 0, sizeof(*ctx));
  376. ctx->block = block;
  377. uint8_t gcm_key[16];
  378. memset(gcm_key, 0, sizeof(gcm_key));
  379. (*block)(gcm_key, gcm_key, aes_key);
  380. CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, ctx->Htable, gcm_key);
  381. }
  382. void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
  383. const uint8_t *iv, size_t len) {
  384. unsigned int ctr;
  385. #ifdef GCM_FUNCREF_4BIT
  386. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  387. #endif
  388. ctx->Yi.u[0] = 0;
  389. ctx->Yi.u[1] = 0;
  390. ctx->Xi.u[0] = 0;
  391. ctx->Xi.u[1] = 0;
  392. ctx->len.u[0] = 0; /* AAD length */
  393. ctx->len.u[1] = 0; /* message length */
  394. ctx->ares = 0;
  395. ctx->mres = 0;
  396. if (len == 12) {
  397. memcpy(ctx->Yi.c, iv, 12);
  398. ctx->Yi.c[15] = 1;
  399. ctr = 1;
  400. } else {
  401. uint64_t len0 = len;
  402. while (len >= 16) {
  403. for (size_t i = 0; i < 16; ++i) {
  404. ctx->Yi.c[i] ^= iv[i];
  405. }
  406. GCM_MUL(ctx, Yi);
  407. iv += 16;
  408. len -= 16;
  409. }
  410. if (len) {
  411. for (size_t i = 0; i < len; ++i) {
  412. ctx->Yi.c[i] ^= iv[i];
  413. }
  414. GCM_MUL(ctx, Yi);
  415. }
  416. len0 <<= 3;
  417. ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
  418. GCM_MUL(ctx, Yi);
  419. ctr = GETU32_aligned(ctx->Yi.c + 12);
  420. }
  421. (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
  422. ++ctr;
  423. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  424. }
  425. int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
  426. unsigned int n;
  427. uint64_t alen = ctx->len.u[0];
  428. #ifdef GCM_FUNCREF_4BIT
  429. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  430. #ifdef GHASH
  431. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  432. size_t len) = ctx->ghash;
  433. #endif
  434. #endif
  435. if (ctx->len.u[1]) {
  436. return 0;
  437. }
  438. alen += len;
  439. if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
  440. return 0;
  441. }
  442. ctx->len.u[0] = alen;
  443. n = ctx->ares;
  444. if (n) {
  445. while (n && len) {
  446. ctx->Xi.c[n] ^= *(aad++);
  447. --len;
  448. n = (n + 1) % 16;
  449. }
  450. if (n == 0) {
  451. GCM_MUL(ctx, Xi);
  452. } else {
  453. ctx->ares = n;
  454. return 1;
  455. }
  456. }
  457. /* Process a whole number of blocks. */
  458. #ifdef GHASH
  459. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  460. if (len_blocks != 0) {
  461. GHASH(ctx, aad, len_blocks);
  462. aad += len_blocks;
  463. len -= len_blocks;
  464. }
  465. #else
  466. while (len >= 16) {
  467. for (size_t i = 0; i < 16; ++i) {
  468. ctx->Xi.c[i] ^= aad[i];
  469. }
  470. GCM_MUL(ctx, Xi);
  471. aad += 16;
  472. len -= 16;
  473. }
  474. #endif
  475. /* Process the remainder. */
  476. if (len != 0) {
  477. n = (unsigned int)len;
  478. for (size_t i = 0; i < len; ++i) {
  479. ctx->Xi.c[i] ^= aad[i];
  480. }
  481. }
  482. ctx->ares = n;
  483. return 1;
  484. }
  485. int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
  486. const unsigned char *in, unsigned char *out,
  487. size_t len) {
  488. unsigned int n, ctr;
  489. uint64_t mlen = ctx->len.u[1];
  490. block128_f block = ctx->block;
  491. #ifdef GCM_FUNCREF_4BIT
  492. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  493. #ifdef GHASH
  494. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  495. size_t len) = ctx->ghash;
  496. #endif
  497. #endif
  498. mlen += len;
  499. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  500. (sizeof(len) == 8 && mlen < len)) {
  501. return 0;
  502. }
  503. ctx->len.u[1] = mlen;
  504. if (ctx->ares) {
  505. /* First call to encrypt finalizes GHASH(AAD) */
  506. GCM_MUL(ctx, Xi);
  507. ctx->ares = 0;
  508. }
  509. ctr = GETU32_aligned(ctx->Yi.c + 12);
  510. n = ctx->mres;
  511. if (n) {
  512. while (n && len) {
  513. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  514. --len;
  515. n = (n + 1) % 16;
  516. }
  517. if (n == 0) {
  518. GCM_MUL(ctx, Xi);
  519. } else {
  520. ctx->mres = n;
  521. return 1;
  522. }
  523. }
  524. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  525. for (size_t i = 0; i < len; ++i) {
  526. if (n == 0) {
  527. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  528. ++ctr;
  529. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  530. }
  531. ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
  532. n = (n + 1) % 16;
  533. if (n == 0) {
  534. GCM_MUL(ctx, Xi);
  535. }
  536. }
  537. ctx->mres = n;
  538. return 1;
  539. }
  540. #if defined(GHASH) && defined(GHASH_CHUNK)
  541. while (len >= GHASH_CHUNK) {
  542. size_t j = GHASH_CHUNK;
  543. while (j) {
  544. size_t *out_t = (size_t *)out;
  545. const size_t *in_t = (const size_t *)in;
  546. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  547. ++ctr;
  548. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  549. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  550. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  551. }
  552. out += 16;
  553. in += 16;
  554. j -= 16;
  555. }
  556. GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
  557. len -= GHASH_CHUNK;
  558. }
  559. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  560. if (len_blocks != 0) {
  561. while (len >= 16) {
  562. size_t *out_t = (size_t *)out;
  563. const size_t *in_t = (const size_t *)in;
  564. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  565. ++ctr;
  566. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  567. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  568. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  569. }
  570. out += 16;
  571. in += 16;
  572. len -= 16;
  573. }
  574. GHASH(ctx, out - len_blocks, len_blocks);
  575. }
  576. #else
  577. while (len >= 16) {
  578. size_t *out_t = (size_t *)out;
  579. const size_t *in_t = (const size_t *)in;
  580. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  581. ++ctr;
  582. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  583. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  584. ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  585. }
  586. GCM_MUL(ctx, Xi);
  587. out += 16;
  588. in += 16;
  589. len -= 16;
  590. }
  591. #endif
  592. if (len) {
  593. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  594. ++ctr;
  595. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  596. while (len--) {
  597. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  598. ++n;
  599. }
  600. }
  601. ctx->mres = n;
  602. return 1;
  603. }
  604. int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
  605. const unsigned char *in, unsigned char *out,
  606. size_t len) {
  607. unsigned int n, ctr;
  608. uint64_t mlen = ctx->len.u[1];
  609. block128_f block = ctx->block;
  610. #ifdef GCM_FUNCREF_4BIT
  611. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  612. #ifdef GHASH
  613. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  614. size_t len) = ctx->ghash;
  615. #endif
  616. #endif
  617. mlen += len;
  618. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  619. (sizeof(len) == 8 && mlen < len)) {
  620. return 0;
  621. }
  622. ctx->len.u[1] = mlen;
  623. if (ctx->ares) {
  624. /* First call to decrypt finalizes GHASH(AAD) */
  625. GCM_MUL(ctx, Xi);
  626. ctx->ares = 0;
  627. }
  628. ctr = GETU32_aligned(ctx->Yi.c + 12);
  629. n = ctx->mres;
  630. if (n) {
  631. while (n && len) {
  632. uint8_t c = *(in++);
  633. *(out++) = c ^ ctx->EKi.c[n];
  634. ctx->Xi.c[n] ^= c;
  635. --len;
  636. n = (n + 1) % 16;
  637. }
  638. if (n == 0) {
  639. GCM_MUL(ctx, Xi);
  640. } else {
  641. ctx->mres = n;
  642. return 1;
  643. }
  644. }
  645. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  646. for (size_t i = 0; i < len; ++i) {
  647. uint8_t c;
  648. if (n == 0) {
  649. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  650. ++ctr;
  651. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  652. }
  653. c = in[i];
  654. out[i] = c ^ ctx->EKi.c[n];
  655. ctx->Xi.c[n] ^= c;
  656. n = (n + 1) % 16;
  657. if (n == 0) {
  658. GCM_MUL(ctx, Xi);
  659. }
  660. }
  661. ctx->mres = n;
  662. return 1;
  663. }
  664. #if defined(GHASH) && defined(GHASH_CHUNK)
  665. while (len >= GHASH_CHUNK) {
  666. size_t j = GHASH_CHUNK;
  667. GHASH(ctx, in, GHASH_CHUNK);
  668. while (j) {
  669. size_t *out_t = (size_t *)out;
  670. const size_t *in_t = (const size_t *)in;
  671. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  672. ++ctr;
  673. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  674. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  675. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  676. }
  677. out += 16;
  678. in += 16;
  679. j -= 16;
  680. }
  681. len -= GHASH_CHUNK;
  682. }
  683. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  684. if (len_blocks != 0) {
  685. GHASH(ctx, in, len_blocks);
  686. while (len >= 16) {
  687. size_t *out_t = (size_t *)out;
  688. const size_t *in_t = (const size_t *)in;
  689. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  690. ++ctr;
  691. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  692. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  693. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  694. }
  695. out += 16;
  696. in += 16;
  697. len -= 16;
  698. }
  699. }
  700. #else
  701. while (len >= 16) {
  702. size_t *out_t = (size_t *)out;
  703. const size_t *in_t = (const size_t *)in;
  704. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  705. ++ctr;
  706. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  707. for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
  708. size_t c = in_t[i];
  709. out_t[i] = c ^ ctx->EKi.t[i];
  710. ctx->Xi.t[i] ^= c;
  711. }
  712. GCM_MUL(ctx, Xi);
  713. out += 16;
  714. in += 16;
  715. len -= 16;
  716. }
  717. #endif
  718. if (len) {
  719. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  720. ++ctr;
  721. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  722. while (len--) {
  723. uint8_t c = in[n];
  724. ctx->Xi.c[n] ^= c;
  725. out[n] = c ^ ctx->EKi.c[n];
  726. ++n;
  727. }
  728. }
  729. ctx->mres = n;
  730. return 1;
  731. }
  732. int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  733. const uint8_t *in, uint8_t *out, size_t len,
  734. ctr128_f stream) {
  735. unsigned int n, ctr;
  736. uint64_t mlen = ctx->len.u[1];
  737. #ifdef GCM_FUNCREF_4BIT
  738. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  739. #ifdef GHASH
  740. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  741. size_t len) = ctx->ghash;
  742. #endif
  743. #endif
  744. mlen += len;
  745. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  746. (sizeof(len) == 8 && mlen < len)) {
  747. return 0;
  748. }
  749. ctx->len.u[1] = mlen;
  750. if (ctx->ares) {
  751. /* First call to encrypt finalizes GHASH(AAD) */
  752. GCM_MUL(ctx, Xi);
  753. ctx->ares = 0;
  754. }
  755. n = ctx->mres;
  756. if (n) {
  757. while (n && len) {
  758. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  759. --len;
  760. n = (n + 1) % 16;
  761. }
  762. if (n == 0) {
  763. GCM_MUL(ctx, Xi);
  764. } else {
  765. ctx->mres = n;
  766. return 1;
  767. }
  768. }
  769. #if defined(AESNI_GCM)
  770. if (aesni_gcm_enabled(ctx, stream)) {
  771. /* |aesni_gcm_encrypt| may not process all the input given to it. It may
  772. * not process *any* of its input if it is deemed too small. */
  773. size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
  774. in += bulk;
  775. out += bulk;
  776. len -= bulk;
  777. }
  778. #endif
  779. ctr = GETU32_aligned(ctx->Yi.c + 12);
  780. #if defined(GHASH)
  781. while (len >= GHASH_CHUNK) {
  782. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  783. ctr += GHASH_CHUNK / 16;
  784. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  785. GHASH(ctx, out, GHASH_CHUNK);
  786. out += GHASH_CHUNK;
  787. in += GHASH_CHUNK;
  788. len -= GHASH_CHUNK;
  789. }
  790. #endif
  791. size_t i = len & kSizeTWithoutLower4Bits;
  792. if (i != 0) {
  793. size_t j = i / 16;
  794. (*stream)(in, out, j, key, ctx->Yi.c);
  795. ctr += (unsigned int)j;
  796. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  797. in += i;
  798. len -= i;
  799. #if defined(GHASH)
  800. GHASH(ctx, out, i);
  801. out += i;
  802. #else
  803. while (j--) {
  804. for (i = 0; i < 16; ++i) {
  805. ctx->Xi.c[i] ^= out[i];
  806. }
  807. GCM_MUL(ctx, Xi);
  808. out += 16;
  809. }
  810. #endif
  811. }
  812. if (len) {
  813. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  814. ++ctr;
  815. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  816. while (len--) {
  817. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  818. ++n;
  819. }
  820. }
  821. ctx->mres = n;
  822. return 1;
  823. }
  824. int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  825. const uint8_t *in, uint8_t *out, size_t len,
  826. ctr128_f stream) {
  827. unsigned int n, ctr;
  828. uint64_t mlen = ctx->len.u[1];
  829. #ifdef GCM_FUNCREF_4BIT
  830. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  831. #ifdef GHASH
  832. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  833. size_t len) = ctx->ghash;
  834. #endif
  835. #endif
  836. mlen += len;
  837. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  838. (sizeof(len) == 8 && mlen < len)) {
  839. return 0;
  840. }
  841. ctx->len.u[1] = mlen;
  842. if (ctx->ares) {
  843. /* First call to decrypt finalizes GHASH(AAD) */
  844. GCM_MUL(ctx, Xi);
  845. ctx->ares = 0;
  846. }
  847. n = ctx->mres;
  848. if (n) {
  849. while (n && len) {
  850. uint8_t c = *(in++);
  851. *(out++) = c ^ ctx->EKi.c[n];
  852. ctx->Xi.c[n] ^= c;
  853. --len;
  854. n = (n + 1) % 16;
  855. }
  856. if (n == 0) {
  857. GCM_MUL(ctx, Xi);
  858. } else {
  859. ctx->mres = n;
  860. return 1;
  861. }
  862. }
  863. #if defined(AESNI_GCM)
  864. if (aesni_gcm_enabled(ctx, stream)) {
  865. /* |aesni_gcm_decrypt| may not process all the input given to it. It may
  866. * not process *any* of its input if it is deemed too small. */
  867. size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
  868. in += bulk;
  869. out += bulk;
  870. len -= bulk;
  871. }
  872. #endif
  873. ctr = GETU32_aligned(ctx->Yi.c + 12);
  874. #if defined(GHASH)
  875. while (len >= GHASH_CHUNK) {
  876. GHASH(ctx, in, GHASH_CHUNK);
  877. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  878. ctr += GHASH_CHUNK / 16;
  879. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  880. out += GHASH_CHUNK;
  881. in += GHASH_CHUNK;
  882. len -= GHASH_CHUNK;
  883. }
  884. #endif
  885. size_t i = len & kSizeTWithoutLower4Bits;
  886. if (i != 0) {
  887. size_t j = i / 16;
  888. #if defined(GHASH)
  889. GHASH(ctx, in, i);
  890. #else
  891. while (j--) {
  892. size_t k;
  893. for (k = 0; k < 16; ++k) {
  894. ctx->Xi.c[k] ^= in[k];
  895. }
  896. GCM_MUL(ctx, Xi);
  897. in += 16;
  898. }
  899. j = i / 16;
  900. in -= i;
  901. #endif
  902. (*stream)(in, out, j, key, ctx->Yi.c);
  903. ctr += (unsigned int)j;
  904. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  905. out += i;
  906. in += i;
  907. len -= i;
  908. }
  909. if (len) {
  910. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  911. ++ctr;
  912. PUTU32_aligned(ctx->Yi.c + 12, ctr);
  913. while (len--) {
  914. uint8_t c = in[n];
  915. ctx->Xi.c[n] ^= c;
  916. out[n] = c ^ ctx->EKi.c[n];
  917. ++n;
  918. }
  919. }
  920. ctx->mres = n;
  921. return 1;
  922. }
  923. int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
  924. uint64_t alen = ctx->len.u[0] << 3;
  925. uint64_t clen = ctx->len.u[1] << 3;
  926. #ifdef GCM_FUNCREF_4BIT
  927. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  928. #endif
  929. if (ctx->mres || ctx->ares) {
  930. GCM_MUL(ctx, Xi);
  931. }
  932. alen = CRYPTO_bswap8(alen);
  933. clen = CRYPTO_bswap8(clen);
  934. ctx->Xi.u[0] ^= alen;
  935. ctx->Xi.u[1] ^= clen;
  936. GCM_MUL(ctx, Xi);
  937. ctx->Xi.u[0] ^= ctx->EK0.u[0];
  938. ctx->Xi.u[1] ^= ctx->EK0.u[1];
  939. if (tag && len <= sizeof(ctx->Xi)) {
  940. return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
  941. } else {
  942. return 0;
  943. }
  944. }
  945. void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
  946. CRYPTO_gcm128_finish(ctx, NULL, 0);
  947. memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
  948. }
  949. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  950. int crypto_gcm_clmul_enabled(void) {
  951. #ifdef GHASH_ASM
  952. return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
  953. OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */
  954. #else
  955. return 0;
  956. #endif
  957. }
  958. #endif