You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1192 lines
29 KiB

  1. /* ====================================================================
  2. * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in
  13. * the documentation and/or other materials provided with the
  14. * distribution.
  15. *
  16. * 3. All advertising materials mentioning features or use of this
  17. * software must display the following acknowledgment:
  18. * "This product includes software developed by the OpenSSL Project
  19. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  20. *
  21. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  22. * endorse or promote products derived from this software without
  23. * prior written permission. For written permission, please contact
  24. * openssl-core@openssl.org.
  25. *
  26. * 5. Products derived from this software may not be called "OpenSSL"
  27. * nor may "OpenSSL" appear in their names without prior written
  28. * permission of the OpenSSL Project.
  29. *
  30. * 6. Redistributions of any form whatsoever must retain the following
  31. * acknowledgment:
  32. * "This product includes software developed by the OpenSSL Project
  33. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  36. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  37. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  38. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  41. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  42. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  44. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  45. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  46. * OF THE POSSIBILITY OF SUCH DAMAGE.
  47. * ==================================================================== */
  48. #include <openssl/modes.h>
  49. #include <assert.h>
  50. #include <openssl/mem.h>
  51. #include <openssl/cpu.h>
  52. #include "internal.h"
  53. #if !defined(OPENSSL_NO_ASM) && \
  54. (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
  55. #define GHASH_ASM
  56. #endif
  57. #if defined(BSWAP4) && STRICT_ALIGNMENT == 1
  58. /* redefine, because alignment is ensured */
  59. #undef GETU32
  60. #define GETU32(p) BSWAP4(*(const uint32_t *)(p))
  61. #undef PUTU32
  62. #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
  63. #endif
  64. #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
  65. #define REDUCE1BIT(V) \
  66. do { \
  67. if (sizeof(size_t) == 8) { \
  68. uint64_t T = U64(0xe100000000000000) & (0 - (V.lo & 1)); \
  69. V.lo = (V.hi << 63) | (V.lo >> 1); \
  70. V.hi = (V.hi >> 1) ^ T; \
  71. } else { \
  72. uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1)); \
  73. V.lo = (V.hi << 63) | (V.lo >> 1); \
  74. V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32); \
  75. } \
  76. } while (0)
  77. static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
  78. u128 V;
  79. Htable[0].hi = 0;
  80. Htable[0].lo = 0;
  81. V.hi = H[0];
  82. V.lo = H[1];
  83. Htable[8] = V;
  84. REDUCE1BIT(V);
  85. Htable[4] = V;
  86. REDUCE1BIT(V);
  87. Htable[2] = V;
  88. REDUCE1BIT(V);
  89. Htable[1] = V;
  90. Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
  91. V = Htable[4];
  92. Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
  93. Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
  94. Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
  95. V = Htable[8];
  96. Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
  97. Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
  98. Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
  99. Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
  100. Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
  101. Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
  102. Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
  103. #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
  104. /* ARM assembler expects specific dword order in Htable. */
  105. {
  106. int j;
  107. const union {
  108. long one;
  109. char little;
  110. } is_endian = {1};
  111. if (is_endian.little) {
  112. for (j = 0; j < 16; ++j) {
  113. V = Htable[j];
  114. Htable[j].hi = V.lo;
  115. Htable[j].lo = V.hi;
  116. }
  117. } else {
  118. for (j = 0; j < 16; ++j) {
  119. V = Htable[j];
  120. Htable[j].hi = V.lo << 32 | V.lo >> 32;
  121. Htable[j].lo = V.hi << 32 | V.hi >> 32;
  122. }
  123. }
  124. }
  125. #endif
  126. }
  127. #if !defined(GHASH_ASM)
  128. static const size_t rem_4bit[16] = {
  129. PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
  130. PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
  131. PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
  132. PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
  133. static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
  134. u128 Z;
  135. int cnt = 15;
  136. size_t rem, nlo, nhi;
  137. const union {
  138. long one;
  139. char little;
  140. } is_endian = {1};
  141. nlo = ((const uint8_t *)Xi)[15];
  142. nhi = nlo >> 4;
  143. nlo &= 0xf;
  144. Z.hi = Htable[nlo].hi;
  145. Z.lo = Htable[nlo].lo;
  146. while (1) {
  147. rem = (size_t)Z.lo & 0xf;
  148. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  149. Z.hi = (Z.hi >> 4);
  150. if (sizeof(size_t) == 8) {
  151. Z.hi ^= rem_4bit[rem];
  152. } else {
  153. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  154. }
  155. Z.hi ^= Htable[nhi].hi;
  156. Z.lo ^= Htable[nhi].lo;
  157. if (--cnt < 0) {
  158. break;
  159. }
  160. nlo = ((const uint8_t *)Xi)[cnt];
  161. nhi = nlo >> 4;
  162. nlo &= 0xf;
  163. rem = (size_t)Z.lo & 0xf;
  164. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  165. Z.hi = (Z.hi >> 4);
  166. if (sizeof(size_t) == 8) {
  167. Z.hi ^= rem_4bit[rem];
  168. } else {
  169. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  170. }
  171. Z.hi ^= Htable[nlo].hi;
  172. Z.lo ^= Htable[nlo].lo;
  173. }
  174. if (is_endian.little) {
  175. #ifdef BSWAP8
  176. Xi[0] = BSWAP8(Z.hi);
  177. Xi[1] = BSWAP8(Z.lo);
  178. #else
  179. uint8_t *p = (uint8_t *)Xi;
  180. uint32_t v;
  181. v = (uint32_t)(Z.hi >> 32);
  182. PUTU32(p, v);
  183. v = (uint32_t)(Z.hi);
  184. PUTU32(p + 4, v);
  185. v = (uint32_t)(Z.lo >> 32);
  186. PUTU32(p + 8, v);
  187. v = (uint32_t)(Z.lo);
  188. PUTU32(p + 12, v);
  189. #endif
  190. } else {
  191. Xi[0] = Z.hi;
  192. Xi[1] = Z.lo;
  193. }
  194. }
  195. /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
  196. * details... Compiler-generated code doesn't seem to give any
  197. * performance improvement, at least not on x86[_64]. It's here
  198. * mostly as reference and a placeholder for possible future
  199. * non-trivial optimization[s]... */
  200. static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  201. size_t len) {
  202. u128 Z;
  203. int cnt;
  204. size_t rem, nlo, nhi;
  205. const union {
  206. long one;
  207. char little;
  208. } is_endian = {1};
  209. do {
  210. cnt = 15;
  211. nlo = ((const uint8_t *)Xi)[15];
  212. nlo ^= inp[15];
  213. nhi = nlo >> 4;
  214. nlo &= 0xf;
  215. Z.hi = Htable[nlo].hi;
  216. Z.lo = Htable[nlo].lo;
  217. while (1) {
  218. rem = (size_t)Z.lo & 0xf;
  219. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  220. Z.hi = (Z.hi >> 4);
  221. if (sizeof(size_t) == 8) {
  222. Z.hi ^= rem_4bit[rem];
  223. } else {
  224. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  225. }
  226. Z.hi ^= Htable[nhi].hi;
  227. Z.lo ^= Htable[nhi].lo;
  228. if (--cnt < 0) {
  229. break;
  230. }
  231. nlo = ((const uint8_t *)Xi)[cnt];
  232. nlo ^= inp[cnt];
  233. nhi = nlo >> 4;
  234. nlo &= 0xf;
  235. rem = (size_t)Z.lo & 0xf;
  236. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  237. Z.hi = (Z.hi >> 4);
  238. if (sizeof(size_t) == 8) {
  239. Z.hi ^= rem_4bit[rem];
  240. } else {
  241. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  242. }
  243. Z.hi ^= Htable[nlo].hi;
  244. Z.lo ^= Htable[nlo].lo;
  245. }
  246. if (is_endian.little) {
  247. #ifdef BSWAP8
  248. Xi[0] = BSWAP8(Z.hi);
  249. Xi[1] = BSWAP8(Z.lo);
  250. #else
  251. uint8_t *p = (uint8_t *)Xi;
  252. uint32_t v;
  253. v = (uint32_t)(Z.hi >> 32);
  254. PUTU32(p, v);
  255. v = (uint32_t)(Z.hi);
  256. PUTU32(p + 4, v);
  257. v = (uint32_t)(Z.lo >> 32);
  258. PUTU32(p + 8, v);
  259. v = (uint32_t)(Z.lo);
  260. PUTU32(p + 12, v);
  261. #endif
  262. } else {
  263. Xi[0] = Z.hi;
  264. Xi[1] = Z.lo;
  265. }
  266. } while (inp += 16, len -= 16);
  267. }
  268. #else /* GHASH_ASM */
  269. void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
  270. void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  271. size_t len);
  272. #endif
  273. #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
  274. #if defined(GHASH_ASM)
  275. #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
  276. /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
  277. * trashing effect. In other words idea is to hash data while it's
  278. * still in L1 cache after encryption pass... */
  279. #define GHASH_CHUNK (3 * 1024)
  280. #endif
  281. #if defined(GHASH_ASM)
  282. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  283. #define GHASH_ASM_X86_OR_64
  284. #define GCM_FUNCREF_4BIT
  285. void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
  286. void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
  287. void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  288. size_t len);
  289. #if defined(OPENSSL_X86)
  290. #define gcm_init_avx gcm_init_clmul
  291. #define gcm_gmult_avx gcm_gmult_clmul
  292. #define gcm_ghash_avx gcm_ghash_clmul
  293. #else
  294. void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
  295. void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
  296. void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len);
  297. #endif
  298. #if defined(OPENSSL_X86)
  299. #define GHASH_ASM_X86
  300. void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
  301. void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  302. size_t len);
  303. void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
  304. void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  305. size_t len);
  306. #endif
  307. #elif defined(OPENSSL_ARM)
  308. #include "../arm_arch.h"
  309. #if __ARM_ARCH__ >= 7
  310. #define GHASH_ASM_ARM
  311. #define GCM_FUNCREF_4BIT
  312. void gcm_init_neon(u128 Htable[16],const uint64_t Xi[2]);
  313. void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
  314. void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  315. size_t len);
  316. #endif
  317. #endif
  318. #endif
  319. #ifdef GCM_FUNCREF_4BIT
  320. #undef GCM_MUL
  321. #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
  322. #ifdef GHASH
  323. #undef GHASH
  324. #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
  325. #endif
  326. #endif
  327. GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) {
  328. GCM128_CONTEXT *ret;
  329. ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT));
  330. if (ret != NULL) {
  331. CRYPTO_gcm128_init(ret, key, block);
  332. }
  333. return ret;
  334. }
  335. void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) {
  336. const union {
  337. long one;
  338. char little;
  339. } is_endian = {1};
  340. memset(ctx, 0, sizeof(*ctx));
  341. ctx->block = block;
  342. ctx->key = key;
  343. (*block)(ctx->H.c, ctx->H.c, key);
  344. if (is_endian.little) {
  345. /* H is stored in host byte order */
  346. #ifdef BSWAP8
  347. ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
  348. ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
  349. #else
  350. uint8_t *p = ctx->H.c;
  351. uint64_t hi, lo;
  352. hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  353. lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  354. ctx->H.u[0] = hi;
  355. ctx->H.u[1] = lo;
  356. #endif
  357. }
  358. #if defined(GHASH_ASM_X86_OR_64)
  359. if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
  360. OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
  361. if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
  362. gcm_init_avx(ctx->Htable, ctx->H.u);
  363. ctx->gmult = gcm_gmult_avx;
  364. ctx->ghash = gcm_ghash_avx;
  365. } else {
  366. gcm_init_clmul(ctx->Htable, ctx->H.u);
  367. ctx->gmult = gcm_gmult_clmul;
  368. ctx->ghash = gcm_ghash_clmul;
  369. }
  370. return;
  371. }
  372. gcm_init_4bit(ctx->Htable, ctx->H.u);
  373. #if defined(GHASH_ASM_X86) /* x86 only */
  374. if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
  375. ctx->gmult = gcm_gmult_4bit_mmx;
  376. ctx->ghash = gcm_ghash_4bit_mmx;
  377. } else {
  378. ctx->gmult = gcm_gmult_4bit_x86;
  379. ctx->ghash = gcm_ghash_4bit_x86;
  380. }
  381. #else
  382. ctx->gmult = gcm_gmult_4bit;
  383. ctx->ghash = gcm_ghash_4bit;
  384. #endif
  385. #elif defined(GHASH_ASM_ARM)
  386. if (CRYPTO_is_NEON_capable()) {
  387. gcm_init_neon(ctx->Htable,ctx->H.u);
  388. ctx->gmult = gcm_gmult_neon;
  389. ctx->ghash = gcm_ghash_neon;
  390. } else {
  391. gcm_init_4bit(ctx->Htable, ctx->H.u);
  392. ctx->gmult = gcm_gmult_4bit;
  393. ctx->ghash = gcm_ghash_4bit;
  394. }
  395. #else
  396. ctx->gmult = gcm_gmult_4bit;
  397. ctx->ghash = gcm_ghash_4bit;
  398. gcm_init_4bit(ctx->Htable, ctx->H.u);
  399. #endif
  400. }
  401. void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const uint8_t *iv, size_t len) {
  402. const union {
  403. long one;
  404. char little;
  405. } is_endian = {1};
  406. unsigned int ctr;
  407. #ifdef GCM_FUNCREF_4BIT
  408. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  409. #endif
  410. ctx->Yi.u[0] = 0;
  411. ctx->Yi.u[1] = 0;
  412. ctx->Xi.u[0] = 0;
  413. ctx->Xi.u[1] = 0;
  414. ctx->len.u[0] = 0; /* AAD length */
  415. ctx->len.u[1] = 0; /* message length */
  416. ctx->ares = 0;
  417. ctx->mres = 0;
  418. if (len == 12) {
  419. memcpy(ctx->Yi.c, iv, 12);
  420. ctx->Yi.c[15] = 1;
  421. ctr = 1;
  422. } else {
  423. size_t i;
  424. uint64_t len0 = len;
  425. while (len >= 16) {
  426. for (i = 0; i < 16; ++i) {
  427. ctx->Yi.c[i] ^= iv[i];
  428. }
  429. GCM_MUL(ctx, Yi);
  430. iv += 16;
  431. len -= 16;
  432. }
  433. if (len) {
  434. for (i = 0; i < len; ++i) {
  435. ctx->Yi.c[i] ^= iv[i];
  436. }
  437. GCM_MUL(ctx, Yi);
  438. }
  439. len0 <<= 3;
  440. if (is_endian.little) {
  441. #ifdef BSWAP8
  442. ctx->Yi.u[1] ^= BSWAP8(len0);
  443. #else
  444. ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
  445. ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
  446. ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
  447. ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
  448. ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
  449. ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
  450. ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
  451. ctx->Yi.c[15] ^= (uint8_t)(len0);
  452. #endif
  453. } else {
  454. ctx->Yi.u[1] ^= len0;
  455. }
  456. GCM_MUL(ctx, Yi);
  457. if (is_endian.little) {
  458. ctr = GETU32(ctx->Yi.c + 12);
  459. } else {
  460. ctr = ctx->Yi.d[3];
  461. }
  462. }
  463. (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
  464. ++ctr;
  465. if (is_endian.little) {
  466. PUTU32(ctx->Yi.c + 12, ctr);
  467. } else {
  468. ctx->Yi.d[3] = ctr;
  469. }
  470. }
  471. int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
  472. size_t i;
  473. unsigned int n;
  474. uint64_t alen = ctx->len.u[0];
  475. #ifdef GCM_FUNCREF_4BIT
  476. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  477. #ifdef GHASH
  478. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  479. size_t len) = ctx->ghash;
  480. #endif
  481. #endif
  482. if (ctx->len.u[1]) {
  483. return 0;
  484. }
  485. alen += len;
  486. if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) {
  487. return 0;
  488. }
  489. ctx->len.u[0] = alen;
  490. n = ctx->ares;
  491. if (n) {
  492. while (n && len) {
  493. ctx->Xi.c[n] ^= *(aad++);
  494. --len;
  495. n = (n + 1) % 16;
  496. }
  497. if (n == 0) {
  498. GCM_MUL(ctx, Xi);
  499. } else {
  500. ctx->ares = n;
  501. return 1;
  502. }
  503. }
  504. #ifdef GHASH
  505. if ((i = (len & (size_t) - 16))) {
  506. GHASH(ctx, aad, i);
  507. aad += i;
  508. len -= i;
  509. }
  510. #else
  511. while (len >= 16) {
  512. for (i = 0; i < 16; ++i) {
  513. ctx->Xi.c[i] ^= aad[i];
  514. }
  515. GCM_MUL(ctx, Xi);
  516. aad += 16;
  517. len -= 16;
  518. }
  519. #endif
  520. if (len) {
  521. n = (unsigned int)len;
  522. for (i = 0; i < len; ++i)
  523. ctx->Xi.c[i] ^= aad[i];
  524. }
  525. ctx->ares = n;
  526. return 1;
  527. }
  528. int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
  529. unsigned char *out, size_t len) {
  530. const union {
  531. long one;
  532. char little;
  533. } is_endian = {1};
  534. unsigned int n, ctr;
  535. size_t i;
  536. uint64_t mlen = ctx->len.u[1];
  537. block128_f block = ctx->block;
  538. void *key = ctx->key;
  539. #ifdef GCM_FUNCREF_4BIT
  540. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  541. #ifdef GHASH
  542. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  543. size_t len) = ctx->ghash;
  544. #endif
  545. #endif
  546. mlen += len;
  547. if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) {
  548. return 0;
  549. }
  550. ctx->len.u[1] = mlen;
  551. if (ctx->ares) {
  552. /* First call to encrypt finalizes GHASH(AAD) */
  553. GCM_MUL(ctx, Xi);
  554. ctx->ares = 0;
  555. }
  556. if (is_endian.little) {
  557. ctr = GETU32(ctx->Yi.c + 12);
  558. } else {
  559. ctr = ctx->Yi.d[3];
  560. }
  561. n = ctx->mres;
  562. if (n) {
  563. while (n && len) {
  564. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  565. --len;
  566. n = (n + 1) % 16;
  567. }
  568. if (n == 0) {
  569. GCM_MUL(ctx, Xi);
  570. } else {
  571. ctx->mres = n;
  572. return 1;
  573. }
  574. }
  575. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  576. for (i = 0; i < len; ++i) {
  577. if (n == 0) {
  578. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  579. ++ctr;
  580. if (is_endian.little) {
  581. PUTU32(ctx->Yi.c + 12, ctr);
  582. } else {
  583. ctx->Yi.d[3] = ctr;
  584. }
  585. }
  586. ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
  587. n = (n + 1) % 16;
  588. if (n == 0) {
  589. GCM_MUL(ctx, Xi);
  590. }
  591. }
  592. ctx->mres = n;
  593. return 1;
  594. }
  595. #if defined(GHASH) && defined(GHASH_CHUNK)
  596. while (len >= GHASH_CHUNK) {
  597. size_t j = GHASH_CHUNK;
  598. while (j) {
  599. size_t *out_t = (size_t *)out;
  600. const size_t *in_t = (const size_t *)in;
  601. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  602. ++ctr;
  603. if (is_endian.little) {
  604. PUTU32(ctx->Yi.c + 12, ctr);
  605. } else {
  606. ctx->Yi.d[3] = ctr;
  607. }
  608. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  609. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  610. }
  611. out += 16;
  612. in += 16;
  613. j -= 16;
  614. }
  615. GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
  616. len -= GHASH_CHUNK;
  617. }
  618. if ((i = (len & (size_t) - 16))) {
  619. size_t j = i;
  620. while (len >= 16) {
  621. size_t *out_t = (size_t *)out;
  622. const size_t *in_t = (const size_t *)in;
  623. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  624. ++ctr;
  625. if (is_endian.little) {
  626. PUTU32(ctx->Yi.c + 12, ctr);
  627. } else {
  628. ctx->Yi.d[3] = ctr;
  629. }
  630. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  631. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  632. }
  633. out += 16;
  634. in += 16;
  635. len -= 16;
  636. }
  637. GHASH(ctx, out - j, j);
  638. }
  639. #else
  640. while (len >= 16) {
  641. size_t *out_t = (size_t *)out;
  642. const size_t *in_t = (const size_t *)in;
  643. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  644. ++ctr;
  645. if (is_endian.little) {
  646. PUTU32(ctx->Yi.c + 12, ctr);
  647. } else {
  648. ctx->Yi.d[3] = ctr;
  649. }
  650. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  651. ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  652. }
  653. GCM_MUL(ctx, Xi);
  654. out += 16;
  655. in += 16;
  656. len -= 16;
  657. }
  658. #endif
  659. if (len) {
  660. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  661. ++ctr;
  662. if (is_endian.little) {
  663. PUTU32(ctx->Yi.c + 12, ctr);
  664. } else {
  665. ctx->Yi.d[3] = ctr;
  666. }
  667. while (len--) {
  668. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  669. ++n;
  670. }
  671. }
  672. ctx->mres = n;
  673. return 1;
  674. }
  675. int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
  676. unsigned char *out, size_t len) {
  677. const union {
  678. long one;
  679. char little;
  680. } is_endian = {1};
  681. unsigned int n, ctr;
  682. size_t i;
  683. uint64_t mlen = ctx->len.u[1];
  684. block128_f block = ctx->block;
  685. void *key = ctx->key;
  686. #ifdef GCM_FUNCREF_4BIT
  687. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  688. #ifdef GHASH
  689. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  690. size_t len) = ctx->ghash;
  691. #endif
  692. #endif
  693. mlen += len;
  694. if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) {
  695. return 0;
  696. }
  697. ctx->len.u[1] = mlen;
  698. if (ctx->ares) {
  699. /* First call to decrypt finalizes GHASH(AAD) */
  700. GCM_MUL(ctx, Xi);
  701. ctx->ares = 0;
  702. }
  703. if (is_endian.little) {
  704. ctr = GETU32(ctx->Yi.c + 12);
  705. } else {
  706. ctr = ctx->Yi.d[3];
  707. }
  708. n = ctx->mres;
  709. if (n) {
  710. while (n && len) {
  711. uint8_t c = *(in++);
  712. *(out++) = c ^ ctx->EKi.c[n];
  713. ctx->Xi.c[n] ^= c;
  714. --len;
  715. n = (n + 1) % 16;
  716. }
  717. if (n == 0) {
  718. GCM_MUL(ctx, Xi);
  719. } else {
  720. ctx->mres = n;
  721. return 1;
  722. }
  723. }
  724. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  725. for (i = 0; i < len; ++i) {
  726. uint8_t c;
  727. if (n == 0) {
  728. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  729. ++ctr;
  730. if (is_endian.little) {
  731. PUTU32(ctx->Yi.c + 12, ctr);
  732. } else {
  733. ctx->Yi.d[3] = ctr;
  734. }
  735. }
  736. c = in[i];
  737. out[i] = c ^ ctx->EKi.c[n];
  738. ctx->Xi.c[n] ^= c;
  739. n = (n + 1) % 16;
  740. if (n == 0) {
  741. GCM_MUL(ctx, Xi);
  742. }
  743. }
  744. ctx->mres = n;
  745. return 1;
  746. }
  747. #if defined(GHASH) && defined(GHASH_CHUNK)
  748. while (len >= GHASH_CHUNK) {
  749. size_t j = GHASH_CHUNK;
  750. GHASH(ctx, in, GHASH_CHUNK);
  751. while (j) {
  752. size_t *out_t = (size_t *)out;
  753. const size_t *in_t = (const size_t *)in;
  754. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  755. ++ctr;
  756. if (is_endian.little) {
  757. PUTU32(ctx->Yi.c + 12, ctr);
  758. } else {
  759. ctx->Yi.d[3] = ctr;
  760. }
  761. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  762. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  763. }
  764. out += 16;
  765. in += 16;
  766. j -= 16;
  767. }
  768. len -= GHASH_CHUNK;
  769. }
  770. if ((i = (len & (size_t) - 16))) {
  771. GHASH(ctx, in, i);
  772. while (len >= 16) {
  773. size_t *out_t = (size_t *)out;
  774. const size_t *in_t = (const size_t *)in;
  775. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  776. ++ctr;
  777. if (is_endian.little) {
  778. PUTU32(ctx->Yi.c + 12, ctr);
  779. } else {
  780. ctx->Yi.d[3] = ctr;
  781. }
  782. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  783. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  784. }
  785. out += 16;
  786. in += 16;
  787. len -= 16;
  788. }
  789. }
  790. #else
  791. while (len >= 16) {
  792. size_t *out_t = (size_t *)out;
  793. const size_t *in_t = (const size_t *)in;
  794. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  795. ++ctr;
  796. if (is_endian.little) {
  797. PUTU32(ctx->Yi.c + 12, ctr);
  798. } else {
  799. ctx->Yi.d[3] = ctr;
  800. }
  801. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  802. size_t c = in_t[i];
  803. out_t[i] = c ^ ctx->EKi.t[i];
  804. ctx->Xi.t[i] ^= c;
  805. }
  806. GCM_MUL(ctx, Xi);
  807. out += 16;
  808. in += 16;
  809. len -= 16;
  810. }
  811. #endif
  812. if (len) {
  813. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  814. ++ctr;
  815. if (is_endian.little) {
  816. PUTU32(ctx->Yi.c + 12, ctr);
  817. } else {
  818. ctx->Yi.d[3] = ctr;
  819. }
  820. while (len--) {
  821. uint8_t c = in[n];
  822. ctx->Xi.c[n] ^= c;
  823. out[n] = c ^ ctx->EKi.c[n];
  824. ++n;
  825. }
  826. }
  827. ctx->mres = n;
  828. return 1;
  829. }
  830. int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
  831. uint8_t *out, size_t len, ctr128_f stream) {
  832. const union {
  833. long one;
  834. char little;
  835. } is_endian = {1};
  836. unsigned int n, ctr;
  837. size_t i;
  838. uint64_t mlen = ctx->len.u[1];
  839. void *key = ctx->key;
  840. #ifdef GCM_FUNCREF_4BIT
  841. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  842. #ifdef GHASH
  843. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  844. size_t len) = ctx->ghash;
  845. #endif
  846. #endif
  847. mlen += len;
  848. if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) {
  849. return 0;
  850. }
  851. ctx->len.u[1] = mlen;
  852. if (ctx->ares) {
  853. /* First call to encrypt finalizes GHASH(AAD) */
  854. GCM_MUL(ctx, Xi);
  855. ctx->ares = 0;
  856. }
  857. if (is_endian.little) {
  858. ctr = GETU32(ctx->Yi.c + 12);
  859. } else {
  860. ctr = ctx->Yi.d[3];
  861. }
  862. n = ctx->mres;
  863. if (n) {
  864. while (n && len) {
  865. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  866. --len;
  867. n = (n + 1) % 16;
  868. }
  869. if (n == 0) {
  870. GCM_MUL(ctx, Xi);
  871. } else {
  872. ctx->mres = n;
  873. return 1;
  874. }
  875. }
  876. #if defined(GHASH)
  877. while (len >= GHASH_CHUNK) {
  878. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  879. ctr += GHASH_CHUNK / 16;
  880. if (is_endian.little) {
  881. PUTU32(ctx->Yi.c + 12, ctr);
  882. } else {
  883. ctx->Yi.d[3] = ctr;
  884. }
  885. GHASH(ctx, out, GHASH_CHUNK);
  886. out += GHASH_CHUNK;
  887. in += GHASH_CHUNK;
  888. len -= GHASH_CHUNK;
  889. }
  890. #endif
  891. if ((i = (len & (size_t) - 16))) {
  892. size_t j = i / 16;
  893. (*stream)(in, out, j, key, ctx->Yi.c);
  894. ctr += (unsigned int)j;
  895. if (is_endian.little) {
  896. PUTU32(ctx->Yi.c + 12, ctr);
  897. } else {
  898. ctx->Yi.d[3] = ctr;
  899. }
  900. in += i;
  901. len -= i;
  902. #if defined(GHASH)
  903. GHASH(ctx, out, i);
  904. out += i;
  905. #else
  906. while (j--) {
  907. for (i = 0; i < 16; ++i) {
  908. ctx->Xi.c[i] ^= out[i];
  909. }
  910. GCM_MUL(ctx, Xi);
  911. out += 16;
  912. }
  913. #endif
  914. }
  915. if (len) {
  916. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  917. ++ctr;
  918. if (is_endian.little) {
  919. PUTU32(ctx->Yi.c + 12, ctr);
  920. } else {
  921. ctx->Yi.d[3] = ctr;
  922. }
  923. while (len--) {
  924. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  925. ++n;
  926. }
  927. }
  928. ctx->mres = n;
  929. return 1;
  930. }
  931. int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
  932. uint8_t *out, size_t len,
  933. ctr128_f stream) {
  934. const union {
  935. long one;
  936. char little;
  937. } is_endian = {1};
  938. unsigned int n, ctr;
  939. size_t i;
  940. uint64_t mlen = ctx->len.u[1];
  941. void *key = ctx->key;
  942. #ifdef GCM_FUNCREF_4BIT
  943. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  944. #ifdef GHASH
  945. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  946. size_t len) = ctx->ghash;
  947. #endif
  948. #endif
  949. mlen += len;
  950. if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) {
  951. return 0;
  952. }
  953. ctx->len.u[1] = mlen;
  954. if (ctx->ares) {
  955. /* First call to decrypt finalizes GHASH(AAD) */
  956. GCM_MUL(ctx, Xi);
  957. ctx->ares = 0;
  958. }
  959. if (is_endian.little) {
  960. ctr = GETU32(ctx->Yi.c + 12);
  961. } else {
  962. ctr = ctx->Yi.d[3];
  963. }
  964. n = ctx->mres;
  965. if (n) {
  966. while (n && len) {
  967. uint8_t c = *(in++);
  968. *(out++) = c ^ ctx->EKi.c[n];
  969. ctx->Xi.c[n] ^= c;
  970. --len;
  971. n = (n + 1) % 16;
  972. }
  973. if (n == 0) {
  974. GCM_MUL(ctx, Xi);
  975. } else {
  976. ctx->mres = n;
  977. return 1;
  978. }
  979. }
  980. #if defined(GHASH)
  981. while (len >= GHASH_CHUNK) {
  982. GHASH(ctx, in, GHASH_CHUNK);
  983. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  984. ctr += GHASH_CHUNK / 16;
  985. if (is_endian.little)
  986. PUTU32(ctx->Yi.c + 12, ctr);
  987. else
  988. ctx->Yi.d[3] = ctr;
  989. out += GHASH_CHUNK;
  990. in += GHASH_CHUNK;
  991. len -= GHASH_CHUNK;
  992. }
  993. #endif
  994. if ((i = (len & (size_t) - 16))) {
  995. size_t j = i / 16;
  996. #if defined(GHASH)
  997. GHASH(ctx, in, i);
  998. #else
  999. while (j--) {
  1000. size_t k;
  1001. for (k = 0; k < 16; ++k)
  1002. ctx->Xi.c[k] ^= in[k];
  1003. GCM_MUL(ctx, Xi);
  1004. in += 16;
  1005. }
  1006. j = i / 16;
  1007. in -= i;
  1008. #endif
  1009. (*stream)(in, out, j, key, ctx->Yi.c);
  1010. ctr += (unsigned int)j;
  1011. if (is_endian.little)
  1012. PUTU32(ctx->Yi.c + 12, ctr);
  1013. else
  1014. ctx->Yi.d[3] = ctr;
  1015. out += i;
  1016. in += i;
  1017. len -= i;
  1018. }
  1019. if (len) {
  1020. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  1021. ++ctr;
  1022. if (is_endian.little)
  1023. PUTU32(ctx->Yi.c + 12, ctr);
  1024. else
  1025. ctx->Yi.d[3] = ctr;
  1026. while (len--) {
  1027. uint8_t c = in[n];
  1028. ctx->Xi.c[n] ^= c;
  1029. out[n] = c ^ ctx->EKi.c[n];
  1030. ++n;
  1031. }
  1032. }
  1033. ctx->mres = n;
  1034. return 1;
  1035. }
  1036. int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
  1037. const union {
  1038. long one;
  1039. char little;
  1040. } is_endian = {1};
  1041. uint64_t alen = ctx->len.u[0] << 3;
  1042. uint64_t clen = ctx->len.u[1] << 3;
  1043. #ifdef GCM_FUNCREF_4BIT
  1044. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  1045. #endif
  1046. if (ctx->mres || ctx->ares) {
  1047. GCM_MUL(ctx, Xi);
  1048. }
  1049. if (is_endian.little) {
  1050. #ifdef BSWAP8
  1051. alen = BSWAP8(alen);
  1052. clen = BSWAP8(clen);
  1053. #else
  1054. uint8_t *p = ctx->len.c;
  1055. ctx->len.u[0] = alen;
  1056. ctx->len.u[1] = clen;
  1057. alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  1058. clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  1059. #endif
  1060. }
  1061. ctx->Xi.u[0] ^= alen;
  1062. ctx->Xi.u[1] ^= clen;
  1063. GCM_MUL(ctx, Xi);
  1064. ctx->Xi.u[0] ^= ctx->EK0.u[0];
  1065. ctx->Xi.u[1] ^= ctx->EK0.u[1];
  1066. if (tag && len <= sizeof(ctx->Xi)) {
  1067. return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
  1068. } else {
  1069. return 0;
  1070. }
  1071. }
  1072. void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
  1073. CRYPTO_gcm128_finish(ctx, NULL, 0);
  1074. memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
  1075. }
  1076. void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) {
  1077. if (ctx) {
  1078. OPENSSL_cleanse(ctx, sizeof(*ctx));
  1079. OPENSSL_free(ctx);
  1080. }
  1081. }