No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 
 
 

347 líneas
14 KiB

  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.] */
  56. /* Altivec-optimized SHA1 in C. This is tested on ppc64le only.
  57. *
  58. * References:
  59. * https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
  60. * http://arctic.org/~dean/crypto/sha1.html
  61. *
  62. * This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
  63. * optimisations were added on top. */
  64. #include <openssl/sha.h>
  65. #if defined(OPENSSL_PPC64LE)
  66. #include <altivec.h>
  67. void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
  68. static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
  69. typedef vector unsigned int vec_uint32_t;
  70. typedef vector unsigned char vec_uint8_t;
  71. /* Vector constants */
  72. static const vec_uint8_t k_swap_endianness = {3, 2, 1, 0, 7, 6, 5, 4,
  73. 11, 10, 9, 8, 15, 14, 13, 12};
  74. /* Shift amounts for byte and bit shifts and rotations */
  75. static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
  76. 32, 32, 32, 32, 32, 32, 32, 32};
  77. static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
  78. 96, 96, 96, 96, 96, 96, 96, 96};
  79. #define K_00_19 0x5a827999UL
  80. #define K_20_39 0x6ed9eba1UL
  81. #define K_40_59 0x8f1bbcdcUL
  82. #define K_60_79 0xca62c1d6UL
  83. /* Vector versions of the above. */
  84. static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
  85. static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
  86. static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
  87. static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
  88. /* vector message scheduling: compute message schedule for round i..i+3 where i
  89. * is divisible by 4. We return the schedule w[i..i+3] as a vector. In
  90. * addition, we also precompute sum w[i..+3] and an additive constant K. This
  91. * is done to offload some computation of f() in the integer execution units.
  92. *
  93. * Byte shifting code below may not be correct for big-endian systems. */
  94. static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
  95. vec_uint32_t k) {
  96. const vec_uint32_t v = *((const vec_uint32_t *)data);
  97. const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
  98. vec_st(w + k, 0, pre_added);
  99. return w;
  100. }
  101. /* Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
  102. *
  103. * w'[i ] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
  104. * w'[i+1] = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
  105. * w'[i+2] = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
  106. * w'[i+3] = ( 0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
  107. *
  108. * w[ i] = w'[ i]
  109. * w[i+1] = w'[i+1]
  110. * w[i+2] = w'[i+2]
  111. * w[i+3] = w'[i+3] ^ (w'[i] <<< 1) */
  112. static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
  113. vec_uint32_t minus_8, vec_uint32_t minus_12,
  114. vec_uint32_t minus_16, vec_uint32_t k) {
  115. const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
  116. const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
  117. const vec_uint32_t k_1_bit = vec_splat_u32(1);
  118. const vec_uint32_t w_prime =
  119. vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
  120. const vec_uint32_t w =
  121. w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
  122. vec_st(w + k, 0, pre_added);
  123. return w;
  124. }
  125. /* Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
  126. * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2 */
  127. static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
  128. vec_uint32_t minus_8, vec_uint32_t minus_16,
  129. vec_uint32_t minus_28, vec_uint32_t minus_32,
  130. vec_uint32_t k) {
  131. const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
  132. const vec_uint32_t k_2_bits = vec_splat_u32(2);
  133. const vec_uint32_t w =
  134. vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
  135. vec_st(w + k, 0, pre_added);
  136. return w;
  137. }
  138. /* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
  139. * to the code in F_00_19. Wei attributes these optimisations to Peter
  140. * Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
  141. * F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) I've just become aware of another
  142. * tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a */
  143. #define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
  144. #define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
  145. #define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
  146. #define F_60_79(b, c, d) F_20_39(b, c, d)
  147. /* We pre-added the K constants during message scheduling. */
  148. #define BODY_00_19(i, a, b, c, d, e, f) \
  149. do { \
  150. (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
  151. (b) = rotate((b), 30); \
  152. } while (0)
  153. #define BODY_20_39(i, a, b, c, d, e, f) \
  154. do { \
  155. (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
  156. (b) = rotate((b), 30); \
  157. } while (0)
  158. #define BODY_40_59(i, a, b, c, d, e, f) \
  159. do { \
  160. (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
  161. (b) = rotate((b), 30); \
  162. } while (0)
  163. #define BODY_60_79(i, a, b, c, d, e, f) \
  164. do { \
  165. (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
  166. (b) = rotate((b), 30); \
  167. } while (0)
  168. void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num) {
  169. uint32_t A, B, C, D, E, T;
  170. A = state[0];
  171. B = state[1];
  172. C = state[2];
  173. D = state[3];
  174. E = state[4];
  175. for (;;) {
  176. vec_uint32_t vw[20];
  177. const uint32_t *w = (const uint32_t *)&vw;
  178. vec_uint32_t k = K_00_19_x_4;
  179. const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
  180. BODY_00_19(0, A, B, C, D, E, T);
  181. BODY_00_19(1, T, A, B, C, D, E);
  182. BODY_00_19(2, E, T, A, B, C, D);
  183. BODY_00_19(3, D, E, T, A, B, C);
  184. const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
  185. BODY_00_19(4, C, D, E, T, A, B);
  186. BODY_00_19(5, B, C, D, E, T, A);
  187. BODY_00_19(6, A, B, C, D, E, T);
  188. BODY_00_19(7, T, A, B, C, D, E);
  189. const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
  190. BODY_00_19(8, E, T, A, B, C, D);
  191. BODY_00_19(9, D, E, T, A, B, C);
  192. BODY_00_19(10, C, D, E, T, A, B);
  193. BODY_00_19(11, B, C, D, E, T, A);
  194. const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
  195. BODY_00_19(12, A, B, C, D, E, T);
  196. BODY_00_19(13, T, A, B, C, D, E);
  197. BODY_00_19(14, E, T, A, B, C, D);
  198. BODY_00_19(15, D, E, T, A, B, C);
  199. const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
  200. BODY_00_19(16, C, D, E, T, A, B);
  201. BODY_00_19(17, B, C, D, E, T, A);
  202. BODY_00_19(18, A, B, C, D, E, T);
  203. BODY_00_19(19, T, A, B, C, D, E);
  204. k = K_20_39_x_4;
  205. const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
  206. BODY_20_39(20, E, T, A, B, C, D);
  207. BODY_20_39(21, D, E, T, A, B, C);
  208. BODY_20_39(22, C, D, E, T, A, B);
  209. BODY_20_39(23, B, C, D, E, T, A);
  210. const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
  211. BODY_20_39(24, A, B, C, D, E, T);
  212. BODY_20_39(25, T, A, B, C, D, E);
  213. BODY_20_39(26, E, T, A, B, C, D);
  214. BODY_20_39(27, D, E, T, A, B, C);
  215. const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
  216. BODY_20_39(28, C, D, E, T, A, B);
  217. BODY_20_39(29, B, C, D, E, T, A);
  218. BODY_20_39(30, A, B, C, D, E, T);
  219. BODY_20_39(31, T, A, B, C, D, E);
  220. const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
  221. BODY_20_39(32, E, T, A, B, C, D);
  222. BODY_20_39(33, D, E, T, A, B, C);
  223. BODY_20_39(34, C, D, E, T, A, B);
  224. BODY_20_39(35, B, C, D, E, T, A);
  225. const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
  226. BODY_20_39(36, A, B, C, D, E, T);
  227. BODY_20_39(37, T, A, B, C, D, E);
  228. BODY_20_39(38, E, T, A, B, C, D);
  229. BODY_20_39(39, D, E, T, A, B, C);
  230. k = K_40_59_x_4;
  231. const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
  232. BODY_40_59(40, C, D, E, T, A, B);
  233. BODY_40_59(41, B, C, D, E, T, A);
  234. BODY_40_59(42, A, B, C, D, E, T);
  235. BODY_40_59(43, T, A, B, C, D, E);
  236. const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
  237. BODY_40_59(44, E, T, A, B, C, D);
  238. BODY_40_59(45, D, E, T, A, B, C);
  239. BODY_40_59(46, C, D, E, T, A, B);
  240. BODY_40_59(47, B, C, D, E, T, A);
  241. const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
  242. BODY_40_59(48, A, B, C, D, E, T);
  243. BODY_40_59(49, T, A, B, C, D, E);
  244. BODY_40_59(50, E, T, A, B, C, D);
  245. BODY_40_59(51, D, E, T, A, B, C);
  246. const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
  247. BODY_40_59(52, C, D, E, T, A, B);
  248. BODY_40_59(53, B, C, D, E, T, A);
  249. BODY_40_59(54, A, B, C, D, E, T);
  250. BODY_40_59(55, T, A, B, C, D, E);
  251. const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
  252. BODY_40_59(56, E, T, A, B, C, D);
  253. BODY_40_59(57, D, E, T, A, B, C);
  254. BODY_40_59(58, C, D, E, T, A, B);
  255. BODY_40_59(59, B, C, D, E, T, A);
  256. k = K_60_79_x_4;
  257. const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
  258. BODY_60_79(60, A, B, C, D, E, T);
  259. BODY_60_79(61, T, A, B, C, D, E);
  260. BODY_60_79(62, E, T, A, B, C, D);
  261. BODY_60_79(63, D, E, T, A, B, C);
  262. const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
  263. BODY_60_79(64, C, D, E, T, A, B);
  264. BODY_60_79(65, B, C, D, E, T, A);
  265. BODY_60_79(66, A, B, C, D, E, T);
  266. BODY_60_79(67, T, A, B, C, D, E);
  267. const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
  268. BODY_60_79(68, E, T, A, B, C, D);
  269. BODY_60_79(69, D, E, T, A, B, C);
  270. BODY_60_79(70, C, D, E, T, A, B);
  271. BODY_60_79(71, B, C, D, E, T, A);
  272. const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
  273. BODY_60_79(72, A, B, C, D, E, T);
  274. BODY_60_79(73, T, A, B, C, D, E);
  275. BODY_60_79(74, E, T, A, B, C, D);
  276. BODY_60_79(75, D, E, T, A, B, C);
  277. /* We don't use the last value */
  278. (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
  279. BODY_60_79(76, C, D, E, T, A, B);
  280. BODY_60_79(77, B, C, D, E, T, A);
  281. BODY_60_79(78, A, B, C, D, E, T);
  282. BODY_60_79(79, T, A, B, C, D, E);
  283. const uint32_t mask = 0xffffffffUL;
  284. state[0] = (state[0] + E) & mask;
  285. state[1] = (state[1] + T) & mask;
  286. state[2] = (state[2] + A) & mask;
  287. state[3] = (state[3] + B) & mask;
  288. state[4] = (state[4] + C) & mask;
  289. data += 64;
  290. if (--num == 0) {
  291. break;
  292. }
  293. A = state[0];
  294. B = state[1];
  295. C = state[2];
  296. D = state[3];
  297. E = state[4];
  298. }
  299. }
  300. #endif /* OPENSSL_PPC64LE */