Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

886 linhas
17 KiB

  1. # Copyright (c) 2014, Google Inc.
  2. #
  3. # Permission to use, copy, modify, and/or distribute this software for any
  4. # purpose with or without fee is hereby granted, provided that the above
  5. # copyright notice and this permission notice appear in all copies.
  6. #
  7. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  10. # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  12. # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  13. # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. # This file contains a pre-compiled version of chacha_vec.c for ARM. This is
  15. # needed to support switching on NEON code at runtime. If the whole of OpenSSL
  16. # were to be compiled with the needed flags to build chacha_vec.c, then it
  17. # wouldn't be possible to run on non-NEON systems.
  18. #
  19. # This file was generated by:
  20. #
  21. # /opt/gcc-linaro-arm-linux-gnueabihf-4.7-2012.10-20121022_linux/bin/arm-linux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -S chacha_vec.c -I ../../include -fpic -o chacha_vec_arm.S
  22. .syntax unified
  23. .cpu cortex-a8
  24. .eabi_attribute 27, 3
  25. # EABI attribute 28 sets whether VFP register arguments were used to build this
  26. # file. If object files are inconsistent on this point, the linker will refuse
  27. # to link them. Thus we report whatever the compiler expects since we don't use
  28. # VFP arguments.
  29. #if defined(__ARM_PCS_VFP)
  30. .eabi_attribute 28, 1
  31. #else
  32. .eabi_attribute 28, 0
  33. #endif
  34. .fpu neon
  35. .eabi_attribute 20, 1
  36. .eabi_attribute 21, 1
  37. .eabi_attribute 23, 3
  38. .eabi_attribute 24, 1
  39. .eabi_attribute 25, 1
  40. .eabi_attribute 26, 2
  41. .eabi_attribute 30, 2
  42. .eabi_attribute 34, 1
  43. .eabi_attribute 18, 4
  44. .thumb
  45. .file "chacha_vec.c"
  46. .text
  47. .align 2
  48. .global CRYPTO_chacha_20_neon
  49. .thumb
  50. .thumb_func
  51. .type CRYPTO_chacha_20_neon, %function
  52. CRYPTO_chacha_20_neon:
  53. @ args = 8, pretend = 0, frame = 304
  54. @ frame_needed = 1, uses_anonymous_args = 0
  55. @ link register save eliminated.
  56. push {r4, r5, r6, r7, r8, r9, sl, fp}
  57. fstmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15}
  58. sub sp, sp, #304
  59. add r7, sp, #0
  60. movw ip, #43691
  61. movt ip, 43690
  62. str r2, [r7, #196]
  63. sub sp, sp, #96
  64. ldr r4, [r7, #196]
  65. ldr r6, [r7, #400]
  66. ldr r2, .L38+16
  67. umull r4, ip, ip, r4
  68. ldr r6, [r6, #0]
  69. ldr r8, [r7, #400]
  70. .LPIC24:
  71. add r2, pc
  72. add r4, sp, #15
  73. str r3, [r7, #244]
  74. str r6, [r7, #176]
  75. bic r4, r4, #15
  76. str r0, [r7, #188]
  77. str r4, [r7, #200]
  78. lsrs ip, ip, #7
  79. str r1, [r7, #184]
  80. ldmia r2, {r0, r1, r2, r3}
  81. ldr r4, [r8, #4]
  82. ldr r5, [r7, #244]
  83. vld1.64 {d24-d25}, [r5:64]
  84. vldr d26, [r5, #16]
  85. vldr d27, [r5, #24]
  86. ldr r9, [r7, #200]
  87. ldr r8, [r7, #404]
  88. ldr r5, [r7, #176]
  89. add r6, r9, #64
  90. str r4, [r7, #300]
  91. mov r4, #0
  92. str r8, [r7, #288]
  93. str r5, [r7, #296]
  94. str r4, [r7, #292]
  95. stmia r6, {r0, r1, r2, r3}
  96. vldr d22, [r9, #64]
  97. vldr d23, [r9, #72]
  98. vldr d20, [r7, #288]
  99. vldr d21, [r7, #296]
  100. str ip, [r7, #192]
  101. beq .L20
  102. lsl r6, ip, #1
  103. ldr r1, [r9, #68]
  104. add r3, r6, ip
  105. str r6, [r7, #180]
  106. ldr r2, [r9, #72]
  107. add r8, r8, #2
  108. ldr r5, [r9, #76]
  109. vldr d18, .L38
  110. vldr d19, .L38+8
  111. str r4, [r7, #240]
  112. ldr r6, [r7, #184]
  113. ldr r4, [r7, #188]
  114. str r0, [r7, #224]
  115. str r1, [r7, #220]
  116. str r8, [r7, #208]
  117. str r2, [r7, #216]
  118. str r3, [r7, #204]
  119. str r5, [r7, #212]
  120. str r6, [r7, #252]
  121. str r4, [r7, #248]
  122. .L4:
  123. ldr r2, [r7, #244]
  124. add r9, r7, #216
  125. ldr r3, [r7, #244]
  126. vadd.i32 q8, q10, q9
  127. ldr r6, [r7, #208]
  128. vmov q15, q13 @ v4si
  129. ldr r5, [r7, #240]
  130. vmov q3, q12 @ v4si
  131. ldr r4, [r7, #244]
  132. vmov q2, q11 @ v4si
  133. adds r5, r5, r6
  134. ldr r2, [r2, #8]
  135. ldr r6, [r7, #400]
  136. vmov q5, q10 @ v4si
  137. ldr r3, [r3, #12]
  138. vmov q1, q13 @ v4si
  139. ldr r0, [r7, #244]
  140. vmov q0, q12 @ v4si
  141. ldr r1, [r7, #244]
  142. vmov q4, q11 @ v4si
  143. ldmia r9, {r9, sl, fp}
  144. str r5, [r7, #228]
  145. ldr r5, [r4, #24]
  146. ldr r0, [r0, #0]
  147. ldr r1, [r1, #4]
  148. str r2, [r7, #264]
  149. str r3, [r7, #236]
  150. ldr r2, [r6, #4]
  151. ldr r3, [r4, #28]
  152. str r5, [r7, #280]
  153. ldr r5, [r6, #0]
  154. movs r6, #0
  155. ldr ip, [r7, #228]
  156. ldr r8, [r7, #212]
  157. str r0, [r7, #232]
  158. str r1, [r7, #268]
  159. ldr r0, [r4, #16]
  160. ldr r1, [r4, #20]
  161. movs r4, #10
  162. str r2, [r7, #24]
  163. str r3, [r7, #284]
  164. str r4, [r7, #256]
  165. ldr r2, [r7, #264]
  166. str r9, [r7, #276]
  167. mov r9, r6
  168. ldr r6, [r7, #280]
  169. str r8, [r7, #260]
  170. mov r8, sl
  171. str r1, [r7, #272]
  172. mov sl, ip
  173. str r6, [r7, #264]
  174. mov r6, r5
  175. ldr r3, [r7, #236]
  176. mov r5, r0
  177. ldr ip, [r7, #24]
  178. ldr r1, [r7, #268]
  179. ldr r0, [r7, #232]
  180. b .L39
  181. .L40:
  182. .align 3
  183. .L38:
  184. .word 1
  185. .word 0
  186. .word 0
  187. .word 0
  188. .word .LANCHOR0-(.LPIC24+4)
  189. .L39:
  190. .L3:
  191. vadd.i32 q4, q4, q0
  192. add r8, r8, r1
  193. vadd.i32 q2, q2, q3
  194. str r8, [r7, #268]
  195. veor q5, q5, q4
  196. ldr r8, [r7, #276]
  197. veor q8, q8, q2
  198. add fp, fp, r0
  199. str fp, [r7, #280]
  200. add r8, r8, r2
  201. vrev32.16 q5, q5
  202. str r8, [r7, #276]
  203. vrev32.16 q8, q8
  204. vadd.i32 q1, q1, q5
  205. vadd.i32 q15, q15, q8
  206. ldr r8, [r7, #280]
  207. veor q0, q1, q0
  208. ldr r4, [r7, #260]
  209. veor q3, q15, q3
  210. eor sl, sl, r8
  211. ldr r8, [r7, #276]
  212. add fp, r4, r3
  213. vshl.i32 q7, q0, #12
  214. ldr r4, [r7, #268]
  215. vshl.i32 q6, q3, #12
  216. eor r6, r6, r8
  217. eor r9, r9, r4
  218. ldr r4, [r7, #272]
  219. vsri.32 q7, q0, #20
  220. ror r8, r6, #16
  221. ldr r6, [r7, #264]
  222. eor ip, ip, fp
  223. vsri.32 q6, q3, #20
  224. ror sl, sl, #16
  225. ror r9, r9, #16
  226. add r5, r5, sl
  227. vadd.i32 q4, q4, q7
  228. str r5, [r7, #236]
  229. vadd.i32 q2, q2, q6
  230. add r5, r4, r9
  231. add r4, r6, r8
  232. ldr r6, [r7, #284]
  233. ror ip, ip, #16
  234. veor q5, q4, q5
  235. veor q8, q2, q8
  236. add r6, r6, ip
  237. str r6, [r7, #264]
  238. eors r1, r1, r5
  239. ldr r6, [r7, #236]
  240. vshl.i32 q3, q5, #8
  241. vshl.i32 q14, q8, #8
  242. eors r2, r2, r4
  243. eors r0, r0, r6
  244. ldr r6, [r7, #264]
  245. vsri.32 q3, q5, #24
  246. ror r1, r1, #20
  247. eors r3, r3, r6
  248. ldr r6, [r7, #280]
  249. ror r0, r0, #20
  250. vsri.32 q14, q8, #24
  251. adds r6, r0, r6
  252. str r6, [r7, #284]
  253. ldr r6, [r7, #268]
  254. vadd.i32 q1, q1, q3
  255. vadd.i32 q15, q15, q14
  256. ror r2, r2, #20
  257. adds r6, r1, r6
  258. str r6, [r7, #260]
  259. ldr r6, [r7, #276]
  260. veor q6, q15, q6
  261. veor q7, q1, q7
  262. ror r3, r3, #20
  263. adds r6, r2, r6
  264. str r6, [r7, #280]
  265. ldr r6, [r7, #284]
  266. vshl.i32 q0, q6, #7
  267. vshl.i32 q5, q7, #7
  268. add fp, r3, fp
  269. eor sl, r6, sl
  270. ldr r6, [r7, #260]
  271. eor ip, fp, ip
  272. vsri.32 q0, q6, #25
  273. eor r9, r6, r9
  274. ldr r6, [r7, #280]
  275. ror sl, sl, #24
  276. vsri.32 q5, q7, #25
  277. eor r8, r6, r8
  278. ldr r6, [r7, #236]
  279. ror r9, r9, #24
  280. ror ip, ip, #24
  281. add r6, sl, r6
  282. str r6, [r7, #276]
  283. ldr r6, [r7, #264]
  284. add r5, r9, r5
  285. str r5, [r7, #272]
  286. vext.32 q5, q5, q5, #1
  287. add r5, ip, r6
  288. ldr r6, [r7, #276]
  289. vext.32 q0, q0, q0, #1
  290. vadd.i32 q4, q4, q5
  291. eors r0, r0, r6
  292. ldr r6, [r7, #272]
  293. vadd.i32 q2, q2, q0
  294. vext.32 q3, q3, q3, #3
  295. ror r8, r8, #24
  296. eors r1, r1, r6
  297. vext.32 q14, q14, q14, #3
  298. add r4, r8, r4
  299. ldr r6, [r7, #284]
  300. veor q3, q4, q3
  301. veor q14, q2, q14
  302. eors r2, r2, r4
  303. ror r1, r1, #25
  304. vext.32 q1, q1, q1, #2
  305. adds r6, r1, r6
  306. str r6, [r7, #284]
  307. vext.32 q15, q15, q15, #2
  308. ldr r6, [r7, #260]
  309. eors r3, r3, r5
  310. ror r2, r2, #25
  311. vrev32.16 q8, q14
  312. adds r6, r2, r6
  313. vrev32.16 q3, q3
  314. str r6, [r7, #268]
  315. vadd.i32 q1, q1, q3
  316. ldr r6, [r7, #280]
  317. vadd.i32 q15, q15, q8
  318. ror r3, r3, #25
  319. veor q5, q1, q5
  320. adds r6, r3, r6
  321. veor q0, q15, q0
  322. str r6, [r7, #264]
  323. ldr r6, [r7, #268]
  324. ror r0, r0, #25
  325. add fp, r0, fp
  326. vshl.i32 q6, q5, #12
  327. eor sl, r6, sl
  328. ldr r6, [r7, #284]
  329. vshl.i32 q14, q0, #12
  330. eor r8, fp, r8
  331. eor ip, r6, ip
  332. ldr r6, [r7, #264]
  333. vsri.32 q6, q5, #20
  334. ror sl, sl, #16
  335. eor r9, r6, r9
  336. ror r6, r8, #16
  337. vsri.32 q14, q0, #20
  338. ldr r8, [r7, #272]
  339. ror ip, ip, #16
  340. add r5, sl, r5
  341. add r8, r6, r8
  342. add r4, ip, r4
  343. str r4, [r7, #236]
  344. eor r0, r8, r0
  345. str r5, [r7, #280]
  346. vadd.i32 q4, q4, q6
  347. ldr r5, [r7, #236]
  348. vadd.i32 q2, q2, q14
  349. ldr r4, [r7, #276]
  350. ror r0, r0, #20
  351. veor q3, q4, q3
  352. eors r1, r1, r5
  353. veor q0, q2, q8
  354. str r8, [r7, #272]
  355. str r0, [r7, #24]
  356. add fp, r0, fp
  357. ldr r8, [r7, #280]
  358. ror r9, r9, #16
  359. ldr r0, [r7, #284]
  360. add r4, r9, r4
  361. str fp, [r7, #260]
  362. ror r1, r1, #20
  363. add fp, r1, r0
  364. eor r2, r8, r2
  365. ldr r0, [r7, #260]
  366. eors r3, r3, r4
  367. vshl.i32 q5, q3, #8
  368. str r4, [r7, #232]
  369. vshl.i32 q8, q0, #8
  370. ldr r4, [r7, #268]
  371. ldr r5, [r7, #264]
  372. ror r2, r2, #20
  373. ror r3, r3, #20
  374. eors r6, r6, r0
  375. adds r5, r3, r5
  376. add r8, r2, r4
  377. vsri.32 q5, q3, #24
  378. ldr r4, [r7, #272]
  379. eor r9, r5, r9
  380. eor ip, fp, ip
  381. vsri.32 q8, q0, #24
  382. eor sl, r8, sl
  383. ror r6, r6, #24
  384. ldr r0, [r7, #280]
  385. str r5, [r7, #276]
  386. adds r4, r6, r4
  387. ldr r5, [r7, #236]
  388. vadd.i32 q1, q1, q5
  389. str r4, [r7, #272]
  390. vadd.i32 q15, q15, q8
  391. ldr r4, [r7, #232]
  392. ror ip, ip, #24
  393. ror sl, sl, #24
  394. ror r9, r9, #24
  395. add r5, ip, r5
  396. add r0, sl, r0
  397. str r5, [r7, #264]
  398. add r5, r9, r4
  399. str r0, [r7, #284]
  400. veor q6, q1, q6
  401. ldr r4, [r7, #24]
  402. veor q14, q15, q14
  403. ldr r0, [r7, #272]
  404. eors r3, r3, r5
  405. vshl.i32 q0, q6, #7
  406. vext.32 q1, q1, q1, #2
  407. eors r0, r0, r4
  408. ldr r4, [r7, #284]
  409. str r0, [r7, #280]
  410. vshl.i32 q3, q14, #7
  411. eors r2, r2, r4
  412. ldr r4, [r7, #280]
  413. ldr r0, [r7, #264]
  414. vsri.32 q0, q6, #25
  415. ror r2, r2, #25
  416. ror r3, r3, #25
  417. eors r1, r1, r0
  418. vsri.32 q3, q14, #25
  419. ror r0, r4, #25
  420. ldr r4, [r7, #256]
  421. ror r1, r1, #25
  422. vext.32 q5, q5, q5, #1
  423. subs r4, r4, #1
  424. str r4, [r7, #256]
  425. vext.32 q15, q15, q15, #2
  426. vext.32 q8, q8, q8, #1
  427. vext.32 q0, q0, q0, #3
  428. vext.32 q3, q3, q3, #3
  429. bne .L3
  430. ldr r4, [r7, #264]
  431. vadd.i32 q14, q10, q9
  432. str r2, [r7, #264]
  433. vadd.i32 q10, q10, q5
  434. ldr r2, [r7, #252]
  435. vld1.64 {d12-d13}, [r2:64]
  436. ldr r2, [r7, #220]
  437. vadd.i32 q4, q11, q4
  438. str ip, [r7, #24]
  439. mov ip, sl
  440. mov sl, r8
  441. ldr r8, [r7, #260]
  442. add sl, sl, r2
  443. ldr r2, [r7, #212]
  444. str r4, [r7, #280]
  445. vadd.i32 q0, q12, q0
  446. ldr r4, [r7, #224]
  447. add r8, r8, r2
  448. ldr r2, [r7, #240]
  449. vadd.i32 q1, q13, q1
  450. str r0, [r7, #232]
  451. add fp, fp, r4
  452. mov r0, r5
  453. ldr r4, [r7, #216]
  454. mov r5, r6
  455. mov r6, r9
  456. ldr r9, [r7, #276]
  457. adds r2, r2, #3
  458. str r2, [r7, #240]
  459. vadd.i32 q2, q11, q2
  460. ldr r2, [r7, #252]
  461. add r9, r9, r4
  462. vadd.i32 q3, q12, q3
  463. ldr r4, [r7, #228]
  464. vadd.i32 q15, q13, q15
  465. str r1, [r7, #268]
  466. vadd.i32 q8, q14, q8
  467. str r3, [r7, #236]
  468. veor q4, q4, q6
  469. ldr r3, [r7, #284]
  470. ldr r1, [r7, #272]
  471. add ip, r4, ip
  472. ldr r4, [r7, #248]
  473. vst1.64 {d8-d9}, [r4:64]
  474. vldr d8, [r2, #16]
  475. vldr d9, [r2, #24]
  476. veor q0, q0, q4
  477. vstr d0, [r4, #16]
  478. vstr d1, [r4, #24]
  479. vldr d0, [r2, #32]
  480. vldr d1, [r2, #40]
  481. veor q1, q1, q0
  482. vstr d2, [r4, #32]
  483. vstr d3, [r4, #40]
  484. vldr d2, [r2, #48]
  485. vldr d3, [r2, #56]
  486. veor q10, q10, q1
  487. vstr d20, [r4, #48]
  488. vstr d21, [r4, #56]
  489. vldr d8, [r2, #64]
  490. vldr d9, [r2, #72]
  491. veor q2, q2, q4
  492. vstr d4, [r4, #64]
  493. vstr d5, [r4, #72]
  494. vldr d10, [r2, #80]
  495. vldr d11, [r2, #88]
  496. veor q3, q3, q5
  497. vstr d6, [r4, #80]
  498. vstr d7, [r4, #88]
  499. vldr d12, [r2, #96]
  500. vldr d13, [r2, #104]
  501. veor q15, q15, q6
  502. vstr d30, [r4, #96]
  503. vstr d31, [r4, #104]
  504. vldr d20, [r2, #112]
  505. vldr d21, [r2, #120]
  506. veor q8, q8, q10
  507. vstr d16, [r4, #112]
  508. vstr d17, [r4, #120]
  509. ldr r4, [r2, #128]
  510. ldr r2, [r7, #248]
  511. vadd.i32 q10, q14, q9
  512. eor r4, fp, r4
  513. vadd.i32 q10, q10, q9
  514. str r4, [r2, #128]
  515. ldr r4, [r7, #252]
  516. ldr r2, [r4, #132]
  517. eor r2, sl, r2
  518. ldr sl, [r7, #248]
  519. str r2, [sl, #132]
  520. ldr r2, [r4, #136]
  521. eor r2, r9, r2
  522. str r2, [sl, #136]
  523. ldr r2, [r4, #140]
  524. eor r2, r8, r2
  525. str r2, [sl, #140]
  526. ldr r2, [r7, #244]
  527. ldr r4, [r4, #144]
  528. ldr r2, [r2, #0]
  529. str r4, [r7, #44]
  530. ldr r4, [r7, #232]
  531. add r8, r4, r2
  532. ldr r2, [r7, #44]
  533. ldr r4, [r7, #244]
  534. eor r8, r8, r2
  535. ldr r2, [r7, #252]
  536. str r8, [sl, #144]
  537. ldr r4, [r4, #4]
  538. ldr r2, [r2, #148]
  539. str r2, [r7, #40]
  540. ldr r2, [r7, #268]
  541. add r8, r2, r4
  542. ldr r4, [r7, #40]
  543. ldr r2, [r7, #244]
  544. eor r8, r8, r4
  545. ldr r4, [r7, #252]
  546. str r8, [sl, #148]
  547. ldr r2, [r2, #8]
  548. ldr r4, [r4, #152]
  549. str r4, [r7, #36]
  550. ldr r4, [r7, #264]
  551. add r8, r4, r2
  552. ldr r2, [r7, #36]
  553. eor r8, r8, r2
  554. str r8, [sl, #152]
  555. ldr r2, [r7, #252]
  556. ldr r4, [r7, #244]
  557. ldr r2, [r2, #156]
  558. ldr r4, [r4, #12]
  559. str r2, [r7, #32]
  560. ldr r2, [r7, #236]
  561. add r8, r2, r4
  562. ldr r4, [r7, #32]
  563. ldr r2, [r7, #252]
  564. eor r8, r8, r4
  565. str r8, [sl, #156]
  566. ldr r8, [r7, #244]
  567. ldr r2, [r2, #160]
  568. ldr r4, [r8, #16]
  569. adds r0, r0, r4
  570. ldr r4, [r7, #252]
  571. eors r0, r0, r2
  572. str r0, [sl, #160]
  573. ldr r0, [r8, #20]
  574. ldr r2, [r4, #164]
  575. adds r1, r1, r0
  576. ldr r0, [r7, #280]
  577. eors r1, r1, r2
  578. str r1, [sl, #164]
  579. ldr r2, [r8, #24]
  580. ldr r1, [r4, #168]
  581. adds r2, r0, r2
  582. eors r2, r2, r1
  583. str r2, [sl, #168]
  584. ldr r1, [r8, #28]
  585. ldr r2, [r4, #172]
  586. adds r3, r3, r1
  587. eors r3, r3, r2
  588. str r3, [sl, #172]
  589. ldr r3, [r4, #176]
  590. eor r3, ip, r3
  591. str r3, [sl, #176]
  592. ldr r3, [r4, #180]
  593. ldr r4, [r7, #400]
  594. eors r6, r6, r3
  595. str r6, [sl, #180]
  596. ldr r6, [r7, #252]
  597. ldr r2, [r4, #0]
  598. ldr r3, [r6, #184]
  599. adds r5, r5, r2
  600. eors r5, r5, r3
  601. str r5, [sl, #184]
  602. ldr r2, [r6, #188]
  603. adds r6, r6, #192
  604. ldr r3, [r4, #4]
  605. str r6, [r7, #252]
  606. ldr r0, [r7, #24]
  607. ldr r1, [r7, #240]
  608. adds r4, r0, r3
  609. eors r4, r4, r2
  610. ldr r2, [r7, #204]
  611. str r4, [sl, #188]
  612. add sl, sl, #192
  613. cmp r1, r2
  614. str sl, [r7, #248]
  615. bne .L4
  616. ldr r4, [r7, #192]
  617. ldr r3, [r7, #180]
  618. ldr r6, [r7, #188]
  619. adds r5, r3, r4
  620. ldr r8, [r7, #184]
  621. lsls r5, r5, #6
  622. adds r4, r6, r5
  623. add r5, r8, r5
  624. .L2:
  625. ldr r9, [r7, #196]
  626. movw r3, #43691
  627. movt r3, 43690
  628. ldr sl, [r7, #196]
  629. umull r9, r3, r3, r9
  630. lsrs r3, r3, #7
  631. add r3, r3, r3, lsl #1
  632. sub r3, sl, r3, lsl #6
  633. lsrs r6, r3, #6
  634. beq .L5
  635. add r1, r5, #16
  636. add r2, r4, #16
  637. mov r0, r6
  638. vldr d30, .L41
  639. vldr d31, .L41+8
  640. .L6:
  641. vmov q8, q10 @ v4si
  642. movs r3, #10
  643. vmov q1, q13 @ v4si
  644. vmov q14, q12 @ v4si
  645. vmov q3, q11 @ v4si
  646. .L7:
  647. vadd.i32 q3, q3, q14
  648. subs r3, r3, #1
  649. veor q2, q8, q3
  650. vrev32.16 q2, q2
  651. vadd.i32 q8, q1, q2
  652. veor q9, q8, q14
  653. vshl.i32 q14, q9, #12
  654. vsri.32 q14, q9, #20
  655. vadd.i32 q3, q3, q14
  656. veor q2, q3, q2
  657. vshl.i32 q9, q2, #8
  658. vsri.32 q9, q2, #24
  659. vadd.i32 q8, q8, q9
  660. vext.32 q9, q9, q9, #3
  661. veor q14, q8, q14
  662. vext.32 q1, q8, q8, #2
  663. vshl.i32 q8, q14, #7
  664. vsri.32 q8, q14, #25
  665. vext.32 q8, q8, q8, #1
  666. vadd.i32 q3, q3, q8
  667. veor q2, q3, q9
  668. vrev32.16 q2, q2
  669. vadd.i32 q9, q1, q2
  670. veor q8, q9, q8
  671. vshl.i32 q14, q8, #12
  672. vsri.32 q14, q8, #20
  673. vadd.i32 q3, q3, q14
  674. veor q2, q3, q2
  675. vshl.i32 q8, q2, #8
  676. vsri.32 q8, q2, #24
  677. vadd.i32 q9, q9, q8
  678. vext.32 q8, q8, q8, #1
  679. veor q14, q9, q14
  680. vext.32 q1, q9, q9, #2
  681. vshl.i32 q9, q14, #7
  682. vsri.32 q9, q14, #25
  683. vext.32 q14, q9, q9, #3
  684. bne .L7
  685. vadd.i32 q8, q10, q8
  686. subs r0, r0, #1
  687. vadd.i32 q3, q11, q3
  688. vldr d0, [r1, #-16]
  689. vldr d1, [r1, #-8]
  690. vadd.i32 q14, q12, q14
  691. vadd.i32 q1, q13, q1
  692. veor q3, q3, q0
  693. vstr d6, [r2, #-16]
  694. vstr d7, [r2, #-8]
  695. vadd.i32 q10, q10, q15
  696. vld1.64 {d8-d9}, [r1:64]
  697. veor q14, q14, q4
  698. vst1.64 {d28-d29}, [r2:64]
  699. vldr d10, [r1, #16]
  700. vldr d11, [r1, #24]
  701. veor q1, q1, q5
  702. vstr d2, [r2, #16]
  703. vstr d3, [r2, #24]
  704. vldr d18, [r1, #32]
  705. vldr d19, [r1, #40]
  706. add r1, r1, #64
  707. veor q8, q8, q9
  708. vstr d16, [r2, #32]
  709. vstr d17, [r2, #40]
  710. add r2, r2, #64
  711. bne .L6
  712. lsls r6, r6, #6
  713. adds r4, r4, r6
  714. adds r5, r5, r6
  715. .L5:
  716. ldr r6, [r7, #196]
  717. ands ip, r6, #63
  718. beq .L1
  719. vmov q8, q10 @ v4si
  720. movs r3, #10
  721. vmov q14, q13 @ v4si
  722. vmov q9, q12 @ v4si
  723. vmov q15, q11 @ v4si
  724. .L10:
  725. vadd.i32 q15, q15, q9
  726. subs r3, r3, #1
  727. veor q8, q8, q15
  728. vrev32.16 q8, q8
  729. vadd.i32 q3, q14, q8
  730. veor q9, q3, q9
  731. vshl.i32 q14, q9, #12
  732. vsri.32 q14, q9, #20
  733. vadd.i32 q15, q15, q14
  734. veor q9, q15, q8
  735. vshl.i32 q8, q9, #8
  736. vsri.32 q8, q9, #24
  737. vadd.i32 q9, q3, q8
  738. vext.32 q8, q8, q8, #3
  739. veor q2, q9, q14
  740. vext.32 q14, q9, q9, #2
  741. vshl.i32 q9, q2, #7
  742. vsri.32 q9, q2, #25
  743. vext.32 q9, q9, q9, #1
  744. vadd.i32 q15, q15, q9
  745. veor q3, q15, q8
  746. vrev32.16 q3, q3
  747. vadd.i32 q14, q14, q3
  748. veor q8, q14, q9
  749. vshl.i32 q9, q8, #12
  750. vsri.32 q9, q8, #20
  751. vadd.i32 q15, q15, q9
  752. veor q3, q15, q3
  753. vshl.i32 q8, q3, #8
  754. vsri.32 q8, q3, #24
  755. vadd.i32 q14, q14, q8
  756. vext.32 q8, q8, q8, #1
  757. veor q3, q14, q9
  758. vext.32 q14, q14, q14, #2
  759. vshl.i32 q9, q3, #7
  760. vsri.32 q9, q3, #25
  761. vext.32 q9, q9, q9, #3
  762. bne .L10
  763. cmp ip, #15
  764. vadd.i32 q11, q11, q15
  765. bhi .L37
  766. ldr r9, [r7, #200]
  767. vst1.64 {d22-d23}, [r9:128]
  768. .L14:
  769. ldr sl, [r7, #196]
  770. and r3, sl, #48
  771. cmp ip, r3
  772. bls .L1
  773. adds r0, r5, r3
  774. adds r1, r4, r3
  775. add r2, r0, #16
  776. add r6, r1, #16
  777. cmp r1, r2
  778. it cc
  779. cmpcc r0, r6
  780. rsb r9, r3, ip
  781. ite cc
  782. movcc r2, #0
  783. movcs r2, #1
  784. cmp r9, #15
  785. ite ls
  786. movls r2, #0
  787. andhi r2, r2, #1
  788. lsr r8, r9, #4
  789. eor r2, r2, #1
  790. cmp r8, #0
  791. it eq
  792. orreq r2, r2, #1
  793. lsl sl, r8, #4
  794. cbnz r2, .L35
  795. ldr fp, [r7, #200]
  796. add r6, fp, r3
  797. .L17:
  798. vld1.8 {q8}, [r0]!
  799. adds r2, r2, #1
  800. cmp r8, r2
  801. vld1.8 {q9}, [r6]!
  802. veor q8, q9, q8
  803. vst1.8 {q8}, [r1]!
  804. bhi .L17
  805. cmp r9, sl
  806. add r3, r3, sl
  807. beq .L1
  808. .L35:
  809. ldr r0, [r7, #200]
  810. .L25:
  811. ldrb r2, [r5, r3] @ zero_extendqisi2
  812. ldrb r1, [r3, r0] @ zero_extendqisi2
  813. eors r2, r2, r1
  814. strb r2, [r4, r3]
  815. adds r3, r3, #1
  816. cmp ip, r3
  817. bhi .L25
  818. .L1:
  819. add r7, r7, #304
  820. mov sp, r7
  821. fldmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15}
  822. pop {r4, r5, r6, r7, r8, r9, sl, fp}
  823. bx lr
  824. .L37:
  825. cmp ip, #31
  826. vld1.64 {d0-d1}, [r5:64]
  827. vadd.i32 q9, q12, q9
  828. veor q11, q11, q0
  829. vst1.64 {d22-d23}, [r4:64]
  830. bls .L12
  831. cmp ip, #47
  832. vldr d2, [r5, #16]
  833. vldr d3, [r5, #24]
  834. vadd.i32 q13, q13, q14
  835. veor q9, q9, q1
  836. vstr d18, [r4, #16]
  837. vstr d19, [r4, #24]
  838. bls .L13
  839. vadd.i32 q8, q8, q10
  840. vldr d0, [r5, #32]
  841. vldr d1, [r5, #40]
  842. ldr r6, [r7, #200]
  843. vstr d16, [r6, #48]
  844. vstr d17, [r6, #56]
  845. veor q8, q13, q0
  846. vstr d16, [r4, #32]
  847. vstr d17, [r4, #40]
  848. b .L14
  849. .L12:
  850. ldr r8, [r7, #200]
  851. vstr d18, [r8, #16]
  852. vstr d19, [r8, #24]
  853. b .L14
  854. .L20:
  855. ldr r5, [r7, #184]
  856. ldr r4, [r7, #188]
  857. b .L2
  858. .L13:
  859. ldr r6, [r7, #200]
  860. vstr d26, [r6, #32]
  861. vstr d27, [r6, #40]
  862. b .L14
  863. .L42:
  864. .align 3
  865. .L41:
  866. .word 1
  867. .word 0
  868. .word 0
  869. .word 0
  870. .size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon
  871. .section .rodata
  872. .align 3
  873. .LANCHOR0 = . + 0
  874. .LC0:
  875. .word 1634760805
  876. .word 857760878
  877. .word 2036477234
  878. .word 1797285236
  879. .ident "GCC: (crosstool-NG linaro-1.13.1-4.7-2012.10-20121022 - Linaro GCC 2012.10) 4.7.3 20121001 (prerelease)"
  880. .section .note.GNU-stack,"",%progbits