From 6dc994265edb2967462e88b2678035a870a2233f Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Fri, 2 Feb 2018 15:12:22 -0500 Subject: [PATCH] Sync up some perlasm license headers and easy fixes. These files are otherwise up-to-date with OpenSSL master as of 50ea9d2b3521467a11559be41dcf05ee05feabd6, modulo a couple of spelling fixes which I've imported. I've also reverted the same-line label and instruction patch to x86_64-mont*.pl. The new delocate parser handles that fine. Change-Id: Ife35c671a8104c3cc2fb6c5a03127376fccc4402 Reviewed-on: https://boringssl-review.googlesource.com/25644 Reviewed-by: Adam Langley --- crypto/chacha/asm/chacha-armv8.pl | 1 + crypto/chacha/asm/chacha-x86.pl | 9 ++++++- crypto/fipsmodule/aes/asm/aes-586.pl | 29 +++++++++++++-------- crypto/fipsmodule/aes/asm/aesni-x86.pl | 31 +++++++++++++++-------- crypto/fipsmodule/aes/asm/bsaes-armv7.pl | 8 ++---- crypto/fipsmodule/aes/asm/vpaes-x86.pl | 9 ++++++- crypto/fipsmodule/aes/asm/vpaes-x86_64.pl | 9 ++++++- crypto/fipsmodule/bn/asm/bn-586.pl | 9 ++++++- crypto/fipsmodule/bn/asm/co-586.pl | 8 +++++- crypto/fipsmodule/bn/asm/x86-mont.pl | 13 +++++++--- crypto/fipsmodule/bn/asm/x86_64-mont.pl | 16 ++++++++---- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 16 ++++++++---- crypto/fipsmodule/modes/asm/ghash-x86.pl | 16 ++++++++---- crypto/fipsmodule/sha/asm/sha1-586.pl | 22 ++++++++++------ crypto/fipsmodule/sha/asm/sha1-armv8.pl | 1 + crypto/fipsmodule/sha/asm/sha256-586.pl | 11 ++++++-- crypto/fipsmodule/sha/asm/sha512-586.pl | 11 ++++++-- crypto/perlasm/x86_64-xlate.pl | 6 ++--- 18 files changed, 160 insertions(+), 65 deletions(-) diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl index 5a660753..0a1c415f 100755 --- a/crypto/chacha/asm/chacha-armv8.pl +++ b/crypto/chacha/asm/chacha-armv8.pl @@ -28,6 +28,7 @@ # Denver 4.50/+82% 2.63 2.67(*) # X-Gene 9.50/+46% 8.82 8.89(*) # Mongoose 8.00/+44% 3.64 3.25 +# Kryo 8.17/+50% 4.83 4.65 # # (*) it's expected that doubling interleave factor doesn't help # all processors, only those with higher NEON latency and diff --git a/crypto/chacha/asm/chacha-x86.pl b/crypto/chacha/asm/chacha-x86.pl index a776b267..1d6a4e2b 100755 --- a/crypto/chacha/asm/chacha-x86.pl +++ b/crypto/chacha/asm/chacha-x86.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov for the OpenSSL diff --git a/crypto/fipsmodule/aes/asm/aes-586.pl b/crypto/fipsmodule/aes/asm/aes-586.pl index f015762f..09483d9b 100755 --- a/crypto/fipsmodule/aes/asm/aes-586.pl +++ b/crypto/fipsmodule/aes/asm/aes-586.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -32,7 +39,7 @@ # for scaling too, I [try to] avoid the latter by favoring off-by-2 # shifts and masking the result with 0xFF<<2 instead of "boring" 0xFF. # -# As was shown by Dean Gaudet , the above note turned +# As was shown by Dean Gaudet, the above note turned out to be # void. Performance improvement with off-by-2 shifts was observed on # intermediate implementation, which was spilling yet another register # to stack... Final offset*4 code below runs just a tad faster on P4, @@ -48,8 +55,8 @@ # better performance on most recent µ-archs... # # Third version adds AES_cbc_encrypt implementation, which resulted in -# up to 40% performance imrovement of CBC benchmark results. 40% was -# observed on P4 core, where "overall" imrovement coefficient, i.e. if +# up to 40% performance improvement of CBC benchmark results. 40% was +# observed on P4 core, where "overall" improvement coefficient, i.e. if # compared to PIC generated by GCC and in CBC mode, was observed to be # as large as 4x:-) CBC performance is virtually identical to ECB now # and on some platforms even better, e.g. 17.6 "small" cycles/byte on @@ -152,7 +159,7 @@ # combinations then attack becomes infeasible. This is why revised # AES_cbc_encrypt "dares" to switch to larger S-box when larger chunk # of data is to be processed in one stroke. The current size limit of -# 512 bytes is chosen to provide same [diminishigly low] probability +# 512 bytes is chosen to provide same [diminishingly low] probability # for cache-line to remain untouched in large chunk operation with # large S-box as for single block operation with compact S-box and # surely needs more careful consideration... @@ -164,12 +171,12 @@ # yield execution to process performing AES just before timer fires # off the scheduler, immediately regain control of CPU and analyze the # cache state. For this attack to be efficient attacker would have to -# effectively slow down the operation by several *orders* of magnitute, +# effectively slow down the operation by several *orders* of magnitude, # by ratio of time slice to duration of handful of AES rounds, which # unlikely to remain unnoticed. Not to mention that this also means -# that he would spend correspondigly more time to collect enough +# that he would spend correspondingly more time to collect enough # statistical data to mount the attack. It's probably appropriate to -# say that if adeversary reckons that this attack is beneficial and +# say that if adversary reckons that this attack is beneficial and # risks to be noticed, you probably have larger problems having him # mere opportunity. In other words suggested code design expects you # to preclude/mitigate this attack by overall system security design. @@ -233,7 +240,7 @@ $small_footprint=1; # $small_footprint=1 code is ~5% slower [on # contention and in hope to "collect" 5% back # in real-life applications... -$vertical_spin=0; # shift "verticaly" defaults to 0, because of +$vertical_spin=0; # shift "vertically" defaults to 0, because of # its proof-of-concept status... # Note that there is no decvert(), as well as last encryption round is # performed with "horizontal" shifts. This is because this "vertical" @@ -1599,7 +1606,7 @@ sub decstep() # no instructions are reordered, as performance appears # optimal... or rather that all attempts to reorder didn't # result in better performance [which by the way is not a - # bit lower than ecryption]. + # bit lower than encryption]. if($i==3) { &mov ($key,$__key); } else { &mov ($out,$s[0]); } &and ($out,0xFF); diff --git a/crypto/fipsmodule/aes/asm/aesni-x86.pl b/crypto/fipsmodule/aes/asm/aesni-x86.pl index ca4e57c9..cf1a51e0 100644 --- a/crypto/fipsmodule/aes/asm/aesni-x86.pl +++ b/crypto/fipsmodule/aes/asm/aesni-x86.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -43,18 +50,22 @@ # Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing # one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09. +# November 2015 +# +# Add aesni_ocb_[en|de]crypt. [Removed in BoringSSL] + ###################################################################### # Current large-block performance in cycles per byte processed with # 128-bit key (less is better). # -# CBC en-/decrypt CTR XTS ECB +# CBC en-/decrypt CTR XTS ECB OCB # Westmere 3.77/1.37 1.37 1.52 1.27 -# * Bridge 5.07/0.98 0.99 1.09 0.91 -# Haswell 4.44/0.80 0.97 1.03 0.72 -# Skylake 2.68/0.65 0.65 0.66 0.64 -# Silvermont 5.77/3.56 3.67 4.03 3.46 -# Goldmont 3.84/1.39 1.39 1.63 1.31 -# Bulldozer 5.80/0.98 1.05 1.24 0.93 +# * Bridge 5.07/0.98 0.99 1.09 0.91 1.10 +# Haswell 4.44/0.80 0.97 1.03 0.72 0.76 +# Skylake 2.68/0.65 0.65 0.66 0.64 0.66 +# Silvermont 5.77/3.56 3.67 4.03 3.46 4.03 +# Goldmont 3.84/1.39 1.39 1.63 1.31 1.70 +# Bulldozer 5.80/0.98 1.05 1.24 0.93 1.23 $PREFIX="aesni"; # if $PREFIX is set to "AES", the script # generates drop-in replacement for @@ -228,7 +239,7 @@ sub aesni_generate1 # fully unrolled loop # can schedule aes[enc|dec] every cycle optimal interleave factor # equals to corresponding instructions latency. 8x is optimal for # * Bridge, but it's unfeasible to accommodate such implementation -# in XMM registers addreassable in 32-bit mode and therefore maximum +# in XMM registers addressable in 32-bit mode and therefore maximum # of 6x is used instead... sub aesni_generate2 @@ -2425,7 +2436,7 @@ if ($PREFIX eq "aesni") { &pxor ("xmm3","xmm3"); &aesenclast ("xmm2","xmm3"); - &movdqa ("xmm3","xmm1") + &movdqa ("xmm3","xmm1"); &pslldq ("xmm1",4); &pxor ("xmm3","xmm1"); &pslldq ("xmm1",4); diff --git a/crypto/fipsmodule/aes/asm/bsaes-armv7.pl b/crypto/fipsmodule/aes/asm/bsaes-armv7.pl index 1ff890aa..fa1e434b 100644 --- a/crypto/fipsmodule/aes/asm/bsaes-armv7.pl +++ b/crypto/fipsmodule/aes/asm/bsaes-armv7.pl @@ -14,8 +14,7 @@ # details see http://www.openssl.org/~appro/cryptogams/. # # Specific modes and adaptation for Linux kernel by Ard Biesheuvel -# . Permission to use under GPL terms is -# granted. +# of Linaro. Permission to use under GPL terms is granted. # ==================================================================== # Bit-sliced AES for ARM NEON @@ -49,10 +48,7 @@ # # April-August 2013 -# -# Add CBC, CTR and XTS subroutines, adapt for kernel use. -# -# +# Add CBC, CTR and XTS subroutines and adapt for kernel use; courtesy of Ard. $flavour = shift; if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86.pl b/crypto/fipsmodule/aes/asm/vpaes-x86.pl index 5a5e1f2e..5f4b208d 100644 --- a/crypto/fipsmodule/aes/asm/vpaes-x86.pl +++ b/crypto/fipsmodule/aes/asm/vpaes-x86.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + ###################################################################### ## Constant-time SSSE3 AES core implementation. diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl index eb4c7244..a583ca43 100644 --- a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + ###################################################################### ## Constant-time SSSE3 AES core implementation. diff --git a/crypto/fipsmodule/bn/asm/bn-586.pl b/crypto/fipsmodule/bn/asm/bn-586.pl index 2a202fcd..16818d55 100644 --- a/crypto/fipsmodule/bn/asm/bn-586.pl +++ b/crypto/fipsmodule/bn/asm/bn-586.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../../perlasm"); diff --git a/crypto/fipsmodule/bn/asm/co-586.pl b/crypto/fipsmodule/bn/asm/co-586.pl index 10167ae9..5eeeef97 100644 --- a/crypto/fipsmodule/bn/asm/co-586.pl +++ b/crypto/fipsmodule/bn/asm/co-586.pl @@ -1,4 +1,10 @@ -#!/usr/local/bin/perl +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../../perlasm"); diff --git a/crypto/fipsmodule/bn/asm/x86-mont.pl b/crypto/fipsmodule/bn/asm/x86-mont.pl index dfdf56ea..d15726bc 100755 --- a/crypto/fipsmodule/bn/asm/x86-mont.pl +++ b/crypto/fipsmodule/bn/asm/x86-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -71,7 +78,7 @@ $frame=32; # size of above frame rounded up to 16n &lea ("ebp",&DWP(-$frame,"esp","edi",4)); # future alloca($frame+4*(num+2)) &neg ("edi"); - # minimize cache contention by arraning 2K window between stack + # minimize cache contention by arranging 2K window between stack # pointer and ap argument [np is also position sensitive vector, # but it's assumed to be near ap, as it's allocated at ~same # time]. diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl index b57537ed..faa5cb02 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -296,12 +303,11 @@ $code.=<<___; mov $num,$j # j=num jmp .Lsub .align 16 -.Lsub: - sbb ($np,$i,8),%rax +.Lsub: sbb ($np,$i,8),%rax mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] mov 8($ap,$i,8),%rax # tp[i+1] lea 1($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub sbb \$0,%rax # handle upmost overflow bit @@ -732,7 +738,7 @@ $code.=<<___; mov 56($ap,$i,8),@ri[3] sbb 40($np,$i,8),@ri[1] lea 4($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub4x mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 69b8e011..2119826b 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -396,12 +403,11 @@ $code.=<<___; mov $num,$j # j=num jmp .Lsub .align 16 -.Lsub: - sbb ($np,$i,8),%rax +.Lsub: sbb ($np,$i,8),%rax mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] mov 8($ap,$i,8),%rax # tp[i+1] lea 1($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub sbb \$0,%rax # handle upmost overflow bit @@ -2405,7 +2411,7 @@ my $N=$STRIDE/4; # should match cache line size $code.=<<___; movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 - lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizaton) + lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimization) lea 128($bp),$bptr # size optimization pshufd \$0,%xmm5,%xmm5 # broadcast index diff --git a/crypto/fipsmodule/modes/asm/ghash-x86.pl b/crypto/fipsmodule/modes/asm/ghash-x86.pl index 5c3aead9..02edf033 100644 --- a/crypto/fipsmodule/modes/asm/ghash-x86.pl +++ b/crypto/fipsmodule/modes/asm/ghash-x86.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -96,14 +103,13 @@ # # Does it make sense to increase Naggr? To start with it's virtually # impossible in 32-bit mode, because of limited register bank -# capacity. Otherwise improvement has to be weighed agiainst slower +# capacity. Otherwise improvement has to be weighed against slower # setup, as well as code size and complexity increase. As even # optimistic estimate doesn't promise 30% performance improvement, # there are currently no plans to increase Naggr. # -# Special thanks to David Woodhouse for -# providing access to a Westmere-based system on behalf of Intel -# Open Source Technology Centre. +# Special thanks to David Woodhouse for providing access to a +# Westmere-based system on behalf of Intel Open Source Technology Centre. # January 2010 # diff --git a/crypto/fipsmodule/sha/asm/sha1-586.pl b/crypto/fipsmodule/sha/asm/sha1-586.pl index d0f6b8f7..87fd361a 100644 --- a/crypto/fipsmodule/sha/asm/sha1-586.pl +++ b/crypto/fipsmodule/sha/asm/sha1-586.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # [Re]written by Andy Polyakov for the OpenSSL @@ -28,10 +35,9 @@ # P4 +85%(!) +45% # # As you can see Pentium came out as looser:-( Yet I reckoned that -# improvement on P4 outweights the loss and incorporate this +# improvement on P4 outweighs the loss and incorporate this # re-tuned code to 0.9.7 and later. # ---------------------------------------------------------------- -# # August 2009. # @@ -538,7 +544,7 @@ for($i=0;$i<20-4;$i+=2) { # being implemented in SSSE3). Once 8 quadruples or 32 elements are # collected, it switches to routine proposed by Max Locktyukhin. # -# Calculations inevitably require temporary reqisters, and there are +# Calculations inevitably require temporary registers, and there are # no %xmm registers left to spare. For this reason part of the ring # buffer, X[2..4] to be specific, is offloaded to 3 quadriples ring # buffer on the stack. Keep in mind that X[2] is alias X[-6], X[3] - @@ -649,7 +655,7 @@ my $_ror=sub { &ror(@_) }; &jmp (&label("loop")); ###################################################################### -# SSE instruction sequence is first broken to groups of indepentent +# SSE instruction sequence is first broken to groups of independent # instructions, independent in respect to their inputs and shifter # (not all architectures have more than one). Then IALU instructions # are "knitted in" between the SSE groups. Distance is maintained for @@ -658,14 +664,14 @@ my $_ror=sub { &ror(@_) }; # # Temporary registers usage. X[2] is volatile at the entry and at the # end is restored from backtrace ring buffer. X[3] is expected to -# contain current K_XX_XX constant and is used to caclulate X[-1]+K +# contain current K_XX_XX constant and is used to calculate X[-1]+K # from previous round, it becomes volatile the moment the value is # saved to stack for transfer to IALU. X[4] becomes volatile whenever # X[-4] is accumulated and offloaded to backtrace ring buffer, at the # end it is loaded with next K_XX_XX [which becomes X[3] in next # round]... # -sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 +sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4 { use integer; my $body = shift; my @insns = (&$body,&$body,&$body,&$body); # 40 instructions @@ -1188,7 +1194,7 @@ my $_ror=sub { &shrd(@_[0],@_) }; &and (@T[0],@T[1]); &jmp (&label("loop")); -sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 +sub Xupdate_avx_16_31() # recall that $Xi starts with 4 { use integer; my $body = shift; my @insns = (&$body,&$body,&$body,&$body); # 40 instructions diff --git a/crypto/fipsmodule/sha/asm/sha1-armv8.pl b/crypto/fipsmodule/sha/asm/sha1-armv8.pl index 1fd0263b..cec3669f 100644 --- a/crypto/fipsmodule/sha/asm/sha1-armv8.pl +++ b/crypto/fipsmodule/sha/asm/sha1-armv8.pl @@ -26,6 +26,7 @@ # Denver 2.13 3.97 (+0%)(**) # X-Gene 8.80 (+200%) # Mongoose 2.05 6.50 (+160%) +# Kryo 1.88 8.00 (+90%) # # (*) Software results are presented mostly for reference purposes. # (**) Keep in mind that Denver relies on binary translation, which diff --git a/crypto/fipsmodule/sha/asm/sha256-586.pl b/crypto/fipsmodule/sha/asm/sha256-586.pl index 644683e1..129a9f42 100644 --- a/crypto/fipsmodule/sha/asm/sha256-586.pl +++ b/crypto/fipsmodule/sha/asm/sha256-586.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -11,7 +18,7 @@ # # Performance improvement over compiler generated code varies from # 10% to 40% [see below]. Not very impressive on some µ-archs, but -# it's 5 times smaller and optimizies amount of writes. +# it's 5 times smaller and optimizes amount of writes. # # May 2012. # diff --git a/crypto/fipsmodule/sha/asm/sha512-586.pl b/crypto/fipsmodule/sha/asm/sha512-586.pl index cd12a739..25a5f255 100644 --- a/crypto/fipsmodule/sha/asm/sha512-586.pl +++ b/crypto/fipsmodule/sha/asm/sha512-586.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -35,7 +42,7 @@ # (*) whichever best applicable. # (**) x86_64 assembler performance is presented for reference # purposes, the results are for integer-only code. -# (***) paddq is increadibly slow on Atom. +# (***) paddq is incredibly slow on Atom. # # IALU code-path is optimized for elder Pentiums. On vanilla Pentium # performance improvement over compiler generated code reaches ~60%, diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index 37240b28..e2ea0d27 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl @@ -526,7 +526,7 @@ my %globals; ); # Following constants are defined in x86_64 ABI supplement, for - # example avaiable at https://www.uclibc.org/docs/psABI-x86_64.pdf, + # example available at https://www.uclibc.org/docs/psABI-x86_64.pdf, # see section 3.7 "Stack Unwind Algorithm". my %DW_reg_idx = ( "%rax"=>0, "%rdx"=>1, "%rcx"=>2, "%rbx"=>3, @@ -539,7 +539,7 @@ my %globals; # [us]leb128 format is variable-length integer representation base # 2^128, with most significant bit of each byte being 0 denoting - # *last* most significat digit. See "Variable Length Data" in the + # *last* most significant digit. See "Variable Length Data" in the # DWARF specification, numbered 7.6 at least in versions 3 and 4. sub sleb128 { use integer; # get right shift extend sign @@ -1436,6 +1436,6 @@ close STDOUT; # # (*) Note that we're talking about run-time, not debug-time. Lack of # unwind information makes debugging hard on both Windows and -# Unix. "Unlike" referes to the fact that on Unix signal handler +# Unix. "Unlike" refers to the fact that on Unix signal handler # will always be invoked, core dumped and appropriate exit code # returned to parent (for user notification).