2 # SPDX-License-Identifier: GPL-2.0
4 # This code is taken from the OpenSSL project but the author (Andy Polyakov)
5 # has relicensed it under the GPLv2. Therefore this program is free software;
6 # you can redistribute it and/or modify it under the terms of the GNU General
7 # Public License version 2 as published by the Free Software Foundation.
9 # The original headers, including the original license headers, are
10 # included below for completeness.
12 # ====================================================================
13 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
14 # project. The module is, however, dual licensed under OpenSSL and
15 # CRYPTOGAMS licenses depending on where you obtain it. For further
16 # details see http://www.openssl.org/~appro/cryptogams/.
17 # ====================================================================
19 # GHASH for for PowerISA v2.07.
23 # Accurate performance measurements are problematic, because it's
24 # always virtualized setup with possibly throttled processor.
25 # Relative comparison is therefore more informative. This initial
26 # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
27 # faster than "4-bit" integer-only compiler-generated 64-bit code.
28 # "Initial version" means that there is room for futher improvement.
33 if ($flavour =~ /64/) {
39 } elsif ($flavour =~ /32/) {
45 } else { die "nonsense $flavour"; }
47 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
48 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
49 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
50 die "can't locate ppc-xlate.pl";
52 open STDOUT
,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
54 my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
56 my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
57 my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
72 lvx_u
$H,0,r4
# load H
74 le?addi r7
,r7
,0x8 # need a vperm start with 08
77 le?vxor
5,5,6 # set a b-endian mask
80 vspltisb
$xC2,-16 # 0xf0
82 vaddubm
$xC2,$xC2,$xC2 # 0xe0
83 vxor
$zero,$zero,$zero
84 vor
$xC2,$xC2,$t0 # 0xe1
85 vsldoi
$xC2,$xC2,$zero,15 # 0xe1...
86 vsldoi
$t1,$zero,$t0,1 # ...1
87 vaddubm
$xC2,$xC2,$xC2 # 0xc2...
89 vor
$xC2,$xC2,$t1 # 0xc2....01
90 vspltb
$t1,$H,0 # most significant byte
92 vsrab
$t1,$t1,$t2 # broadcast carry bit
94 vxor
$H,$H,$t1 # twisted H
96 vsldoi
$H,$H,$H,8 # twist even more ...
97 vsldoi
$xC2,$zero,$xC2,8 # 0xc2.0
98 vsldoi
$Hl,$zero,$H,8 # ... and split
101 stvx_u
$xC2,0,r3
# save pre-computed table
109 .byte
0,12,0x14,0,0,0,2,0
111 .size
.gcm_init_p8
,.-.gcm_init_p8
120 lvx_u
$IN,0,$Xip # load Xi
122 lvx_u
$Hl,r8
,$Htbl # load pre-computed table
123 le?lvsl
$lemask,r0
,r0
127 le?vxor
$lemask,$lemask,$t0
129 le?vperm
$IN,$IN,$IN,$lemask
130 vxor
$zero,$zero,$zero
132 vpmsumd
$Xl,$IN,$Hl # H.lo·Xi.lo
133 vpmsumd
$Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
134 vpmsumd
$Xh,$IN,$Hh # H.hi·Xi.hi
136 vpmsumd
$t2,$Xl,$xC2 # 1st phase
138 vsldoi
$t0,$Xm,$zero,8
139 vsldoi
$t1,$zero,$Xm,8
146 vsldoi
$t1,$Xl,$Xl,8 # 2nd phase
151 le?vperm
$Xl,$Xl,$Xl,$lemask
152 stvx_u
$Xl,0,$Xip # write out Xi
157 .byte
0,12,0x14,0,0,0,2,0
159 .size
.gcm_gmult_p8
,.-.gcm_gmult_p8
168 lvx_u
$Xl,0,$Xip # load Xi
170 lvx_u
$Hl,r8
,$Htbl # load pre-computed table
171 le?lvsl
$lemask,r0
,r0
175 le?vxor
$lemask,$lemask,$t0
177 le?vperm
$Xl,$Xl,$Xl,$lemask
178 vxor
$zero,$zero,$zero
183 le?vperm
$IN,$IN,$IN,$lemask
190 vpmsumd
$Xl,$IN,$Hl # H.lo·Xi.lo
191 subfe
. r0
,r0
,r0
# borrow?-1:0
192 vpmsumd
$Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
194 vpmsumd
$Xh,$IN,$Hh # H.hi·Xi.hi
197 vpmsumd
$t2,$Xl,$xC2 # 1st phase
199 vsldoi
$t0,$Xm,$zero,8
200 vsldoi
$t1,$zero,$Xm,8
209 vsldoi
$t1,$Xl,$Xl,8 # 2nd phase
211 le?vperm
$IN,$IN,$IN,$lemask
215 beq Loop
# did $len-=16 borrow?
218 le?vperm
$Xl,$Xl,$Xl,$lemask
219 stvx_u
$Xl,0,$Xip # write out Xi
224 .byte
0,12,0x14,0,0,0,4,0
226 .size
.gcm_ghash_p8
,.-.gcm_ghash_p8
228 .asciz
"GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
232 foreach (split("\n",$code)) {
233 if ($flavour =~ /le$/o) { # little-endian
243 close STDOUT
; # enforce flush