3 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
4 push(@INC, "${dir}perlasm", "perlasm");
7 &asm_init
($ARGV[0],"x86cpuid");
9 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
11 &function_begin
("OPENSSL_ia32_cpuid");
24 &jnc
(&label
("nocpuid"));
26 &mov
("edi","eax"); # max value for standard query level
29 &cmp ("ebx",0x756e6547); # "Genu"
32 &cmp ("edx",0x49656e69); # "ineI"
35 &cmp ("ecx",0x6c65746e); # "ntel"
37 &or ("ebp","eax"); # 0 indicates Intel CPU
38 &jz
(&label
("intel"));
40 &cmp ("ebx",0x68747541); # "Auth"
43 &cmp ("edx",0x69746E65); # "enti"
46 &cmp ("ecx",0x444D4163); # "cAMD"
48 &or ("esi","eax"); # 0 indicates AMD CPU
49 &jnz
(&label
("intel"));
52 &mov
("eax",0x80000000);
54 &cmp ("eax",0x80000001);
55 &jb
(&label
("intel"));
57 &mov
("eax",0x80000001);
59 &and ("ecx","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP bit
60 &or ("ecx",1); # make sure ecx is not zero
63 &cmp ("esi",0x80000008);
64 &jb
(&label
("intel"));
66 &mov
("eax",0x80000008);
68 &movz
("esi",&LB
("ecx")); # number of cores - 1
69 &inc
("esi"); # number of cores
74 &bt
("edx","\$IA32CAP_BIT0_HT");
75 &jnc
(&label
("generic"));
79 &ja
(&label
("generic"));
80 &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit
81 &jmp
(&label
("generic"));
86 &jb
(&label
("nocacheinfo"));
89 &mov
("ecx",0); # query L1D
93 &and ("edi",0xfff); # number of cores -1 per L1D
95 &set_label
("nocacheinfo");
99 # force reserved bits to 0.
100 &and ("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)");
102 &jne
(&label
("notintel"));
103 # set reserved bit#30 on Intel CPUs
104 &or ("edx","\$IA32CAP_MASK0_INTEL");
105 &and (&HB
("eax"),15); # family ID
106 &cmp (&HB
("eax"),15); # P4?
107 &jne
(&label
("notintel"));
108 # set reserved bit#20 to engage RC4_CHAR
109 &or ("edx","\$IA32CAP_MASK0_INTELP4");
110 &set_label
("notintel");
111 &bt
("edx","\$IA32CAP_BIT0_HT"); # test hyper-threading bit
112 &jnc
(&label
("generic"));
113 &xor ("edx","\$IA32CAP_MASK0_HT");
115 &je
(&label
("generic"));
117 &or ("edx","\$IA32CAP_MASK0_HT");
119 &cmp (&LB
("ebx"),1); # see if cache is shared
120 &ja
(&label
("generic"));
121 &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not
123 &set_label
("generic");
124 &and ("ebp","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP flag
125 # force reserved bits to 0.
126 &and ("ecx","\$~IA32CAP_MASK1_AMD_XOP");
128 &or ("ebp","ecx"); # merge AMD XOP flag
130 &bt
("ecx","\$IA32CAP_BIT1_OSXSAVE"); # check OSXSAVE bit
131 &jnc
(&label
("clear_avx"));
133 &data_byte
(0x0f,0x01,0xd0); # xgetbv
136 &je
(&label
("done"));
138 &je
(&label
("clear_avx"));
139 &set_label
("clear_xmm");
140 # clear AESNI and PCLMULQDQ bits.
141 &and ("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)");
143 &and ("esi","\$~IA32CAP_MASK0_FXSR");
144 &set_label
("clear_avx");
145 # clear AVX, FMA3 and AMD XOP bits.
146 &and ("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)");
150 &set_label
("nocpuid");
151 &function_end
("OPENSSL_ia32_cpuid");
153 &external_label
("OPENSSL_ia32cap_P");
155 &function_begin_B
("OPENSSL_wipe_cpu","");
158 &picmeup
("ecx","OPENSSL_ia32cap_P");
159 &mov
("ecx",&DWP
(0,"ecx"));
160 &bt
(&DWP
(0,"ecx"),"\$IA32CAP_BIT0_FPU");
161 &jnc
(&label
("no_x87"));
163 # Check SSE2 and FXSR bits.
164 &and ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)");
165 &cmp ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)");
166 &jne
(&label
("no_sse2"));
167 &pxor
("xmm0","xmm0");
168 &pxor
("xmm1","xmm1");
169 &pxor
("xmm2","xmm2");
170 &pxor
("xmm3","xmm3");
171 &pxor
("xmm4","xmm4");
172 &pxor
("xmm5","xmm5");
173 &pxor
("xmm6","xmm6");
174 &pxor
("xmm7","xmm7");
175 &set_label
("no_sse2");
177 # just a bunch of fldz to zap the fp/mm bank followed by finit...
178 &data_word
(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
179 &set_label
("no_x87");
180 &lea
("eax",&DWP
(4,"esp"));
182 &function_end_B
("OPENSSL_wipe_cpu");
184 &function_begin_B
("OPENSSL_atomic_add");
185 &mov
("edx",&DWP
(4,"esp")); # fetch the pointer, 1st arg
186 &mov
("ecx",&DWP
(8,"esp")); # fetch the increment, 2nd arg
189 &mov
("eax",&DWP
(0,"edx"));
191 &lea
("ebx",&DWP
(0,"eax","ecx"));
193 &data_word
(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded
194 &jne
(&label
("spin"));
195 &mov
("eax","ebx"); # OpenSSL expects the new value
198 &function_end_B
("OPENSSL_atomic_add");
200 &initseg
("OPENSSL_cpuid_setup");