llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(cos|exp|log|sin|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|trunc)" --version 2
   2
   3 ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON
   4 ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE
   5 ; RUN: opt -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=ARMPL-NEON
   6 ; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=ARMPL-SVE
   7
   8 target triple = "aarch64-unknown-linux-gnu"
   9
  10 ; We are checking whether loops containing intrinsic calls can be vectorized,
  11 ; when the compiler provides TLI mappings to their vector variants. The tests
  12 ; are checking fixed width vectorization with NEON and scalable vectorization
  13 ; with SVE.
  14
  15 declare double @llvm.ceil.f64(double)
  16 declare float @llvm.ceil.f32(float)
  17
  18 define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
  19 ; SLEEF-NEON-LABEL: define void @ceil_f64
  20 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
  21 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
  22 ;
  23 ; SLEEF-SVE-LABEL: define void @ceil_f64
  24 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
  25 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
  26 ;
  27 ; ARMPL-NEON-LABEL: define void @ceil_f64
  28 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
  29 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
  30 ;
  31 ; ARMPL-SVE-LABEL: define void @ceil_f64
  32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
  33 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
  34 ;
  35   entry:
  36   br label %for.body
  37
  38   for.body:
  39   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
  40   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
  41   %in = load double, ptr %in.gep, align 8
  42   %call = tail call double @llvm.ceil.f64(double %in)
  43   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
  44   store double %call, ptr %out.gep, align 8
  45   %iv.next = add nuw nsw i64 %iv, 1
  46   %exitcond = icmp eq i64 %iv.next, 1000
  47   br i1 %exitcond, label %for.end, label %for.body
  48
  49   for.end:
  50   ret void
  51 }
  52
  53 define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
  54 ; SLEEF-NEON-LABEL: define void @ceil_f32
  55 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
  56 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
  57 ;
  58 ; SLEEF-SVE-LABEL: define void @ceil_f32
  59 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
  60 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
  61 ;
  62 ; ARMPL-NEON-LABEL: define void @ceil_f32
  63 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
  64 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
  65 ;
  66 ; ARMPL-SVE-LABEL: define void @ceil_f32
  67 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
  68 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
  69 ;
  70   entry:
  71   br label %for.body
  72
  73   for.body:
  74   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
  75   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
  76   %in = load float, ptr %in.gep, align 8
  77   %call = tail call float @llvm.ceil.f32(float %in)
  78   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
  79   store float %call, ptr %out.gep, align 4
  80   %iv.next = add nuw nsw i64 %iv, 1
  81   %exitcond = icmp eq i64 %iv.next, 1000
  82   br i1 %exitcond, label %for.end, label %for.body
  83
  84   for.end:
  85   ret void
  86 }
  87
  88 declare double @llvm.copysign.f64(double, double)
  89 declare float @llvm.copysign.f32(float, float)
  90
  91 define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
  92 ; SLEEF-NEON-LABEL: define void @copysign_f64
  93 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
  94 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
  95 ;
  96 ; SLEEF-SVE-LABEL: define void @copysign_f64
  97 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
  98 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
  99 ;
 100 ; ARMPL-NEON-LABEL: define void @copysign_f64
 101 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 102 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 103 ;
 104 ; ARMPL-SVE-LABEL: define void @copysign_f64
 105 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 106 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 107 ;
 108   entry:
 109   br label %for.body
 110
 111   for.body:
 112   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 113   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 114   %in = load double, ptr %in.gep, align 8
 115   %call = tail call double @llvm.copysign.f64(double %in, double %in)
 116   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 117   store double %call, ptr %out.gep, align 8
 118   %iv.next = add nuw nsw i64 %iv, 1
 119   %exitcond = icmp eq i64 %iv.next, 1000
 120   br i1 %exitcond, label %for.end, label %for.body
 121
 122   for.end:
 123   ret void
 124 }
 125
 126 define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 127 ; SLEEF-NEON-LABEL: define void @copysign_f32
 128 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 129 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 130 ;
 131 ; SLEEF-SVE-LABEL: define void @copysign_f32
 132 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 133 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 134 ;
 135 ; ARMPL-NEON-LABEL: define void @copysign_f32
 136 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 137 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 138 ;
 139 ; ARMPL-SVE-LABEL: define void @copysign_f32
 140 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 141 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 142 ;
 143   entry:
 144   br label %for.body
 145
 146   for.body:
 147   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 148   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 149   %in = load float, ptr %in.gep, align 8
 150   %call = tail call float @llvm.copysign.f32(float %in, float %in)
 151   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 152   store float %call, ptr %out.gep, align 4
 153   %iv.next = add nuw nsw i64 %iv, 1
 154   %exitcond = icmp eq i64 %iv.next, 1000
 155   br i1 %exitcond, label %for.end, label %for.body
 156
 157   for.end:
 158   ret void
 159 }
 160
 161 declare double @llvm.cos.f64(double)
 162 declare float @llvm.cos.f32(float)
 163
 164 define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 165 ; SLEEF-NEON-LABEL: define void @cos_f64
 166 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 167 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
 168 ;
 169 ; SLEEF-SVE-LABEL: define void @cos_f64
 170 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 171 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 172 ;
 173 ; ARMPL-NEON-LABEL: define void @cos_f64
 174 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 175 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 176 ;
 177 ; ARMPL-SVE-LABEL: define void @cos_f64
 178 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 179 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 180 ;
 181   entry:
 182   br label %for.body
 183
 184   for.body:
 185   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 186   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 187   %in = load double, ptr %in.gep, align 8
 188   %call = tail call double @llvm.cos.f64(double %in)
 189   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 190   store double %call, ptr %out.gep, align 8
 191   %iv.next = add nuw nsw i64 %iv, 1
 192   %exitcond = icmp eq i64 %iv.next, 1000
 193   br i1 %exitcond, label %for.end, label %for.body
 194
 195   for.end:
 196   ret void
 197 }
 198
 199 define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 200 ; SLEEF-NEON-LABEL: define void @cos_f32
 201 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 202 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
 203 ;
 204 ; SLEEF-SVE-LABEL: define void @cos_f32
 205 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 206 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 207 ;
 208 ; ARMPL-NEON-LABEL: define void @cos_f32
 209 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 210 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 211 ;
 212 ; ARMPL-SVE-LABEL: define void @cos_f32
 213 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 214 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 215 ;
 216   entry:
 217   br label %for.body
 218
 219   for.body:
 220   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 221   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 222   %in = load float, ptr %in.gep, align 8
 223   %call = tail call float @llvm.cos.f32(float %in)
 224   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 225   store float %call, ptr %out.gep, align 4
 226   %iv.next = add nuw nsw i64 %iv, 1
 227   %exitcond = icmp eq i64 %iv.next, 1000
 228   br i1 %exitcond, label %for.end, label %for.body
 229
 230   for.end:
 231   ret void
 232 }
 233
 234 declare double @llvm.exp.f64(double)
 235 declare float @llvm.exp.f32(float)
 236
 237 define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 238 ; SLEEF-NEON-LABEL: define void @exp_f64
 239 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 240 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
 241 ;
 242 ; SLEEF-SVE-LABEL: define void @exp_f64
 243 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 244 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 245 ;
 246 ; ARMPL-NEON-LABEL: define void @exp_f64
 247 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 248 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 249 ;
 250 ; ARMPL-SVE-LABEL: define void @exp_f64
 251 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 252 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 253 ;
 254   entry:
 255   br label %for.body
 256
 257   for.body:
 258   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 259   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 260   %in = load double, ptr %in.gep, align 8
 261   %call = tail call double @llvm.exp.f64(double %in)
 262   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 263   store double %call, ptr %out.gep, align 8
 264   %iv.next = add nuw nsw i64 %iv, 1
 265   %exitcond = icmp eq i64 %iv.next, 1000
 266   br i1 %exitcond, label %for.end, label %for.body
 267
 268   for.end:
 269   ret void
 270 }
 271
 272 define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 273 ; SLEEF-NEON-LABEL: define void @exp_f32
 274 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 275 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
 276 ;
 277 ; SLEEF-SVE-LABEL: define void @exp_f32
 278 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 279 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 280 ;
 281 ; ARMPL-NEON-LABEL: define void @exp_f32
 282 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 283 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 284 ;
 285 ; ARMPL-SVE-LABEL: define void @exp_f32
 286 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 287 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 288 ;
 289   entry:
 290   br label %for.body
 291
 292   for.body:
 293   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 294   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 295   %in = load float, ptr %in.gep, align 8
 296   %call = tail call float @llvm.exp.f32(float %in)
 297   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 298   store float %call, ptr %out.gep, align 4
 299   %iv.next = add nuw nsw i64 %iv, 1
 300   %exitcond = icmp eq i64 %iv.next, 1000
 301   br i1 %exitcond, label %for.end, label %for.body
 302
 303   for.end:
 304   ret void
 305 }
 306
 307 declare double @llvm.exp10.f64(double)
 308 declare float @llvm.exp10.f32(float)
 309
 310 define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 311 ; SLEEF-NEON-LABEL: define void @exp10_f64
 312 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 313 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
 314 ;
 315 ; SLEEF-SVE-LABEL: define void @exp10_f64
 316 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 317 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 318 ;
 319 ; ARMPL-NEON-LABEL: define void @exp10_f64
 320 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 321 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 322 ;
 323 ; ARMPL-SVE-LABEL: define void @exp10_f64
 324 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 325 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 326 ;
 327   entry:
 328   br label %for.body
 329
 330   for.body:
 331   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 332   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 333   %in = load double, ptr %in.gep, align 8
 334   %call = tail call double @llvm.exp10.f64(double %in)
 335   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 336   store double %call, ptr %out.gep, align 8
 337   %iv.next = add nuw nsw i64 %iv, 1
 338   %exitcond = icmp eq i64 %iv.next, 1000
 339   br i1 %exitcond, label %for.end, label %for.body
 340
 341   for.end:
 342   ret void
 343 }
 344
 345 define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 346 ; SLEEF-NEON-LABEL: define void @exp10_f32
 347 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 348 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
 349 ;
 350 ; SLEEF-SVE-LABEL: define void @exp10_f32
 351 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 352 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 353 ;
 354 ; ARMPL-NEON-LABEL: define void @exp10_f32
 355 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 356 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 357 ;
 358 ; ARMPL-SVE-LABEL: define void @exp10_f32
 359 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 360 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 361 ;
 362   entry:
 363   br label %for.body
 364
 365   for.body:
 366   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 367   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 368   %in = load float, ptr %in.gep, align 8
 369   %call = tail call float @llvm.exp10.f32(float %in)
 370   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 371   store float %call, ptr %out.gep, align 4
 372   %iv.next = add nuw nsw i64 %iv, 1
 373   %exitcond = icmp eq i64 %iv.next, 1000
 374   br i1 %exitcond, label %for.end, label %for.body
 375
 376   for.end:
 377   ret void
 378 }
 379
 380 declare double @llvm.exp2.f64(double)
 381 declare float @llvm.exp2.f32(float)
 382
 383 define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 384 ; SLEEF-NEON-LABEL: define void @exp2_f64
 385 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 386 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
 387 ;
 388 ; SLEEF-SVE-LABEL: define void @exp2_f64
 389 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 390 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 391 ;
 392 ; ARMPL-NEON-LABEL: define void @exp2_f64
 393 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 394 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 395 ;
 396 ; ARMPL-SVE-LABEL: define void @exp2_f64
 397 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 398 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 399 ;
 400   entry:
 401   br label %for.body
 402
 403   for.body:
 404   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 405   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 406   %in = load double, ptr %in.gep, align 8
 407   %call = tail call double @llvm.exp2.f64(double %in)
 408   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 409   store double %call, ptr %out.gep, align 8
 410   %iv.next = add nuw nsw i64 %iv, 1
 411   %exitcond = icmp eq i64 %iv.next, 1000
 412   br i1 %exitcond, label %for.end, label %for.body
 413
 414   for.end:
 415   ret void
 416 }
 417
 418 define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 419 ; SLEEF-NEON-LABEL: define void @exp2_f32
 420 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 421 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
 422 ;
 423 ; SLEEF-SVE-LABEL: define void @exp2_f32
 424 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 425 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 426 ;
 427 ; ARMPL-NEON-LABEL: define void @exp2_f32
 428 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 429 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 430 ;
 431 ; ARMPL-SVE-LABEL: define void @exp2_f32
 432 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 433 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 434 ;
 435   entry:
 436   br label %for.body
 437
 438   for.body:
 439   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 440   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 441   %in = load float, ptr %in.gep, align 8
 442   %call = tail call float @llvm.exp2.f32(float %in)
 443   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 444   store float %call, ptr %out.gep, align 4
 445   %iv.next = add nuw nsw i64 %iv, 1
 446   %exitcond = icmp eq i64 %iv.next, 1000
 447   br i1 %exitcond, label %for.end, label %for.body
 448
 449   for.end:
 450   ret void
 451 }
 452
 453 declare double @llvm.fabs.f64(double)
 454 declare float @llvm.fabs.f32(float)
 455
 456 define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 457 ; SLEEF-NEON-LABEL: define void @fabs_f64
 458 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 459 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 460 ;
 461 ; SLEEF-SVE-LABEL: define void @fabs_f64
 462 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 463 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 464 ;
 465 ; ARMPL-NEON-LABEL: define void @fabs_f64
 466 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 467 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 468 ;
 469 ; ARMPL-SVE-LABEL: define void @fabs_f64
 470 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 471 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 472 ;
 473   entry:
 474   br label %for.body
 475
 476   for.body:
 477   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 478   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 479   %in = load double, ptr %in.gep, align 8
 480   %call = tail call double @llvm.fabs.f64(double %in)
 481   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 482   store double %call, ptr %out.gep, align 8
 483   %iv.next = add nuw nsw i64 %iv, 1
 484   %exitcond = icmp eq i64 %iv.next, 1000
 485   br i1 %exitcond, label %for.end, label %for.body
 486
 487   for.end:
 488   ret void
 489 }
 490
 491 define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 492 ; SLEEF-NEON-LABEL: define void @fabs_f32
 493 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 494 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 495 ;
 496 ; SLEEF-SVE-LABEL: define void @fabs_f32
 497 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 498 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 499 ;
 500 ; ARMPL-NEON-LABEL: define void @fabs_f32
 501 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 502 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 503 ;
 504 ; ARMPL-SVE-LABEL: define void @fabs_f32
 505 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 506 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 507 ;
 508   entry:
 509   br label %for.body
 510
 511   for.body:
 512   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 513   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 514   %in = load float, ptr %in.gep, align 8
 515   %call = tail call float @llvm.fabs.f32(float %in)
 516   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 517   store float %call, ptr %out.gep, align 4
 518   %iv.next = add nuw nsw i64 %iv, 1
 519   %exitcond = icmp eq i64 %iv.next, 1000
 520   br i1 %exitcond, label %for.end, label %for.body
 521
 522   for.end:
 523   ret void
 524 }
 525
 526 declare double @llvm.floor.f64(double)
 527 declare float @llvm.floor.f32(float)
 528
 529 define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 530 ; SLEEF-NEON-LABEL: define void @floor_f64
 531 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 532 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 533 ;
 534 ; SLEEF-SVE-LABEL: define void @floor_f64
 535 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 536 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 537 ;
 538 ; ARMPL-NEON-LABEL: define void @floor_f64
 539 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 540 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 541 ;
 542 ; ARMPL-SVE-LABEL: define void @floor_f64
 543 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 544 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 545 ;
 546   entry:
 547   br label %for.body
 548
 549   for.body:
 550   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 551   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 552   %in = load double, ptr %in.gep, align 8
 553   %call = tail call double @llvm.floor.f64(double %in)
 554   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 555   store double %call, ptr %out.gep, align 8
 556   %iv.next = add nuw nsw i64 %iv, 1
 557   %exitcond = icmp eq i64 %iv.next, 1000
 558   br i1 %exitcond, label %for.end, label %for.body
 559
 560   for.end:
 561   ret void
 562 }
 563
 564 define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 565 ; SLEEF-NEON-LABEL: define void @floor_f32
 566 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 567 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 568 ;
 569 ; SLEEF-SVE-LABEL: define void @floor_f32
 570 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 571 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 572 ;
 573 ; ARMPL-NEON-LABEL: define void @floor_f32
 574 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 575 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 576 ;
 577 ; ARMPL-SVE-LABEL: define void @floor_f32
 578 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 579 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 580 ;
 581   entry:
 582   br label %for.body
 583
 584   for.body:
 585   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 586   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 587   %in = load float, ptr %in.gep, align 8
 588   %call = tail call float @llvm.floor.f32(float %in)
 589   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 590   store float %call, ptr %out.gep, align 4
 591   %iv.next = add nuw nsw i64 %iv, 1
 592   %exitcond = icmp eq i64 %iv.next, 1000
 593   br i1 %exitcond, label %for.end, label %for.body
 594
 595   for.end:
 596   ret void
 597 }
 598
 599 declare double @llvm.fma.f64(double, double, double)
 600 declare float @llvm.fma.f32(float, float, float)
 601
 602 define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 603 ; SLEEF-NEON-LABEL: define void @fma_f64
 604 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 605 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
 606 ;
 607 ; SLEEF-SVE-LABEL: define void @fma_f64
 608 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 609 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 610 ;
 611 ; ARMPL-NEON-LABEL: define void @fma_f64
 612 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 613 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
 614 ;
 615 ; ARMPL-SVE-LABEL: define void @fma_f64
 616 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 617 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 618 ;
 619   entry:
 620   br label %for.body
 621
 622   for.body:
 623   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 624   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 625   %in = load double, ptr %in.gep, align 8
 626   %call = tail call double @llvm.fma.f64(double %in, double %in, double %in)
 627   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 628   store double %call, ptr %out.gep, align 8
 629   %iv.next = add nuw nsw i64 %iv, 1
 630   %exitcond = icmp eq i64 %iv.next, 1000
 631   br i1 %exitcond, label %for.end, label %for.body
 632
 633   for.end:
 634   ret void
 635 }
 636
 637 define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 638 ; SLEEF-NEON-LABEL: define void @fma_f32
 639 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 640 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
 641 ;
 642 ; SLEEF-SVE-LABEL: define void @fma_f32
 643 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 644 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 645 ;
 646 ; ARMPL-NEON-LABEL: define void @fma_f32
 647 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 648 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
 649 ;
 650 ; ARMPL-SVE-LABEL: define void @fma_f32
 651 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 652 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 653 ;
 654   entry:
 655   br label %for.body
 656
 657   for.body:
 658   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 659   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 660   %in = load float, ptr %in.gep, align 8
 661   %call = tail call float @llvm.fma.f32(float %in, float %in, float %in)
 662   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 663   store float %call, ptr %out.gep, align 4
 664   %iv.next = add nuw nsw i64 %iv, 1
 665   %exitcond = icmp eq i64 %iv.next, 1000
 666   br i1 %exitcond, label %for.end, label %for.body
 667
 668   for.end:
 669   ret void
 670 }
 671
 672 declare double @llvm.log.f64(double)
 673 declare float @llvm.log.f32(float)
 674
 675 define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 676 ; SLEEF-NEON-LABEL: define void @log_f64
 677 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 678 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
 679 ;
 680 ; SLEEF-SVE-LABEL: define void @log_f64
 681 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 682 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 683 ;
 684 ; ARMPL-NEON-LABEL: define void @log_f64
 685 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 686 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 687 ;
 688 ; ARMPL-SVE-LABEL: define void @log_f64
 689 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 690 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 691 ;
 692   entry:
 693   br label %for.body
 694
 695   for.body:
 696   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 697   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 698   %in = load double, ptr %in.gep, align 8
 699   %call = tail call double @llvm.log.f64(double %in)
 700   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 701   store double %call, ptr %out.gep, align 8
 702   %iv.next = add nuw nsw i64 %iv, 1
 703   %exitcond = icmp eq i64 %iv.next, 1000
 704   br i1 %exitcond, label %for.end, label %for.body
 705
 706   for.end:
 707   ret void
 708 }
 709
 710 define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 711 ; SLEEF-NEON-LABEL: define void @log_f32
 712 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 713 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
 714 ;
 715 ; SLEEF-SVE-LABEL: define void @log_f32
 716 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 717 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 718 ;
 719 ; ARMPL-NEON-LABEL: define void @log_f32
 720 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 721 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 722 ;
 723 ; ARMPL-SVE-LABEL: define void @log_f32
 724 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 725 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 726 ;
 727   entry:
 728   br label %for.body
 729
 730   for.body:
 731   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 732   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 733   %in = load float, ptr %in.gep, align 8
 734   %call = tail call float @llvm.log.f32(float %in)
 735   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 736   store float %call, ptr %out.gep, align 4
 737   %iv.next = add nuw nsw i64 %iv, 1
 738   %exitcond = icmp eq i64 %iv.next, 1000
 739   br i1 %exitcond, label %for.end, label %for.body
 740
 741   for.end:
 742   ret void
 743 }
 744
 745 declare double @llvm.log10.f64(double)
 746 declare float @llvm.log10.f32(float)
 747
 748 define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 749 ; SLEEF-NEON-LABEL: define void @log10_f64
 750 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 751 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
 752 ;
 753 ; SLEEF-SVE-LABEL: define void @log10_f64
 754 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 755 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 756 ;
 757 ; ARMPL-NEON-LABEL: define void @log10_f64
 758 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 759 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 760 ;
 761 ; ARMPL-SVE-LABEL: define void @log10_f64
 762 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 763 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 764 ;
 765   entry:
 766   br label %for.body
 767
 768   for.body:
 769   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 770   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 771   %in = load double, ptr %in.gep, align 8
 772   %call = tail call double @llvm.log10.f64(double %in)
 773   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 774   store double %call, ptr %out.gep, align 8
 775   %iv.next = add nuw nsw i64 %iv, 1
 776   %exitcond = icmp eq i64 %iv.next, 1000
 777   br i1 %exitcond, label %for.end, label %for.body
 778
 779   for.end:
 780   ret void
 781 }
 782
 783 define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 784 ; SLEEF-NEON-LABEL: define void @log10_f32
 785 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 786 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
 787 ;
 788 ; SLEEF-SVE-LABEL: define void @log10_f32
 789 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 790 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 791 ;
 792 ; ARMPL-NEON-LABEL: define void @log10_f32
 793 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 794 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 795 ;
 796 ; ARMPL-SVE-LABEL: define void @log10_f32
 797 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 798 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 799 ;
 800   entry:
 801   br label %for.body
 802
 803   for.body:
 804   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 805   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 806   %in = load float, ptr %in.gep, align 8
 807   %call = tail call float @llvm.log10.f32(float %in)
 808   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 809   store float %call, ptr %out.gep, align 4
 810   %iv.next = add nuw nsw i64 %iv, 1
 811   %exitcond = icmp eq i64 %iv.next, 1000
 812   br i1 %exitcond, label %for.end, label %for.body
 813
 814   for.end:
 815   ret void
 816 }
 817
 818 declare double @llvm.log2.f64(double)
 819 declare float @llvm.log2.f32(float)
 820
 821 define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 822 ; SLEEF-NEON-LABEL: define void @log2_f64
 823 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 824 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
 825 ;
 826 ; SLEEF-SVE-LABEL: define void @log2_f64
 827 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 828 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 829 ;
 830 ; ARMPL-NEON-LABEL: define void @log2_f64
 831 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 832 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 833 ;
 834 ; ARMPL-SVE-LABEL: define void @log2_f64
 835 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 836 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 837 ;
 838   entry:
 839   br label %for.body
 840
 841   for.body:
 842   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 843   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 844   %in = load double, ptr %in.gep, align 8
 845   %call = tail call double @llvm.log2.f64(double %in)
 846   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 847   store double %call, ptr %out.gep, align 8
 848   %iv.next = add nuw nsw i64 %iv, 1
 849   %exitcond = icmp eq i64 %iv.next, 1000
 850   br i1 %exitcond, label %for.end, label %for.body
 851
 852   for.end:
 853   ret void
 854 }
 855
 856 define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 857 ; SLEEF-NEON-LABEL: define void @log2_f32
 858 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 859 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
 860 ;
 861 ; SLEEF-SVE-LABEL: define void @log2_f32
 862 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 863 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 864 ;
 865 ; ARMPL-NEON-LABEL: define void @log2_f32
 866 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 867 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 868 ;
 869 ; ARMPL-SVE-LABEL: define void @log2_f32
 870 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 871 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 872 ;
 873   entry:
 874   br label %for.body
 875
 876   for.body:
 877   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 878   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 879   %in = load float, ptr %in.gep, align 8
 880   %call = tail call float @llvm.log2.f32(float %in)
 881   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 882   store float %call, ptr %out.gep, align 4
 883   %iv.next = add nuw nsw i64 %iv, 1
 884   %exitcond = icmp eq i64 %iv.next, 1000
 885   br i1 %exitcond, label %for.end, label %for.body
 886
 887   for.end:
 888   ret void
 889 }
 890
 891 declare double @llvm.maxnum.f64(double, double)
 892 declare float @llvm.maxnum.f32(float, float)
 893
 894 define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 895 ; SLEEF-NEON-LABEL: define void @maxnum_f64
 896 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 897 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 898 ;
 899 ; SLEEF-SVE-LABEL: define void @maxnum_f64
 900 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 901 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 902 ;
 903 ; ARMPL-NEON-LABEL: define void @maxnum_f64
 904 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 905 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 906 ;
 907 ; ARMPL-SVE-LABEL: define void @maxnum_f64
 908 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 909 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 910 ;
 911   entry:
 912   br label %for.body
 913
 914   for.body:
 915   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 916   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 917   %in = load double, ptr %in.gep, align 8
 918   %call = tail call double @llvm.maxnum.f64(double %in, double %in)
 919   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 920   store double %call, ptr %out.gep, align 8
 921   %iv.next = add nuw nsw i64 %iv, 1
 922   %exitcond = icmp eq i64 %iv.next, 1000
 923   br i1 %exitcond, label %for.end, label %for.body
 924
 925   for.end:
 926   ret void
 927 }
 928
 929 define void @maxnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
 930 ; SLEEF-NEON-LABEL: define void @maxnum_f32
 931 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 932 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 933 ;
 934 ; SLEEF-SVE-LABEL: define void @maxnum_f32
 935 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 936 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 937 ;
 938 ; ARMPL-NEON-LABEL: define void @maxnum_f32
 939 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 940 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 941 ;
 942 ; ARMPL-SVE-LABEL: define void @maxnum_f32
 943 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 944 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 945 ;
 946   entry:
 947   br label %for.body
 948
 949   for.body:
 950   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 951   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
 952   %in = load float, ptr %in.gep, align 8
 953   %call = tail call float @llvm.maxnum.f32(float %in, float %in)
 954   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
 955   store float %call, ptr %out.gep, align 4
 956   %iv.next = add nuw nsw i64 %iv, 1
 957   %exitcond = icmp eq i64 %iv.next, 1000
 958   br i1 %exitcond, label %for.end, label %for.body
 959
 960   for.end:
 961   ret void
 962 }
 963
 964 declare double @llvm.minnum.f64(double, double)
 965 declare float @llvm.minnum.f32(float, float)
 966
 967 define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 968 ; SLEEF-NEON-LABEL: define void @minnum_f64
 969 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 970 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 971 ;
 972 ; SLEEF-SVE-LABEL: define void @minnum_f64
 973 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 974 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 975 ;
 976 ; ARMPL-NEON-LABEL: define void @minnum_f64
 977 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 978 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 979 ;
 980 ; ARMPL-SVE-LABEL: define void @minnum_f64
 981 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 982 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 983 ;
 984   entry:
 985   br label %for.body
 986
 987   for.body:
 988   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 989   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
 990   %in = load double, ptr %in.gep, align 8
 991   %call = tail call double @llvm.minnum.f64(double %in, double %in)
 992   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
 993   store double %call, ptr %out.gep, align 8
 994   %iv.next = add nuw nsw i64 %iv, 1
 995   %exitcond = icmp eq i64 %iv.next, 1000
 996   br i1 %exitcond, label %for.end, label %for.body
 997
 998   for.end:
 999   ret void
1000 }
1001
1002 define void @minnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1003 ; SLEEF-NEON-LABEL: define void @minnum_f32
1004 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1005 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1006 ;
1007 ; SLEEF-SVE-LABEL: define void @minnum_f32
1008 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1009 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1010 ;
1011 ; ARMPL-NEON-LABEL: define void @minnum_f32
1012 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1013 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1014 ;
1015 ; ARMPL-SVE-LABEL: define void @minnum_f32
1016 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1017 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1018 ;
1019   entry:
1020   br label %for.body
1021
1022   for.body:
1023   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1024   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1025   %in = load float, ptr %in.gep, align 8
1026   %call = tail call float @llvm.minnum.f32(float %in, float %in)
1027   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1028   store float %call, ptr %out.gep, align 4
1029   %iv.next = add nuw nsw i64 %iv, 1
1030   %exitcond = icmp eq i64 %iv.next, 1000
1031   br i1 %exitcond, label %for.end, label %for.body
1032
1033   for.end:
1034   ret void
1035 }
1036
1037 declare double @llvm.nearbyint.f64(double)
1038 declare float @llvm.nearbyint.f32(float)
1039
1040 define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1041 ; SLEEF-NEON-LABEL: define void @nearbyint_f64
1042 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1043 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1044 ;
1045 ; SLEEF-SVE-LABEL: define void @nearbyint_f64
1046 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1047 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1048 ;
1049 ; ARMPL-NEON-LABEL: define void @nearbyint_f64
1050 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1051 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1052 ;
1053 ; ARMPL-SVE-LABEL: define void @nearbyint_f64
1054 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1055 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1056 ;
1057   entry:
1058   br label %for.body
1059
1060   for.body:
1061   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1062   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1063   %in = load double, ptr %in.gep, align 8
1064   %call = tail call double @llvm.nearbyint.f64(double %in)
1065   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1066   store double %call, ptr %out.gep, align 8
1067   %iv.next = add nuw nsw i64 %iv, 1
1068   %exitcond = icmp eq i64 %iv.next, 1000
1069   br i1 %exitcond, label %for.end, label %for.body
1070
1071   for.end:
1072   ret void
1073 }
1074
1075 define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1076 ; SLEEF-NEON-LABEL: define void @nearbyint_f32
1077 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1078 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1079 ;
1080 ; SLEEF-SVE-LABEL: define void @nearbyint_f32
1081 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1082 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1083 ;
1084 ; ARMPL-NEON-LABEL: define void @nearbyint_f32
1085 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1086 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1087 ;
1088 ; ARMPL-SVE-LABEL: define void @nearbyint_f32
1089 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1090 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1091 ;
1092   entry:
1093   br label %for.body
1094
1095   for.body:
1096   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1097   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1098   %in = load float, ptr %in.gep, align 8
1099   %call = tail call float @llvm.nearbyint.f32(float %in)
1100   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1101   store float %call, ptr %out.gep, align 4
1102   %iv.next = add nuw nsw i64 %iv, 1
1103   %exitcond = icmp eq i64 %iv.next, 1000
1104   br i1 %exitcond, label %for.end, label %for.body
1105
1106   for.end:
1107   ret void
1108 }
1109
1110 declare double @llvm.pow.f64(double, double)
1111 declare float @llvm.pow.f32(float, float)
1112
1113 define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1114 ; SLEEF-NEON-LABEL: define void @pow_f64
1115 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1116 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1117 ;
1118 ; SLEEF-SVE-LABEL: define void @pow_f64
1119 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1120 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1121 ;
1122 ; ARMPL-NEON-LABEL: define void @pow_f64
1123 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1124 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1125 ;
1126 ; ARMPL-SVE-LABEL: define void @pow_f64
1127 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1128 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1129 ;
1130   entry:
1131   br label %for.body
1132
1133   for.body:
1134   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1135   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1136   %in = load double, ptr %in.gep, align 8
1137   %call = tail call double @llvm.pow.f64(double %in, double %in)
1138   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1139   store double %call, ptr %out.gep, align 8
1140   %iv.next = add nuw nsw i64 %iv, 1
1141   %exitcond = icmp eq i64 %iv.next, 1000
1142   br i1 %exitcond, label %for.end, label %for.body
1143
1144   for.end:
1145   ret void
1146 }
1147
1148 define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1149 ; SLEEF-NEON-LABEL: define void @pow_f32
1150 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1151 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1152 ;
1153 ; SLEEF-SVE-LABEL: define void @pow_f32
1154 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1155 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1156 ;
1157 ; ARMPL-NEON-LABEL: define void @pow_f32
1158 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1159 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1160 ;
1161 ; ARMPL-SVE-LABEL: define void @pow_f32
1162 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1163 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1164 ;
1165   entry:
1166   br label %for.body
1167
1168   for.body:
1169   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1170   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1171   %in = load float, ptr %in.gep, align 8
1172   %call = tail call float @llvm.pow.f32(float %in, float %in)
1173   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1174   store float %call, ptr %out.gep, align 4
1175   %iv.next = add nuw nsw i64 %iv, 1
1176   %exitcond = icmp eq i64 %iv.next, 1000
1177   br i1 %exitcond, label %for.end, label %for.body
1178
1179   for.end:
1180   ret void
1181 }
1182
1183 declare double @llvm.rint.f64(double)
1184 declare float @llvm.rint.f32(float)
1185
1186 define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1187 ; SLEEF-NEON-LABEL: define void @rint_f64
1188 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1189 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1190 ;
1191 ; SLEEF-SVE-LABEL: define void @rint_f64
1192 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1193 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1194 ;
1195 ; ARMPL-NEON-LABEL: define void @rint_f64
1196 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1197 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1198 ;
1199 ; ARMPL-SVE-LABEL: define void @rint_f64
1200 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1201 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1202 ;
1203   entry:
1204   br label %for.body
1205
1206   for.body:
1207   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1208   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1209   %in = load double, ptr %in.gep, align 8
1210   %call = tail call double @llvm.rint.f64(double %in)
1211   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1212   store double %call, ptr %out.gep, align 8
1213   %iv.next = add nuw nsw i64 %iv, 1
1214   %exitcond = icmp eq i64 %iv.next, 1000
1215   br i1 %exitcond, label %for.end, label %for.body
1216
1217   for.end:
1218   ret void
1219 }
1220
1221 define void @rint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1222 ; SLEEF-NEON-LABEL: define void @rint_f32
1223 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1224 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1225 ;
1226 ; SLEEF-SVE-LABEL: define void @rint_f32
1227 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1228 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1229 ;
1230 ; ARMPL-NEON-LABEL: define void @rint_f32
1231 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1232 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1233 ;
1234 ; ARMPL-SVE-LABEL: define void @rint_f32
1235 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1236 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1237 ;
1238   entry:
1239   br label %for.body
1240
1241   for.body:
1242   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1243   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1244   %in = load float, ptr %in.gep, align 8
1245   %call = tail call float @llvm.rint.f32(float %in)
1246   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1247   store float %call, ptr %out.gep, align 4
1248   %iv.next = add nuw nsw i64 %iv, 1
1249   %exitcond = icmp eq i64 %iv.next, 1000
1250   br i1 %exitcond, label %for.end, label %for.body
1251
1252   for.end:
1253   ret void
1254 }
1255
1256 declare double @llvm.round.f64(double)
1257 declare float @llvm.round.f32(float)
1258
1259 define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1260 ; SLEEF-NEON-LABEL: define void @round_f64
1261 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1262 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1263 ;
1264 ; SLEEF-SVE-LABEL: define void @round_f64
1265 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1266 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1267 ;
1268 ; ARMPL-NEON-LABEL: define void @round_f64
1269 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1270 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1271 ;
1272 ; ARMPL-SVE-LABEL: define void @round_f64
1273 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1274 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1275 ;
1276   entry:
1277   br label %for.body
1278
1279   for.body:
1280   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1281   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1282   %in = load double, ptr %in.gep, align 8
1283   %call = tail call double @llvm.round.f64(double %in)
1284   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1285   store double %call, ptr %out.gep, align 8
1286   %iv.next = add nuw nsw i64 %iv, 1
1287   %exitcond = icmp eq i64 %iv.next, 1000
1288   br i1 %exitcond, label %for.end, label %for.body
1289
1290   for.end:
1291   ret void
1292 }
1293
1294 define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1295 ; SLEEF-NEON-LABEL: define void @round_f32
1296 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1297 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1298 ;
1299 ; SLEEF-SVE-LABEL: define void @round_f32
1300 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1301 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1302 ;
1303 ; ARMPL-NEON-LABEL: define void @round_f32
1304 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1305 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1306 ;
1307 ; ARMPL-SVE-LABEL: define void @round_f32
1308 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1309 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1310 ;
1311   entry:
1312   br label %for.body
1313
1314   for.body:
1315   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1316   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1317   %in = load float, ptr %in.gep, align 8
1318   %call = tail call float @llvm.round.f32(float %in)
1319   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1320   store float %call, ptr %out.gep, align 4
1321   %iv.next = add nuw nsw i64 %iv, 1
1322   %exitcond = icmp eq i64 %iv.next, 1000
1323   br i1 %exitcond, label %for.end, label %for.body
1324
1325   for.end:
1326   ret void
1327 }
1328
1329 declare double @llvm.sin.f64(double)
1330 declare float @llvm.sin.f32(float)
1331
1332 define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1333 ; SLEEF-NEON-LABEL: define void @sin_f64
1334 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1335 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
1336 ;
1337 ; SLEEF-SVE-LABEL: define void @sin_f64
1338 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1339 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1340 ;
1341 ; ARMPL-NEON-LABEL: define void @sin_f64
1342 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1343 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
1344 ;
1345 ; ARMPL-SVE-LABEL: define void @sin_f64
1346 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1347 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1348 ;
1349   entry:
1350   br label %for.body
1351
1352   for.body:
1353   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1354   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1355   %in = load double, ptr %in.gep, align 8
1356   %call = tail call double @llvm.sin.f64(double %in)
1357   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1358   store double %call, ptr %out.gep, align 8
1359   %iv.next = add nuw nsw i64 %iv, 1
1360   %exitcond = icmp eq i64 %iv.next, 1000
1361   br i1 %exitcond, label %for.end, label %for.body
1362
1363   for.end:
1364   ret void
1365 }
1366
1367 define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1368 ; SLEEF-NEON-LABEL: define void @sin_f32
1369 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1370 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
1371 ;
1372 ; SLEEF-SVE-LABEL: define void @sin_f32
1373 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1374 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1375 ;
1376 ; ARMPL-NEON-LABEL: define void @sin_f32
1377 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1378 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
1379 ;
1380 ; ARMPL-SVE-LABEL: define void @sin_f32
1381 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1382 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1383 ;
1384   entry:
1385   br label %for.body
1386
1387   for.body:
1388   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1389   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1390   %in = load float, ptr %in.gep, align 8
1391   %call = tail call float @llvm.sin.f32(float %in)
1392   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1393   store float %call, ptr %out.gep, align 4
1394   %iv.next = add nuw nsw i64 %iv, 1
1395   %exitcond = icmp eq i64 %iv.next, 1000
1396   br i1 %exitcond, label %for.end, label %for.body
1397
1398   for.end:
1399   ret void
1400 }
1401
1402 declare double @llvm.sqrt.f64(double)
1403 declare float @llvm.sqrt.f32(float)
1404
1405 define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1406 ; SLEEF-NEON-LABEL: define void @sqrt_f64
1407 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1408 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1409 ;
1410 ; SLEEF-SVE-LABEL: define void @sqrt_f64
1411 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1412 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1413 ;
1414 ; ARMPL-NEON-LABEL: define void @sqrt_f64
1415 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1416 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1417 ;
1418 ; ARMPL-SVE-LABEL: define void @sqrt_f64
1419 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1420 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1421 ;
1422   entry:
1423   br label %for.body
1424
1425   for.body:
1426   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1427   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1428   %in = load double, ptr %in.gep, align 8
1429   %call = tail call double @llvm.sqrt.f64(double %in)
1430   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1431   store double %call, ptr %out.gep, align 8
1432   %iv.next = add nuw nsw i64 %iv, 1
1433   %exitcond = icmp eq i64 %iv.next, 1000
1434   br i1 %exitcond, label %for.end, label %for.body
1435
1436   for.end:
1437   ret void
1438 }
1439
1440 define void @sqrt_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1441 ; SLEEF-NEON-LABEL: define void @sqrt_f32
1442 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1443 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1444 ;
1445 ; SLEEF-SVE-LABEL: define void @sqrt_f32
1446 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1447 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1448 ;
1449 ; ARMPL-NEON-LABEL: define void @sqrt_f32
1450 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1451 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1452 ;
1453 ; ARMPL-SVE-LABEL: define void @sqrt_f32
1454 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1455 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1456 ;
1457   entry:
1458   br label %for.body
1459
1460   for.body:
1461   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1462   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1463   %in = load float, ptr %in.gep, align 8
1464   %call = tail call float @llvm.sqrt.f32(float %in)
1465   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1466   store float %call, ptr %out.gep, align 4
1467   %iv.next = add nuw nsw i64 %iv, 1
1468   %exitcond = icmp eq i64 %iv.next, 1000
1469   br i1 %exitcond, label %for.end, label %for.body
1470
1471   for.end:
1472   ret void
1473 }
1474
1475 declare double @llvm.trunc.f64(double)
1476 declare float @llvm.trunc.f32(float)
1477
1478 define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1479 ; SLEEF-NEON-LABEL: define void @trunc_f64
1480 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1481 ; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1482 ;
1483 ; SLEEF-SVE-LABEL: define void @trunc_f64
1484 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1485 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1486 ;
1487 ; ARMPL-NEON-LABEL: define void @trunc_f64
1488 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1489 ; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1490 ;
1491 ; ARMPL-SVE-LABEL: define void @trunc_f64
1492 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1493 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1494 ;
1495   entry:
1496   br label %for.body
1497
1498   for.body:
1499   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1500   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1501   %in = load double, ptr %in.gep, align 8
1502   %call = tail call double @llvm.trunc.f64(double %in)
1503   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1504   store double %call, ptr %out.gep, align 8
1505   %iv.next = add nuw nsw i64 %iv, 1
1506   %exitcond = icmp eq i64 %iv.next, 1000
1507   br i1 %exitcond, label %for.end, label %for.body
1508
1509   for.end:
1510   ret void
1511 }
1512
1513 define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1514 ; SLEEF-NEON-LABEL: define void @trunc_f32
1515 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1516 ; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1517 ;
1518 ; SLEEF-SVE-LABEL: define void @trunc_f32
1519 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1520 ; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1521 ;
1522 ; ARMPL-NEON-LABEL: define void @trunc_f32
1523 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1524 ; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1525 ;
1526 ; ARMPL-SVE-LABEL: define void @trunc_f32
1527 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1528 ; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1529 ;
1530   entry:
1531   br label %for.body
1532
1533   for.body:
1534   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1535   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1536   %in = load float, ptr %in.gep, align 8
1537   %call = tail call float @llvm.trunc.f32(float %in)
1538   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1539   store float %call, ptr %out.gep, align 4
1540   %iv.next = add nuw nsw i64 %iv, 1
1541   %exitcond = icmp eq i64 %iv.next, 1000
1542   br i1 %exitcond, label %for.end, label %for.body
1543
1544   for.end:
1545   ret void
1546 }