test/CodeGen/AArch64/arm64-vmax.ll

   1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
   2
   3 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   4 ;CHECK-LABEL: smax_8b:
   5 ;CHECK: smax.8b
   6         %tmp1 = load <8 x i8>, <8 x i8>* %A
   7         %tmp2 = load <8 x i8>, <8 x i8>* %B
   8         %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
   9         ret <8 x i8> %tmp3
  10 }
  11
  12 define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
  13 ;CHECK-LABEL: smax_16b:
  14 ;CHECK: smax.16b
  15         %tmp1 = load <16 x i8>, <16 x i8>* %A
  16         %tmp2 = load <16 x i8>, <16 x i8>* %B
  17         %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  18         ret <16 x i8> %tmp3
  19 }
  20
  21 define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
  22 ;CHECK-LABEL: smax_4h:
  23 ;CHECK: smax.4h
  24         %tmp1 = load <4 x i16>, <4 x i16>* %A
  25         %tmp2 = load <4 x i16>, <4 x i16>* %B
  26         %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  27         ret <4 x i16> %tmp3
  28 }
  29
  30 define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
  31 ;CHECK-LABEL: smax_8h:
  32 ;CHECK: smax.8h
  33         %tmp1 = load <8 x i16>, <8 x i16>* %A
  34         %tmp2 = load <8 x i16>, <8 x i16>* %B
  35         %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  36         ret <8 x i16> %tmp3
  37 }
  38
  39 define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  40 ;CHECK-LABEL: smax_2s:
  41 ;CHECK: smax.2s
  42         %tmp1 = load <2 x i32>, <2 x i32>* %A
  43         %tmp2 = load <2 x i32>, <2 x i32>* %B
  44         %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  45         ret <2 x i32> %tmp3
  46 }
  47
  48 define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  49 ;CHECK-LABEL: smax_4s:
  50 ;CHECK: smax.4s
  51         %tmp1 = load <4 x i32>, <4 x i32>* %A
  52         %tmp2 = load <4 x i32>, <4 x i32>* %B
  53         %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  54         ret <4 x i32> %tmp3
  55 }
  56
  57 declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
  58 declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
  59 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  60 declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
  61 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  62 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
  63
  64 define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
  65 ;CHECK-LABEL: umax_8b:
  66 ;CHECK: umax.8b
  67         %tmp1 = load <8 x i8>, <8 x i8>* %A
  68         %tmp2 = load <8 x i8>, <8 x i8>* %B
  69         %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
  70         ret <8 x i8> %tmp3
  71 }
  72
  73 define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
  74 ;CHECK-LABEL: umax_16b:
  75 ;CHECK: umax.16b
  76         %tmp1 = load <16 x i8>, <16 x i8>* %A
  77         %tmp2 = load <16 x i8>, <16 x i8>* %B
  78         %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  79         ret <16 x i8> %tmp3
  80 }
  81
  82 define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
  83 ;CHECK-LABEL: umax_4h:
  84 ;CHECK: umax.4h
  85         %tmp1 = load <4 x i16>, <4 x i16>* %A
  86         %tmp2 = load <4 x i16>, <4 x i16>* %B
  87         %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  88         ret <4 x i16> %tmp3
  89 }
  90
  91 define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
  92 ;CHECK-LABEL: umax_8h:
  93 ;CHECK: umax.8h
  94         %tmp1 = load <8 x i16>, <8 x i16>* %A
  95         %tmp2 = load <8 x i16>, <8 x i16>* %B
  96         %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  97         ret <8 x i16> %tmp3
  98 }
  99
 100 define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 101 ;CHECK-LABEL: umax_2s:
 102 ;CHECK: umax.2s
 103         %tmp1 = load <2 x i32>, <2 x i32>* %A
 104         %tmp2 = load <2 x i32>, <2 x i32>* %B
 105         %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 106         ret <2 x i32> %tmp3
 107 }
 108
 109 define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 110 ;CHECK-LABEL: umax_4s:
 111 ;CHECK: umax.4s
 112         %tmp1 = load <4 x i32>, <4 x i32>* %A
 113         %tmp2 = load <4 x i32>, <4 x i32>* %B
 114         %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 115         ret <4 x i32> %tmp3
 116 }
 117
 118 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 119 declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 120 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 121 declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 122 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 123 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 124
 125 define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 126 ;CHECK-LABEL: smin_8b:
 127 ;CHECK: smin.8b
 128         %tmp1 = load <8 x i8>, <8 x i8>* %A
 129         %tmp2 = load <8 x i8>, <8 x i8>* %B
 130         %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 131         ret <8 x i8> %tmp3
 132 }
 133
 134 define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 135 ;CHECK-LABEL: smin_16b:
 136 ;CHECK: smin.16b
 137         %tmp1 = load <16 x i8>, <16 x i8>* %A
 138         %tmp2 = load <16 x i8>, <16 x i8>* %B
 139         %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 140         ret <16 x i8> %tmp3
 141 }
 142
 143 define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 144 ;CHECK-LABEL: smin_4h:
 145 ;CHECK: smin.4h
 146         %tmp1 = load <4 x i16>, <4 x i16>* %A
 147         %tmp2 = load <4 x i16>, <4 x i16>* %B
 148         %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 149         ret <4 x i16> %tmp3
 150 }
 151
 152 define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 153 ;CHECK-LABEL: smin_8h:
 154 ;CHECK: smin.8h
 155         %tmp1 = load <8 x i16>, <8 x i16>* %A
 156         %tmp2 = load <8 x i16>, <8 x i16>* %B
 157         %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 158         ret <8 x i16> %tmp3
 159 }
 160
 161 define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 162 ;CHECK-LABEL: smin_2s:
 163 ;CHECK: smin.2s
 164         %tmp1 = load <2 x i32>, <2 x i32>* %A
 165         %tmp2 = load <2 x i32>, <2 x i32>* %B
 166         %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 167         ret <2 x i32> %tmp3
 168 }
 169
 170 define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 171 ;CHECK-LABEL: smin_4s:
 172 ;CHECK: smin.4s
 173         %tmp1 = load <4 x i32>, <4 x i32>* %A
 174         %tmp2 = load <4 x i32>, <4 x i32>* %B
 175         %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 176         ret <4 x i32> %tmp3
 177 }
 178
 179 declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 180 declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 181 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 182 declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 183 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 184 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 185
 186 define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 187 ;CHECK-LABEL: umin_8b:
 188 ;CHECK: umin.8b
 189         %tmp1 = load <8 x i8>, <8 x i8>* %A
 190         %tmp2 = load <8 x i8>, <8 x i8>* %B
 191         %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 192         ret <8 x i8> %tmp3
 193 }
 194
 195 define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 196 ;CHECK-LABEL: umin_16b:
 197 ;CHECK: umin.16b
 198         %tmp1 = load <16 x i8>, <16 x i8>* %A
 199         %tmp2 = load <16 x i8>, <16 x i8>* %B
 200         %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 201         ret <16 x i8> %tmp3
 202 }
 203
 204 define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 205 ;CHECK-LABEL: umin_4h:
 206 ;CHECK: umin.4h
 207         %tmp1 = load <4 x i16>, <4 x i16>* %A
 208         %tmp2 = load <4 x i16>, <4 x i16>* %B
 209         %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 210         ret <4 x i16> %tmp3
 211 }
 212
 213 define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 214 ;CHECK-LABEL: umin_8h:
 215 ;CHECK: umin.8h
 216         %tmp1 = load <8 x i16>, <8 x i16>* %A
 217         %tmp2 = load <8 x i16>, <8 x i16>* %B
 218         %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 219         ret <8 x i16> %tmp3
 220 }
 221
 222 define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 223 ;CHECK-LABEL: umin_2s:
 224 ;CHECK: umin.2s
 225         %tmp1 = load <2 x i32>, <2 x i32>* %A
 226         %tmp2 = load <2 x i32>, <2 x i32>* %B
 227         %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 228         ret <2 x i32> %tmp3
 229 }
 230
 231 define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 232 ;CHECK-LABEL: umin_4s:
 233 ;CHECK: umin.4s
 234         %tmp1 = load <4 x i32>, <4 x i32>* %A
 235         %tmp2 = load <4 x i32>, <4 x i32>* %B
 236         %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 237         ret <4 x i32> %tmp3
 238 }
 239
 240 declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 241 declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 242 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 243 declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 244 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 245 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 246
 247 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 248
 249 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 250 ;CHECK-LABEL: smaxp_8b:
 251 ;CHECK: smaxp.8b
 252         %tmp1 = load <8 x i8>, <8 x i8>* %A
 253         %tmp2 = load <8 x i8>, <8 x i8>* %B
 254         %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 255         ret <8 x i8> %tmp3
 256 }
 257
 258 define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 259 ;CHECK-LABEL: smaxp_16b:
 260 ;CHECK: smaxp.16b
 261         %tmp1 = load <16 x i8>, <16 x i8>* %A
 262         %tmp2 = load <16 x i8>, <16 x i8>* %B
 263         %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 264         ret <16 x i8> %tmp3
 265 }
 266
 267 define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 268 ;CHECK-LABEL: smaxp_4h:
 269 ;CHECK: smaxp.4h
 270         %tmp1 = load <4 x i16>, <4 x i16>* %A
 271         %tmp2 = load <4 x i16>, <4 x i16>* %B
 272         %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 273         ret <4 x i16> %tmp3
 274 }
 275
 276 define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 277 ;CHECK-LABEL: smaxp_8h:
 278 ;CHECK: smaxp.8h
 279         %tmp1 = load <8 x i16>, <8 x i16>* %A
 280         %tmp2 = load <8 x i16>, <8 x i16>* %B
 281         %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 282         ret <8 x i16> %tmp3
 283 }
 284
 285 define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 286 ;CHECK-LABEL: smaxp_2s:
 287 ;CHECK: smaxp.2s
 288         %tmp1 = load <2 x i32>, <2 x i32>* %A
 289         %tmp2 = load <2 x i32>, <2 x i32>* %B
 290         %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 291         ret <2 x i32> %tmp3
 292 }
 293
 294 define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 295 ;CHECK-LABEL: smaxp_4s:
 296 ;CHECK: smaxp.4s
 297         %tmp1 = load <4 x i32>, <4 x i32>* %A
 298         %tmp2 = load <4 x i32>, <4 x i32>* %B
 299         %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 300         ret <4 x i32> %tmp3
 301 }
 302
 303 declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 304 declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 305 declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 306 declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 307 declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 308 declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 309
 310 define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 311 ;CHECK-LABEL: umaxp_8b:
 312 ;CHECK: umaxp.8b
 313         %tmp1 = load <8 x i8>, <8 x i8>* %A
 314         %tmp2 = load <8 x i8>, <8 x i8>* %B
 315         %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 316         ret <8 x i8> %tmp3
 317 }
 318
 319 define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 320 ;CHECK-LABEL: umaxp_16b:
 321 ;CHECK: umaxp.16b
 322         %tmp1 = load <16 x i8>, <16 x i8>* %A
 323         %tmp2 = load <16 x i8>, <16 x i8>* %B
 324         %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 325         ret <16 x i8> %tmp3
 326 }
 327
 328 define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 329 ;CHECK-LABEL: umaxp_4h:
 330 ;CHECK: umaxp.4h
 331         %tmp1 = load <4 x i16>, <4 x i16>* %A
 332         %tmp2 = load <4 x i16>, <4 x i16>* %B
 333         %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 334         ret <4 x i16> %tmp3
 335 }
 336
 337 define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 338 ;CHECK-LABEL: umaxp_8h:
 339 ;CHECK: umaxp.8h
 340         %tmp1 = load <8 x i16>, <8 x i16>* %A
 341         %tmp2 = load <8 x i16>, <8 x i16>* %B
 342         %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 343         ret <8 x i16> %tmp3
 344 }
 345
 346 define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 347 ;CHECK-LABEL: umaxp_2s:
 348 ;CHECK: umaxp.2s
 349         %tmp1 = load <2 x i32>, <2 x i32>* %A
 350         %tmp2 = load <2 x i32>, <2 x i32>* %B
 351         %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 352         ret <2 x i32> %tmp3
 353 }
 354
 355 define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 356 ;CHECK-LABEL: umaxp_4s:
 357 ;CHECK: umaxp.4s
 358         %tmp1 = load <4 x i32>, <4 x i32>* %A
 359         %tmp2 = load <4 x i32>, <4 x i32>* %B
 360         %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 361         ret <4 x i32> %tmp3
 362 }
 363
 364 declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 365 declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 366 declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 367 declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 368 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 369 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 370
 371 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 372
 373 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 374 ;CHECK-LABEL: sminp_8b:
 375 ;CHECK: sminp.8b
 376         %tmp1 = load <8 x i8>, <8 x i8>* %A
 377         %tmp2 = load <8 x i8>, <8 x i8>* %B
 378         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 379         ret <8 x i8> %tmp3
 380 }
 381
 382 define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 383 ;CHECK-LABEL: sminp_16b:
 384 ;CHECK: sminp.16b
 385         %tmp1 = load <16 x i8>, <16 x i8>* %A
 386         %tmp2 = load <16 x i8>, <16 x i8>* %B
 387         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 388         ret <16 x i8> %tmp3
 389 }
 390
 391 define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 392 ;CHECK-LABEL: sminp_4h:
 393 ;CHECK: sminp.4h
 394         %tmp1 = load <4 x i16>, <4 x i16>* %A
 395         %tmp2 = load <4 x i16>, <4 x i16>* %B
 396         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 397         ret <4 x i16> %tmp3
 398 }
 399
 400 define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 401 ;CHECK-LABEL: sminp_8h:
 402 ;CHECK: sminp.8h
 403         %tmp1 = load <8 x i16>, <8 x i16>* %A
 404         %tmp2 = load <8 x i16>, <8 x i16>* %B
 405         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 406         ret <8 x i16> %tmp3
 407 }
 408
 409 define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 410 ;CHECK-LABEL: sminp_2s:
 411 ;CHECK: sminp.2s
 412         %tmp1 = load <2 x i32>, <2 x i32>* %A
 413         %tmp2 = load <2 x i32>, <2 x i32>* %B
 414         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 415         ret <2 x i32> %tmp3
 416 }
 417
 418 define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 419 ;CHECK-LABEL: sminp_4s:
 420 ;CHECK: sminp.4s
 421         %tmp1 = load <4 x i32>, <4 x i32>* %A
 422         %tmp2 = load <4 x i32>, <4 x i32>* %B
 423         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 424         ret <4 x i32> %tmp3
 425 }
 426
 427 declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 428 declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 429 declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 430 declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 431 declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 432 declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 433
 434 define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 435 ;CHECK-LABEL: uminp_8b:
 436 ;CHECK: uminp.8b
 437         %tmp1 = load <8 x i8>, <8 x i8>* %A
 438         %tmp2 = load <8 x i8>, <8 x i8>* %B
 439         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 440         ret <8 x i8> %tmp3
 441 }
 442
 443 define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 444 ;CHECK-LABEL: uminp_16b:
 445 ;CHECK: uminp.16b
 446         %tmp1 = load <16 x i8>, <16 x i8>* %A
 447         %tmp2 = load <16 x i8>, <16 x i8>* %B
 448         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 449         ret <16 x i8> %tmp3
 450 }
 451
 452 define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 453 ;CHECK-LABEL: uminp_4h:
 454 ;CHECK: uminp.4h
 455         %tmp1 = load <4 x i16>, <4 x i16>* %A
 456         %tmp2 = load <4 x i16>, <4 x i16>* %B
 457         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 458         ret <4 x i16> %tmp3
 459 }
 460
 461 define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 462 ;CHECK-LABEL: uminp_8h:
 463 ;CHECK: uminp.8h
 464         %tmp1 = load <8 x i16>, <8 x i16>* %A
 465         %tmp2 = load <8 x i16>, <8 x i16>* %B
 466         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 467         ret <8 x i16> %tmp3
 468 }
 469
 470 define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 471 ;CHECK-LABEL: uminp_2s:
 472 ;CHECK: uminp.2s
 473         %tmp1 = load <2 x i32>, <2 x i32>* %A
 474         %tmp2 = load <2 x i32>, <2 x i32>* %B
 475         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 476         ret <2 x i32> %tmp3
 477 }
 478
 479 define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 480 ;CHECK-LABEL: uminp_4s:
 481 ;CHECK: uminp.4s
 482         %tmp1 = load <4 x i32>, <4 x i32>* %A
 483         %tmp2 = load <4 x i32>, <4 x i32>* %B
 484         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 485         ret <4 x i32> %tmp3
 486 }
 487
 488 declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 489 declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 490 declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 491 declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 492 declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 493 declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 494
 495 define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 496 ;CHECK-LABEL: fmax_2s:
 497 ;CHECK: fmax.2s
 498         %tmp1 = load <2 x float>, <2 x float>* %A
 499         %tmp2 = load <2 x float>, <2 x float>* %B
 500         %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 501         ret <2 x float> %tmp3
 502 }
 503
 504 define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 505 ;CHECK-LABEL: fmax_4s:
 506 ;CHECK: fmax.4s
 507         %tmp1 = load <4 x float>, <4 x float>* %A
 508         %tmp2 = load <4 x float>, <4 x float>* %B
 509         %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 510         ret <4 x float> %tmp3
 511 }
 512
 513 define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 514 ;CHECK-LABEL: fmax_2d:
 515 ;CHECK: fmax.2d
 516         %tmp1 = load <2 x double>, <2 x double>* %A
 517         %tmp2 = load <2 x double>, <2 x double>* %B
 518         %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 519         ret <2 x double> %tmp3
 520 }
 521
 522 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
 523 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
 524 declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
 525
 526 define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 527 ;CHECK-LABEL: fmaxp_2s:
 528 ;CHECK: fmaxp.2s
 529         %tmp1 = load <2 x float>, <2 x float>* %A
 530         %tmp2 = load <2 x float>, <2 x float>* %B
 531         %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 532         ret <2 x float> %tmp3
 533 }
 534
 535 define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 536 ;CHECK-LABEL: fmaxp_4s:
 537 ;CHECK: fmaxp.4s
 538         %tmp1 = load <4 x float>, <4 x float>* %A
 539         %tmp2 = load <4 x float>, <4 x float>* %B
 540         %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 541         ret <4 x float> %tmp3
 542 }
 543
 544 define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 545 ;CHECK-LABEL: fmaxp_2d:
 546 ;CHECK: fmaxp.2d
 547         %tmp1 = load <2 x double>, <2 x double>* %A
 548         %tmp2 = load <2 x double>, <2 x double>* %B
 549         %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 550         ret <2 x double> %tmp3
 551 }
 552
 553 declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
 554 declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
 555 declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
 556
 557 define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 558 ;CHECK-LABEL: fmin_2s:
 559 ;CHECK: fmin.2s
 560         %tmp1 = load <2 x float>, <2 x float>* %A
 561         %tmp2 = load <2 x float>, <2 x float>* %B
 562         %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 563         ret <2 x float> %tmp3
 564 }
 565
 566 define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 567 ;CHECK-LABEL: fmin_4s:
 568 ;CHECK: fmin.4s
 569         %tmp1 = load <4 x float>, <4 x float>* %A
 570         %tmp2 = load <4 x float>, <4 x float>* %B
 571         %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 572         ret <4 x float> %tmp3
 573 }
 574
 575 define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 576 ;CHECK-LABEL: fmin_2d:
 577 ;CHECK: fmin.2d
 578         %tmp1 = load <2 x double>, <2 x double>* %A
 579         %tmp2 = load <2 x double>, <2 x double>* %B
 580         %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 581         ret <2 x double> %tmp3
 582 }
 583
 584 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
 585 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
 586 declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
 587
 588 define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 589 ;CHECK-LABEL: fminp_2s:
 590 ;CHECK: fminp.2s
 591         %tmp1 = load <2 x float>, <2 x float>* %A
 592         %tmp2 = load <2 x float>, <2 x float>* %B
 593         %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 594         ret <2 x float> %tmp3
 595 }
 596
 597 define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 598 ;CHECK-LABEL: fminp_4s:
 599 ;CHECK: fminp.4s
 600         %tmp1 = load <4 x float>, <4 x float>* %A
 601         %tmp2 = load <4 x float>, <4 x float>* %B
 602         %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 603         ret <4 x float> %tmp3
 604 }
 605
 606 define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 607 ;CHECK-LABEL: fminp_2d:
 608 ;CHECK: fminp.2d
 609         %tmp1 = load <2 x double>, <2 x double>* %A
 610         %tmp2 = load <2 x double>, <2 x double>* %B
 611         %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 612         ret <2 x double> %tmp3
 613 }
 614
 615 declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
 616 declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
 617 declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
 618
 619 define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 620 ;CHECK-LABEL: fminnmp_2s:
 621 ;CHECK: fminnmp.2s
 622         %tmp1 = load <2 x float>, <2 x float>* %A
 623         %tmp2 = load <2 x float>, <2 x float>* %B
 624         %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 625         ret <2 x float> %tmp3
 626 }
 627
 628 define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 629 ;CHECK-LABEL: fminnmp_4s:
 630 ;CHECK: fminnmp.4s
 631         %tmp1 = load <4 x float>, <4 x float>* %A
 632         %tmp2 = load <4 x float>, <4 x float>* %B
 633         %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 634         ret <4 x float> %tmp3
 635 }
 636
 637 define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 638 ;CHECK-LABEL: fminnmp_2d:
 639 ;CHECK: fminnmp.2d
 640         %tmp1 = load <2 x double>, <2 x double>* %A
 641         %tmp2 = load <2 x double>, <2 x double>* %B
 642         %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 643         ret <2 x double> %tmp3
 644 }
 645
 646 declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
 647 declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
 648 declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
 649
 650 define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 651 ;CHECK-LABEL: fmaxnmp_2s:
 652 ;CHECK: fmaxnmp.2s
 653         %tmp1 = load <2 x float>, <2 x float>* %A
 654         %tmp2 = load <2 x float>, <2 x float>* %B
 655         %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 656         ret <2 x float> %tmp3
 657 }
 658
 659 define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 660 ;CHECK-LABEL: fmaxnmp_4s:
 661 ;CHECK: fmaxnmp.4s
 662         %tmp1 = load <4 x float>, <4 x float>* %A
 663         %tmp2 = load <4 x float>, <4 x float>* %B
 664         %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 665         ret <4 x float> %tmp3
 666 }
 667
 668 define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 669 ;CHECK-LABEL: fmaxnmp_2d:
 670 ;CHECK: fmaxnmp.2d
 671         %tmp1 = load <2 x double>, <2 x double>* %A
 672         %tmp2 = load <2 x double>, <2 x double>* %B
 673         %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 674         ret <2 x double> %tmp3
 675 }
 676
 677 declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
 678 declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
 679 declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone